diff --git a/docs/.markdownlintignore b/docs/.markdownlintignore
deleted file mode 100644
index 1aea57f9f2..0000000000
--- a/docs/.markdownlintignore
+++ /dev/null
@@ -1,2 +0,0 @@
-node_modules
-.vuepress
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index 5f960c8f3b..46c3afb7a6 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -17,7 +17,7 @@
*/
module.exports = [
- {
+ {
title: "Downloads",
directoryPath: "downloads/",
initialOpenGroupIndex: -1,
@@ -25,69 +25,854 @@ module.exports = [
sidebarDepth: 1,
},
{
- title: "Compilation and Deployment",
- directoryPath: "installing/",
+ title: "Getting Started",
+ directoryPath: "get-starting/",
initialOpenGroupIndex: -1,
children: [
- "compilation",
- "compilation-with-ldb-toolchain",
- "compilation-arm",
- "install-deploy",
- "upgrade",
+ "get-starting"
],
},
{
- title: "Getting Started",
- directoryPath: "getting-started/",
+ title: "Doris Architecture",
+ directoryPath: "summary/",
initialOpenGroupIndex: -1,
children: [
+ "basic-summary",
+ "system-architecture"
+ ],
+ },
+ {
+ title: "Install and deploy",
+ directoryPath: "install/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "install-deploy",
+ {
+ title: "Compile",
+ directoryPath: "source-install/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "compilation",
+ "compilation-with-ldb-toolchain",
+ "compilation-arm"
+ ],
+ sidebarDepth: 2,
+ },
+ ]
+ },
+ {
+ title: "Table Design",
+ directoryPath: "data-table/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "data-model",
+ "data-partition",
"basic-usage",
"advance-usage",
- "data-model-rollup",
- "data-partition",
"hit-the-rollup",
"best-practice",
+ {
+ title: "Index",
+ directoryPath: "index/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "bloomfilter",
+ "prefix-index",
+ "bitmap-index"
+ ],
+ },
],
},
{
- title: "Administrator Guide",
- directoryPath: "administrator-guide/",
+ title: "Data Operate",
+ directoryPath: "data-operate/",
initialOpenGroupIndex: -1,
children: [
{
- title: "Load Data",
- directoryPath: "load-data/",
+ title: "Import",
+ directoryPath: "import/",
initialOpenGroupIndex: -1,
children: [
"load-manual",
- "batch-delete-manual",
- "binlog-load-manual",
- "broker-load-manual",
- "routine-load-manual",
- "sequence-column-manual",
- "spark-load-manual",
- "stream-load-manual",
- "s3-load-manual",
- "delete-manual",
- "insert-into-manual",
- "load-json-format",
+ {
+ title: "Import Scenes",
+ directoryPath: "import-scenes/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "local-file-load",
+ "external-storage-load",
+ "kafka-load",
+ "external-table-load",
+ "jdbc-load",
+ "load-atomicity",
+ "load-data-convert",
+ "load-strict-mode",
+ ],
+ },
+ {
+ title: "Import Way",
+ directoryPath: "import-way/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "binlog-load-manual",
+ "broker-load-manual",
+ "routine-load-manual",
+ "spark-load-manual",
+ "stream-load-manual",
+ "s3-load-manual",
+ "insert-into-manual",
+ "load-json-format",
+ ],
+ },
],
- sidebarDepth: 2,
},
{
- title: "Schema Change",
+ title: "Export",
+ directoryPath: "export/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "export-manual",
+ "outfile",
+ "export_with_mysql_dump",
+ ],
+ },
+ {
+ title: "Update and Delete",
+ directoryPath: "update-delete/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "update",
+ "sequence-column-manual",
+ "delete-manual",
+ "batch-delete-manual"
+ ],
+ },
+ ],
+ },
+ {
+ title: "Advanced usage",
+ directoryPath: "advanced/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "materialized-view",
+ {
+ title: "Alter Table",
directoryPath: "alter-table/",
initialOpenGroupIndex: -1,
children: [
- "alter-table-bitmap-index",
- "alter-table-replace-table",
- "alter-table-rollup",
- "alter-table-schema-change",
- "alter-table-temp-partition",
+ "schema-change",
+ "replace-table"
],
- sidebarDepth: 2,
},
- "materialized_view",
+ {
+ title: "Doris Partition",
+ directoryPath: "partition/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "dynamic-partition",
+ "table-temp-partition"
+ ],
+ },
+ {
+ title: "Join Optimization",
+ directoryPath: "join-optimization/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "bucket-shuffle-join",
+ "colocation-join",
+ "runtime-filter"
+ ],
+ },
+ {
+ title: "Date Cache",
+ directoryPath: "cache/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "partition-cache"
+ ],
+ },
+ "vectorized-execution-engine",
+ "broker",
+ "resource",
+ "orthogonal-bitmap-manual",
+ "variables",
+ "time-zone",
+ "small-file-mgr",
+ {
+ title: "Best Practice",
+ directoryPath: "best-practice/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "query-analysis",
+ "import-analysis",
+ "debug-log"
+ ]
+ }
+ ],
+ },
+ {
+ title: "Ecosystem",
+ directoryPath: "ecosystem/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Expansion table",
+ directoryPath: "external-table/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "doris-on-es",
+ "odbc-of-doris",
+ "hive-of-doris",
+ "iceberg-of-doris"
+ ],
+ },
+ "audit-plugin",
+ "flink-doris-connector",
+ "spark-doris-connector",
+ "datax",
+ "logstash",
+ {
+ title: "Doris Manager",
+ directoryPath: "doris-manager/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "compiling-deploying",
+ "initializing",
+ "cluster-managenent",
+ "space-list",
+ "space-management",
+ "system-settings"
+ ],
+ },
+ {
+ title: "SeaTunnel",
+ directoryPath: "seatunnel/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "flink-sink",
+ "spark-sink"
+ ],
+ },
+ {
+ title: "UDF",
+ directoryPath: "udf/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "native-user-defined-function",
+ "remote-user-defined-function",
+ "contribute-udf"
+ ],
+ },
+ ],
+ },
+ {
+ title: "SQL manual",
+ directoryPath: "sql-manual/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "SQL Functions",
+ directoryPath: "sql-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Date Functions",
+ directoryPath: "date-time-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "convert_tz",
+ "curdate",
+ "current_timestamp",
+ "curtime",
+ "date_add",
+ "date_format",
+ "date_sub",
+ "datediff",
+ "day",
+ "dayname",
+ "dayofmonth",
+ "dayofweek",
+ "dayofyear",
+ "from_days",
+ "from_unixtime",
+ "hour",
+ "makedate",
+ "minute",
+ "month",
+ "monthname",
+ "now",
+ "second",
+ "str_to_date",
+ "time_round",
+ "timediff",
+ "timestampadd",
+ "timestampdiff",
+ "to_days",
+ "unix_timestamp",
+ "utc_timestamp",
+ "week",
+ "weekday",
+ "weekofyear",
+ "year",
+ "yearweek",
+ ],
+ },
+ {
+ title: "GIS Functions",
+ directoryPath: "spatial-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "st_astext",
+ "st_circle",
+ "st_contains",
+ "st_distance_sphere",
+ "st_geometryfromtext",
+ "st_linefromtext",
+ "st_point",
+ "st_polygon",
+ "st_x",
+ "st_y",
+ ],
+ },
+ {
+ title: "String Functions",
+ directoryPath: "string-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "append_trailing_char_if_absent",
+ "ascii",
+ "bit_length",
+ "char_length",
+ "concat",
+ "concat_ws",
+ "ends_with",
+ "find_in_set",
+ "hex",
+ "instr",
+ "lcase",
+ "left",
+ "length",
+ "locate",
+ "lower",
+ "lpad",
+ "ltrim",
+ "money_format",
+ "null_or_empty",
+ "repeat",
+ "replace",
+ "reverse",
+ "right",
+ "rpad",
+ "split_part",
+ "starts_with",
+ "strleft",
+ "strright",
+ "substring",
+ "unhex",
+ {
+ title: "Fuzzy Match",
+ directoryPath: "like/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "like",
+ "not_like",
+ ],
+ },
+ {
+ title: "Regular Match",
+ directoryPath: "regexp/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "regexp",
+ "regexp_extract",
+ "regexp_replace",
+ "not_regexp",
+ ],
+ },
+ ],
+ },
+ {
+ title: "Aggregate Functions",
+ directoryPath: "aggregate-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "approx_count_distinct",
+ "avg",
+ "bitmap_union",
+ "count",
+ "group_concat",
+ "hll_union_agg",
+ "max",
+ "max_by",
+ "min",
+ "min_by",
+ "percentile",
+ "percentile_approx",
+ "stddev",
+ "stddev_samp",
+ "sum",
+ "topn",
+ "var_samp",
+ "variance",
+ ],
+ },
+ {
+ title: "Bitmap Functions",
+ directoryPath: "bitmap-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "bitmap_and",
+ "bitmap_contains",
+ "bitmap_empty",
+ "bitmap_from_string",
+ "bitmap_has_any",
+ "bitmap_has_all",
+ "bitmap_hash",
+ "bitmap_intersect",
+ "bitmap_or",
+ "bitmap_and_count",
+ "bitmap_or_count",
+ "bitmap_xor",
+ "bitmap_xor_count",
+ "bitmap_not",
+ "bitmap_and_not",
+ "bitmap_and_not_count",
+ "bitmap_subset_in_range",
+ "bitmap_subset_limit",
+ "sub_bitmap",
+ "bitmap_to_string",
+ "bitmap_union",
+ "bitmap_xor",
+ "to_bitmap",
+ "bitmap_max",
+ "orthogonal_bitmap_intersect",
+ "orthogonal_bitmap_intersect_count",
+ "orthogonal_bitmap_union_count",
+ ],
+ },
+ {
+ title: "Bitwise Functions",
+ directoryPath: "bitwise-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "bitand",
+ "bitor",
+ "bitxor",
+ "bitnot"
+ ],
+ },
+ {
+ title: "Condition Functions",
+ directoryPath: "conditional-functions/",
+ children: [
+ "case",
+ "coalesce",
+ "if",
+ "ifnull",
+ "nullif"
+ ],
+ },
+ {
+ title: "JSON Functions",
+ directoryPath: "json-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "get_json_double",
+ "get_json_int",
+ "get_json_string",
+ "json_array",
+ "json_object",
+ "json_quote",
+ ],
+ },
+ {
+ title: "Hash Functions",
+ directoryPath: "hash-functions/",
+ initialOpenGroupIndex: -1,
+ children: ["murmur_hash3_32"],
+ },
+ {
+ title: "Math Functions",
+ directoryPath: "math-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "conv",
+ "pmod"
+ ],
+ },
+ {
+ title: "Encryption Functions",
+ directoryPath: "encrypt-dixgest-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "aes",
+ "md5",
+ "md5sum",
+ "sm4",
+ "sm3",
+ "sm3sum"
+ ],
+ },
+ {
+ title: "Table Functions",
+ directoryPath: "table-functions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "explode-bitmap",
+ "explode-split",
+ "explode-json-array",
+ "outer-combinator"
+ ],
+ },
+ "window-function",
+ "cast",
+ "digital-masking",
+ ],
+ },
+ {
+ title: "SQL Reference",
+ directoryPath: "sql-reference-v2/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Account Management",
+ directoryPath: "Account-Management-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "CREATE-USER",
+ "CREATE-ROLE",
+ "DROP-ROLE",
+ "DROP-USER",
+ "GRANT",
+ "REVOKE",
+ "SET-PASSWORD",
+ "SET-PROPERTY",
+ "LDAP",
+ ],
+ },
+ {
+ title: "Cluster management",
+ directoryPath: "Cluster-Management-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ALTER-SYSTEM-ADD-BACKEND",
+ "ALTER-SYSTEM-ADD-FOLLOWER",
+ "ALTER-SYSTEM-ADD-OBSERVER",
+ "ALTER-SYSTEM-DECOMMISSION-BACKEND",
+ "ALTER-SYSTEM-DROP-BACKEND",
+ "ALTER-SYSTEM-DROP-FOLLOWER",
+ "ALTER-SYSTEM-DROP-OBSERVER",
+ "ALTER-SYSTEM-MODIFY-BROKER",
+ "CANCEL-ALTER-SYSTEM",
+ ],
+ },
+ {
+ title: "DDL",
+ directoryPath: "Data-Definition-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Alter",
+ directoryPath: "Alter/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ALTER-DATABASE",
+ "ALTER-SQL-BLOCK-RULE",
+ "ALTER-TABLE-COLUMN",
+ "ALTER-TABLE-PARTITION",
+ "ALTER-TABLE-PROPERTY",
+ "ALTER-TABLE-RENAME",
+ "ALTER-TABLE-REPLACE",
+ "ALTER-TABLE-ROLLUP",
+ "ALTER-VIEW",
+ "CANCEL-ALTER-TABLE",
+ ],
+ },
+ {
+ title: "Backup and Restore",
+ directoryPath: "Backup-and-Restore/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "BACKUP",
+ "CANCEL-BACKUP",
+ "CANCEL-RESTORE",
+ "CREATE-REPOSITORY",
+ "DROP-REPOSITORY",
+ "RESTORE",
+ ],
+ },
+ {
+ title: "Create",
+ directoryPath: "Create/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "CREATE-DATABASE",
+ "CREATE-ENCRYPT-KEY",
+ "CREATE-FILE",
+ "CREATE-FUNCTION",
+ "CREATE-INDEX",
+ "CREATE-MATERIALIZED-VIEW",
+ "CREATE-RESOURCE",
+ "CREATE-SQL-BLOCK-RULE",
+ "CREATE-TABLE-LIKE",
+ "CREATE-TABLE",
+ "CREATE-VIEW",
+ "CREATE-EXTERNAL-TABLE",
+ ],
+ },
+ {
+ title: "Drop",
+ directoryPath: "Drop/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "DROP-DATABASE",
+ "DROP-ENCRYPT-KEY",
+ "DROP-FILE",
+ "DROP-FUNCTION",
+ "DROP-INDEX",
+ "DROP-MATERIALIZED-VIEW",
+ "DROP-RESOURCE",
+ "DROP-SQL-BLOCK-RULE",
+ "DROP-TABLE",
+ "TRUNCATE-TABLE",
+ ],
+ },
+ ],
+ },
+ {
+ title: "DML",
+ directoryPath: "Data-Manipulation-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Load",
+ directoryPath: "Load/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ALTER-ROUTINE-LOAD",
+ "BROKER-LOAD",
+ "CANCEL-LOAD",
+ "CREATE-ROUTINE-LOAD",
+ "PAUSE-ROUTINE-LOAD",
+ "RESUME-ROUTINE-LOAD",
+ "STOP-ROUTINE-LOAD",
+ "STREAM-LOAD",
+ "PAUSE-SYNC-JOB",
+ "RESUME-SYNC-JOB",
+ "STOP-SYNC-JOB",
+ "CREATE-SYNC-JOB",
+ ],
+ },
+ {
+ title: "Manipulation",
+ directoryPath: "Manipulation/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "DELETE",
+ "INSERT",
+ "UPDATE",
+ ],
+ },
+ "OUTFILE"
+ ],
+ },
+ {
+ title: "Database Administration",
+ directoryPath: "Database-Administration-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ADMIN-CANCEL-REPAIR",
+ "ADMIN-CHECK-TABLET",
+ "ADMIN-REPAIR-TABLE",
+ "ADMIN-SET-CONFIG",
+ "ADMIN-SET-REPLICA-STATUS",
+ "ADMIN-SHOW-CONFIG",
+ "ADMIN-SHOW-REPLICA-DISTRIBUTION",
+ "ADMIN-SHOW-REPLICA-STATUS",
+ "ENABLE-FEATURE",
+ "INSTALL-PLUGIN",
+ "KILL",
+ "RECOVER",
+ "SET-VARIABLE",
+ "UNINSTALL-PLUGIN",
+ ],
+ },
+ {
+ title: "Show",
+ directoryPath: "Show-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "SHOW-ALTER",
+ "SHOW-BACKENDS",
+ "SHOW-BACKUP",
+ "SHOW-BROKER",
+ "SHOW-CHARSET",
+ "SHOW-COLLATION",
+ "SHOW-COLUMNS",
+ "SHOW-CREATE-DATABASE",
+ "SHOW-CREATE-FUNCTION",
+ "SHOW-CREATE-ROUTINE-LOAD",
+ "SHOW-CREATE-TABLE",
+ "SHOW-DATA",
+ "SHOW-DATABASE-ID",
+ "SHOW-DATABASES",
+ "SHOW-DELETE",
+ "SHOW-DYNAMIC-PARTITION",
+ "SHOW-ENCRYPT-KEY",
+ "SHOW-ENGINES",
+ "SHOW-EVENTS",
+ "SHOW-EXPORT",
+ "SHOW-FRONTENDS",
+ "SHOW-FUNCTIONS",
+ "SHOW-GRANTS",
+ "SHOW-INDEX",
+ "SHOW-LOAD-PROFILE",
+ "SHOW-LOAD-WARNINGS",
+ "SHOW-LOAD",
+ "SHOW-MIGRATIONS",
+ "SHOW-OPEN-TABLES",
+ "SHOW-PARTITION-ID",
+ "SHOW-PARTITIONS",
+ "SHOW-PLUGINS",
+ "SHOW-PROC",
+ "SHOW-PROCEDURE",
+ "SHOW-PROCESSLIST",
+ "SHOW-PROPERTY",
+ "SHOW-QUERY-PROFILE",
+ "SHOW-REPOSITORIES",
+ "SHOW-RESOURCES",
+ "SHOW-RESTORE",
+ "SHOW-ROLES",
+ "SHOW-ROLLUP",
+ "SHOW-ROUTINE-LOAD-TASK",
+ "SHOW-ROUTINE-LOAD",
+ "SHOW-SMALL-FILES",
+ "SHOW-SNAPSHOT",
+ "SHOW-SQL-BLOCK-RULE",
+ "SHOW-STATUS",
+ "SHOW-STREAM-LOAD",
+ "SHOW-SYNC-JOB",
+ "SHOW-TABLE-ID",
+ "SHOW-TABLE-STATUS",
+ "SHOW-TABLET",
+ "SHOW-TRANSACTION",
+ "SHOW-TRIGGERS",
+ "SHOW-TRASH",
+ "SHOW-USER",
+ "SHOW-VARIABLES",
+ "SHOW-VIEW",
+ "SHOW-WARNING",
+ "SHOW-WHITE-LIST",
+ ],
+ },
+ {
+ title: "Data Types",
+ directoryPath: "Data-Types/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "BIGINT",
+ "BITMAP",
+ "BOOLEAN",
+ "CHAR",
+ "DATE",
+ "DATETIME",
+ "DECIMAL",
+ "DOUBLE",
+ "FLOAT",
+ "HLL",
+ "INT",
+ "SMALLINT",
+ "STRING",
+ "TINYINT",
+ "VARCHAR",
+ ],
+ },
+ {
+ title: "Utility",
+ directoryPath: "Utility-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "DESCRIBE",
+ "HELP",
+ "USE",
+ ],
+ },
+ ],
+ },
+ ],
+ },
+ {
+ title: "Admin Manual",
+ directoryPath: "admin-manual/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "cluster management",
+ directoryPath: "cluster-management/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "upgrade",
+ "elastic-expansion",
+ "load-balancing"
+ ],
+ },
+ {
+ title: "Data Admin",
+ directoryPath: "data-admin/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "backup",
+ "restore",
+ "delete-recover"
+ ],
+ },
+ "sql-interception",
+ "query-profile",
+ "optimization",
+ {
+ title: "Maintenance and Monitor",
+ directoryPath: "maint-monitor/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "Monitor Metrics",
+ directoryPath: "monitor-metrics/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "fe-metrics",
+ "be-metrics"
+ ],
+ },
+ "disk-capacity",
+ "metadata-operation",
+ "tablet-meta-tool",
+ "tablet-repair-and-balance",
+ "tablet-restore-tool",
+ "monitor-alert",
+ "doris-error-code",
+ "be-olap-error-code"
+ ],
+ },
+ {
+ title: "Config",
+ directoryPath: "config/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "fe-config",
+ "be-config",
+ "user-property"
+ ],
+ },
+ {
+ title: "User Privilege and Ldap",
+ directoryPath: "privilege-ldap/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "user-privilege",
+ "ldap"
+ ],
+ },
+ "multi-tenant",
{
title: "HTTP API",
directoryPath: "http-actions/",
@@ -162,597 +947,28 @@ module.exports = [
],
sidebarDepth: 1,
},
- {
- title: "Maintainence Operation",
- directoryPath: "operation/",
- initialOpenGroupIndex: -1,
- children: [
- "doris-error-code",
- "be-olap-error-code",
- "disk-capacity",
- "metadata-operation",
- "monitor-alert",
- "multi-tenant",
- "tablet-meta-tool",
- "tablet-repair-and-balance",
- "tablet-restore-tool",
- {
- title: "Metrics",
- directoryPath: "monitor-metrics/",
- initialOpenGroupIndex: -1,
- children: [
- "be-metrics",
- "fe-metrics",
- ],
- },
- ],
- sidebarDepth: 2,
- },
- {
- title: "Configuration",
- directoryPath: "config/",
- initialOpenGroupIndex: -1,
- children: [
- "be_config",
- "fe_config",
- "user_property",
- ],
- sidebarDepth: 1,
- },
- {
- title: "Block Rule",
- directoryPath: "block-rule/",
- initialOpenGroupIndex: -1,
- children: [
- "sql-block",
- ],
- sidebarDepth: 1,
- },
- "backup-restore",
- "bloomfilter",
- "broker",
- "colocation-join",
- "bucket-shuffle-join",
- "vectorized-execution-engine",
- "dynamic-partition",
- "export-manual",
- "export_with_mysql_dump",
- "outfile",
- "privilege",
- "ldap",
- "resource-management",
- "running-profile",
- "runtime-filter",
- "small-file-mgr",
- "sql-mode",
- "time-zone",
- "variables",
- "update",
- "multi-tenant",
- "orthogonal-bitmap-manual",
],
- sidebarDepth: 1,
},
{
- title: "Benchmark & Sample",
+ title: "FQA",
+ directoryPath: "faq/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "install-faq",
+ "data-faq",
+ "sql-faq"
+ ],
+ },
+ {
+ title: "Benchmark",
directoryPath: "benchmark/",
initialOpenGroupIndex: -1,
children: [
- "star-schema-benchmark",
- "systemd",
- "samples"
+ "ssb",
+ "tpc-h"
],
},
- {
- title: "Extending Ability",
- directoryPath: "extending-doris/",
- initialOpenGroupIndex: -1,
- children: [
- "audit-plugin",
- "doris-on-es",
- "logstash",
- "odbc-of-doris",
- "hive-of-doris",
- "iceberg-of-doris",
- "plugin-development-manual",
- "spark-doris-connector",
- "flink-doris-connector",
- "datax",
- {
- title: "Seatunnel",
- directoryPath: "seatunnel/",
- initialOpenGroupIndex: -1,
- children: [
- "spark-sink",
- "flink-sink",
- ],
- },
- {
- title: "UDF",
- directoryPath: "udf/",
- initialOpenGroupIndex: -1,
- children: [
- "contribute-udf",
- "native-user-defined-function",
- "remote-user-defined-function",
- "java-user-defined-function",
- ],
- },
- ],
- },
- {
- title: "Design Documents",
- directoryPath: "internal/",
- initialOpenGroupIndex: -1,
- children: [
- "doris_storage_optimization",
- "grouping_sets_design",
- "metadata-design",
- ],
- },
- {
- title: "SQL Manual",
- directoryPath: "sql-reference/",
- initialOpenGroupIndex: -1,
- children: [
- {
- title: "SQL Functions",
- directoryPath: "sql-functions/",
- initialOpenGroupIndex: -1,
- children: [
- {
- title: "Date Time Functions",
- directoryPath: "date-time-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "convert_tz",
- "curdate",
- "current_timestamp",
- "curtime",
- "date_add",
- "date_format",
- "date_sub",
- "datediff",
- "day",
- "dayname",
- "dayofmonth",
- "dayofweek",
- "dayofyear",
- "from_days",
- "from_unixtime",
- "hour",
- "makedate",
- "minute",
- "month",
- "monthname",
- "now",
- "second",
- "str_to_date",
- "time_round",
- "timediff",
- "timestampadd",
- "timestampdiff",
- "to_days",
- "unix_timestamp",
- "utc_timestamp",
- "week",
- "weekday",
- "weekofyear",
- "year",
- "yearweek",
- ],
- },
- {
- title: "Sptial Functions",
- directoryPath: "spatial-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "st_astext",
- "st_circle",
- "st_contains",
- "st_distance_sphere",
- "st_geometryfromtext",
- "st_linefromtext",
- "st_point",
- "st_polygon",
- "st_x",
- "st_y",
- ],
- },
- {
- title: "String Functions",
- directoryPath: "string-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "append_trailing_char_if_absent",
- "ascii",
- "bit_length",
- "char_length",
- "concat",
- "concat_ws",
- "ends_with",
- "find_in_set",
- "hex",
- "instr",
- "lcase",
- "left",
- "length",
- "locate",
- "lower",
- "lpad",
- "ltrim",
- "money_format",
- "null_or_empty",
- "repeat",
- "replace",
- "reverse",
- "right",
- "rpad",
- "split_part",
- "starts_with",
- "strleft",
- "strright",
- "substring",
- "unhex",
- {
- title: "fuzzy match",
- directoryPath: "like/",
- initialOpenGroupIndex: -1,
- children: [
- "like",
- "not_like",
- ],
- },
- {
- title: "regular match",
- directoryPath: "regexp/",
- initialOpenGroupIndex: -1,
- children: [
- "regexp",
- "regexp_extract",
- "regexp_replace",
- "not_regexp",
- ],
- },
- ],
- },
- {
- title: "Aggregate Functions",
- directoryPath: "aggregate-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "approx_count_distinct",
- "avg",
- "bitmap_union",
- "count",
- "group_concat",
- "hll_union_agg",
- "max",
- "min",
- "percentile",
- "percentile_approx",
- "stddev",
- "stddev_samp",
- "sum",
- "topn",
- "var_samp",
- "variance",
- ],
- },
- {
- title: "bitmap functions",
- directoryPath: "bitmap-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "bitmap_and",
- "bitmap_contains",
- "bitmap_empty",
- "bitmap_from_string",
- "bitmap_has_any",
- "bitmap_has_all",
- "bitmap_hash",
- "bitmap_intersect",
- "bitmap_or",
- "bitmap_and_count",
- "bitmap_or_count",
- "bitmap_xor",
- "bitmap_xor_count",
- "bitmap_not",
- "bitmap_and_not",
- "bitmap_and_not_count",
- "bitmap_subset_in_range",
- "bitmap_subset_limit",
- "sub_bitmap",
- "bitmap_to_string",
- "bitmap_union",
- "bitmap_xor",
- "to_bitmap",
- "bitmap_max",
- "orthogonal_bitmap_intersect",
- "orthogonal_bitmap_intersect_count",
- "orthogonal_bitmap_union_count",
- ],
- },
- {
- title: "bitwise function",
- directoryPath: "bitwise-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "bitand",
- "bitor",
- "bitxor",
- "bitnot"
- ],
- },
- {
- title: "conditional function",
- directoryPath: "conditional-functions/",
- children: [
- "case",
- "coalesce",
- "if",
- "ifnull",
- "nullif"
- ],
- },
- {
- title: "json function",
- directoryPath: "json-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "get_json_double",
- "get_json_int",
- "get_json_string",
- "json_array",
- "json_object",
- "json_quote",
- ],
- },
- {
- title: "Encryption and Digest Functions",
- directoryPath: "encrypt-digest-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "aes",
- "md5",
- "md5sum",
- "sm4",
- "sm3",
- "sm3sum"
- ],
- },
- {
- title: "Hash Functions",
- directoryPath: "hash-functions/",
- initialOpenGroupIndex: -1,
- children: ["murmur_hash3_32"],
- },
- {
- title: "Math Functions",
- directoryPath: "math-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "conv",
- "pmod"
- ],
- },
- {
- title: "table functions",
- directoryPath: "table-functions/",
- initialOpenGroupIndex: -1,
- children: [
- "explode-bitmap",
- "explode-split",
- "explode-json-array",
- "explode-numbers",
- "outer-combinator"
- ],
- },
- "window-function",
- "cast",
- "digital-masking",
- ],
- },
- {
- title: "SQL Statements",
- directoryPath: "sql-statements/",
- initialOpenGroupIndex: -1,
- children: [
- {
- title: "Account Management",
- directoryPath: "Account Management/",
- initialOpenGroupIndex: -1,
- children: [
- "CREATE ROLE",
- "CREATE USER",
- "DROP ROLE",
- "DROP USER",
- "GRANT",
- "REVOKE",
- "SET PASSWORD",
- "SET PROPERTY",
- "SHOW GRANTS",
- "SHOW ROLES",
- ],
- },
- {
- title: "Administration",
- directoryPath: "Administration/",
- initialOpenGroupIndex: -1,
- children: [
- "ADMIN CANCEL REBALANCE DISK",
- "ADMIN CANCEL REPAIR",
- "ADMIN CLEAN TRASH",
- "ADMIN CHECK TABLET",
- "ADMIN COMPACT",
- "ADMIN REBALANCE DISK",
- "ADMIN REPAIR",
- "ADMIN SET CONFIG",
- "ADMIN SET REPLICA STATUS",
- "ADMIN SHOW CONFIG",
- "ADMIN SHOW REPLICA DISTRIBUTION",
- "ADMIN SHOW REPLICA STATUS",
- "ADMIN-DIAGNOSE-TABLET",
- "ADMIN SHOW TABLET STORAGE FORMAT",
- "ALTER CLUSTER",
- "ALTER SYSTEM",
- "CANCEL DECOMMISSION",
- "CREATE CLUSTER",
- "CREATE FILE",
- "DROP CLUSTER",
- "DROP FILE",
- "ENTER",
- "INSTALL PLUGIN",
- "LINK DATABASE",
- "MIGRATE DATABASE",
- "SET LDAP_ADMIN_PASSWORD",
- "SHOW BACKENDS",
- "SHOW BROKER",
- "SHOW FILE",
- "SHOW FRONTENDS",
- "SHOW FULL COLUMNS",
- "SHOW INDEX",
- "SHOW MIGRATIONS",
- "SHOW PLUGINS",
- "SHOW TABLE STATUS",
- "SHOW TRASH",
- "UNINSTALL PLUGIN",
- ],
- },
- {
- title: "Data Definition",
- directoryPath: "Data Definition/",
- initialOpenGroupIndex: -1,
- children: [
- "ALTER DATABASE",
- "ALTER RESOURCE",
- "ALTER TABLE",
- "ALTER VIEW",
- "BACKUP",
- "CANCEL ALTER",
- "CANCEL BACKUP",
- "CREATE ENCRYPTKEY",
- "CANCEL RESTORE",
- "CREATE DATABASE",
- "CREATE INDEX",
- "CREATE MATERIALIZED VIEW",
- "CREATE REPOSITORY",
- "CREATE RESOURCE",
- "CREATE TABLE LIKE",
- "CREATE TABLE",
- "CREATE VIEW",
- "Colocate Join",
- "DROP DATABASE",
- "DROP ENCRYPTKEY",
- "DROP INDEX",
- "DROP MATERIALIZED VIEW",
- "DROP REPOSITORY",
- "DROP RESOURCE",
- "DROP TABLE",
- "DROP VIEW",
- "HLL",
- "RECOVER",
- "REFRESH DATABASE",
- "REFRESH TABLE",
- "RESTORE",
- "SHOW ENCRYPTKEYS",
- "SHOW RESOURCES",
- "TRUNCATE TABLE",
- "create-function",
- "drop-function",
- "show-functions",
- ],
- },
- {
- title: "Data Manipulation",
- directoryPath: "Data Manipulation/",
- initialOpenGroupIndex: -1,
- children: [
- "BEGIN",
- "BROKER LOAD",
- "CANCEL DELETE",
- "CANCEL LABEL",
- "CANCEL LOAD",
- "CREATE SYNC JOB",
- "DELETE",
- "EXPORT",
- "GET LABEL STATE",
- "GROUP BY",
- "LOAD",
- "MINI LOAD",
- "MULTI LOAD",
- "PAUSE ROUTINE LOAD",
- "PAUSE SYNC JOB",
- "RESTORE TABLET",
- "RESUME ROUTINE LOAD",
- "RESUME SYNC JOB",
- "ROUTINE LOAD",
- "SHOW ALTER",
- "SHOW BACKUP",
- "SHOW CREATE FUNCTION",
- "SHOW CREATE ROUTINE LOAD",
- "SHOW DATA",
- "SHOW DATA SKEW",
- "SHOW DATABASES",
- "SHOW DELETE",
- "SHOW DYNAMIC PARTITION TABLES",
- "SHOW EXPORT",
- "SHOW LOAD",
- "SHOW PARTITIONS",
- "SHOW PROPERTY",
- "SHOW REPOSITORIES",
- "SHOW RESTORE",
- "SHOW ROUTINE LOAD TASK",
- "SHOW ROUTINE LOAD",
- "SHOW SNAPSHOT",
- "SHOW SYNC JOB",
- "SHOW TABLES",
- "SHOW TABLE CREATION",
- "SHOW TABLET",
- "SHOW TRANSACTION",
- "SHOW-LAST-INSERT",
- "STOP ROUTINE LOAD",
- "STOP SYNC JOB",
- "STREAM LOAD",
- "alter-routine-load",
- "insert",
- "UPDATE",
- "lateral-view",
- ],
- },
- {
- title: "Data Types",
- directoryPath: "Data Types/",
- initialOpenGroupIndex: -1,
- children: [
- "BIGINT",
- "BITMAP",
- "BOOLEAN",
- "CHAR",
- "DATE",
- "DATETIME",
- "DECIMAL",
- "DOUBLE",
- "FLOAT",
- "HLL",
- "INT",
- "SMALLINT",
- "STRING",
- "TINYINT",
- "VARCHAR",
- ],
- },
- {
- title: "Utility",
- directoryPath: "Utility/",
- initialOpenGroupIndex: -1,
- children: ["util_stmt"],
- },
- ],
- },
- ],
- },
- {
+ {
title: "Doris User",
directoryPath: "case-user/",
initialOpenGroupIndex: -1,
@@ -780,15 +996,6 @@ module.exports = [
"bitmap-hll-file-format",
],
},
- {
- title: "FAQ",
- directoryPath: "faq/",
- initialOpenGroupIndex: -1,
- children: [
- "faq",
- "error"
- ],
- },
{
title: "Apache Community",
directoryPath: "community/",
@@ -817,6 +1024,7 @@ module.exports = [
"release-prepare",
"release-doris-core",
"release-doris-connectors",
+ "release-doris-manager",
"release-complete",
"release-verify",
],
@@ -824,4 +1032,4 @@ module.exports = [
"security",
],
},
-]
+];
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index 324b9a7809..0cbc7b0edb 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -25,288 +25,249 @@ module.exports = [
sidebarDepth: 1,
},
{
- title: "编译与部署",
- directoryPath: "installing/",
+ title: "快速开始",
+ directoryPath: "get-starting/",
initialOpenGroupIndex: -1,
children: [
- "compilation",
- "compilation-with-ldb-toolchain",
- "compilation-arm",
- "install-deploy",
- "upgrade",
+ "get-starting"
],
},
{
- title: "开始使用",
- directoryPath: "getting-started/",
+ title: "Doris架构",
+ directoryPath: "summary/",
initialOpenGroupIndex: -1,
children: [
+ "basic-summary",
+ "system-architecture"
+ ],
+ },
+ {
+ title: "安装部署",
+ directoryPath: "install/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "install-deploy",
+ {
+ title: "源码编译",
+ directoryPath: "source-install/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "compilation",
+ "compilation-with-ldb-toolchain",
+ "compilation-arm"
+ ],
+ sidebarDepth: 2,
+ },
+ ]
+ },
+ {
+ title: "数据表设计",
+ directoryPath: "data-table/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "data-model",
+ "data-partition",
"basic-usage",
"advance-usage",
- "data-model-rollup",
- "data-partition",
"hit-the-rollup",
+ "best-practice",
+ {
+ title: "索引",
+ directoryPath: "index/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "bloomfilter",
+ "prefix-index",
+ "bitmap-index"
+ ],
+ },
],
},
{
- title: "操作手册",
- directoryPath: "administrator-guide/",
+ title: "数据操作",
+ directoryPath: "data-operate/",
initialOpenGroupIndex: -1,
children: [
{
title: "数据导入",
- directoryPath: "load-data/",
+ directoryPath: "import/",
initialOpenGroupIndex: -1,
children: [
"load-manual",
- "batch-delete-manual",
- "binlog-load-manual",
- "broker-load-manual",
- "routine-load-manual",
- "sequence-column-manual",
- "spark-load-manual",
- "stream-load-manual",
- "s3-load-manual",
- "delete-manual",
- "insert-into-manual",
- "load-json-format",
+ {
+ title: "按场景导入",
+ directoryPath: "import-scenes/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "local-file-load",
+ "external-storage-load",
+ "kafka-load",
+ "external-table-load",
+ "jdbc-load",
+ "load-atomicity",
+ "load-data-convert",
+ "load-strict-mode",
+ ],
+ },
+ {
+ title: "按方式导入",
+ directoryPath: "import-way/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "binlog-load-manual",
+ "broker-load-manual",
+ "routine-load-manual",
+ "spark-load-manual",
+ "stream-load-manual",
+ "s3-load-manual",
+ "insert-into-manual",
+ "load-json-format",
+ ],
+ },
],
- sidebarDepth: 2,
},
+ {
+ title: "Export导出",
+ directoryPath: "export/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "export-manual",
+ "outfile",
+ "export_with_mysql_dump",
+ ],
+ },
+ {
+ title: "数据更新及删除",
+ directoryPath: "update-delete/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "update",
+ "sequence-column-manual",
+ "delete-manual",
+ "batch-delete-manual"
+ ],
+ },
+ ],
+ },
+ {
+ title: "进阶使用",
+ directoryPath: "advanced/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "materialized-view",
{
title: "表结构变更",
directoryPath: "alter-table/",
initialOpenGroupIndex: -1,
children: [
- "alter-table-bitmap-index",
- "alter-table-replace-table",
- "alter-table-rollup",
- "alter-table-schema-change",
- "alter-table-temp-partition",
+ "schema-change",
+ "replace-table"
],
- sidebarDepth: 2,
- },
- "materialized_view",
- {
- title: "HTTP API",
- directoryPath: "http-actions/",
- initialOpenGroupIndex: -1,
- children: [
- {
- title: "FE",
- directoryPath: "fe/",
- initialOpenGroupIndex: -1,
- children: [
- {
- title: "MANAGER",
- directoryPath: "manager/",
- initialOpenGroupIndex: -1,
- children: [
- "cluster-action",
- "node-action",
- "query-profile-action",
- ],
- },
- "backends-action",
- "bootstrap-action",
- "cancel-load-action",
- "check-decommission-action",
- "check-storage-type-action",
- "config-action",
- "connection-action",
- "get-ddl-stmt-action",
- "get-load-info-action",
- "get-load-state",
- "get-log-file-action",
- "get-small-file",
- "ha-action",
- "hardware-info-action",
- "health-action",
- "log-action",
- "logout-action",
- "meta-action",
- "meta-info-action",
- "meta-replay-state-action",
- "profile-action",
- "query-detail-action",
- "query-profile-action",
- "row-count-action",
- "session-action",
- "set-config-action",
- "show-data-action",
- "show-meta-info-action",
- "show-proc-action",
- "show-runtime-info-action",
- "statement-execution-action",
- "system-action",
- "table-query-plan-action",
- "table-row-count-action",
- "table-schema-action",
- "upload-action",
- ],
- },
- "cancel-label",
- "check-reset-rpc-cache",
- "compaction-action",
- "connection-action",
- "fe-get-log-file",
- "get-load-state",
- "get-tablets",
- "profile-action",
- "query-detail-action",
- "restore-tablet",
- "show-data-action",
- "tablet-migration-action",
- "tablets_distribution",
- ],
- sidebarDepth: 1,
},
{
- title: "运维操作",
- directoryPath: "operation/",
+ title: "Doris表分区",
+ directoryPath: "partition/",
initialOpenGroupIndex: -1,
children: [
- "doris-error-code",
- "be-olap-error-code",
- "disk-capacity",
- "metadata-operation",
- "monitor-alert",
- "multi-tenant",
- "tablet-meta-tool",
- "tablet-repair-and-balance",
- "tablet-restore-tool",
- {
- title: "监控项",
- directoryPath: "monitor-metrics/",
- initialOpenGroupIndex: -1,
- children: [
- "be-metrics",
- "fe-metrics",
- ],
- },
+ "dynamic-partition",
+ "table-tmp-partition"
],
- sidebarDepth: 2,
},
{
- title: "配置文件",
- directoryPath: "config/",
+ title: "Join优化",
+ directoryPath: "join-optimization/",
initialOpenGroupIndex: -1,
children: [
- "be_config",
- "fe_config",
- "user_property",
+ "bucket-shuffle-join",
+ "colocation-join",
+ "runtime-filter"
],
- sidebarDepth: 1,
},
{
- title: "拦截规则",
- directoryPath: "block-rule/",
+ title: "数据缓存",
+ directoryPath: "cache/",
initialOpenGroupIndex: -1,
children: [
- "sql-block",
+ "partition-cache"
],
- sidebarDepth: 1,
},
- "backup-restore",
- "bloomfilter",
- "broker",
- "colocation-join",
- "bucket-shuffle-join",
"vectorized-execution-engine",
- "dynamic-partition",
- "export-manual",
- "export_with_mysql_dump",
- "outfile",
- "partition_cache",
- "privilege",
- "ldap",
- "resource-management",
- "running-profile",
- "runtime-filter",
- "segment-v2-usage",
- "small-file-mgr",
- "sql-mode",
- "time-zone",
- "variables",
- "update",
- "multi-tenant",
+ "broker",
+ "resource",
"orthogonal-bitmap-manual",
- ],
- sidebarDepth: 1,
- },
- {
- title: "性能测试及示例",
- directoryPath: "benchmark/",
- initialOpenGroupIndex: -1,
- children: [
- "star-schema-benchmark",
- "systemd",
- "samples"
+ "variables",
+ "time-zone",
+ "small-file-mgr",
+ {
+ title: "最佳实践",
+ directoryPath: "best-practice/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "query-analysis",
+ "import-analysis",
+ "debug-log"
+ ],
+ }
],
},
{
- title: "扩展功能",
- directoryPath: "extending-doris/",
+ title: "生态扩展",
+ directoryPath: "ecosystem/",
initialOpenGroupIndex: -1,
children: [
+ {
+ title: "扩展表",
+ directoryPath: "external-table/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "doris-on-es",
+ "odbc-of-doris",
+ "hive-of-doris",
+ "iceberg-of-doris"
+ ],
+ },
"audit-plugin",
- "doris-on-es",
- "logstash",
- "odbc-of-doris",
- "hive-of-doris",
- "iceberg-of-doris",
- "plugin-development-manual",
- "spark-doris-connector",
"flink-doris-connector",
+ "spark-doris-connector",
"datax",
+ "logstash",
{
title: "Doris Manager",
directoryPath: "doris-manager/",
+ initialOpenGroupIndex: -1,
children: [
"compiling-deploying",
"initializing",
- "space-list",
- "system-settings",
"cluster-managenent",
- "space-management"
+ "space-list",
+ "space-management",
+ "system-settings"
],
},
{
- title: "Seatunnel",
+ title: "SeaTunnel",
directoryPath: "seatunnel/",
initialOpenGroupIndex: -1,
children: [
- "spark-sink",
"flink-sink",
+ "spark-sink"
],
},
{
- title: "UDF",
+ title: "自定义函数",
directoryPath: "udf/",
initialOpenGroupIndex: -1,
children: [
- "contribute-udf",
"native-user-defined-function",
"remote-user-defined-function",
- "java-user-defined-function",
+ "contribute-udf"
],
},
],
},
{
- title: "设计文档",
- directoryPath: "internal/",
- initialOpenGroupIndex: -1,
- children: [
- "doris_storage_optimization",
- "grouping_sets_design",
- "metadata-design",
- "spark_load",
- ],
- },
- {
- title: "SQL 手册",
- directoryPath: "sql-reference/",
+ title: "SQL手册",
+ directoryPath: "sql-manual/",
initialOpenGroupIndex: -1,
children: [
{
@@ -442,7 +403,9 @@ module.exports = [
"group_concat",
"hll_union_agg",
"max",
+ "max_by",
"min",
+ "min_by",
"percentile",
"percentile_approx",
"stddev",
@@ -539,7 +502,7 @@ module.exports = [
},
{
title: "加密和信息摘要函数",
- directoryPath: "encrypt-digest-functions/",
+ directoryPath: "encrypt-dixgest-functions/",
initialOpenGroupIndex: -1,
children: [
"aes",
@@ -558,7 +521,6 @@ module.exports = [
"explode-bitmap",
"explode-split",
"explode-json-array",
- "explode-numbers",
"outer-combinator"
],
},
@@ -568,171 +530,244 @@ module.exports = [
],
},
{
- title: "语法帮助",
- directoryPath: "sql-statements/",
+ title: "SQL手册",
+ directoryPath: "sql-reference-v2/",
initialOpenGroupIndex: -1,
children: [
{
- title: "用户账户管理",
- directoryPath: "Account Management/",
+ title: "账户管理",
+ directoryPath: "Account-Management-Statements/",
initialOpenGroupIndex: -1,
children: [
- "CREATE ROLE",
- "CREATE USER",
- "DROP ROLE",
- "DROP USER",
+ "CREATE-USER",
+ "CREATE-ROLE",
+ "DROP-ROLE",
+ "DROP-USER",
"GRANT",
"REVOKE",
- "SET PASSWORD",
- "SET PROPERTY",
- "SHOW GRANTS",
- "SHOW ROLES",
+ "SET-PASSWORD",
+ "SET-PROPERTY",
+ "LDAP",
],
},
{
title: "集群管理",
- directoryPath: "Administration/",
+ directoryPath: "Cluster-Management-Statements/",
initialOpenGroupIndex: -1,
children: [
- "ADMIN CANCEL REBALANCE DISK",
- "ADMIN CANCEL REPAIR",
- "ADMIN CLEAN TRASH",
- "ADMIN CHECK TABLET",
- "ADMIN COMPACT",
- "ADMIN REBALANCE DISK",
- "ADMIN REPAIR",
- "ADMIN SET CONFIG",
- "ADMIN SET REPLICA STATUS",
- "ADMIN SHOW CONFIG",
- "ADMIN SHOW REPLICA DISTRIBUTION",
- "ADMIN SHOW REPLICA STATUS",
- "ADMIN-DIAGNOSE-TABLET",
- "ADMIN SHOW TABLET STORAGE FORMAT",
- "ALTER CLUSTER",
- "ALTER SYSTEM",
- "CANCEL DECOMMISSION",
- "CREATE CLUSTER",
- "CREATE FILE",
- "DROP CLUSTER",
- "DROP FILE",
- "ENTER",
- "INSTALL PLUGIN",
- "LINK DATABASE",
- "MIGRATE DATABASE",
- "SET LDAP_ADMIN_PASSWORD",
- "SHOW BACKENDS",
- "SHOW BROKER",
- "SHOW FILE",
- "SHOW FRONTENDS",
- "SHOW FULL COLUMNS",
- "SHOW INDEX",
- "SHOW MIGRATIONS",
- "SHOW PLUGINS",
- "SHOW TABLE STATUS",
- "SHOW TRASH",
- "UNINSTALL PLUGIN",
+ "ALTER-SYSTEM-ADD-BACKEND",
+ "ALTER-SYSTEM-ADD-FOLLOWER",
+ "ALTER-SYSTEM-ADD-OBSERVER",
+ "ALTER-SYSTEM-DECOMMISSION-BACKEND",
+ "ALTER-SYSTEM-DROP-BACKEND",
+ "ALTER-SYSTEM-DROP-FOLLOWER",
+ "ALTER-SYSTEM-DROP-OBSERVER",
+ "ALTER-SYSTEM-MODIFY-BROKER",
+ "CANCEL-ALTER-SYSTEM",
],
},
{
title: "DDL",
- directoryPath: "Data Definition/",
+ directoryPath: "Data-Definition-Statements/",
initialOpenGroupIndex: -1,
children: [
- "ALTER DATABASE",
- "ALTER RESOURCE",
- "ALTER TABLE",
- "ALTER VIEW",
- "BACKUP",
- "CANCEL ALTER",
- "CANCEL BACKUP",
- "CANCEL RESTORE",
- "CREATE DATABASE",
- "CREATE ENCRYPTKEY",
- "CREATE INDEX",
- "CREATE MATERIALIZED VIEW",
- "CREATE REPOSITORY",
- "CREATE RESOURCE",
- "CREATE TABLE LIKE",
- "CREATE TABLE",
- "CREATE VIEW",
- "DROP DATABASE",
- "DROP ENCRYPTKEY",
- "DROP INDEX",
- "DROP MATERIALIZED VIEW",
- "DROP REPOSITORY",
- "DROP RESOURCE",
- "DROP TABLE",
- "DROP VIEW",
- "HLL",
- "RECOVER",
- "REFRESH DATABASE",
- "REFRESH TABLE",
- "RESTORE",
- "SHOW ENCRYPTKEYS",
- "SHOW RESOURCES",
- "TRUNCATE TABLE",
- "create-function",
- "drop-function",
- "show-functions",
+ {
+ title: "Alter",
+ directoryPath: "Alter/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ALTER-DATABASE",
+ "ALTER-SQL-BLOCK-RULE",
+ "ALTER-TABLE-COLUMN",
+ "ALTER-TABLE-PARTITION",
+ "ALTER-TABLE-PROPERTY",
+ "ALTER-TABLE-RENAME",
+ "ALTER-TABLE-REPLACE",
+ "ALTER-TABLE-ROLLUP",
+ "ALTER-VIEW",
+ "CANCEL-ALTER-TABLE",
+ ],
+ },
+ {
+ title: "备份及恢复",
+ directoryPath: "Backup-and-Restore/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "BACKUP",
+ "CANCEL-BACKUP",
+ "CANCEL-RESTORE",
+ "CREATE-REPOSITORY",
+ "DROP-REPOSITORY",
+ "RESTORE",
+ ],
+ },
+ {
+ title: "Create",
+ directoryPath: "Create/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "CREATE-DATABASE",
+ "CREATE-ENCRYPT-KEY",
+ "CREATE-FILE",
+ "CREATE-FUNCTION",
+ "CREATE-INDEX",
+ "CREATE-MATERIALIZED-VIEW",
+ "CREATE-RESOURCE",
+ "CREATE-SQL-BLOCK-RULE",
+ "CREATE-TABLE-LIKE",
+ "CREATE-TABLE",
+ "CREATE-VIEW",
+ "CREATE-EXTERNAL-TABLE",
+ ],
+ },
+ {
+ title: "Drop",
+ directoryPath: "Drop/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "DROP-DATABASE",
+ "DROP-ENCRYPT-KEY",
+ "DROP-FILE",
+ "DROP-FUNCTION",
+ "DROP-INDEX",
+ "DROP-MATERIALIZED-VIEW",
+ "DROP-RESOURCE",
+ "DROP-SQL-BLOCK-RULE",
+ "DROP-TABLE",
+ "TRUNCATE-TABLE",
+ ],
+ },
],
},
{
title: "DML",
- directoryPath: "Data Manipulation/",
+ directoryPath: "Data-Manipulation-Statements/",
initialOpenGroupIndex: -1,
children: [
- "BEGIN",
- "BROKER LOAD",
- "CANCEL LOAD",
- "CREATE SYNC JOB",
- "DELETE",
- "EXPORT",
- "GROUP BY",
- "LOAD",
- "MINI LOAD",
- "MULTI LOAD",
- "PAUSE ROUTINE LOAD",
- "PAUSE SYNC JOB",
- "RESUME ROUTINE LOAD",
- "RESUME SYNC JOB",
- "ROUTINE LOAD",
- "SHOW ALTER",
- "SHOW BACKUP",
- "SHOW CREATE FUNCTION",
- "SHOW CREATE ROUTINE LOAD",
- "SHOW DATA",
- "SHOW DATA SKEW",
- "SHOW DATABASES",
- "SHOW DELETE",
- "SHOW DYNAMIC PARTITION TABLES",
- "SHOW EXPORT",
- "SHOW LOAD",
- "SHOW PARTITIONS",
- "SHOW PROPERTY",
- "SHOW REPOSITORIES",
- "SHOW RESTORE",
- "SHOW ROUTINE LOAD TASK",
- "SHOW ROUTINE LOAD",
- "SHOW SNAPSHOT",
- "SHOW SYNC JOB",
- "SHOW TABLES",
- "SHOW TABLE CREATION",
- "SHOW TABLET",
- "SHOW TRANSACTION",
- "SHOW-LAST-INSERT",
- "SPARK LOAD",
- "STOP ROUTINE LOAD",
- "STOP SYNC JOB",
- "STREAM LOAD",
- "alter-routine-load",
- "insert",
- "UPDATE",
- "lateral-view",
+ {
+ title: "Load",
+ directoryPath: "Load/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ALTER-ROUTINE-LOAD",
+ "BROKER-LOAD",
+ "CANCEL-LOAD",
+ "CREATE-ROUTINE-LOAD",
+ "PAUSE-ROUTINE-LOAD",
+ "RESUME-ROUTINE-LOAD",
+ "STOP-ROUTINE-LOAD",
+ "STREAM-LOAD",
+ "PAUSE-SYNC-JOB",
+ "RESUME-SYNC-JOB",
+ "STOP-SYNC-JOB",
+ "CREATE-SYNC-JOB",
+ ],
+ },
+ {
+ title: "操作",
+ directoryPath: "Manipulation/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "DELETE",
+ "INSERT",
+ "UPDATE",
+ ],
+ },
+ "OUTFILE"
+ ],
+ },
+ {
+ title: "数据库管理",
+ directoryPath: "Database-Administration-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ADMIN-CANCEL-REPAIR",
+ "ADMIN-CHECK-TABLET",
+ "ADMIN-REPAIR-TABLE",
+ "ADMIN-SET-CONFIG",
+ "ADMIN-SET-REPLICA-STATUS",
+ "ADMIN-SHOW-CONFIG",
+ "ADMIN-SHOW-REPLICA-DISTRIBUTION",
+ "ADMIN-SHOW-REPLICA-STATUS",
+ "ENABLE-FEATURE",
+ "INSTALL-PLUGIN",
+ "KILL",
+ "RECOVER",
+ "SET-VARIABLE",
+ "UNINSTALL-PLUGIN",
+ ],
+ },
+ {
+ title: "Show",
+ directoryPath: "Show-Statements/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "SHOW-ALTER",
+ "SHOW-BACKENDS",
+ "SHOW-BACKUP",
+ "SHOW-BROKER",
+ "SHOW-CHARSET",
+ "SHOW-COLLATION",
+ "SHOW-COLUMNS",
+ "SHOW-CREATE-DATABASE",
+ "SHOW-CREATE-FUNCTION",
+ "SHOW-CREATE-ROUTINE-LOAD",
+ "SHOW-CREATE-TABLE",
+ "SHOW-DATA",
+ "SHOW-DATABASE-ID",
+ "SHOW-DATABASES",
+ "SHOW-DELETE",
+ "SHOW-DYNAMIC-PARTITION",
+ "SHOW-ENCRYPT-KEY",
+ "SHOW-ENGINES",
+ "SHOW-EVENTS",
+ "SHOW-EXPORT",
+ "SHOW-FRONTENDS",
+ "SHOW-FUNCTIONS",
+ "SHOW-GRANTS",
+ "SHOW-INDEX",
+ "SHOW-LOAD-PROFILE",
+ "SHOW-LOAD-WARNINGS",
+ "SHOW-LOAD",
+ "SHOW-MIGRATIONS",
+ "SHOW-OPEN-TABLES",
+ "SHOW-PARTITION-ID",
+ "SHOW-PARTITIONS",
+ "SHOW-PLUGINS",
+ "SHOW-PROC",
+ "SHOW-PROCEDURE",
+ "SHOW-PROCESSLIST",
+ "SHOW-PROPERTY",
+ "SHOW-QUERY-PROFILE",
+ "SHOW-REPOSITORIES",
+ "SHOW-RESOURCES",
+ "SHOW-RESTORE",
+ "SHOW-ROLES",
+ "SHOW-ROLLUP",
+ "SHOW-ROUTINE-LOAD-TASK",
+ "SHOW-ROUTINE-LOAD",
+ "SHOW-SMALL-FILES",
+ "SHOW-SNAPSHOT",
+ "SHOW-SQL-BLOCK-RULE",
+ "SHOW-STATUS",
+ "SHOW-STREAM-LOAD",
+ "SHOW-SYNC-JOB",
+ "SHOW-TABLE-ID",
+ "SHOW-TABLE-STATUS",
+ "SHOW-TABLET",
+ "SHOW-TRANSACTION",
+ "SHOW-TRIGGERS",
+ "SHOW-TRASH",
+ "SHOW-USER",
+ "SHOW-VARIABLES",
+ "SHOW-VIEW",
+ "SHOW-WARNING",
+ "SHOW-WHITE-LIST",
],
},
{
title: "数据类型",
- directoryPath: "Data Types/",
+ directoryPath: "Data-Types/",
initialOpenGroupIndex: -1,
children: [
"BIGINT",
@@ -746,7 +781,6 @@ module.exports = [
"FLOAT",
"HLL",
"INT",
- "LARGEINT",
"SMALLINT",
"STRING",
"TINYINT",
@@ -755,14 +789,185 @@ module.exports = [
},
{
title: "辅助命令",
- directoryPath: "Utility/",
+ directoryPath: "Utility-Statements/",
initialOpenGroupIndex: -1,
- children: ["DESCRIBE"],
+ children: [
+ "DESCRIBE",
+ "HELP",
+ "USE"
+ ],
},
],
},
],
},
+ {
+ title: "管理手册",
+ directoryPath: "admin-manual/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "集群管理",
+ directoryPath: "cluster-management/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "upgrade",
+ "elastic-expansion",
+ "load-balancing"
+ ],
+ },
+ {
+ title: "数据管理",
+ directoryPath: "data-admin/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "backup",
+ "restore",
+ "delete-recover"
+ ],
+ },
+ "sql-interception",
+ "query-profile",
+ "optimization",
+ {
+ title: "运维监控",
+ directoryPath: "maint-monitor/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "监控指标",
+ directoryPath: "monitor-metrics/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "fe-metrics",
+ "be-metrics"
+ ],
+ },
+ "disk-capacity",
+ "metadata-operation",
+ "tablet-meta-tool",
+ "tablet-repair-and-balance",
+ "tablet-restore-tool",
+ "monitor-alert",
+ "doris-error-code",
+ "be-olap-error-code"
+ ],
+ },
+ {
+ title: "配置管理",
+ directoryPath: "config/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "fe-config",
+ "be-config",
+ "user-property"
+ ],
+ },
+ {
+ title: "用户权限及认证",
+ directoryPath: "privilege-ldap/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "user-privilege",
+ "ldap"
+ ],
+ },
+ "multi-tenant",
+ {
+ title: "HTTP API",
+ directoryPath: "http-actions/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "FE",
+ directoryPath: "fe/",
+ initialOpenGroupIndex: -1,
+ children: [
+ {
+ title: "MANAGER",
+ directoryPath: "manager/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "cluster-action",
+ "node-action",
+ "query-profile-action",
+ ],
+ },
+ "backends-action",
+ "bootstrap-action",
+ "cancel-load-action",
+ "check-decommission-action",
+ "check-storage-type-action",
+ "config-action",
+ "connection-action",
+ "get-ddl-stmt-action",
+ "get-load-info-action",
+ "get-load-state",
+ "get-log-file-action",
+ "get-small-file",
+ "ha-action",
+ "hardware-info-action",
+ "health-action",
+ "log-action",
+ "logout-action",
+ "meta-action",
+ "meta-info-action",
+ "meta-replay-state-action",
+ "profile-action",
+ "query-detail-action",
+ "query-profile-action",
+ "row-count-action",
+ "session-action",
+ "set-config-action",
+ "show-data-action",
+ "show-meta-info-action",
+ "show-proc-action",
+ "show-runtime-info-action",
+ "statement-execution-action",
+ "system-action",
+ "table-query-plan-action",
+ "table-row-count-action",
+ "table-schema-action",
+ "upload-action",
+ ],
+ },
+ "cancel-label",
+ "check-reset-rpc-cache",
+ "compaction-action",
+ "connection-action",
+ "fe-get-log-file",
+ "get-load-state",
+ "get-tablets",
+ "profile-action",
+ "query-detail-action",
+ "restore-tablet",
+ "show-data-action",
+ "tablet-migration-action",
+ "tablets_distribution",
+ ],
+ sidebarDepth: 1,
+ },
+ ],
+ },
+ {
+ title: "常见问题",
+ directoryPath: "faq/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "install-faq",
+ "data-faq",
+ "sql-faq"
+ ],
+ },
+ {
+ title: "性能测试",
+ directoryPath: "benchmark/",
+ initialOpenGroupIndex: -1,
+ children: [
+ "ssb",
+ "tpc-h"
+ ],
+ },
{
title: "Doris用户",
directoryPath: "case-user/",
@@ -792,15 +997,6 @@ module.exports = [
"regression-testing",
],
},
- {
- title: "FAQ 常见问题",
- directoryPath: "faq/",
- initialOpenGroupIndex: -1,
- children: [
- "faq",
- "error"
- ],
- },
{
title: "Apache 社区",
directoryPath: "community/",
@@ -829,6 +1025,7 @@ module.exports = [
"release-prepare",
"release-doris-core",
"release-doris-connectors",
+ "release-doris-manager",
"release-complete",
"release-verify",
],
diff --git a/docs/en/README.md b/docs/en/README.md
index 40040ee870..d5ddba4ad6 100644
--- a/docs/en/README.md
+++ b/docs/en/README.md
@@ -89,7 +89,7 @@ cases:
- logo: /images/home/logo-youdao.png
alt: 有道
actionText: Quick Start →
-actionLink: /en/installing/compilation
+actionLink: /en/get-starting/get-starting
articleText: Latest News
articleLink: /en/article/article-list
---
diff --git a/new-docs/en/admin-manual/cluster-management/elastic-expansion.md b/docs/en/admin-manual/cluster-management/elastic-expansion.md
similarity index 100%
rename from new-docs/en/admin-manual/cluster-management/elastic-expansion.md
rename to docs/en/admin-manual/cluster-management/elastic-expansion.md
diff --git a/new-docs/en/admin-manual/cluster-management/load-balancing.md b/docs/en/admin-manual/cluster-management/load-balancing.md
similarity index 100%
rename from new-docs/en/admin-manual/cluster-management/load-balancing.md
rename to docs/en/admin-manual/cluster-management/load-balancing.md
diff --git a/new-docs/en/admin-manual/cluster-management/upgrade.md b/docs/en/admin-manual/cluster-management/upgrade.md
similarity index 100%
rename from new-docs/en/admin-manual/cluster-management/upgrade.md
rename to docs/en/admin-manual/cluster-management/upgrade.md
diff --git a/new-docs/en/admin-manual/config/be-config.md b/docs/en/admin-manual/config/be-config.md
similarity index 100%
rename from new-docs/en/admin-manual/config/be-config.md
rename to docs/en/admin-manual/config/be-config.md
diff --git a/new-docs/en/admin-manual/config/fe-config.md b/docs/en/admin-manual/config/fe-config.md
similarity index 100%
rename from new-docs/en/admin-manual/config/fe-config.md
rename to docs/en/admin-manual/config/fe-config.md
diff --git a/new-docs/en/admin-manual/config/user-property.md b/docs/en/admin-manual/config/user-property.md
similarity index 100%
rename from new-docs/en/admin-manual/config/user-property.md
rename to docs/en/admin-manual/config/user-property.md
diff --git a/new-docs/en/admin-manual/data-admin/backup.md b/docs/en/admin-manual/data-admin/backup.md
similarity index 100%
rename from new-docs/en/admin-manual/data-admin/backup.md
rename to docs/en/admin-manual/data-admin/backup.md
diff --git a/new-docs/en/admin-manual/data-admin/delete-recover.md b/docs/en/admin-manual/data-admin/delete-recover.md
similarity index 100%
rename from new-docs/en/admin-manual/data-admin/delete-recover.md
rename to docs/en/admin-manual/data-admin/delete-recover.md
diff --git a/new-docs/en/admin-manual/data-admin/restore.md b/docs/en/admin-manual/data-admin/restore.md
similarity index 100%
rename from new-docs/en/admin-manual/data-admin/restore.md
rename to docs/en/admin-manual/data-admin/restore.md
diff --git a/docs/en/administrator-guide/http-actions/cancel-label.md b/docs/en/admin-manual/http-actions/cancel-label.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/cancel-label.md
rename to docs/en/admin-manual/http-actions/cancel-label.md
diff --git a/docs/en/administrator-guide/http-actions/check-reset-rpc-cache.md b/docs/en/admin-manual/http-actions/check-reset-rpc-cache.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/check-reset-rpc-cache.md
rename to docs/en/admin-manual/http-actions/check-reset-rpc-cache.md
diff --git a/docs/en/administrator-guide/http-actions/compaction-action.md b/docs/en/admin-manual/http-actions/compaction-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/compaction-action.md
rename to docs/en/admin-manual/http-actions/compaction-action.md
diff --git a/docs/en/administrator-guide/http-actions/connection-action.md b/docs/en/admin-manual/http-actions/connection-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/connection-action.md
rename to docs/en/admin-manual/http-actions/connection-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe-get-log-file.md b/docs/en/admin-manual/http-actions/fe-get-log-file.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe-get-log-file.md
rename to docs/en/admin-manual/http-actions/fe-get-log-file.md
diff --git a/docs/en/administrator-guide/http-actions/fe/backends-action.md b/docs/en/admin-manual/http-actions/fe/backends-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/backends-action.md
rename to docs/en/admin-manual/http-actions/fe/backends-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/bootstrap-action.md b/docs/en/admin-manual/http-actions/fe/bootstrap-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/bootstrap-action.md
rename to docs/en/admin-manual/http-actions/fe/bootstrap-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/cancel-load-action.md b/docs/en/admin-manual/http-actions/fe/cancel-load-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/cancel-load-action.md
rename to docs/en/admin-manual/http-actions/fe/cancel-load-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/check-decommission-action.md b/docs/en/admin-manual/http-actions/fe/check-decommission-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/check-decommission-action.md
rename to docs/en/admin-manual/http-actions/fe/check-decommission-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/check-storage-type-action.md b/docs/en/admin-manual/http-actions/fe/check-storage-type-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/check-storage-type-action.md
rename to docs/en/admin-manual/http-actions/fe/check-storage-type-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/config-action.md b/docs/en/admin-manual/http-actions/fe/config-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/config-action.md
rename to docs/en/admin-manual/http-actions/fe/config-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/connection-action.md b/docs/en/admin-manual/http-actions/fe/connection-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/connection-action.md
rename to docs/en/admin-manual/http-actions/fe/connection-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/get-ddl-stmt-action.md b/docs/en/admin-manual/http-actions/fe/get-ddl-stmt-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/get-ddl-stmt-action.md
rename to docs/en/admin-manual/http-actions/fe/get-ddl-stmt-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/get-load-info-action.md b/docs/en/admin-manual/http-actions/fe/get-load-info-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/get-load-info-action.md
rename to docs/en/admin-manual/http-actions/fe/get-load-info-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/get-load-state.md b/docs/en/admin-manual/http-actions/fe/get-load-state.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/get-load-state.md
rename to docs/en/admin-manual/http-actions/fe/get-load-state.md
diff --git a/docs/en/administrator-guide/http-actions/fe/get-log-file-action.md b/docs/en/admin-manual/http-actions/fe/get-log-file-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/get-log-file-action.md
rename to docs/en/admin-manual/http-actions/fe/get-log-file-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/get-small-file.md b/docs/en/admin-manual/http-actions/fe/get-small-file.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/get-small-file.md
rename to docs/en/admin-manual/http-actions/fe/get-small-file.md
diff --git a/docs/en/administrator-guide/http-actions/fe/ha-action.md b/docs/en/admin-manual/http-actions/fe/ha-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/ha-action.md
rename to docs/en/admin-manual/http-actions/fe/ha-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/hardware-info-action.md b/docs/en/admin-manual/http-actions/fe/hardware-info-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/hardware-info-action.md
rename to docs/en/admin-manual/http-actions/fe/hardware-info-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/health-action.md b/docs/en/admin-manual/http-actions/fe/health-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/health-action.md
rename to docs/en/admin-manual/http-actions/fe/health-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/log-action.md b/docs/en/admin-manual/http-actions/fe/log-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/log-action.md
rename to docs/en/admin-manual/http-actions/fe/log-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/logout-action.md b/docs/en/admin-manual/http-actions/fe/logout-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/logout-action.md
rename to docs/en/admin-manual/http-actions/fe/logout-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/manager/cluster-action.md b/docs/en/admin-manual/http-actions/fe/manager/cluster-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/manager/cluster-action.md
rename to docs/en/admin-manual/http-actions/fe/manager/cluster-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/manager/node-action.md b/docs/en/admin-manual/http-actions/fe/manager/node-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/manager/node-action.md
rename to docs/en/admin-manual/http-actions/fe/manager/node-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/manager/query-profile-action.md b/docs/en/admin-manual/http-actions/fe/manager/query-profile-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/manager/query-profile-action.md
rename to docs/en/admin-manual/http-actions/fe/manager/query-profile-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/meta-action.md b/docs/en/admin-manual/http-actions/fe/meta-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/meta-action.md
rename to docs/en/admin-manual/http-actions/fe/meta-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/meta-info-action.md b/docs/en/admin-manual/http-actions/fe/meta-info-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/meta-info-action.md
rename to docs/en/admin-manual/http-actions/fe/meta-info-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/meta-replay-state-action.md b/docs/en/admin-manual/http-actions/fe/meta-replay-state-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/meta-replay-state-action.md
rename to docs/en/admin-manual/http-actions/fe/meta-replay-state-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/profile-action.md b/docs/en/admin-manual/http-actions/fe/profile-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/profile-action.md
rename to docs/en/admin-manual/http-actions/fe/profile-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/query-detail-action.md b/docs/en/admin-manual/http-actions/fe/query-detail-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/query-detail-action.md
rename to docs/en/admin-manual/http-actions/fe/query-detail-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/query-profile-action.md b/docs/en/admin-manual/http-actions/fe/query-profile-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/query-profile-action.md
rename to docs/en/admin-manual/http-actions/fe/query-profile-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/row-count-action.md b/docs/en/admin-manual/http-actions/fe/row-count-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/row-count-action.md
rename to docs/en/admin-manual/http-actions/fe/row-count-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/session-action.md b/docs/en/admin-manual/http-actions/fe/session-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/session-action.md
rename to docs/en/admin-manual/http-actions/fe/session-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/set-config-action.md b/docs/en/admin-manual/http-actions/fe/set-config-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/set-config-action.md
rename to docs/en/admin-manual/http-actions/fe/set-config-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/show-data-action.md b/docs/en/admin-manual/http-actions/fe/show-data-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/show-data-action.md
rename to docs/en/admin-manual/http-actions/fe/show-data-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/show-meta-info-action.md b/docs/en/admin-manual/http-actions/fe/show-meta-info-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/show-meta-info-action.md
rename to docs/en/admin-manual/http-actions/fe/show-meta-info-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/show-proc-action.md b/docs/en/admin-manual/http-actions/fe/show-proc-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/show-proc-action.md
rename to docs/en/admin-manual/http-actions/fe/show-proc-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/show-runtime-info-action.md b/docs/en/admin-manual/http-actions/fe/show-runtime-info-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/show-runtime-info-action.md
rename to docs/en/admin-manual/http-actions/fe/show-runtime-info-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/statement-execution-action.md b/docs/en/admin-manual/http-actions/fe/statement-execution-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/statement-execution-action.md
rename to docs/en/admin-manual/http-actions/fe/statement-execution-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/system-action.md b/docs/en/admin-manual/http-actions/fe/system-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/system-action.md
rename to docs/en/admin-manual/http-actions/fe/system-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/table-query-plan-action.md b/docs/en/admin-manual/http-actions/fe/table-query-plan-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/table-query-plan-action.md
rename to docs/en/admin-manual/http-actions/fe/table-query-plan-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/table-row-count-action.md b/docs/en/admin-manual/http-actions/fe/table-row-count-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/table-row-count-action.md
rename to docs/en/admin-manual/http-actions/fe/table-row-count-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/table-schema-action.md b/docs/en/admin-manual/http-actions/fe/table-schema-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/table-schema-action.md
rename to docs/en/admin-manual/http-actions/fe/table-schema-action.md
diff --git a/docs/en/administrator-guide/http-actions/fe/upload-action.md b/docs/en/admin-manual/http-actions/fe/upload-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/fe/upload-action.md
rename to docs/en/admin-manual/http-actions/fe/upload-action.md
diff --git a/docs/en/administrator-guide/http-actions/get-load-state.md b/docs/en/admin-manual/http-actions/get-load-state.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/get-load-state.md
rename to docs/en/admin-manual/http-actions/get-load-state.md
diff --git a/docs/en/administrator-guide/http-actions/get-tablets.md b/docs/en/admin-manual/http-actions/get-tablets.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/get-tablets.md
rename to docs/en/admin-manual/http-actions/get-tablets.md
diff --git a/docs/en/administrator-guide/http-actions/profile-action.md b/docs/en/admin-manual/http-actions/profile-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/profile-action.md
rename to docs/en/admin-manual/http-actions/profile-action.md
diff --git a/docs/en/administrator-guide/http-actions/query-detail-action.md b/docs/en/admin-manual/http-actions/query-detail-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/query-detail-action.md
rename to docs/en/admin-manual/http-actions/query-detail-action.md
diff --git a/docs/en/administrator-guide/http-actions/restore-tablet.md b/docs/en/admin-manual/http-actions/restore-tablet.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/restore-tablet.md
rename to docs/en/admin-manual/http-actions/restore-tablet.md
diff --git a/docs/en/administrator-guide/http-actions/show-data-action.md b/docs/en/admin-manual/http-actions/show-data-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/show-data-action.md
rename to docs/en/admin-manual/http-actions/show-data-action.md
diff --git a/docs/en/administrator-guide/http-actions/tablet-migration-action.md b/docs/en/admin-manual/http-actions/tablet-migration-action.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/tablet-migration-action.md
rename to docs/en/admin-manual/http-actions/tablet-migration-action.md
diff --git a/docs/en/administrator-guide/http-actions/tablets_distribution.md b/docs/en/admin-manual/http-actions/tablets_distribution.md
similarity index 100%
rename from docs/en/administrator-guide/http-actions/tablets_distribution.md
rename to docs/en/admin-manual/http-actions/tablets_distribution.md
diff --git a/new-docs/en/admin-manual/maint-monitor/be-olap-error-code.md b/docs/en/admin-manual/maint-monitor/be-olap-error-code.md
similarity index 100%
rename from new-docs/en/admin-manual/maint-monitor/be-olap-error-code.md
rename to docs/en/admin-manual/maint-monitor/be-olap-error-code.md
diff --git a/new-docs/en/admin-manual/maint-monitor/disk-capacity.md b/docs/en/admin-manual/maint-monitor/disk-capacity.md
similarity index 100%
rename from new-docs/en/admin-manual/maint-monitor/disk-capacity.md
rename to docs/en/admin-manual/maint-monitor/disk-capacity.md
diff --git a/docs/en/administrator-guide/operation/doris-error-code.md b/docs/en/admin-manual/maint-monitor/doris-error-code.md
similarity index 100%
rename from docs/en/administrator-guide/operation/doris-error-code.md
rename to docs/en/admin-manual/maint-monitor/doris-error-code.md
diff --git a/new-docs/en/admin-manual/maint-monitor/metadata-operation.md b/docs/en/admin-manual/maint-monitor/metadata-operation.md
similarity index 100%
rename from new-docs/en/admin-manual/maint-monitor/metadata-operation.md
rename to docs/en/admin-manual/maint-monitor/metadata-operation.md
diff --git a/docs/en/administrator-guide/operation/monitor-alert.md b/docs/en/admin-manual/maint-monitor/monitor-alert.md
similarity index 100%
rename from docs/en/administrator-guide/operation/monitor-alert.md
rename to docs/en/admin-manual/maint-monitor/monitor-alert.md
diff --git a/docs/en/administrator-guide/operation/monitor-metrics/be-metrics.md b/docs/en/admin-manual/maint-monitor/monitor-metrics/be-metrics.md
similarity index 100%
rename from docs/en/administrator-guide/operation/monitor-metrics/be-metrics.md
rename to docs/en/admin-manual/maint-monitor/monitor-metrics/be-metrics.md
diff --git a/docs/en/administrator-guide/operation/monitor-metrics/fe-metrics.md b/docs/en/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md
similarity index 100%
rename from docs/en/administrator-guide/operation/monitor-metrics/fe-metrics.md
rename to docs/en/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md
diff --git a/docs/en/administrator-guide/operation/multi-tenant.md b/docs/en/admin-manual/maint-monitor/multi-tenant.md
similarity index 100%
rename from docs/en/administrator-guide/operation/multi-tenant.md
rename to docs/en/admin-manual/maint-monitor/multi-tenant.md
diff --git a/new-docs/en/admin-manual/maint-monitor/tablet-meta-tool.md b/docs/en/admin-manual/maint-monitor/tablet-meta-tool.md
similarity index 100%
rename from new-docs/en/admin-manual/maint-monitor/tablet-meta-tool.md
rename to docs/en/admin-manual/maint-monitor/tablet-meta-tool.md
diff --git a/new-docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md
similarity index 100%
rename from new-docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md
rename to docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md
diff --git a/docs/en/administrator-guide/operation/tablet-restore-tool.md b/docs/en/admin-manual/maint-monitor/tablet-restore-tool.md
similarity index 100%
rename from docs/en/administrator-guide/operation/tablet-restore-tool.md
rename to docs/en/admin-manual/maint-monitor/tablet-restore-tool.md
diff --git a/new-docs/en/admin-manual/multi-tenant.md b/docs/en/admin-manual/multi-tenant.md
similarity index 100%
rename from new-docs/en/admin-manual/multi-tenant.md
rename to docs/en/admin-manual/multi-tenant.md
diff --git a/new-docs/en/admin-manual/optimization.md b/docs/en/admin-manual/optimization.md
similarity index 100%
rename from new-docs/en/admin-manual/optimization.md
rename to docs/en/admin-manual/optimization.md
diff --git a/new-docs/en/admin-manual/privilege-ldap/ldap.md b/docs/en/admin-manual/privilege-ldap/ldap.md
similarity index 100%
rename from new-docs/en/admin-manual/privilege-ldap/ldap.md
rename to docs/en/admin-manual/privilege-ldap/ldap.md
diff --git a/new-docs/en/admin-manual/privilege-ldap/user-privilege.md b/docs/en/admin-manual/privilege-ldap/user-privilege.md
similarity index 100%
rename from new-docs/en/admin-manual/privilege-ldap/user-privilege.md
rename to docs/en/admin-manual/privilege-ldap/user-privilege.md
diff --git a/new-docs/en/admin-manual/query-profile.md b/docs/en/admin-manual/query-profile.md
similarity index 100%
rename from new-docs/en/admin-manual/query-profile.md
rename to docs/en/admin-manual/query-profile.md
diff --git a/new-docs/en/admin-manual/sql-interception.md b/docs/en/admin-manual/sql-interception.md
similarity index 100%
rename from new-docs/en/admin-manual/sql-interception.md
rename to docs/en/admin-manual/sql-interception.md
diff --git a/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md b/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md
deleted file mode 100644
index 34506e5e33..0000000000
--- a/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-{
- "title": "Bitmap Index",
- "language": "en"
-}
----
-
-
-
-# Bitmap Index
-Users can speed up queries by creating a bitmap index
-This document focuses on how to create an index job, as well as some considerations and frequently asked questions when creating an index.
-
-## Glossary
-* bitmap index: a fast data structure that speeds up queries
-
-## Basic Principles
-Creating and dropping index is essentially a schema change job. For details, please refer to
-[Schema Change](alter-table-schema-change.html).
-
-## Syntax
-There are two forms of index creation and modification related syntax, one is integrated with alter table statement, and the other is using separate
-create/drop index syntax
-1. Create Index
-
- Please refer to [CREATE INDEX](../../sql-reference/sql-statements/Data%20Definition/CREATE%20INDEX.html)
- or [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html),
- You can also specify a bitmap index when creating a table, Please refer to [CREATE TABLE](../../sql-reference/sql-statements/Data%20Definition/CREATE%20TABLE.html)
-
-2. Show Index
-
- Please refer to [SHOW INDEX](../../sql-reference/sql-statements/Administration/SHOW%20INDEX.html)
-
-3. Drop Index
-
- Please refer to [DROP INDEX](../../sql-reference/sql-statements/Data%20Definition/DROP%20INDEX.html) or [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html)
-
-## Create Job
-Please refer to [Schema Change](alter-table-schema-change.html)
-## View Job
-Please refer to [Schema Change](alter-table-schema-change.html)
-
-## Cancel Job
-Please refer to [Schema Change](alter-table-schema-change.html)
-
-## Notice
-* Currently only index of bitmap type is supported.
-* The bitmap index is only created on a single column.
-* Bitmap indexes can be applied to all columns of the `Duplicate` data model and key columns of the `Aggregate` and `Uniq` models.
-* The data types supported by bitmap indexes are as follows:
- * `TINYINT`
- * `SMALLINT`
- * `INT`
- * `UNSIGNEDINT`
- * `BIGINT`
- * `CHAR`
- * `VARCHAR`
- * `DATE`
- * `DATETIME`
- * `LARGEINT`
- * `DECIMAL`
- * `BOOL`
-* The bitmap index takes effect only in segmentV2. The table's storage format will be converted to V2 automatically when creating index.
diff --git a/docs/en/administrator-guide/alter-table/alter-table-replace-table.md b/docs/en/administrator-guide/alter-table/alter-table-replace-table.md
deleted file mode 100644
index 02532988d8..0000000000
--- a/docs/en/administrator-guide/alter-table/alter-table-replace-table.md
+++ /dev/null
@@ -1,72 +0,0 @@
----
-{
- "title": "Replace Table",
- "language": "en"
-}
----
-
-
-
-# Replace Table
-
-In version 0.14, Doris supports atomic replacement of two tables.
-This operation only applies to OLAP tables.
-
-For partition level replacement operations, please refer to [Temporary Partition Document](./alter-table-temp-partition.md)
-
-## Syntax
-
-```
-ALTER TABLE [db.]tbl1 REPLACE WITH tbl2
-[PROPERTIES('swap' = 'true')];
-```
-
-Replace table `tbl1` with table `tbl2`.
-
-If the `swap` parameter is `true`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The data in the table named `tbl2` is the data in the original table `tbl1`. That is, the data of the two tables are interchanged.
-
-If the `swap` parameter is `false`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The table named `tbl2` is dropped.
-
-## Principle
-
-The replacement table function actually turns the following set of operations into an atomic operation.
-
-Suppose you want to replace table A with table B, and `swap` is `true`, the operation is as follows:
-
-1. Rename table B to table A.
-2. Rename table A to table B.
-
-If `swap` is `false`, the operation is as follows:
-
-1. Drop table A.
-2. Rename table B to table A.
-
-## Notice
-
-1. The `swap` parameter defaults to `true`. That is, the replacement table operation is equivalent to the exchange of two table data.
-2. If the `swap` parameter is set to `false`, the replaced table (table A) will be dropped and cannot be recovered.
-3. The replacement operation can only occur between two OLAP tables, and the table structure of the two tables is not checked for consistency.
-4. The replacement operation will not change the original permission settings. Because the permission check is based on the table name.
-
-## Best Practices
-
-1. Atomic Overwrite Operation
-
- In some cases, the user wants to be able to rewrite the data of a certain table, but if it is dropped and then imported, there will be a period of time in which the data cannot be viewed. At this time, the user can first use the `CREATE TABLE LIKE` statement to create a new table with the same structure, import the new data into the new table, and replace the old table atomically through the replacement operation to achieve the goal. For partition level atomic overwrite operation, please refer to [Temporary partition document](./alter-table-temp-partition.md)
diff --git a/docs/en/administrator-guide/alter-table/alter-table-rollup.md b/docs/en/administrator-guide/alter-table/alter-table-rollup.md
deleted file mode 100644
index a3df4c8dc2..0000000000
--- a/docs/en/administrator-guide/alter-table/alter-table-rollup.md
+++ /dev/null
@@ -1,188 +0,0 @@
----
-{
- "title": "Rollup",
- "language": "en"
-}
----
-
-
-
-# Rollup
-
-Users can speed up queries by creating rollup tables. For the concept and usage of Rollup, please refer to [Data
- Model, ROLLUP and Prefix Index](../../getting-started/data-model-rollup_EN.md) and
- [Rollup and query](../../getting-started/hit-the-rollup_EN.md).
-
-This document focuses on how to create a Rollup job, as well as some considerations and frequently asked questions about creating a Rollup.
-
-## Glossary
-
-* Base Table: When each table is created, it corresponds to a base table. The base table stores the complete data of this table. Rollups are usually created based on the data in the base table (and can also be created from other rollups).
-* Index: Materialized index. Rollup or Base Table are both called materialized indexes.
-* Transaction: Each import task is a transaction, and each transaction has a unique incrementing Transaction ID.
-
-## Basic Principles
-
-The basic process of creating a Rollup is to generate a new Rollup data containing the specified column from the data in the Base table. Among them, two parts of data conversion are needed. One is the conversion of existing historical data, and the other is the conversion of newly arrived imported data during Rollup execution.
-
-```
-+----------+
-| Load Job |
-+----+-----+
- |
- | Load job generates both base and rollup index data
- |
- | +------------------+ +---------------+
- | | Base Index | | Base Index |
- +------> New Incoming Data| | History Data |
- | +------------------+ +------+--------+
- | |
- | | Convert history data
- | |
- | +------------------+ +------v--------+
- | | Rollup Index | | Rollup Index |
- +------> New Incoming Data| | History Data |
- +------------------+ +---------------+
-```
-
-Before starting the conversion of historical data, Doris will obtain a latest transaction ID. And wait for all import transactions before this Transaction ID to complete. This Transaction ID becomes a watershed. This means that Doris guarantees that all import tasks after the watershed will generate data for the Rollup Index at the same time. In this way, after the historical data conversion is completed, the data of the Rollup and Base tables can be guaranteed to be flush.
-
-## Create Job
-
-The specific syntax for creating a Rollup can be found in the description of the Rollup section in the help `HELP ALTER TABLE`.
-
-The creation of Rollup is an asynchronous process. After the job is submitted successfully, the user needs to use the `SHOW ALTER TABLE ROLLUP` command to view the progress of the job.
-
-## View Job
-
-`SHOW ALTER TABLE ROLLUP` You can view rollup jobs that are currently executing or completed. For example:
-
-```
- JobId: 20037
- TableName: tbl1
- CreateTime: 2019-08-06 15:38:49
- FinishedTime: N/A
- BaseIndexName: tbl1
-RollupIndexName: r1
- RollupId: 20038
- TransactionId: 10034
- State: PENDING
- Msg:
- Progress: N/A
- Timeout: 86400
-```
-
-* JobId: A unique ID for each Rollup job.
-* TableName: The table name of the base table corresponding to Rollup.
-* CreateTime: Job creation time.
-* FinishedTime: The end time of the job. If it is not finished, "N / A" is displayed.
-* BaseIndexName: The name of the source Index corresponding to Rollup.
-* RollupIndexName: The name of the Rollup.
-* RollupId: The unique ID of the Rollup.
-* TransactionId: the watershed transaction ID of the conversion history data.
-* State: The phase of the operation.
- * PENDING: The job is waiting in the queue to be scheduled.
- * WAITING_TXN: Wait for the import task before the watershed transaction ID to complete.
- * RUNNING: Historical data conversion.
- * FINISHED: The operation was successful.
- * CANCELLED: The job failed.
-* Msg: If the job fails, a failure message is displayed here.
-* Progress: operation progress. Progress is displayed only in the RUNNING state. Progress is displayed in M / N. Where N is the total number of copies of Rollup. M is the number of copies of historical data conversion completed.
-* Timeout: Job timeout time. Unit of second.
-
-## Cancel Job
-
-In the case that the job status is not FINISHED or CANCELLED, you can cancel the Rollup job with the following command:
-
-`CANCEL ALTER TABLE ROLLUP FROM tbl_name;`
-
-## Notice
-
-* A table can have only one Rollup job running at a time. And only one rollup can be created in a job.
-
-* Rollup operations do not block import and query operations.
-
-* If a DELETE operation has a Key column in a where condition that does not exist in a Rollup, the DELETE is not allowed.
-
- If a Key column does not exist in a Rollup, the DELETE operation cannot delete data from the Rollup, so the data consistency between the Rollup table and the Base table cannot be guaranteed.
-
-* Rollup columns must exist in the Base table.
-
- Rollup columns are always a subset of the Base table columns. Columns that do not exist in the Base table cannot appear.
-
-* If a rollup contains columns of the REPLACE aggregation type, the rollup must contain all the key columns.
-
- Assume the structure of the Base table is as follows:
-
- `` `(k1 INT, k2 INT, v1 INT REPLACE, v2 INT SUM)` ``
-
- If you need to create a Rollup that contains `v1` columns, you must include the` k1`, `k2` columns. Otherwise, the system cannot determine the value of `v1` listed in Rollup.
-
- Note that all Value columns in the Unique data model table are of the REPLACE aggregation type.
-
-* Rollup of the DUPLICATE data model table, you can specify the DUPLICATE KEY of the rollup.
-
- The DUPLICATE KEY in the DUPLICATE data model table is actually sorted. Rollup can specify its own sort order, but the sort order must be a prefix of the Rollup column order. If not specified, the system will check if the Rollup contains all sort columns of the Base table, and if it does not, it will report an error. For example:
-
- Base table structure: `(k1 INT, k2 INT, k3 INT) DUPLICATE KEY (k1, k2)`
-
- Rollup can be: `(k2 INT, k1 INT) DUPLICATE KEY (k2)`
-
-* Rollup does not need to include partitioned or bucket columns for the Base table.
-
-## FAQ
-
-* How many rollups can a table create
-
- There is theoretically no limit to the number of rollups a table can create, but too many rollups can affect import performance. Because when importing, data will be generated for all rollups at the same time. At the same time, Rollup will take up physical storage space. Usually the number of rollups for a table is less than 10.
-
-* Rollup creation speed
-
- Rollup creation speed is currently estimated at about 10MB / s based on the worst efficiency. To be conservative, users can set the timeout for jobs based on this rate.
-
-* Submitting job error `Table xxx is not stable. ...`
-
- Rollup can start only when the table data is complete and unbalanced. If some data shard copies of the table are incomplete, or if some copies are undergoing an equalization operation, the submission is rejected.
-
- Whether the data shard copy is complete can be checked with the following command:
-
- ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS! =" OK ";```
-
- If a result is returned, there is a problem with the copy. These problems are usually fixed automatically by the system. You can also use the following commands to repair this table first:
-
- ```ADMIN REPAIR TABLE tbl1; ```
-
- You can check if there are running balancing tasks with the following command:
-
- ```SHOW PROC" / cluster_balance / pending_tablets ";```
-
- You can wait for the balancing task to complete, or temporarily disable the balancing operation with the following command:
-
- ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");```
-
-## Configurations
-
-### FE Configurations
-
-* `alter_table_timeout_second`: The default timeout for the job is 86400 seconds.
-
-### BE Configurations
-
-* `alter_tablet_worker_count`: Number of threads used to perform historical data conversion on the BE side. The default is 3. If you want to speed up the rollup job, you can increase this parameter appropriately and restart the BE. But too many conversion threads can cause increased IO pressure and affect other operations. This thread is shared with the Schema Change job.
diff --git a/docs/en/administrator-guide/alter-table/alter-table-schema-change.md b/docs/en/administrator-guide/alter-table/alter-table-schema-change.md
deleted file mode 100644
index 31755b02a0..0000000000
--- a/docs/en/administrator-guide/alter-table/alter-table-schema-change.md
+++ /dev/null
@@ -1,231 +0,0 @@
----
-{
- "title": "Schema Change",
- "language": "en"
-}
----
-
-
-
-# Schema Change
-
-Users can modify the schema of existing tables through the Schema Change operation. Doris currently supports the following modifications:
-
-* Add and delete columns
-* Modify column type
-* Adjust column order
-* Add and modify Bloom Filter
-* Add and delete bitmap index
-
-This document mainly describes how to create a Schema Change job, as well as some considerations and frequently asked questions about Schema Change.
-## Glossary
-
-* Base Table: When each table is created, it corresponds to a base table. The base table stores the complete data of this table. Rollups are usually created based on the data in the base table (and can also be created from other rollups).
-* Index: Materialized index. Rollup or Base Table are both called materialized indexes.
-* Transaction: Each import task is a transaction, and each transaction has a unique incrementing Transaction ID.
-* Rollup: Roll-up tables based on base tables or other rollups.
-
-## Basic Principles
-
-The basic process of executing a Schema Change is to generate a copy of the index data of the new schema from the data of the original index. Among them, two parts of data conversion are required. One is the conversion of existing historical data, and the other is the conversion of newly arrived imported data during the execution of Schema Change.
-```
-+----------+
-| Load Job |
-+----+-----+
- |
- | Load job generates both origin and new index data
- |
- | +------------------+ +---------------+
- | | Origin Index | | Origin Index |
- +------> New Incoming Data| | History Data |
- | +------------------+ +------+--------+
- | |
- | | Convert history data
- | |
- | +------------------+ +------v--------+
- | | New Index | | New Index |
- +------> New Incoming Data| | History Data |
- +------------------+ +---------------+
-```
-
-Before starting the conversion of historical data, Doris will obtain a latest transaction ID. And wait for all import transactions before this Transaction ID to complete. This Transaction ID becomes a watershed. This means that Doris guarantees that all import tasks after the watershed will generate data for both the original Index and the new Index. In this way, when the historical data conversion is completed, the data in the new Index can be guaranteed to be complete.
-## Create Job
-
-The specific syntax for creating a Schema Change can be found in the description of the Schema Change section in the help `HELP ALTER TABLE`.
-
-The creation of Schema Change is an asynchronous process. After the job is submitted successfully, the user needs to view the job progress through the `SHOW ALTER TABLE COLUMN` command.
-## View Job
-
-`SHOW ALTER TABLE COLUMN` You can view the Schema Change jobs that are currently executing or completed. When multiple indexes are involved in a Schema Change job, the command displays multiple lines, each corresponding to an index. For example:
-
-```
- JobId: 20021
- TableName: tbl1
- CreateTime: 2019-08-05 23:03:13
- FinishTime: 2019-08-05 23:03:42
- IndexName: tbl1
- IndexId: 20022
-OriginIndexId: 20017
-SchemaVersion: 2:792557838
-TransactionId: 10023
- State: FINISHED
- Msg:
- Progress: N/A
- Timeout: 86400
-```
-
-* JobId: A unique ID for each Schema Change job.
-* TableName: The table name of the base table corresponding to Schema Change.
-* CreateTime: Job creation time.
-* FinishedTime: The end time of the job. If it is not finished, "N / A" is displayed.
-* IndexName: The name of an Index involved in this modification.
-* IndexId: The unique ID of the new Index.
-* OriginIndexId: The unique ID of the old Index.
-* SchemaVersion: Displayed in M: N format. M is the version of this Schema Change, and N is the corresponding hash value. With each Schema Change, the version is incremented.
-* TransactionId: the watershed transaction ID of the conversion history data.
-* State: The phase of the operation.
- * PENDING: The job is waiting in the queue to be scheduled.
- * WAITING_TXN: Wait for the import task before the watershed transaction ID to complete.
- * RUNNING: Historical data conversion.
- * FINISHED: The operation was successful.
- * CANCELLED: The job failed.
-* Msg: If the job fails, a failure message is displayed here.
-* Progress: operation progress. Progress is displayed only in the RUNNING state. Progress is displayed in M / N. Where N is the total number of copies involved in the Schema Change. M is the number of copies of historical data conversion completed.
-* Timeout: Job timeout time. Unit of second.
-
-## Cancel Job
-
-In the case that the job status is not FINISHED or CANCELLED, you can cancel the Schema Change job with the following command:
-`CANCEL ALTER TABLE COLUMN FROM tbl_name;`
-
-## Best Practice
-
-Schema Change can make multiple changes to multiple indexes in one job. For example:
-Source Schema:
-
-```
-+-----------+-------+------+------+------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-----------+-------+------+------+------+---------+-------+
-| tbl1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k3 | INT | No | true | N/A | |
-| | | | | | | |
-| rollup2 | k2 | INT | No | true | N/A | |
-| | | | | | | |
-| rollup1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-+-----------+-------+------+------+------+---------+-------+
-```
-
-You can add a row k4 to both rollup1 and rollup2 by adding the following k5 to rollup2:
-```
-ALTER TABLE tbl1
-ADD COLUMN k4 INT default "1" to rollup1,
-ADD COLUMN k4 INT default "1" to rollup2,
-ADD COLUMN k5 INT default "1" to rollup2;
-```
-
-When completion, the Schema becomes:
-
-```
-+-----------+-------+------+------+------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-----------+-------+------+------+------+---------+-------+
-| tbl1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k3 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-| | k5 | INT | No | true | 1 | |
-| | | | | | | |
-| rollup2 | k2 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-| | k5 | INT | No | true | 1 | |
-| | | | | | | |
-| rollup1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-+-----------+-------+------+------+------+---------+-------+
-```
-
-As you can see, the base table tbl1 also automatically added k4, k5 columns. That is, columns added to any rollup are automatically added to the Base table.
-
-At the same time, columns that already exist in the Base table are not allowed to be added to Rollup. If you need to do this, you can re-create a Rollup with the new columns and then delete the original Rollup.
-## Notice
-
-* Only one Schema Change job can be running on a table at a time.
-
-* Schema Change operation does not block import and query operations.
-
-* The partition column and bucket column cannot be modified.
-
-* If there is a value column aggregated by REPLACE in the schema, the Key column is not allowed to be deleted.
-
- If the Key column is deleted, Doris cannot determine the value of the REPLACE column.
-
- All non-Key columns of the Unique data model table are REPLACE aggregated.
-
-* When adding a value column whose aggregation type is SUM or REPLACE, the default value of this column has no meaning to historical data.
-
- Because the historical data has lost the detailed information, the default value cannot actually reflect the aggregated value.
-
-* When modifying the column type, fields other than Type need to be completed according to the information on the original column.
-
- If you modify the column `k1 INT SUM NULL DEFAULT" 1 "` as type BIGINT, you need to execute the following command:
-
-```ALTER TABLE tbl1 MODIFY COLUMN `k1` BIGINT SUM NULL DEFAULT "1";```
-
- Note that in addition to the new column types, such as the aggregation mode, Nullable attributes, and default values must be completed according to the original information.
-
-* Modifying column names, aggregation types, nullable attributes, default values, and column comments is not supported.
-
-## FAQ
-
-* the execution speed of Schema Change
-
- At present, the execution speed of Schema Change is estimated to be about 10MB / s according to the worst efficiency. To be conservative, users can set the timeout for jobs based on this rate.
-
-* Submit job error `Table xxx is not stable. ...`
-
- Schema Change can only be started when the table data is complete and unbalanced. If some data shard copies of the table are incomplete, or if some copies are undergoing an equalization operation, the submission is rejected.
-
- Whether the data shard copy is complete can be checked with the following command:
- ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";```
-
- If a result is returned, there is a problem with the copy. These problems are usually fixed automatically by the system. You can also use the following commands to repair this table first:
- ```ADMIN REPAIR TABLE tbl1;```
-
- You can check if there are running balancing tasks with the following command:
-
- ```SHOW PROC "/cluster_balance/pending_tablets";```
-
- You can wait for the balancing task to complete, or temporarily disable the balancing operation with the following command:
-
- ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");```
-
-## Configurations
-
-### FE Configurations
-
-* `alter_table_timeout_second`: The default timeout for the job is 86400 seconds.
-
-### BE Configurations
-
-* `alter_tablet_worker_count`: Number of threads used to perform historical data conversion on the BE side. The default is 3. If you want to speed up the Schema Change job, you can increase this parameter appropriately and restart the BE. But too many conversion threads can cause increased IO pressure and affect other operations. This thread is shared with the Rollup job.
diff --git a/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md b/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md
deleted file mode 100644
index 94f7440bf7..0000000000
--- a/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md
+++ /dev/null
@@ -1,288 +0,0 @@
----
-{
- "title": "Temporary partition",
- "language": "en"
-}
----
-
-
-
-# Temporary partition
-
-Since version 0.12, Doris supports temporary partitioning.
-
-A temporary partition belongs to a partitioned table. Only partitioned tables can create temporary partitions.
-
-## Rules
-
-* The partition columns of the temporary partition is the same as the formal partition and cannot be modified.
-* The partition ranges of all temporary partitions of a table cannot overlap, but the ranges of temporary partitions and formal partitions can overlap.
-* The partition name of the temporary partition cannot be the same as the formal partitions and other temporary partitions.
-
-## Supported operations
-
-The temporary partition supports add, delete, and replace operations.
-
-### Add temporary partition
-
-You can add temporary partitions to a table with the `ALTER TABLE ADD TEMPORARY PARTITION` statement:
-
-```
-ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN ("2020-02-01");
-
-ALTER TABLE tbl2 ADD TEMPORARY PARTITION tp1 VALUES [("2020-01-01"), ("2020-02-01"));
-
-ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN ("2020-02-01")
-("in_memory" = "true", "replication_num" = "1")
-DISTRIBUTED BY HASH (k1) BUCKETS 5;
-
-ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai");
-
-ALTER TABLE tbl4 ADD TEMPORARY PARTITION tp1 VALUES IN ((1, "Beijing"), (1, "Shanghai"));
-
-ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai")
-("in_memory" = "true", "replication_num" = "1")
-DISTRIBUTED BY HASH(k1) BUCKETS 5;
-
-```
-
-See `HELP ALTER TABLE;` for more help and examples.
-
-Some instructions for adding operations:
-
-* Adding a temporary partition is similar to adding a formal partition. The partition range of the temporary partition is independent of the formal partition.
-* Temporary partition can independently specify some attributes. Includes information such as the number of buckets, the number of replicas, whether it is a memory table, or the storage medium.
-
-### Delete temporary partition
-
-A table's temporary partition can be dropped with the `ALTER TABLE DROP TEMPORARY PARTITION` statement:
-
-```
-ALTER TABLE tbl1 DROP TEMPORARY PARTITION tp1;
-```
-
-See `HELP ALTER TABLE;` for more help and examples.
-
-Some instructions for the delete operation:
-
-* Deleting the temporary partition will not affect the data of the formal partition.
-
-### Replace partition
-
-You can replace formal partitions of a table with temporary partitions with the `ALTER TABLE REPLACE PARTITION` statement.
-
-```
-ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1);
-
-ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2, tp3);
-
-ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2)
-PROPERTIES (
- "strict_range" = "false",
- "use_temp_partition_name" = "true"
-);
-```
-
-See `HELP ALTER TABLE;` for more help and examples.
-
-The replace operation has two special optional parameters:
-
-1. `strict_range`
-
- The default is true.
-
- For Range partition, When this parameter is true, the range union of all formal partitions to be replaced needs to be the same as the range union of the temporary partitions to be replaced. When set to false, you only need to ensure that the range between the new formal partitions does not overlap after replacement.
-
- For List partition, this parameter is always true, and the enumeration values of all full partitions to be replaced must be identical to the enumeration values of the temporary partitions to be replaced.
-
- Here are some examples:
-
- * Example 1
-
- Range of partitions p1, p2, p3 to be replaced (=> union):
-
- ```
- (10, 20), [20, 30), [40, 50) => [10, 30), [40, 50)
- ```
-
- Replace the range of partitions tp1, tp2 (=> union):
-
- ```
- (10, 30), [40, 45), [45, 50) => [10, 30), [40, 50)
- ```
-
- The union of ranges is the same, so you can use tp1 and tp2 to replace p1, p2, p3.
-
- * Example 2
-
- Range of partition p1 to be replaced (=> union):
-
- ```
- (10, 50) => [10, 50)
- ```
-
- Replace the range of partitions tp1, tp2 (=> union):
-
- ```
- (10, 30), [40, 50) => [10, 30), [40, 50)
- ```
-
- The union of ranges is not the same. If `strict_range` is true, you cannot use tp1 and tp2 to replace p1. If false, and the two partition ranges `[10, 30), [40, 50)` and the other formal partitions do not overlap, they can be replaced.
-
- * Example 3
-
- Enumerated values of partitions p1, p2 to be replaced (=> union).
-
- ```
- (1, 2, 3), (4, 5, 6) => (1, 2, 3, 4, 5, 6)
- ```
-
- Replace the enumerated values of partitions tp1, tp2, tp3 (=> union).
-
- ```
- (1, 2, 3), (4), (5, 6) => (1, 2, 3, 4, 5, 6)
- ```
-
- The enumeration values are the same, you can use tp1, tp2, tp3 to replace p1, p2
-
- * Example 4
-
- Enumerated values of partitions p1, p2, p3 to be replaced (=> union).
-
- ```
- (("1", "beijing"), ("1", "shanghai")), (("2", "beijing"), ("2", "shanghai")), (("3", "beijing"), ("3", "shanghai")) => (("1", "beijing"), ("3", "shanghai")) "), ("1", "shanghai"), ("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai"))
- ```
-
- Replace the enumerated values of partitions tp1, tp2 (=> union).
-
- ```
- (("1", "beijing"), ("1", "shanghai")), (("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai")) => (("1", "beijing") , ("1", "shanghai"), ("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai"))
- ```
-
- The enumeration values are the same, you can use tp1, tp2 to replace p1, p2, p3
-
-2. `use_temp_partition_name`
-
- The default is false. When this parameter is false, and the number of partitions to be replaced is the same as the number of replacement partitions, the name of the formal partition after the replacement remains unchanged. If true, after replacement, the name of the formal partition is the name of the replacement partition. Here are some examples:
-
- * Example 1
-
- ```
- ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1);
- ```
-
- `use_temp_partition_name` is false by default. After replacement, the partition name is still p1, but the related data and attributes are replaced with tp1.
-
- If `use_temp_partition_name` is true by default, the name of the partition is tp1 after replacement. The p1 partition no longer exists.
-
- * Example 2
-
- ```
- ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1);
- ```
-
- `use_temp_partition_name` is false by default, but this parameter is invalid because the number of partitions to be replaced and the number of replacement partitions are different. After the replacement, the partition name is tp1, and p1 and p2 no longer exist.
-
-Some instructions for the replacement operation:
-
-* After the partition is replaced successfully, the replaced partition will be deleted and cannot be recovered.
-
-## Load and query of temporary partitions
-
-Users can load data into temporary partitions or specify temporary partitions for querying.
-
-1. Load temporary partition
-
- The syntax for specifying a temporary partition is slightly different depending on the load method. Here is a simple illustration through an example:
-
- ```
- INSERT INTO tbl TEMPORARY PARTITION (tp1, tp2, ...) SELECT ....
- ```
-
- ```
- curl --location-trusted -u root: -H "label: 123" -H "temporary_partition: tp1, tp2, ..." -T testData http: // host: port / api / testDb / testTbl / _stream_load
- ```
-
- ```
- LOAD LABEL example_db.label1
- (
- DATA INFILE ("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- TEMPORARY PARTITION (tp1, tp2, ...)
- ...
- )
- WITH BROKER hdfs ("username" = "hdfs_user", "password" = "hdfs_password");
- ```
-
- ```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS (k1, k2, k3, v1, v2, v3 = k1 * 100),
- TEMPORARY PARTITIONS (tp1, tp2, ...),
- WHERE k1> 100
- PROPERTIES
- (...)
- FROM KAFKA
- (...);
- ```
-
-2. Query the temporary partition
-
- ```
- SELECT ... FROM
- tbl1 TEMPORARY PARTITION (tp1, tp2, ...)
- JOIN
- tbl2 TEMPORARY PARTITION (tp1, tp2, ...)
- ON ...
- WHERE ...;
- ```
-
-## Relationship to other operations
-
-### DROP
-
-* After using the `DROP` operation to directly drop the database or table, you can recover the database or table (within a limited time) through the `RECOVER` command, but the temporary partition will not be recovered.
-* After the formal partition is dropped using the `ALTER` command, the partition can be recovered by the `RECOVER` command (within a limited time). Operating a formal partition is not related to a temporary partition.
-* After the temporary partition is dropped using the `ALTER` command, the temporary partition cannot be recovered through the `RECOVER` command.
-
-### TRUNCATE
-
-* Use the `TRUNCATE` command to empty the table. The temporary partition of the table will be deleted and cannot be recovered.
-* When using `TRUNCATE` command to empty the formal partition, it will not affect the temporary partition.
-* You cannot use the `TRUNCATE` command to empty the temporary partition.
-
-### ALTER
-
-* When the table has a temporary partition, you cannot use the `ALTER` command to perform Schema Change, Rollup, etc. on the table.
-* You cannot add temporary partitions to a table while the table is undergoing a alter operation.
-
-
-## Best Practices
-
-1. Atomic overwrite
-
- In some cases, the user wants to be able to rewrite the data of a certain partition, but if it is dropped first and then loaded, there will be a period of time when the data cannot be seen. At this moment, the user can first create a corresponding temporary partition, load new data into the temporary partition, and then replace the original partition atomically through the `REPLACE` operation to achieve the purpose. For atomic overwrite operations of non-partitioned tables, please refer to [Replace Table Document](./alter-table-replace-table.md)
-
-2. Modify the number of buckets
-
- In some cases, the user used an inappropriate number of buckets when creating a partition. The user can first create a temporary partition corresponding to the partition range and specify a new number of buckets. Then use the `INSERT INTO` command to load the data of the formal partition into the temporary partition. Through the replacement operation, the original partition is replaced atomically to achieve the purpose.
-
-3. Merge or split partitions
-
- In some cases, users want to modify the range of partitions, such as merging two partitions, or splitting a large partition into multiple smaller partitions. Then the user can first create temporary partitions corresponding to the merged or divided range, and then load the data of the formal partition into the temporary partition through the `INSERT INTO` command. Through the replacement operation, the original partition is replaced atomically to achieve the purpose.
\ No newline at end of file
diff --git a/docs/en/administrator-guide/backup-restore.md b/docs/en/administrator-guide/backup-restore.md
deleted file mode 100644
index 4e9ce83513..0000000000
--- a/docs/en/administrator-guide/backup-restore.md
+++ /dev/null
@@ -1,195 +0,0 @@
----
-{
- "title": "Backup and Recovery",
- "language": "en"
-}
----
-
-
-
-# Backup and Recovery
-
-Doris supports the backup of current data in the form of files to remote storage systems via broker. The data can then be restored from the remote storage system to any Doris cluster by the restore command. With this feature, Doris can support regular snapshot backups of data. It can also be used to migrate data between different clusters.
-
-This feature requires Doris version 0.8.2+
-
-Using this function, brokers corresponding to remote storage need to be deployed. Such as BOS, HDFS, etc. You can view the currently deployed broker through `SHOW BROKER;`
-
-## Brief Principle Description
-
-### Backup
-
-The backup operation is to upload the data of the specified table or partition directly to the remote warehouse in the form of files stored by Doris for storage. When a user submits a Backup request, the following actions will be done within the system:
-
-1. Snapshot and snapshot upload
-
- The snapshot phase takes a snapshot of the specified table or partition data file. Later, backups are all snapshots. After the snapshot, changes to tables, imports, and other operations no longer affect the results of the backup. Snapshots only produce a hard link to the current data file, which takes very little time. Once the snapshots are completed, they are uploaded one by one. Snapshot upload is done concurrently by each Backend.
-
-2. Metadata preparation and upload
-
- After the data file snapshot is uploaded, Frontend first writes the corresponding metadata to the local file, and then uploads the local metadata file to the remote warehouse through broker. Finish the final backup job.
-
-3. Dynamic partition table description
-
- If the table is a dynamic partition table, the dynamic partition attribute will be automatically disabled after backup. When restoring, you need to manually enable the dynamic partition attribute of the table. The command is as follows:
-
- ```sql
- ALTER TABLE tbl1 SET ("dynamic_partition.enable"="true")
- ````
-
-### Restore
-
-Recovery operations need to specify a backup that already exists in a remote repository, and then restore the backup content to the local cluster. When a user submits a Restore request, the following actions will be done within the system:
-
-1. Create corresponding metadata locally
-
- This step starts by creating structures such as restoring the corresponding table partitions in the local cluster. When created, the table is visible, but not accessible.
-
-2. Local snapshot
-
- This step is to take a snapshot of the table created in the previous step. This is actually an empty snapshot (because the tables just created have no data), and its main purpose is to generate the corresponding snapshot directory on the Backend for receiving the snapshot files downloaded from the remote repository later.
-
-3. Download snapshots
-
- The snapshot files in the remote warehouse are downloaded to the corresponding snapshot directory generated in the previous step. This step is done concurrently by each backend.
-
-4. Effective snapshot
-
- When the snapshot download is complete, we map each snapshot to the metadata of the current local table. These snapshots are then reloaded to take effect and complete the final recovery operation.
-
-## Best Practices
-
-### Backup
-
-We currently support full backup at the minimum partition granularity (incremental backup may be supported in future versions). If data need to be backed up regularly, first of all, it is necessary to plan the partition and bucket allocation of tables reasonably, such as partitioning according to time. Then in the subsequent run process, periodic data backup is performed according to partition granularity.
-
-### Data migration
-
-Users can first backup the data to the remote warehouse, and then restore the data to another cluster through the remote warehouse to complete data migration. Because data backup is done in the form of snapshots, new imported data after the snapshot phase of the backup job will not be backed up. Therefore, after the snapshot is completed, the data imported on the original cluster needs to be imported on the new cluster as well until the recovery job is completed.
-
-It is suggested that the new and old clusters be imported in parallel for a period of time after the migration is completed. After completing data and business correctness checks, the business is migrated to the new cluster.
-
-## Highlights
-
-1. Backup and recovery-related operations are currently only allowed to be performed by users with ADMIN privileges.
-2. Within a database, only one backup or recovery job is allowed to be performed.
-3. Both backup and recovery support the operation at the minimum partition level. When the table has a large amount of data, it is recommended to perform partition-by-partition to reduce the cost of failed retries.
-4. Because backup and recovery operations, the operation is the actual data files. So when there are too many fragments of a table or too many small versions of a fragment, it may take a long time to backup or restore even if the total amount of data is very small. Users can estimate job execution time by `SHOW PARTITIONS FROM table_name;`, and `SHOW TABLET FROM table_name;`, viewing the number of partitions and the number of file versions of each partition. The number of files has a great impact on the execution time of the job, so it is suggested that the partition buckets should be planned reasonably in order to avoid excessive partitioning.
-5. When viewing the job status through `SHOW BACKUP` or `SHOW RESTORE`. It is possible to see an error message in the `TaskErrMsg` column. But as long as the `State` column does not
-`CANCELLED`, that means the job is still going on. These Tasks may succeed in retrying. Of course, some Task errors can also directly lead to job failure.
-6. If the recovery operation is a coverage operation (specifying the recovery data to an existing table or partition), then starting from the `COMMIT` phase of the recovery operation, the data covered on the current cluster may not be restored. At this time, if the recovery operation fails or is cancelled, it may cause the previous data to be damaged and inaccessible. In this case, the recovery operation can only be performed again and wait for the job to complete. Therefore, we recommend that if it is not necessary, try not to use coverage to recover data unless it is confirmed that the current data is no longer in use.
-
-## Relevant orders
-
-The commands related to the backup recovery function are as follows. The following commands, you can use `help cmd;'to view detailed help after connecting Doris through mysql-client.
-
-1. CREATE REPOSITORY
-
- Create a remote warehouse Path for backup or recovery. This command needs to access the remote storage through the Broker. Different brokers need to provide different parameters. For details, please refer to [Broker Document] (broker.md), or directly back up to the remote storage supporting AWS S3 protocol through S3 protocol. For details, please refer to [CREATE REPOSITORY DOCUMENT] (../sql-reference/sql-statements/Data%20Definition/CREATE%20REPOSITORY.md)
-
-
-1. BACKUP
-
- Perform a backup operation.
-
-3. SHOW BACKUP
-
- View the execution of the last backup job, including:
-
- * JobId: ID of this backup job.
- * SnapshotName: User-specified name of this backup job (Label).
- * DbName: The database corresponding to the backup job.
- * State: The current stage of the backup job:
- * PENDING: The initial state of the job.
- * SNAPSHOTING: Snapshot operation is in progress.
- * UPLOAD_SNAPSHOT: The snapshot is over and ready to upload.
- * UPLOADING: Uploading snapshots.
- * SAVE_META: Metadata files are being generated locally.
- * UPLOAD_INFO: Upload metadata files and information for this backup job.
- * FINISHED: The backup is complete.
- * CANCELLED: Backup failed or cancelled.
- * Backup Objs: List of tables and partitions involved in this backup.
- * CreateTime: Job creation time.
- * Snapshot Finished Time: Snapshot completion time.
- * Upload Finished Time: Snapshot upload completion time.
- * FinishedTime: The completion time of this assignment.
- * Unfinished Tasks: In the `SNAPSHOTTING`, `UPLOADING` and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks.
- * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here.
- * Status: It is used to record some status information that may appear during the whole operation.
- * Timeout: The timeout time of a job in seconds.
-
-4. SHOW SNAPSHOT
-
- View the backup that already exists in the remote warehouse.
-
- * Snapshot: The name of the backup specified at the time of backup (Label).
- * Timestamp: Backup timestamp.
- * Status: Is the backup normal?
-
- If the where clause is specified after `SHOW SNAPSHOT', more detailed backup information can be displayed.
-
- * Database: The database corresponding to backup.
- * Details: Shows the complete data directory structure of the backup.
-
-5. RESTORE
-
- Perform a recovery operation.
-
-6. SHOW RESTORE
-
- View the execution of the last restore job, including:
-
- * JobId: ID of this resumption job.
- * Label: The name of the backup in the user-specified warehouse (Label).
- * Timestamp: The timestamp for backup in a user-specified warehouse.
- * DbName: Restore the database corresponding to the job.
- * State: The current stage of the recovery operation:
- * PENDING: The initial state of the job.
- * SNAPSHOTING: A snapshot of a new local table is in progress.
- * DOWNLOAD: The download snapshot task is being sent.
- * DOWNLOADING: Snapshot is downloading.
- * COMMIT: Prepare to take effect the downloaded snapshot.
- * COMMITTING: The downloaded snapshot is in effect.
- * FINISHED: Recovery is complete.
- * CANCELLED: Recovery failed or cancelled.
- * AllowLoad: Is import allowed during recovery?
- * ReplicationNum: Restores the specified number of copies.
- * Restore Objs: List of tables and partitions involved in this recovery.
- * CreateTime: Job creation time.
- * MetaPreparedTime: Completion time of local metadata generation.
- * Snapshot Finished Time: Local snapshot completion time.
- * Download Finished Time: The download completion time of the remote snapshot.
- * FinishedTime: The completion time of this assignment.
- * Unfinished Tasks: In the `SNAPSHOTTING`, `DOWNLOADING`, `COMMITTING`, and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks.
- * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here.
- * Status: It is used to record some status information that may appear during the whole operation.
- * Timeout: The timeout time of a job in seconds.
-
-7. CANCEL BACKUP
-
- Cancel the backup job currently being performed.
-
-8. CANCEL RESTORE
-
- Cancel the recovery job currently being performed.
-
-9. DROP REPOSITORY
-
- Delete the created remote warehouse. Delete the warehouse, just delete the mapping of the warehouse in Doris, will not delete the actual warehouse data.
diff --git a/docs/en/administrator-guide/block-rule/sql-block.md b/docs/en/administrator-guide/block-rule/sql-block.md
deleted file mode 100644
index 0b167ae17c..0000000000
--- a/docs/en/administrator-guide/block-rule/sql-block.md
+++ /dev/null
@@ -1,93 +0,0 @@
----
-{
-"title": "SQL Block Rule",
-"language": "en"
-}
----
-
-
-
-# SQL Block Rule
-
-This function is only used to limit the query statement, and does not limit the execution of the explain statement.
-Support SQL block rule by user level:
-
-1. by regex way to deny specify SQL
-
-2. by setting partition_num, tablet_num, cardinality, check whether a sql reaches one of the limitations
- - partition_num, tablet_num, cardinality could be set together, and once reach one of them, the sql will be blocked.
-
-## Rule
-
-SQL block rule CRUD
-- create SQL block rule
- - sql: Regex pattern,Special characters need to be translated, "NULL" by default
- - sqlHash: Sql hash value, Used to match exactly, We print it in fe.audit.log, This parameter is the only choice between sql and sql, "NULL" by default
- - partition_num: Max number of partitions will be scanned by a scan node, 0L by default
- - tablet_num: Max number of tablets will be scanned by a scan node, 0L by default
- - cardinality: An inaccurate number of scan rows of a scan node, 0L by default
- - global: Whether global(all users)is in effect, false by default
- - enable: Whether to enable block rule,true by default
-```sql
-CREATE SQL_BLOCK_RULE test_rule
-PROPERTIES(
- "sql"="select * from order_analysis",
- "global"="false",
- "enable"="true",
- "sqlHash"=""
-)
-```
-When we execute the sql that we defined in the rule just now, an exception error will be returned. An example is as follows:
-```sql
-mysql> select * from order_analysis;
-ERROR 1064 (HY000): errCode = 2, detailMessage = sql match regex sql block rule: order_analysis_rule
-```
-
-- create test_rule2, limits the maximum number of scanning partitions to 30 and the maximum scanning cardinality to 10 billion rows. As shown in the following example:
-```sql
-CREATE SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "30", "cardinality"="10000000000","global"="false","enable"="true")
-```
-
-- show configured SQL block rules, or show all rules if you do not specify a rule name
-
-```sql
-SHOW SQL_BLOCK_RULE [FOR RULE_NAME]
-```
-- alter SQL block rule,Allows changes sql/sqlHash/global/enable/partition_num/tablet_num/cardinality anyone
- - sql and sqlHash cannot be set both. It means if sql or sqlHash is set in a rule, another property will never be allowed to be altered
- - sql/sqlHash and partition_num/tablet_num/cardinality cannot be set together. For example, partition_num is set in a rule, then sql or sqlHash will never be allowed to be altered.
-```sql
-ALTER SQL_BLOCK_RULE test_rule PROPERTIES("sql"="select \\* from test_table","enable"="true")
-```
-
-```
-ALTER SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "10","tablet_num"="300","enable"="true")
-```
-
-- drop SQL block rule,Support multiple rules, separated by `,`
-```sql
-DROP SQL_BLOCK_RULE test_rule1,test_rule2
-```
-
-## User bind rules
-If global=false is configured, the rules binding for the specified user needs to be configured, with multiple rules separated by ', '
-```sql
-SET PROPERTY [FOR 'jack'] 'sql_block_rules' = 'test_rule1,test_rule2'
-```
diff --git a/docs/en/administrator-guide/broker.md b/docs/en/administrator-guide/broker.md
deleted file mode 100644
index 0bac53bdc6..0000000000
--- a/docs/en/administrator-guide/broker.md
+++ /dev/null
@@ -1,289 +0,0 @@
----
-{
- "title": "Broker",
- "language": "en"
-}
----
-
-
-
-# Broker
-
-Broker is an optional process in the Doris cluster. It is mainly used to support Doris to read and write files or directories on remote storage, such as HDFS, BOS, and AFS.
-
-Broker provides services through an RPC service port. It is a stateless JVM process that is responsible for encapsulating some POSIX-like file operations for read and write operations on remote storage, such as open, pred, pwrite, and so on.
-In addition, the Broker does not record any other information, so the connection information, file information, permission information, and so on stored remotely need to be passed to the Broker process in the RPC call through parameters in order for the Broker to read and write files correctly .
-
-Broker only acts as a data channel and does not participate in any calculations, so it takes up less memory. Usually one or more Broker processes are deployed in a Doris system. And the same type of Broker will form a group and set a ** Broker name **.
-
-Broker's position in the Doris system architecture is as follows:
-
-```
-+----+ +----+
-| FE | | BE |
-+-^--+ +--^-+
- | |
- | |
-+-v---------v-+
-| Broker |
-+------^------+
- |
- |
-+------v------+
-|HDFS/BOS/AFS |
-+-------------+
-```
-
-This document mainly introduces the parameters that Broker needs when accessing different remote storages, such as connection information,
-authorization information, and so on.
-
-## Supported Storage System
-
-Different types of brokers support different storage systems.
-
-1. Community HDFS
-
- * Support simple authentication access
- * Support kerberos authentication access
- * Support HDFS HA mode access
-
-2. Baidu HDFS / AFS (not supported by open source version)
-
- * Support UGI simple authentication access
-
-3. Baidu Object Storage BOS (not supported by open source version)
-
- * Support AK / SK authentication access
-
-## Function provided by Broker
-
-1. Broker Load
-
- The Broker Load function reads the file data on the remote storage through the Broker process and imports it into Doris. Examples are as follows:
-
- ```
- LOAD LABEL example_db.label6
- (
- DATA INFILE("bos://my_bucket/input/file")
- INTO TABLE `my_table`
- )
- WITH BROKER "broker_name"
- (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy"
- )
- ```
-
- `WITH BROKER` and following Property Map are used to provide Broker's related information.
-
-2. Export
-
- The Export function export the data stored in Doris to a file stored in remote storage in text format through Broker process. Examples are as follows:
-
- ```
- EXPORT TABLE testTbl
- TO "hdfs://hdfs_host:port/a/b/c"
- WITH BROKER "broker_name"
- (
- "username" = "xxx",
- "password" = "yyy"
- );
- ```
-
- `WITH BROKER` and following Property Map are used to provide Broker's related information.
-
-3. Create Repository
-
- When users need to use the backup and restore function, they need to first create a "repository" with the `CREATE REPOSITORY` command,and the broker metadata and related information are recorded in the warehouse metadata.
- Subsequent backup and restore operations will use Broker to back up data to this warehouse, or read data from this warehouse to restore to Doris. Examples are as follows:
-
- ```
- CREATE REPOSITORY `bos_repo`
- WITH BROKER `broker_name`
- ON LOCATION "bos://doris_backup"
- PROPERTIES
- (
- "bos_endpoint" = "http://gz.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy"
- );
- ```
-
- `WITH BROKER` and following Property Map are used to provide Broker's related information.
-
-
-## Broker Information
-
-Broker information includes two parts: ** Broker name ** and ** Certification information **. The general syntax is as follows:
-
-```
-WITH BROKER "broker_name"
-(
- "username" = "xxx",
- "password" = "yyy",
- "other_prop" = "prop_value",
- ...
-);
-```
-
-### Broker Name
-
-Usually the user needs to specify an existing Broker Name through the `WITH BROKER" broker_name "` clause in the operation command.
-Broker Name is a name that the user specifies when adding a Broker process through the ALTER SYSTEM ADD BROKER command.
-A name usually corresponds to one or more broker processes. Doris selects available broker processes based on the name.
-You can use the `SHOW BROKER` command to view the Brokers that currently exist in the cluster.
-
-**Note: Broker Name is just a user-defined name and does not represent the type of Broker.**
-
-### Certification Information
-
-Different broker types and different access methods need to provide different authentication information.
-Authentication information is usually provided as a Key-Value in the Property Map after `WITH BROKER" broker_name "`.
-
-#### Community HDFS
-
-1. Simple Authentication
-
- Simple authentication means that Hadoop configures `hadoop.security.authentication` to` simple`.
-
- Use system users to access HDFS. Or add in the environment variable started by Broker: ```HADOOP_USER_NAME```.
-
- ```
- (
- "username" = "user",
- "password" = ""
- );
- ```
-
- Just leave the password blank.
-
-2. Kerberos Authentication
-
- The authentication method needs to provide the following information::
-
- * `hadoop.security.authentication`: Specify the authentication method as kerberos.
- * `kerberos_principal`: Specify the principal of kerberos.
- * `kerberos_keytab`: Specify the path to the keytab file for kerberos. The file must be an absolute path to a file on the server where the broker process is located. And can be accessed by the Broker process.
- * `kerberos_keytab_content`: Specify the content of the keytab file in kerberos after base64 encoding. You can choose one of these with `kerberos_keytab` configuration.
-
- Examples are as follows:
-
- ```
- (
- "hadoop.security.authentication" = "kerberos",
- "kerberos_principal" = "doris@YOUR.COM",
- "kerberos_keytab" = "/home/doris/my.keytab"
- )
- ```
- ```
- (
- "hadoop.security.authentication" = "kerberos",
- "kerberos_principal" = "doris@YOUR.COM",
- "kerberos_keytab_content" = "ASDOWHDLAWIDJHWLDKSALDJSDIWALD"
- )
- ```
- If Kerberos authentication is used, the [krb5.conf](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html) file is required when deploying the Broker process.
- The krb5.conf file contains Kerberos configuration information, Normally, you should install your krb5.conf file in the directory /etc. You can override the default location by setting the environment variable KRB5_CONFIG.
- An example of the contents of the krb5.conf file is as follows:
- ```
- [libdefaults]
- default_realm = DORIS.HADOOP
- default_tkt_enctypes = des3-hmac-sha1 des-cbc-crc
- default_tgs_enctypes = des3-hmac-sha1 des-cbc-crc
- dns_lookup_kdc = true
- dns_lookup_realm = false
-
- [realms]
- DORIS.HADOOP = {
- kdc = kerberos-doris.hadoop.service:7005
- }
- ```
-
-3. HDFS HA Mode
-
- This configuration is used to access HDFS clusters deployed in HA mode.
-
- * `dfs.nameservices`: Specify the name of the hdfs service, custom, such as "dfs.nameservices" = "my_ha".
- * `dfs.ha.namenodes.xxx`: Custom namenode names. Multiple names are separated by commas, where xxx is the custom name in `dfs.nameservices`, such as" dfs.ha.namenodes.my_ha "=" my_nn ".
- * `dfs.namenode.rpc-address.xxx.nn`: Specify the rpc address information of namenode, Where nn represents the name of the namenode configured in `dfs.ha.namenodes.xxx`, such as: "dfs.namenode.rpc-address.my_ha.my_nn" = "host:port".
- * `dfs.client.failover.proxy.provider`: Specify the provider for the client to connect to the namenode. The default is: org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider.
-
- Examples are as follows:
-
- ```
- (
- "dfs.nameservices" = "my_ha",
- "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
- ```
-
- The HA mode can be combined with the previous two authentication methods for cluster access. If you access HA HDFS with simple authentication:
-
- ```
- (
- "username"="user",
- "password"="passwd",
- "dfs.nameservices" = "my_ha",
- "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
- ```
- The configuration for accessing the HDFS cluster can be written to the hdfs-site.xml file. When users use the Broker process to read data from the HDFS cluster, they only need to fill in the cluster file path and authentication information.
-
-#### Baidu Object Storage BOS
-
-**(Open source version is not supported)**
-
-1. Access via AK / SK
-
- * AK/SK: Access Key and Secret Key. You can check the user's AK / SK in Baidu Cloud Security Certification Center.
- * Region Endpoint: Endpoint of the BOS region.
- * For the regions supported by BOS and corresponding Endpoints, please see [Get access domain name](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%8E%B7%E5%8F%96%E8%AE %BF%E9%97%AE%E5%9F%9F%E5%90%8D)
-
- Examples are as follows:
-
- ```
- (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy"
- )
- ```
-
-#### Baidu HDFS/AFS
-
-**(Open source version is not supported)**
-
-Baidu AFS and HDFS only support simple authentication access using UGI. Examples are as follows:
-
-```
-(
- "username" = "user",
- "password" = "passwd"
-);
-```
-
-User and passwd are UGI configurations for Hadoop.
diff --git a/docs/en/administrator-guide/bucket-shuffle-join.md b/docs/en/administrator-guide/bucket-shuffle-join.md
deleted file mode 100644
index a2edaef4fc..0000000000
--- a/docs/en/administrator-guide/bucket-shuffle-join.md
+++ /dev/null
@@ -1,105 +0,0 @@
----
-{
- "title": "Bucket Shuffle Join",
- "language": "en"
-}
----
-
-
-
-# Bucket Shuffle Join
-
-Bucket Shuffle Join is a new function officially added in Doris 0.14. The purpose is to provide local optimization for some join queries to reduce the time-consuming of data transmission between nodes and speed up the query.
-
-It's design, implementation can be referred to [ISSUE 4394](https://github.com/apache/incubator-doris/issues/4394).
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-* Left table: the left table in join query. Perform probe expr. The order can be adjusted by join reorder.
-* Right table: the right table in join query. Perform build expr The order can be adjusted by join reorder.
-
-## Principle
-The conventional distributed join methods supported by Doris is: `Shuffle Join, Broadcast Join`. Both of these join will lead to some network overhead.
-
-For example, there are join queries for table A and table B. the join method is hashjoin. The cost of different join types is as follows:
-* **Broadcast Join**: If table a has three executing hashjoinnodes according to the data distribution, table B needs to be sent to the three HashJoinNode. Its network overhead is `3B `, and its memory overhead is `3B`.
-* **Shuffle Join**: Shuffle join will distribute the data of tables A and B to the nodes of the cluster according to hash calculation, so its network overhead is `A + B` and memory overhead is `B`.
-
-The data distribution information of each Doris table is saved in FE. If the join statement hits the data distribution column of the left table, we should use the data distribution information to reduce the network and memory overhead of the join query. This is the source of the idea of bucket shuffle join.
-
-
-
-The picture above shows how the Bucket Shuffle Join works. The SQL query is A table join B table. The equivalent expression of join hits the data distribution column of A. According to the data distribution information of table A. Bucket Shuffle Join sends the data of table B to the corresponding data storage and calculation node of table A. The cost of Bucket Shuffle Join is as follows:
-
-* network cost: ``` B < min(3B, A + B) ```
-
-* memory cost: ``` B <= min(3B, B) ```
-
-Therefore, compared with Broadcast Join and Shuffle Join, Bucket shuffle join has obvious performance advantages. It reduces the time-consuming of data transmission between nodes and the memory cost of join. Compared with Doris's original join method, it has the following advantages
-
-* First of all, Bucket Shuffle Join reduces the network and memory cost which makes some join queries have better performance. Especially when FE can perform partition clipping and bucket clipping of the left table.
-* Secondly, unlike Colorate Join, it is not intrusive to the data distribution of tables, which is transparent to users. There is no mandatory requirement for the data distribution of the table, which is not easy to lead to the problem of data skew.
-* Finally, it can provide more optimization space for join reorder.
-
-## Usage
-
-### Set session variable
-
-Set session variable `enable_bucket_shuffle_join` to `true`, FE will automatically plan queries that can be converted to Bucket Shuffle Join.
-
-```
-set enable_bucket_shuffle_join = true;
-```
-
-In FE's distributed query planning, the priority order is Colorate Join -> Bucket Shuffle Join -> Broadcast Join -> Shuffle Join. However, if the user explicitly hints the type of join, for example:
-
-```
-select * from test join [shuffle] baseall on test.k1 = baseall.k1;
-```
-the above order of preference will not take effect.
-
-The session variable is set to `true` by default in version 0.14, while it needs to be set to `true` manually in version 0.13.
-
-### View the type of join
-
-You can use the `explain` command to check whether the join is a Bucket Shuffle Join
-
-```
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BUCKET_SHUFFLE) |
-| | hash predicates: |
-| | colocate: false, reason: table not in the same group |
-| | equal join conjunct: `test`.`k1` = `baseall`.`k1`
-```
-
-The join type indicates that the join method to be used is:`BUCKET_SHUFFLE`.
-
-## Planning rules of Bucket Shuffle Join
-
-In most scenarios, users only need to turn on the session variable by default to transparently use the performance improvement brought by this join method. However, if we understand the planning rules of Bucket Shuffle Join, we can use it to write more efficient SQL.
-
-* Bucket Shuffle Join only works when the join condition is equivalent. The reason is similar to Colorate Join. They all rely on hash to calculate the determined data distribution.
-* The bucket column of two tables is included in the equivalent join condition. When the bucket column of the left table is an equivalent join condition, it has a high probability of being planned as a Bucket Shuffle Join.
-* Because the hash values of different data types have different calculation results. Bucket Shuffle Join requires that the bucket column type of the left table and the equivalent join column type of the right table should be consistent, otherwise the corresponding planning cannot be carried out.
-* Bucket Shuffle Join only works on Doris native OLAP tables. For ODBC, MySQL, ES External Table, when they are used as left tables, they cannot be planned as Bucket Shuffle Join.
-* For partitioned tables, because the data distribution rules of each partition may be different, the Bucket Shuffle Join can only guarantee that the left table is a single partition. Therefore, in SQL execution, we need to use the `where` condition as far as possible to make the partition clipping policy effective.
-* If the left table is a colorate table, the data distribution rules of each partition are determined. So the bucket shuffle join can perform better on the colorate table.
diff --git a/docs/en/administrator-guide/colocation-join.md b/docs/en/administrator-guide/colocation-join.md
deleted file mode 100644
index 390e2f27e6..0000000000
--- a/docs/en/administrator-guide/colocation-join.md
+++ /dev/null
@@ -1,409 +0,0 @@
----
-{
- "title": "Colocation Join",
- "language": "en"
-}
----
-
-
-
-# Colocation Join
-
-Colocation Join is a new feature introduced in Doris 0.9. The purpose of this paper is to provide local optimization for some Join queries to reduce data transmission time between nodes and speed up queries.
-
-The original design, implementation and effect can be referred to [ISSUE 245](https://github.com/apache/incubator-doris/issues/245).
-
-The Colocation Join function has undergone a revision, and its design and use are slightly different from the original design. This document mainly introduces Colocation Join's principle, implementation, usage and precautions.
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-* Colocation Group (CG): A CG contains one or more tables. Tables within the same group have the same Colocation Group Schema and the same data fragmentation distribution.
-* Colocation Group Schema (CGS): Used to describe table in a CG and general Schema information related to Colocation. Including bucket column type, bucket number and copy number.
-
-## Principle
-
-The Colocation Join function is to make a CG of a set of tables with the same CGS. Ensure that the corresponding data fragments of these tables will fall on the same BE node. When tables in CG perform Join operations on bucket columns, local data Join can be directly performed to reduce data transmission time between nodes.
-
-The data of a table will eventually fall into a barrel according to the barrel column value Hash and the number of barrels modeled. Assuming that the number of buckets in a table is 8, there are eight buckets `[0, 1, 2, 3, 4, 5, 6, 7] `Buckets'. We call such a sequence a `Buckets Sequence`. Each Bucket has one or more Tablets. When a table is a single partitioned table, there is only one Tablet in a Bucket. If it is a multi-partition table, there will be more than one.
-
-In order for a table to have the same data distribution, the table in the same CG must ensure the following attributes are the same:
-
-1. Barrel row and number of barrels
-
- Bucket column, that is, the column specified in `DISTRIBUTED BY HASH (col1, col2,...)'in the table building statement. Bucket columns determine which column values are used to Hash data from a table into different Tablets. Tables in the same CG must ensure that the type and number of barrel columns are identical, and the number of barrels is identical, so that the data fragmentation of multiple tables can be controlled one by one.
-
-2. Number of copies
-
- The number of copies of all partitions of all tables in the same CG must be the same. If inconsistent, there may be a copy of a Tablet, and there is no corresponding copy of other table fragments on the same BE.
-
-Tables in the same CG do not require consistency in the number, scope, and type of partition columns.
-
-After fixing the number of bucket columns and buckets, the tables in the same CG will have the same Buckets Sequence. The number of replicas determines the number of replicas of Tablets in each bucket, which BE they are stored on. Suppose that Buckets Sequence is `[0, 1, 2, 3, 4, 5, 6, 7] `, and that BE nodes have `[A, B, C, D] `4. A possible distribution of data is as follows:
-
-```
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-| 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 |
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-| A | | B | | C | | D | | A | | B | | C | | D |
-| | | | | | | | | | | | | | | |
-| B | | C | | D | | A | | B | | C | | D | | A |
-| | | | | | | | | | | | | | | |
-| C | | D | | A | | B | | C | | D | | A | | B |
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-```
-
-The data of all tables in CG will be uniformly distributed according to the above rules, which ensures that the data with the same barrel column value are on the same BE node, and local data Join can be carried out.
-
-## Usage
-
-### Establishment of tables
-
-When creating a table, you can specify the attribute `"colocate_with"="group_name"` in `PROPERTIES`, which means that the table is a Colocation Join table and belongs to a specified Colocation Group.
-
-Examples:
-
-```
-CREATE TABLE tbl (k1 int, v1 int sum)
-DISTRIBUTED BY HASH(k1)
-BUCKETS 8
-PROPERTIES(
- "colocate_with" = "group1"
-);
-```
-
-If the specified group does not exist, Doris automatically creates a group that contains only the current table. If the Group already exists, Doris checks whether the current table satisfies the Colocation Group Schema. If satisfied, the table is created and added to the Group. At the same time, tables create fragments and replicas based on existing data distribution rules in Groups.
-Group belongs to a database, and its name is unique in a database. Internal storage is the full name of Group `dbId_groupName`, but users only perceive groupName.
-
-### Delete table
-
-When the last table in Group is deleted completely (deleting completely means deleting from the recycle bin). Usually, when a table is deleted by the `DROP TABLE` command, it will be deleted after the default one-day stay in the recycle bin, and the group will be deleted automatically.
-
-### View Group
-
-The following command allows you to view the existing Group information in the cluster.
-
-```
-SHOW PROC '/colocation_group';
-
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-| GroupId | GroupName | TableIds | BucketsNum | ReplicationNum | DistCols | IsStable |
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-| 10005.10008 | 10005_group1 | 10007, 10040 | 10 | 3 | int(11) | true |
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-```
-
-* GroupId: The unique identity of a group's entire cluster, with DB ID in the first half and group ID in the second half.
-* GroupName: The full name of Group.
-* Tablet Ids: The group contains a list of Tables'ID.
-* Buckets Num: Number of barrels.
-* Replication Num: Number of copies.
-* DistCols: Distribution columns,
-* IsStable: Is the group stable (for the definition of stability, see section `Collocation replica balancing and repair').
-
-You can further view the data distribution of a group by following commands:
-
-```
-SHOW PROC '/colocation_group/10005.10008';
-
-+-------------+---------------------+
-| BucketIndex | BackendIds |
-+-------------+---------------------+
-| 0 | 10004, 10002, 10001 |
-| 1 | 10003, 10002, 10004 |
-| 2 | 10002, 10004, 10001 |
-| 3 | 10003, 10002, 10004 |
-| 4 | 10002, 10004, 10003 |
-| 5 | 10003, 10002, 10001 |
-| 6 | 10003, 10004, 10001 |
-| 7 | 10003, 10004, 10002 |
-+-------------+---------------------+
-```
-
-* BucketIndex: Subscript to the bucket sequence.
-* Backend Ids: A list of BE node IDs where data fragments are located in buckets.
-
-> The above commands require ADMIN privileges. Normal user view is not supported at this time.
-
-### Modify Colocate Group
-
-You can modify the Colocation Group property of a table that has been created. Examples:
-
-`ALTER TABLE tbl SET ("colocate_with" = "group2");`
-
-* If the table has not previously specified a Group, the command checks the Schema and adds the table to the Group (if the Group does not exist, it will be created).
-* If other groups are specified before the table, the command first removes the table from the original group and adds a new group (if the group does not exist, it will be created).
-
-You can also delete the Colocation attribute of a table by following commands:
-
-`ALTER TABLE tbl SET ("colocate_with" = "");`
-
-### Other related operations
-
-When an ADD PARTITION is added to a table with a Colocation attribute and the number of copies is modified, Doris checks whether the modification violates the Colocation Group Schema and rejects it if it does.
-
-## Colocation Duplicate Balancing and Repair
-
-Copy distribution of Colocation tables needs to follow the distribution specified in Group, so it is different from common fragmentation in replica repair and balancing.
-
-Group itself has a Stable attribute, when Stable is true, which indicates that all fragments of the table in the current Group are not changing, and the Colocation feature can be used normally. When Stable is false, it indicates that some tables in Group are being repaired or migrated. At this time, Colocation Join of related tables will degenerate into ordinary Join.
-
-### Replica Repair
-
-Copies can only be stored on specified BE nodes. So when a BE is unavailable (downtime, Decommission, etc.), a new BE is needed to replace it. Doris will first look for the BE with the lowest load to replace it. After replacement, all data fragments on the old BE in the Bucket will be repaired. During the migration process, Group is marked Unstable.
-
-### Duplicate Equilibrium
-
-Doris will try to distribute the fragments of the Collocation table evenly across all BE nodes. For the replica balancing of common tables, the granularity is single replica, that is to say, it is enough to find BE nodes with lower load for each replica alone. The equilibrium of the Colocation table is at the Bucket level, where all replicas within a Bucket migrate together. We adopt a simple equalization algorithm, which distributes Buckets Sequence evenly on all BEs, regardless of the actual size of the replicas, but only according to the number of replicas. Specific algorithms can be referred to the code annotations in `ColocateTableBalancer.java`.
-
-> Note 1: Current Colocation replica balancing and repair algorithms may not work well for heterogeneous deployed Oris clusters. The so-called heterogeneous deployment, that is, the BE node's disk capacity, number, disk type (SSD and HDD) is inconsistent. In the case of heterogeneous deployment, small BE nodes and large BE nodes may store the same number of replicas.
->
-> Note 2: When a group is in an Unstable state, the Join of the table in it will degenerate into a normal Join. At this time, the query performance of the cluster may be greatly reduced. If you do not want the system to balance automatically, you can set the FE configuration item `disable_colocate_balance` to prohibit automatic balancing. Then open it at the right time. (See Section `Advanced Operations` for details)
-
-## Query
-
-The Colocation table is queried in the same way as ordinary tables, and users do not need to perceive Colocation attributes. If the Group in which the Colocation table is located is in an Unstable state, it will automatically degenerate to a normal Join.
-
-Examples are given to illustrate:
-
-Table 1:
-
-```
-CREATE TABLE `tbl1` (
- `k1` date NOT NULL COMMENT "",
- `k2` int(11) NOT NULL COMMENT "",
- `v1` int(11) SUM NOT NULL COMMENT ""
-) ENGINE=OLAP
-AGGREGATE KEY(`k1`, `k2`)
-PARTITION BY RANGE(`k1`)
-(
- PARTITION p1 VALUES LESS THAN ('2019-05-31'),
- PARTITION p2 VALUES LESS THAN ('2019-06-30')
-)
-DISTRIBUTED BY HASH(`k2`) BUCKETS 8
-PROPERTIES (
- "colocate_with" = "group1"
-);
-```
-
-Table 2:
-
-```
-CREATE TABLE `tbl2` (
- `k1` datetime NOT NULL COMMENT "",
- `k2` int(11) NOT NULL COMMENT "",
- `v1` double SUM NOT NULL COMMENT ""
-) ENGINE=OLAP
-AGGREGATE KEY(`k1`, `k2`)
-DISTRIBUTED BY HASH(`k2`) BUCKETS 8
-PROPERTIES (
- "colocate_with" = "group1"
-);
-```
-
-View the query plan:
-
-```
-DESC SELECT * FROM tbl1 INNER JOIN tbl2 ON (tbl1.k2 = tbl2.k2);
-
-+----------------------------------------------------+
-| Explain String |
-+----------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`tbl1`.`k1` | |
-| PARTITION: RANDOM |
-| |
-| RESULT SINK |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN |
-| | hash predicates: |
-| | colocate: true |
-| | `tbl1`.`k2` = `tbl2`.`k2` |
-| | tuple ids: 0 1 |
-| | |
-| |----1:OlapScanNode |
-| | TABLE: tbl2 |
-| | PREAGGREGATION: OFF. Reason: null |
-| | partitions=0/1 |
-| | rollup: null |
-| | buckets=0/0 |
-| | cardinality=-1 |
-| | avgRowSize=0.0 |
-| | numNodes=0 |
-| | tuple ids: 1 |
-| | |
-| 0:OlapScanNode |
-| TABLE: tbl1 |
-| PREAGGREGATION: OFF. Reason: No AggregateInfo |
-| partitions=0/2 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 0 |
-+----------------------------------------------------+
-```
-
-If Colocation Join works, the Hash Join Node will show `colocate: true`.
-
-If not, the query plan is as follows:
-
-```
-+----------------------------------------------------+
-| Explain String |
-+----------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`tbl1`.`k1` | |
-| PARTITION: RANDOM |
-| |
-| RESULT SINK |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BROADCAST) |
-| | hash predicates: |
-| | colocate: false, reason: group is not stable |
-| | `tbl1`.`k2` = `tbl2`.`k2` |
-| | tuple ids: 0 1 |
-| | |
-| |----3:EXCHANGE |
-| | tuple ids: 1 |
-| | |
-| 0:OlapScanNode |
-| TABLE: tbl1 |
-| PREAGGREGATION: OFF. Reason: No AggregateInfo |
-| partitions=0/2 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 0 |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: RANDOM |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 03 |
-| UNPARTITIONED |
-| |
-| 1:OlapScanNode |
-| TABLE: tbl2 |
-| PREAGGREGATION: OFF. Reason: null |
-| partitions=0/1 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 1 |
-+----------------------------------------------------+
-```
-
-The HASH JOIN node displays the corresponding reason: `colocate: false, reason: group is not stable`. At the same time, an EXCHANGE node will be generated.
-
-
-## Advanced Operations
-
-### FE Configuration Item
-
-* disable\_colocate\_relocate
-
-Whether to close Doris's automatic Colocation replica repair. The default is false, i.e. not closed. This parameter only affects the replica repair of the Colocation table, but does not affect the normal table.
-
-* disable\_colocate\_balance
-
-Whether to turn off automatic Colocation replica balancing for Doris. The default is false, i.e. not closed. This parameter only affects the replica balance of the Collocation table, but does not affect the common table.
-
-User can set these configurations at runtime. See `HELP ADMIN SHOW CONFIG;` and `HELP ADMIN SET CONFIG;`.
-
-* disable\_colocate\_join
-
-Whether to turn off the Colocation Join function or not. In 0.10 and previous versions, the default is true, that is, closed. In a later version, it will default to false, that is, open.
-
-* use\_new\_tablet\_scheduler
-
-In 0.10 and previous versions, the new replica scheduling logic is incompatible with the Colocation Join function, so in 0.10 and previous versions, if `disable_colocate_join = false`, you need to set `use_new_tablet_scheduler = false`, that is, close the new replica scheduler. In later versions, `use_new_tablet_scheduler` will be equal to true.
-
-###HTTP Restful API
-
-Doris provides several HTTP Restful APIs related to Colocation Join for viewing and modifying Colocation Group.
-
-The API is implemented on the FE side and accessed using `fe_host: fe_http_port`. ADMIN privileges are required.
-
-1. View all Colocation information for the cluster
-
- ```
- GET /api/colocate
-
- Return the internal Colocation info in JSON format:
-
- {
- "msg": "success",
- "code": 0,
- "data": {
- "infos": [
- ["10003.12002", "10003_group1", "10037, 10043", "1", "1", "int(11)", "true"]
- ],
- "unstableGroupIds": [],
- "allGroupIds": [{
- "dbId": 10003,
- "grpId": 12002
- }]
- },
- "count": 0
- }
- ```
-2. Mark Group as Stable or Unstable
-
- * Mark as Stable
-
- ```
- POST /api/colocate/group_stable?db_id=10005&group_id=10008
-
- Returns: 200
- ```
-
- * Mark as Unstable
-
- ```
- DELETE /api/colocate/group_stable?db_id=10005&group_id=10008
-
- Returns: 200
- ```
-
-3. Setting Data Distribution for Group
-
- The interface can force the number distribution of a group.
-
- ```
- POST /api/colocate/bucketseq?db_id=10005&group_id=10008
-
- Body:
- [[10004,10002],[10003,10002],[10002,10004],[10003,10002],[10002,10004],[10003,10002],[10003,10004],[10003,10004],[10003,10004],[10002,10004]]
-
- Returns: 200
- ```
- Body is a Buckets Sequence represented by a nested array and the ID of the BE where the fragments are distributed in each Bucket.
-
- Note that using this command, you may need to set the FE configuration `disable_colocate_relocate` and `disable_colocate_balance` to true. That is to shut down the system for automatic Colocation replica repair and balancing. Otherwise, it may be automatically reset by the system after modification.
diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md
deleted file mode 100644
index aa5a4dce50..0000000000
--- a/docs/en/administrator-guide/config/be_config.md
+++ /dev/null
@@ -1,1526 +0,0 @@
----
-{
- "title": "BE Configuration",
- "language": "en"
-}
----
-
-
-
-
-
-# BE Configuration
-
-This document mainly introduces the relevant configuration items of BE.
-
-The BE configuration file `be.conf` is usually stored in the `conf/` directory of the BE deployment path. In version 0.14, another configuration file `be_custom.conf` will be introduced. The configuration file is used to record the configuration items that are dynamically configured and persisted by the user during operation.
-
-After the BE process is started, it will read the configuration items in `be.conf` first, and then read the configuration items in `be_custom.conf`. The configuration items in `be_custom.conf` will overwrite the same configuration items in `be.conf`.
-
-The location of the `be_custom.conf` file can be configured in `be.conf` through the `custom_config_dir` configuration item.
-
-## View configuration items
-
-Users can view the current configuration items by visiting BE's web page:
-
-`http://be_host:be_webserver_port/varz`
-
-## Set configuration items
-
-There are two ways to configure BE configuration items:
-
-1. Static configuration
-
- Add and set configuration items in the `conf/be.conf` file. The configuration items in `be.conf` will be read when BE starts. Configuration items not in `be.conf` will use default values.
-
-2. Dynamic configuration
-
- After BE starts, the configuration items can be dynamically set with the following commands.
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}'
- ```
-
- In version 0.13 and before, the configuration items modified in this way will become invalid after the BE process restarts. In 0.14 and later versions, the modified configuration can be persisted through the following command. The modified configuration items are stored in the `be_custom.conf` file.
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}&persis=true'
- ```
-
-## Examples
-
-1. Modify `max_compaction_concurrency` statically
-
- By adding in the `be.conf` file:
-
- ```max_compaction_concurrency=5```
-
- Then restart the BE process to take effect the configuration.
-
-2. Modify `streaming_load_max_mb` dynamically
-
- After BE starts, the configuration item `streaming_load_max_mb` is dynamically set by the following command:
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024
- ```
-
- The return value is as follows, indicating that the setting is successful.
-
- ```
- {
- "status": "OK",
- "msg": ""
- }
- ```
-
- The configuration will become invalid after the BE restarts. If you want to persist the modified results, use the following command:
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024\&persist=true
- ```
-
-## Configurations
-
-### `alter_tablet_worker_count`
-
-Default: 3
-
-The number of threads making schema changes
-
-### `base_compaction_check_interval_seconds`
-
-Default: 60 (s)
-
-BaseCompaction thread polling interval
-
-### `base_compaction_interval_seconds_since_last_operation`
-
-Default: 86400
-
-One of the triggering conditions of BaseCompaction: the interval since the last BaseCompaction
-
-### `base_compaction_num_cumulative_deltas`
-
-Default: 5
-
-One of the triggering conditions of BaseCompaction: The limit of the number of Cumulative files to be reached. After reaching this limit, BaseCompaction will be triggered
-
-### base_compaction_trace_threshold
-
-* Type: int32
-* Description: Threshold to logging base compaction's trace information, in seconds
-* Default value: 10
-
-Base compaction is a long time cost background task, this configuration is the threshold to logging trace information. Trace information in log file looks like:
-
-```
-W0610 11:26:33.804431 56452 storage_engine.cpp:552] Trace:
-0610 11:23:03.727535 (+ 0us) storage_engine.cpp:554] start to perform base compaction
-0610 11:23:03.728961 (+ 1426us) storage_engine.cpp:560] found best tablet 546859
-0610 11:23:03.728963 (+ 2us) base_compaction.cpp:40] got base compaction lock
-0610 11:23:03.729029 (+ 66us) base_compaction.cpp:44] rowsets picked
-0610 11:24:51.784439 (+108055410us) compaction.cpp:46] got concurrency lock and start to do compaction
-0610 11:24:51.784818 (+ 379us) compaction.cpp:74] prepare finished
-0610 11:26:33.359265 (+101574447us) compaction.cpp:87] merge rowsets finished
-0610 11:26:33.484481 (+125216us) compaction.cpp:102] output rowset built
-0610 11:26:33.484482 (+ 1us) compaction.cpp:106] check correctness finished
-0610 11:26:33.513197 (+ 28715us) compaction.cpp:110] modify rowsets finished
-0610 11:26:33.513300 (+ 103us) base_compaction.cpp:49] compaction finished
-0610 11:26:33.513441 (+ 141us) base_compaction.cpp:56] unused rowsets have been moved to GC queue
-Metrics: {"filtered_rows":0,"input_row_num":3346807,"input_rowsets_count":42,"input_rowsets_data_size":1256413170,"input_segments_num":44,"merge_rowsets_latency_us":101574444,"merged_rows":0,"output_row_num":3346807,"output_rowset_data_size":1228439659,"output_segments_num":6}
-```
-
-### `base_compaction_write_mbytes_per_sec`
-
-Default: 5(MB)
-
-Maximum disk write speed per second of BaseCompaction task
-
-### `base_cumulative_delta_ratio`
-
-Default: 0.3 (30%)
-
-One of the trigger conditions of BaseCompaction: Cumulative file size reaches the proportion of Base file
-
-### `be_port`
-
-* Type: int32
-* Description: The port of the thrift server on BE which used to receive requests from FE
-* Default value: 9060
-
-### `be_service_threads`
-
-* Type: int32
-* Description: The number of execution threads of the thrift server service on BE which represents the number of threads that can be used to execute FE requests.
-* Default value: 64
-
-### `brpc_max_body_size`
-
-This configuration is mainly used to modify the parameter `max_body_size` of brpc.
-
-Sometimes the query fails and an error message of `body_size is too large` will appear in the BE log. This may happen when the SQL mode is "multi distinct + no group by + more than 1T of data".
-
-This error indicates that the packet size of brpc exceeds the configured value. At this time, you can avoid this error by increasing the configuration.
-
-### `brpc_socket_max_unwritten_bytes`
-
-This configuration is mainly used to modify the parameter `socket_max_unwritten_bytes` of brpc.
-
-Sometimes the query fails and an error message of `The server is overcrowded` will appear in the BE log. This means there are too many messages to buffer at the sender side, which may happen when the SQL needs to send large bitmap value. You can avoid this error by increasing the configuration.
-
-### `transfer_data_by_brpc_attachment`
-
-* Type: bool
-* Description: This configuration is used to control whether to transfer the RowBatch in the ProtoBuf Request to the Controller Attachment and then send it through brpc. When the length of ProtoBuf Request exceeds 2G, an error will be reported: Bad request, error_text=[E1003]Fail to compress request, Putting RowBatch in Controller Attachment will be faster and avoid this error.
-* Default value: false
-
-### `brpc_num_threads`
-
-This configuration is mainly used to modify the number of bthreads for brpc. The default value is set to -1, which means the number of bthreads is #cpu-cores.
-
-User can set this configuration to a larger value to get better QPS performance. For more information, please refer to `https://github.com/apache/incubator-brpc/blob/master/docs/cn/benchmark.md`
-
-### `brpc_port`
-
-* Type: int32
-* Description: The port of BRPC on BE, used for communication between BEs
-* Default value: 9060
-
-### `buffer_pool_clean_pages_limit`
-
-默认值: 20G
-
-Clean up pages that may be saved by the buffer pool
-
-### `buffer_pool_limit`
-
-* Type: string
-* Description: The largest allocatable memory of the buffer pool
-* Default value: 20%
-
-The maximum amount of memory available in the BE buffer pool. The buffer pool is a new memory management structure of BE, which manages the memory by the buffer page and enables spill data to disk. The memory for all concurrent queries will be allocated from the buffer pool. The current buffer pool only works on **AggregationNode** and **ExchangeNode**.
-
-### `check_auto_compaction_interval_seconds`
-
-* Type: int32
-* Description: Check the configuration of auto compaction in seconds when auto compaction disabled.
-* Default value: 5
-
-### `check_consistency_worker_count`
-
-Default: 1
-
-The number of worker threads to calculate the checksum of the tablet
-
-### `chunk_reserved_bytes_limit`
-
-Default: 2147483648
-
-The reserved bytes limit of Chunk Allocator is 2GB by default. Increasing this variable can improve performance, but it will get more free memory that other modules cannot use.
-
-### `clear_transaction_task_worker_count`
-
-Default: 1
-
-Number of threads used to clean up transactions
-
-### `clone_worker_count`
-
-Default: 3
-
-Number of threads used to perform cloning tasks
-
-### `cluster_id`
-
-* Type: int32
-* Description: Configure the cluster id to which the BE belongs.
-* Default value: -1
-
-This value is usually delivered by the FE to the BE by the heartbeat, no need to configure. When it is confirmed that a BE belongs to a certain Drois cluster, it can be configured. The cluster_id file under the data directory needs to be modified to make sure same as this parament.
-
-### `column_dictionary_key_ratio_threshold`
-
-Default: 0
-
-The value ratio of string type, less than this ratio, using dictionary compression algorithm
-
-### `column_dictionary_key_size_threshold`
-
-Default: 0
-
-Dictionary compression column size, less than this value using dictionary compression algorithm
-
-### `compaction_tablet_compaction_score_factor`
-
-* Type: int32
-* Description: Coefficient for compaction score when calculating tablet score to find a tablet for compaction.
-* Default value: 1
-
-### `compaction_tablet_scan_frequency_factor`
-
-* Type: int32
-* Description: Coefficient for tablet scan frequency when calculating tablet score to find a tablet for compaction.
-* Default value: 0
-
-Tablet scan frequency can be taken into consideration when selecting an tablet for compaction and preferentially do compaction for those tablets which are scanned frequently during a latest period of time at the present.
-Tablet score can be calculated like this:
-
-tablet_score = compaction_tablet_scan_frequency_factor * tablet_scan_frequency + compaction_tablet_compaction_score_factor * compaction_score
-
-### `compaction_task_num_per_disk`
-
-* Type: int32
-* Description: The number of compaction tasks which execute in parallel for a disk(HDD).
-* Default value: 2
-
-### `compaction_task_num_per_fast_disk`
-
-* Type: int32
-* Description: The number of compaction tasks which execute in parallel for a fast disk(SSD).
-* Default value: 4
-
-### `compress_rowbatches`
-
-* Type: bool
-* Description: enable to use Snappy compression algorithm for data compression when serializing RowBatch
-* Default value: true
-
-### `create_tablet_worker_count`
-
-Default: 3
-
-Number of worker threads for BE to create a tablet
-
-### `cumulative_compaction_rounds_for_each_base_compaction_round`
-
-* Type: int32
-* Description: How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation.
-* Default value: 9
-
-### `disable_auto_compaction`
-
-* Type: bool
-* Description: Whether disable automatic compaction task
-* Default value: false
-
-Generally it needs to be turned off. When you want to manually operate the compaction task in the debugging or test environment, you can turn on the configuration.
-
-### `cumulative_compaction_budgeted_bytes`
-
-Default: 104857600
-
-One of the trigger conditions of BaseCompaction: Singleton file size limit, 100MB
-
-### `cumulative_compaction_check_interval_seconds`
-
-Default: 10 (s)
-
-CumulativeCompaction thread polling interval
-
-### `cumulative_compaction_skip_window_seconds`
-
-Default: 30(s)
-
-CumulativeCompaction skips the most recently released increments to prevent compacting versions that may be queried (in case the query planning phase takes some time). Change the parameter is to set the skipped window time size
-
-### cumulative_compaction_trace_threshold
-
-* Type: int32
-* Description: Threshold to logging cumulative compaction's trace information, in seconds
-* Default value: 10
-
-Similar to `base_compaction_trace_threshold`.
-
-### disable_compaction_trace_log
-
-* Type: bool
-* Description: disable the trace log of compaction
-* Default value: true
-
-If set to true, the `cumulative_compaction_trace_threshold` and `base_compaction_trace_threshold` won't work and log is disabled.
-
-### `cumulative_compaction_policy`
-
-* Type: string
-* Description: Configure the merge policy of the cumulative compaction stage. Currently, two merge policy have been implemented, num_based and size_based.
-* Default value: size_based
-
-In detail, ordinary is the initial version of the cumulative compaction merge policy. After a cumulative compaction, the base compaction process is directly performed. The size_based policy is an optimized version of the ordinary strategy. Versions are merged only when the disk volume of the rowset is of the same order of magnitude. After the compaction, the output rowset which satisfies the conditions is promoted to the base compaction stage. In the case of a large number of small batch imports: reduce the write magnification of base compact, trade-off between read magnification and space magnification, and reducing file version data.
-
-### `cumulative_size_based_promotion_size_mbytes`
-
-* Type: int64
-* Description: Under the size_based policy, the total disk size of the output rowset of cumulative compaction exceeds this configuration size, and the rowset will be used for base compaction. The unit is m bytes.
-* Default value: 1024
-
-In general, if the configuration is less than 2G, in order to prevent the cumulative compression time from being too long, resulting in the version backlog.
-
-### `cumulative_size_based_promotion_ratio`
-
-* Type: double
-* Description: Under the size_based policy, when the total disk size of the cumulative compaction output rowset exceeds the configuration ratio of the base version rowset, the rowset will be used for base compaction.
-* Default value: 0.05
-
-Generally, it is recommended that the configuration should not be higher than 0.1 and lower than 0.02.
-
-### `cumulative_size_based_promotion_min_size_mbytes`
-
-* Type: int64
-* Description: Under the size_based strategy, if the total disk size of the output rowset of the cumulative compaction is lower than this configuration size, the rowset will not undergo base compaction and is still in the cumulative compaction process. The unit is m bytes.
-* Default value: 64
-
-Generally, the configuration is within 512m. If the configuration is too large, the size of the early base version is too small, and base compaction has not been performed.
-
-### `cumulative_size_based_compaction_lower_size_mbytes`
-
-* Type: int64
-* Description: Under the size_based strategy, when the cumulative compaction is merged, the selected rowsets to be merged have a larger disk size than this configuration, then they are divided and merged according to the level policy. When it is smaller than this configuration, merge directly. The unit is m bytes.
-* Default value: 64
-
-Generally, the configuration is within 128m. Over configuration will cause more cumulative compaction write amplification.
-
-### `custom_config_dir`
-
-Configure the location of the `be_custom.conf` file. The default is in the `conf/` directory.
-
-In some deployment environments, the `conf/` directory may be overwritten due to system upgrades. This will cause the user modified configuration items to be overwritten. At this time, we can store `be_custom.conf` in another specified directory to prevent the configuration file from being overwritten.
-
-### `default_num_rows_per_column_file_block`
-
-* Type: int32
-* Description: Configure how many rows of data are contained in a single RowBlock.
-* Default value: 1024
-
-### `default_rowset_type`
-
-* Type: string
-* Description: Identifies the storage format selected by BE by default. The configurable parameters are: "**ALPHA**", "**BETA**". Mainly play the following two roles
-1. When the storage_format of the table is set to Default, select the storage format of BE through this configuration.
-2. Select the storage format of when BE performing Compaction
-* Default value: BETA
-
-### `delete_worker_count`
-
-Default: 3
-
-Number of threads performing data deletion tasks
-
-### `disable_mem_pools`
-
-Default: false
-
-Whether to disable the memory cache pool, it is not disabled by default
-
-### `disable_storage_page_cache`
-
-* Type: bool
-* Description: Disable to use page cache for index caching, this configuration only takes effect in BETA storage format, usually it is recommended to false
-* Default value: false
-
-### `disk_stat_monitor_interval`
-
-Default: 5(s)
-
-Disk status check interval
-
-### `doris_cgroups`
-
-Default: empty
-
-Cgroups assigned to doris
-
-### `doris_max_pushdown_conjuncts_return_rate`
-
-* Type: int32
-* Description: When BE performs HashJoin, it will adopt a dynamic partitioning method to push the join condition to OlapScanner. When the data scanned by OlapScanner is larger than 32768 rows, BE will check the filter condition. If the filter rate of the filter condition is lower than this configuration, Doris will stop using the dynamic partition clipping condition for data filtering.
-* Default value: 90
-
-### `doris_max_scan_key_num`
-
-* Type: int
-* Description: Used to limit the maximum number of scan keys that a scan node can split in a query request. When a conditional query request reaches the scan node, the scan node will try to split the conditions related to the key column in the query condition into multiple scan key ranges. After that, these scan key ranges will be assigned to multiple scanner threads for data scanning. A larger value usually means that more scanner threads can be used to increase the parallelism of the scanning operation. However, in high concurrency scenarios, too many threads may bring greater scheduling overhead and system load, and will slow down the query response speed. An empirical value is 50. This configuration can be configured separately at the session level. For details, please refer to the description of `max_scan_key_num` in [Variables](../variables.md).
-* Default value: 1024
-
-When the concurrency cannot be improved in high concurrency scenarios, try to reduce this value and observe the impact.
-
-### `doris_scan_range_row_count`
-
-* Type: int32
-* Description: When BE performs data scanning, it will split the same scanning range into multiple ScanRanges. This parameter represents the scan data range of each ScanRange. This parameter can limit the time that a single OlapScanner occupies the io thread.
-* Default value: 524288
-
-### `doris_scanner_queue_size`
-
-* Type: int32
-* Description: The length of the RowBatch buffer queue between TransferThread and OlapScanner. When Doris performs data scanning, it is performed asynchronously. The Rowbatch scanned by OlapScanner will be placed in the scanner buffer queue, waiting for the upper TransferThread to take it away.
-* Default value: 1024
-
-### `doris_scanner_row_num`
-
-Default: 16384
-
-The maximum number of data rows returned by each scanning thread in a single execution
-
-### `doris_scanner_thread_pool_queue_size`
-
-* Type: int32
-* Description: The queue length of the Scanner thread pool. In Doris' scanning tasks, each Scanner will be submitted as a thread task to the thread pool waiting to be scheduled, and after the number of submitted tasks exceeds the length of the thread pool queue, subsequent submitted tasks will be blocked until there is a empty slot in the queue.
-* Default value: 102400
-
-### `doris_scanner_thread_pool_thread_num`
-
-* Type: int32
-* Description: The number of threads in the Scanner thread pool. In Doris' scanning tasks, each Scanner will be submitted as a thread task to the thread pool to be scheduled. This parameter determines the size of the Scanner thread pool.
-* Default value: 48
-
-### `download_low_speed_limit_kbps`
-
-Default: 50 (KB/s)
-
-Minimum download speed
-
-### `download_low_speed_time`
-
-Default: 300(s)
-
-Download time limit, 300 seconds by default
-
-### `download_worker_count`
-
-Default: 1
-
-The number of download threads, the default is 1
-
-### `drop_tablet_worker_count`
-
-Default: 3
-
-Number of threads to delete tablet
-
-### `enable_metric_calculator`
-
-Default: true
-
-If set to true, the metric calculator will run to collect BE-related indicator information, if set to false, it will not run
-
-### `enable_partitioned_aggregation`
-
-* Type: bool
-* Description: Whether the BE node implements the aggregation operation by PartitionAggregateNode, if false, AggregateNode will be executed to complete the aggregation. It is not recommended to set it to false in non-special demand scenarios.
-* Default value: true
-
-### `enable_prefetch`
-* Type: bool
-* Description: When using PartitionedHashTable for aggregation and join calculations, whether to perform HashBuket prefetch. Recommended to be set to true
-* Default value: true
-
-### `enable_quadratic_probing`
-
-* Type: bool
-* Description: When a Hash conflict occurs when using PartitionedHashTable, enable to use the square detection method to resolve the Hash conflict. If the value is false, linear detection is used to resolve the Hash conflict. For the square detection method, please refer to: [quadratic_probing](https://en.wikipedia.org/wiki/Quadratic_probing)
-* Default value: true
-
-### `enable_system_metrics`
-
-Default: true
-
-User control to turn on and off system indicators.
-
-### `enable_token_check`
-
-Default: true
-
-Used for forward compatibility, will be removed later.
-
-### `es_http_timeout_ms`
-
-Default: 5000 (ms)
-
-The timeout period for connecting to ES via http, the default is 5 seconds.
-
-### `es_scroll_keepalive`
-
-Default: 5m
-
-es scroll Keeplive hold time, the default is 5 minutes
-
-### `etl_thread_pool_queue_size`
-
-Default: 256
-
-The size of the ETL thread pool
-
-### `etl_thread_pool_size`
-
-### `exchg_node_buffer_size_bytes`
-
-* Type: int32
-* Description: The size of the Buffer queue of the ExchangeNode node, in bytes. After the amount of data sent from the Sender side is larger than the Buffer size of ExchangeNode, subsequent data sent will block until the Buffer frees up space for writing.
-* Default value: 10485760
-
-### `file_descriptor_cache_capacity`
-
-Default: 32768
-
-File handle cache capacity, 32768 file handles are cached by default.
-
-### `cache_clean_interval`
-
-Default: 1800(s)
-
-File handle cache cleaning interval, used to clean up file handles that have not been used for a long time.
-Also the clean interval of Segment Cache.
-
-### `flush_thread_num_per_store`
-
-Default: 2
-
-The number of threads used to refresh the memory table per store
-
-### `force_recovery`
-
-### `fragment_pool_queue_size`
-
-Default: 2048
-
-The upper limit of query requests that can be processed on a single node
-
-### `fragment_pool_thread_num_min`
-
-Default: 64
-
-### `fragment_pool_thread_num_max`
-
-Default: 256
-
-The above two parameters are to set the number of query threads. By default, a minimum of 64 threads will be started, subsequent query requests will dynamically create threads, and a maximum of 256 threads will be created.
-
-### `heartbeat_service_port`
-* Type: int32
-* Description: Heartbeat service port (thrift) on BE, used to receive heartbeat from FE
-* Default value: 9050
-
-### `heartbeat_service_thread_count`
-
-* Type: int32
-* Description: The number of threads that execute the heartbeat service on BE. the default is 1, it is not recommended to modify
-* Default value: 1
-
-### `ignore_broken_disk`
-
-Default: false
-
-When BE start, If there is a broken disk, BE process will exit by default.Otherwise, we will ignore the broken disk
-
-### `ignore_load_tablet_failure`
-When BE starts, it will check all the paths under the storage_root_path in configuration.
-
-`ignore_broken_disk=true`
-
-If the path does not exist or the file under the path cannot be read or written (broken disk), it will be ignored. If there are any other available paths, the startup will not be interrupted.
-
-`ignore_broken_disk=false`
-
-If the path does not exist or the file under the path cannot be read or written (bad disk), the startup will fail and exit.
-
-The default value is false.
-```
-load tablets from header failed, failed tablets size: xxx, path=xxx
-```
-
-Indicates how many tablets in the data directory failed to load. At the same time, the log will also contain specific information about the tablet that failed to load. At this time, manual intervention is required to troubleshoot the cause of the error. After investigation, there are usually two ways to recover:
-
-1. The tablet information cannot be repaired. If the other copies are normal, you can delete the wrong tablet with the `meta_tool` tool.
-2. Set `ignore_load_tablet_failure` to true, BE will ignore these faulty tablets and start normally
-
-### ignore_rowset_stale_unconsistent_delete
-
-* Type: boolean
-* Description:It is used to decide whether to delete the outdated merged rowset if it cannot form a consistent version path.
-* Default: false
-
-The merged expired rowset version path will be deleted after half an hour. In abnormal situations, deleting these versions will result in the problem that the consistent path of the query cannot be constructed. When the configuration is false, the program check is strict and the program will directly report an error and exit.
-When configured as true, the program will run normally and ignore this error. In general, ignoring this error will not affect the query, only when the merged version is dispatched by fe, -230 error will appear.
-
-### inc_rowset_expired_sec
-
-Default: 1800 (s)
-
-Import activated data, storage engine retention time, used for incremental cloning
-
-### `index_stream_cache_capacity`
-
-Default: 10737418240
-
-BloomFilter/Min/Max and other statistical information cache capacity
-
-### `kafka_broker_version_fallback`
-
-Default: 0.10.0
-
-If the dependent Kafka version is lower than the Kafka client version that routine load depends on, the value set by the fallback version kafka_broker_version_fallback will be used, and the valid values are: 0.9.0, 0.8.2, 0.8.1, 0.8.0.
-
-### `load_data_reserve_hours`
-
-Default: 4(hour)
-
-Used for mini load. The mini load data file will be deleted after this time
-
-### `load_error_log_reserve_hours`
-
-Default: 48 (hour)
-
-The load error log will be deleted after this time
-
-### `load_process_max_memory_limit_bytes`
-
-Default: 107374182400
-
-The upper limit of memory occupied by all imported threads on a single node, default value: 100G
-
-Set these default values very large, because we don't want to affect load performance when users upgrade Doris. If necessary, the user should set these configurations correctly.
-
-### `load_process_max_memory_limit_percent`
-
-Default: 80 (%)
-
-The percentage of the upper memory limit occupied by all imported threads on a single node, the default is 80%
-
-Set these default values very large, because we don't want to affect load performance when users upgrade Doris. If necessary, the user should set these configurations correctly
-
-### `log_buffer_level`
-
-Default: empty
-
-The log flushing strategy is kept in memory by default
-
-### `madvise_huge_pages`
-
-Default: false
-
-Whether to use linux memory huge pages, not enabled by default
-
-### `make_snapshot_worker_count`
-
-Default: 5
-
-Number of threads making snapshots
-
-### `max_client_cache_size_per_host`
-
-Default: 10
-
-The maximum number of client caches per host. There are multiple client caches in BE, but currently we use the same cache size configuration. If necessary, use different configurations to set up different client-side caches
-
-### `max_compaction_threads`
-
-* Type: int32
-* Description: The maximum of thread number in compaction thread pool.
-* Default value: 10
-
-### `max_consumer_num_per_group`
-
-Default: 3
-
-The maximum number of consumers in a data consumer group, used for routine load
-
-### `min_cumulative_compaction_num_singleton_deltas`
-
-Default: 5
-
-Cumulative compaction strategy: the minimum number of incremental files
-
-### `max_cumulative_compaction_num_singleton_deltas`
-
-Default: 1000
-
-Cumulative compaction strategy: the maximum number of incremental files
-
-### `max_download_speed_kbps`
-
-Default: 50000 (KB/s)
-
-Maximum download speed limit
-
-### `max_free_io_buffers`
-
-Default: 128
-
-For each io buffer size, the maximum number of buffers that IoMgr will reserve ranges from 1024B to 8MB buffers, up to about 2GB buffers.
-
-### `max_garbage_sweep_interval`
-
-Default: 3600
-
-The maximum interval for disk garbage cleaning, the default is one hour
-
-### `max_memory_sink_batch_count`
-
-Default: 20
-
-The maximum external scan cache batch count, which means that the cache max_memory_cache_batch_count * batch_size row, the default is 20, and the default value of batch_size is 1024, which means that 20 * 1024 rows will be cached
-
-### `max_percentage_of_error_disk`
-
-* Type: int32
-* Description: The storage engine allows the percentage of damaged hard disks to exist. After the damaged hard disk exceeds the changed ratio, BE will automatically exit.
-* Default value: 0
-
-### `max_pushdown_conditions_per_column`
-
-* Type: int
-* Description: Used to limit the maximum number of conditions that can be pushed down to the storage engine for a single column in a query request. During the execution of the query plan, the filter conditions on some columns can be pushed down to the storage engine, so that the index information in the storage engine can be used for data filtering, reducing the amount of data that needs to be scanned by the query. Such as equivalent conditions, conditions in IN predicates, etc. In most cases, this parameter only affects queries containing IN predicates. Such as `WHERE colA IN (1,2,3,4, ...)`. A larger number means that more conditions in the IN predicate can be pushed to the storage engine, but too many conditions may cause an increase in random reads, and in some cases may reduce query efficiency. This configuration can be individually configured for session level. For details, please refer to the description of `max_pushdown_conditions_per_column` in [Variables](../ variables.md).
-* Default value: 1024
-
-* Example
-
- The table structure is `id INT, col2 INT, col3 varchar (32), ...`.
-
- The query is `... WHERE id IN (v1, v2, v3, ...)`
-
- If the number of conditions in the IN predicate exceeds the configuration, try to increase the configuration value and observe whether the query response has improved.
-
-### `max_runnings_transactions_per_txn_map`
-
-Default: 100
-
-Max number of txns for every txn_partition_map in txn manager, this is a self protection to avoid too many txns saving in manager
-
-### `max_send_batch_parallelism_per_job`
-
-* Type: int
-* Description: Max send batch parallelism for OlapTableSink. The value set by the user for `send_batch_parallelism` is not allowed to exceed `max_send_batch_parallelism_per_job`, if exceed, the value of `send_batch_parallelism` would be `max_send_batch_parallelism_per_job`.
-* Default value: 5
-
-### `max_tablet_num_per_shard`
-
-Default: 1024
-
-The number of sliced tablets, plan the layout of the tablet, and avoid too many tablet subdirectories in the repeated directory
-
-### `max_tablet_version_num`
-
-* Type: int
-* Description: Limit the number of versions of a single tablet. It is used to prevent a large number of version accumulation problems caused by too frequent import or untimely compaction. When the limit is exceeded, the import task will be rejected.
-* Default value: 500
-
-### `mem_limit`
-
-* Type: string
-* Description: Limit the percentage of the server's maximum memory used by the BE process. It is used to prevent BE memory from occupying to many the machine's memory. This parameter must be greater than 0. When the percentage is greater than 100%, the value will default to 100%.
-* Default value: 80%
-
-### `memory_limitation_per_thread_for_schema_change`
-
-Default: 2 (G)
-
-Maximum memory allowed for a single schema change task
-
-### `memory_maintenance_sleep_time_s`
-
-Default: 10
-
-Sleep time (in seconds) between memory maintenance iterations
-
-### `memory_max_alignment`
-
-Default: 16
-
-Maximum alignment memory
-
-### `read_size`
-
-Default: 8388608
-
-The read size is the read size sent to the os. There is a trade-off between latency and the whole process, getting to keep the disk busy but not introducing seeks. For 8 MB reads, random io and sequential io have similar performance
-
-### `min_buffer_size`
-
-Default: 1024
-
-Minimum read buffer size (in bytes)
-
-### `min_compaction_failure_interval_sec`
-
-* Type: int32
-* Description: During the cumulative compaction process, when the selected tablet fails to be merged successfully, it will wait for a period of time before it may be selected again. The waiting period is the value of this configuration.
-* Default value: 5
-* Unit: seconds
-
-### `min_compaction_threads`
-
-* Type: int32
-* Description: The minimum of thread number in compaction thread pool.
-* Default value: 10
-
-### `min_file_descriptor_number`
-
-Default: 60000
-
-The lower limit required by the file handle limit of the BE process
-
-### `min_garbage_sweep_interval`
-
-Default: 180
-
-The minimum interval between disk garbage cleaning, time seconds
-
-### `mmap_buffers`
-
-Default: false
-
-Whether to use mmap to allocate memory, not used by default
-
-### `num_cores`
-
-* Type: int32
-* Description: The number of CPU cores that BE can use. When the value is 0, BE will obtain the number of CPU cores of the machine from /proc/cpuinfo.
-* Default value: 0
-
-### `num_disks`
-
-Defalut: 0
-
-Control the number of disks on the machine. If it is 0, it comes from the system settings
-
-### `num_threads_per_core`
-
-Default: 3
-
-Control the number of threads that each core runs. Usually choose 2 times or 3 times the number of cores. This keeps the core busy without causing excessive jitter
-
-### `num_threads_per_disk`
-
-Default: 0
-
-The maximum number of threads per disk is also the maximum queue depth of each disk
-
-### `number_tablet_writer_threads`
-
-Default: 16
-
-Number of tablet write threads
-
-### `path_gc_check`
-
-Default: true
-
-Whether to enable the recycle scan data thread check, it is enabled by default
-
-### `path_gc_check_interval_second`
-
-Default: 86400
-
-Recycle scan data thread check interval, in seconds
-
-### `path_gc_check_step`
-
-Default: 1000
-
-### `path_gc_check_step_interval_ms`
-
-Default: 10 (ms)
-
-### `path_scan_interval_second`
-
-Default: 86400
-
-### `pending_data_expire_time_sec`
-
-Default: 1800
-
-The maximum duration of unvalidated data retained by the storage engine, the default unit: seconds
-
-### `periodic_counter_update_period_ms`
-
-Default: 500
-
-Update rate counter and sampling counter cycle, default unit: milliseconds
-
-### `plugin_path`
-
-Default: ${DORIS_HOME}/plugin
-
-pliugin path
-
-### `port`
-
-* Type: int32
-* Description: The port used in UT. Meaningless in the actual environment and can be ignored.
-* Default value: 20001
-
-### `pprof_profile_dir`
-
-Default : ${DORIS_HOME}/log
-
-pprof profile save directory
-
-### `priority_networks`
-
-Default: empty
-
-Declare a selection strategy for those servers with many IPs. Note that at most one ip should match this list. This is a semicolon-separated list in CIDR notation, such as 10.10.10.0/24. If there is no IP matching this rule, one will be randomly selected
-
-### `priority_queue_remaining_tasks_increased_frequency`
-
-Default: 512
-
- the increased frequency of priority for remaining tasks in BlockingPriorityQueue
-
-### `publish_version_worker_count`
-
-Default: 8
-
-the count of thread to publish version
-
-### `pull_load_task_dir`
-
-Default: ${DORIS_HOME}/var/pull_load
-
-Pull the directory of the laod task
-
-### `push_worker_count_high_priority`
-
-Default: 3
-
-Import the number of threads for processing HIGH priority tasks
-
-### `push_worker_count_normal_priority`
-
-Default: 3
-
-Import the number of threads for processing NORMAL priority tasks
-
-### `push_write_mbytes_per_sec`
-
-+ Type: int32
-+ Description: Load data speed control, the default is 10MB per second. Applicable to all load methods.
-+ Unit: MB
-+ Default value: 10
-
-### `query_scratch_dirs`
-
-+ Type: string
-+ Description: The directory selected by BE to store temporary data during spill to disk. which is similar to the storage path configuration, multiple directories are separated by ;.
-+ Default value: ${DORIS_HOME}
-
-### `release_snapshot_worker_count`
-
-Default: 5
-
-Number of threads releasing snapshots
-
-### `report_disk_state_interval_seconds`
-
-Default: 60
-
-The interval time for the agent to report the disk status to FE, unit (seconds)
-
-### `report_tablet_interval_seconds`
-
-Default: 60
-
-The interval time for the agent to report the olap table to the FE, in seconds
-
-### `report_task_interval_seconds`
-
-Default: 10
-
-The interval time for the agent to report the task signature to FE, unit (seconds)
-
-### `result_buffer_cancelled_interval_time`
-
-Default: 300
-
-Result buffer cancellation time (unit: second)
-
-### `routine_load_thread_pool_size`
-
-Default: 10
-
-The thread pool size of the routine load task. This should be greater than the FE configuration'max_concurrent_task_num_per_be' (default 5)
-
-### `row_nums_check`
-
-Default: true
-
-Check row nums for BE/CE and schema change. true is open, false is closed
-
-### `row_step_for_compaction_merge_log`
-
-* Type: int64
-* Description: Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction. If the value is set to 0, merge log will not be printed.
-* Default value: 0
-* Dynamically modify: true
-
-### `scan_context_gc_interval_min`
-
-Default: 5
-
-This configuration is used for the context gc thread scheduling cycle. Note: The unit is minutes, and the default is 5 minutes
-
-### `send_batch_thread_pool_thread_num`
-
-* Type: int32
-* Description: The number of threads in the SendBatch thread pool. In NodeChannels' sending data tasks, the SendBatch operation of each NodeChannel will be submitted as a thread task to the thread pool to be scheduled. This parameter determines the size of the SendBatch thread pool.
-* Default value: 256
-
-### `send_batch_thread_pool_queue_size`
-
-* Type: int32
-* Description: The queue length of the SendBatch thread pool. In NodeChannels' sending data tasks, the SendBatch operation of each NodeChannel will be submitted as a thread task to the thread pool waiting to be scheduled, and after the number of submitted tasks exceeds the length of the thread pool queue, subsequent submitted tasks will be blocked until there is a empty slot in the queue.
-
-### `sleep_one_second`
-
-+ Type: int32
-+ Description: Global variables, used for BE thread sleep for 1 seconds, should not be modified
-+ Default value: 1
-
-### `small_file_dir`
-
-Default: ${DORIS_HOME}/lib/small_file/
-
-Directory for saving files downloaded by SmallFileMgr
-
-### `snapshot_expire_time_sec`
-
-Default: 172800
-
-Snapshot file cleaning interval, default value: 48 hours
-
-### `status_report_interval`
-
-Default: 5
-
-Interval between profile reports; unit: seconds
-
-### `storage_flood_stage_left_capacity_bytes`
-
-Default: 1073741824
-
-The min bytes that should be left of a data dir,default value:1G
-
-### `storage_flood_stage_usage_percent`
-
-Default: 95 (95%)
-
-The storage_flood_stage_usage_percent and storage_flood_stage_left_capacity_bytes configurations limit the maximum usage of the capacity of the data directory.
-
-### `storage_medium_migrate_count`
-
-Default: 1
-
-the count of thread to clone
-
-### `storage_page_cache_limit`
-
-Default: 20%
-
-Cache for storage page size
-
-### `index_page_cache_percentage`
-* Type: int32
-* Description: Index page cache as a percentage of total storage page cache, value range is [0, 100]
-* Default value: 10
-
-### `storage_root_path`
-
-* Type: string
-
-* Description: data root path, separate by ';'.you can specify the storage medium of each root path, HDD or SSD. you can add capacity limit at the end of each root path, seperate by ','
-
- eg.1: `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris`
-
- * 1./home/disk1/doris.HDD,50, indicates capacity limit is 50GB, HDD;
- * 2./home/disk2/doris.SSD,1, indicates capacity limit is 1GB, SSD;
- * 3./home/disk2/doris, indicates capacity limit is disk capacity, HDD(default)
-
- eg.2: `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50`
-
- * 1./home/disk1/doris,medium:hdd,capacity:10,capacity limit is 10GB, HDD;
- * 2./home/disk2/doris,medium:ssd,capacity:50,capacity limit is 50GB, SSD;
-
-* Default: ${DORIS_HOME}
-
-### `storage_strict_check_incompatible_old_format`
-* Type: bool
-* Description: Used to check incompatible old format strictly
-* Default value: true
-* Dynamically modify: false
-
-This config is used to check incompatible old format hdr_ format whether doris uses strict way. When config is true,
-process will log fatal and exit. When config is false, process will only log warning.
-
-### `streaming_load_max_mb`
-
-* Type: int64
-* Description: Used to limit the maximum amount of csv data allowed in one Stream load. The unit is MB.
-* Default value: 10240
-* Dynamically modify: yes
-
-Stream Load is generally suitable for loading data less than a few GB, not suitable for loading` too large data.
-
-### `streaming_load_json_max_mb`
-
-* Type: int64
-* Description: it is used to limit the maximum amount of json data allowed in one Stream load. The unit is MB.
-* Default value: 100
-* Dynamically modify: yes
-
-Some data formats, such as JSON, cannot be split. Doris must read all the data into the memory before parsing can begin. Therefore, this value is used to limit the maximum amount of data that can be loaded in a single Stream load.
-
-### `streaming_load_rpc_max_alive_time_sec`
-
-Default: 1200
-
-The lifetime of TabletsChannel. If the channel does not receive any data at this time, the channel will be deleted, unit: second
-
-### `sync_tablet_meta`
-
-Default: false
-
-Whether the storage engine opens sync and keeps it to the disk
-
-### `sys_log_dir`
-
-* Type: string
-* Description: Storage directory of BE log data
-* Default: ${DORIS_HOME}/log
-
-### `sys_log_level`
-
-INFO
-
-Log Level: INFO < WARNING < ERROR < FATAL
-
-### `sys_log_roll_mode`
-
-Default: SIZE-MB-1024
-
-The size of the log split, one log file is split every 1G
-
-### `sys_log_roll_num`
-
-Default: 10
-
-Number of log files kept
-
-### `sys_log_verbose_level`
-
-Defaultl: 10
-
-Log display level, used to control the log output at the beginning of VLOG in the code
-
-### `sys_log_verbose_modules`
-
-Default: empty
-
-Log printing module, writing olap will only print the log under the olap module
-
-### `tablet_map_shard_size`
-
-Default: 1
-
-tablet_map_lock fragment size, the value is 2^n, n=0,1,2,3,4, this is for better tablet management
-
-### `tablet_meta_checkpoint_min_interval_secs`
-
-Default: 600(s)
-
-The polling interval of the TabletMeta Checkpoint thread
-
-### `tablet_meta_checkpoint_min_new_rowsets_num`
-
-### `tablet_scan_frequency_time_node_interval_second`
-
-* Type: int64
-* Description: Time interval to record the metric 'query_scan_count' and timestamp in second for the purpose of calculating tablet scan frequency during a latest period of time at the present.
-* Default: 300
-
-### `tablet_stat_cache_update_interval_second`
-
-默认值: 10
-
-The minimum number of Rowsets for TabletMeta Checkpoint
-
-### `tablet_rowset_stale_sweep_time_sec`
-
-* Type: int64
-* Description: It is used to control the expiration time of cleaning up the merged rowset version. When the current time now() minus the max created rowset‘s create time in a version path is greater than tablet_rowset_stale_sweep_time_sec, the current path is cleaned up and these merged rowsets are deleted, the unit is second.
-* Default: 1800
-
-When writing is too frequent and the disk time is insufficient, you can configure less tablet_rowset_stale_sweep_time_sec. However, if this time is less than 5 minutes, it may cause fe to query the version that has been merged, causing a query -230 error.
-
-### `tablet_writer_open_rpc_timeout_sec`
-
-Default: 300
-
-Update interval of tablet state cache, unit: second
-
-### `tablet_writer_ignore_eovercrowded`
-
-* Type: bool
-* Description: Used to ignore brpc error '[E1011]The server is overcrowded' when writing data.
-* Default value: false
-
-When meet '[E1011]The server is overcrowded' error, you can tune the configuration `brpc_socket_max_unwritten_bytes`, but it can't be modified at runtime. Set it to `true` to avoid writing failed temporarily. Notice that, it only effects `write`, other rpc requests will still check if overcrowded.
-
-### `tc_free_memory_rate`
-
-Default: 20 (%)
-
-Available memory, value range: [0-100]
-
-### `tc_max_total_thread_cache_bytes`
-
-* Type: int64
-* Description: Used to limit the total thread cache size in tcmalloc. This limit is not a hard limit, so the actual thread cache usage may exceed this limit. For details, please refer to [TCMALLOC\_MAX\_TOTAL\_THREAD\_CACHE\_BYTES](https://gperftools.github.io/gperftools/tcmalloc.html)
-* Default: 1073741824
-
-If the system is found to be in a high-stress scenario and a large number of threads are found in the tcmalloc lock competition phase through the BE thread stack, such as a large number of `SpinLock` related stacks, you can try increasing this parameter to improve system performance. [Reference](https://github.com/gperftools/gperftools/issues/1111)
-
-### `tc_use_memory_min`
-
-Default: 10737418240
-
-The minimum memory of TCmalloc, when the memory used is less than this, it is not returned to the operating system
-
-### `thrift_client_retry_interval_ms`
-
-* Type: int64
-* Description: Used to set retry interval for thrift client in be to avoid avalanche disaster in fe thrift server, the unit is ms.
-* Default: 1000
-
-### `thrift_connect_timeout_seconds`
-
-Default: 3
-
-The default thrift client connection timeout time (unit: seconds)
-
-### `thrift_rpc_timeout_ms`
-
-Default: 5000
-
-thrift default timeout time, default: 5 seconds
-
-### `thrift_server_type_of_fe`
-
-This configuration indicates the service model used by FE's Thrift service. The type is string and is case-insensitive. This parameter needs to be consistent with the setting of fe's thrift_server_type parameter. Currently there are two values for this parameter, `THREADED` and `THREAD_POOL`.
-
-If the parameter is `THREADED`, the model is a non-blocking I/O model,
-
-If the parameter is `THREAD_POOL`, the model is a blocking I/O model.
-
-### `total_permits_for_compaction_score`
-
-* Type: int64
-* Description: The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction.
-* Default: 10000
-* Dynamically modify: true
-
-### `trash_file_expire_time_sec`
-
-Default: 259200
-
-The interval for cleaning the recycle bin is 72 hours. When the disk space is insufficient, the file retention period under trash may not comply with this parameter
-
-### `txn_commit_rpc_timeout_ms`
-
-Default: 10000
-
-txn submit rpc timeout, the default is 10 seconds
-
-### `txn_map_shard_size`
-
-Default: 128
-
-txn_map_lock fragment size, the value is 2^n, n=0,1,2,3,4. This is an enhancement to improve the performance of managing txn
-
-### `txn_shard_size`
-
-Default: 1024
-
-txn_lock shard size, the value is 2^n, n=0,1,2,3,4, this is an enhancement function that can improve the performance of submitting and publishing txn
-
-### `unused_rowset_monitor_interval`
-
-Default: 30
-
-Time interval for clearing expired Rowset, unit: second
-
-### `upload_worker_count`
-
-Default: 1
-
-Maximum number of threads for uploading files
-
-### `use_mmap_allocate_chunk`
-
-Default: false
-
-Whether to use mmap to allocate blocks. If you enable this feature, it is best to increase the value of vm.max_map_count, its default value is 65530. You can use "sysctl -w vm.max_map_count=262144" or "echo 262144> /proc/sys/vm/" to operate max_map_count as root. When this setting is true, you must set chunk_reserved_bytes_limit to a relatively low Big number, otherwise the performance is very very bad
-
-### `user_function_dir`
-
-${DORIS_HOME}/lib/udf
-
-udf function directory
-
-### `webserver_num_workers`
-
-Default: 48
-
-Webserver default number of worker threads
-
-### `webserver_port`
-
-* Type: int32
-* Description: Service port of http server on BE
-* Default: 8040
-
-### `write_buffer_size`
-
-Default: 104857600
-
-The size of the buffer before flashing
-
-### `zone_map_row_num_threshold`
-
-* Type: int32
-* Description: If the number of rows in a page is less than this value, no zonemap will be created to reduce data expansion
-* Default: 20
-
-### `aws_log_level`
-
-* Type: int32
-
-* Description: log level of AWS SDK,
- ```
- Off = 0,
- Fatal = 1,
- Error = 2,
- Warn = 3,
- Info = 4,
- Debug = 5,
- Trace = 6
- ```
-
-* Default: 3
-
-### `track_new_delete`
-
-* Type: bool
-* Description: Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook.
-* Default: true
-
-### `mem_tracker_level`
-
-* Type: int16
-* Description: The level at which MemTracker is displayed on the Web page equal or lower than this level will be displayed on the Web page
- ```
- OVERVIEW = 0
- TASK = 1
- INSTANCE = 2
- VERBOSE = 3
- ```
-* Default: 0
-
-### `mem_tracker_consume_min_size_bytes`
-
-* Type: int32
-* Description: The minimum length of TCMalloc Hook when consume/release MemTracker. Consume size smaller than this value will continue to accumulate to avoid frequent calls to consume/release of MemTracker. Decreasing this value will increase the frequency of consume/release. Increasing this value will cause MemTracker statistics to be inaccurate. Theoretically, the statistical value of a MemTracker differs from the true value = ( mem_tracker_consume_min_size_bytes * the number of BE threads where the MemTracker is located).
-* Default: 1048576
-
-### `memory_leak_detection`
-
-* Type: bool
-* Description: Whether to start memory leak detection, when MemTracker is a negative value, it is considered that a memory leak has occurred, but the actual MemTracker records inaccurately will also cause a negative value, so this feature is in the experimental stage.
-* Default: false
-
-### `max_segment_num_per_rowset`
-
-* Type: int32
-* Description: Used to limit the number of segments in the newly generated rowset when importing. If the threshold is exceeded, the import will fail with error -238. Too many segments will cause compaction to take up a lot of memory and cause OOM errors.
-* Default value: 200
-
-### `remote_storage_read_buffer_mb`
-
-* Type: int32
-* Description: The cache size used when reading files on hdfs or object storage.
-* Default value: 16MB
-
-Increasing this value can reduce the number of calls to read remote data, but it will increase memory overhead.
-
-### `external_table_connect_timeout_sec`
-
-* Type: int32
-* Description: The timeout when establishing connection with external table such as ODBC table.
-* Default value: 5 seconds
-
-### `segment_cache_capacity`
-
-* Type: int32
-* Description: The maximum number of Segments cached by Segment Cache.
-* Default value: 1000000
-
-The default value is currently only an empirical value, and may need to be modified according to actual scenarios. Increasing this value can cache more segments and avoid some IO. Decreasing this value will reduce memory usage.
-
-### `auto_refresh_brpc_channel`
-
-* Type: bool
-* Description: When obtaining a brpc connection, judge the availability of the connection through hand_shake rpc, and re-establish the connection if it is not available .
-* Default value: false
-
-### `high_priority_flush_thread_num_per_store`
-
-* Type: int32
-* Description: The number of flush threads per store path allocated for the high priority import task.
-* Default value: 1
-
-### `routine_load_consumer_pool_size`
-
-* Type: int32
-* Description: The number of caches for the data consumer used by the routine load.
-* Default: 10
-
-### `load_task_high_priority_threshold_second`
-
-* Type: int32
-* Description: When the timeout of an import task is less than this threshold, Doris will consider it to be a high priority task. High priority tasks use a separate pool of flush threads.
-* Default: 120
-
-### `min_load_rpc_timeout_ms`
-
-* Type: int32
-* Description: The minimum timeout for each rpc in the load job.
-* Default: 20
-
-Translated with www.DeepL.com/Translator (free version)
-
-### `doris_scan_range_max_mb`
-* Type: int32
-* Description: The maximum amount of data read by each OlapScanner.
-* Default: 1024
-
-### `string_type_length_soft_limit_bytes`
-* Type: int32
-* Description: A soft limit of string type length.
-* Description: 1048576
diff --git a/docs/en/administrator-guide/config/fe_config.md b/docs/en/administrator-guide/config/fe_config.md
deleted file mode 100644
index f34658653c..0000000000
--- a/docs/en/administrator-guide/config/fe_config.md
+++ /dev/null
@@ -1,2210 +0,0 @@
----
-{
- "title": "FE Configuration",
- "language": "en"
-}
----
-
-
-
-
-
-# FE Configuration
-
-This document mainly introduces the relevant configuration items of FE.
-
-The FE configuration file `fe.conf` is usually stored in the `conf/` directory of the FE deployment path. In version 0.14, another configuration file `fe_custom.conf` will be introduced. The configuration file is used to record the configuration items that are dynamically configured and persisted by the user during operation.
-
-After the FE process is started, it will read the configuration items in `fe.conf` first, and then read the configuration items in `fe_custom.conf`. The configuration items in `fe_custom.conf` will overwrite the same configuration items in `fe.conf`.
-
-The location of the `fe_custom.conf` file can be configured in `fe.conf` through the `custom_config_dir` configuration item.
-
-## View configuration items
-
-There are two ways to view the configuration items of FE:
-
-1. FE web page
-
- Open the FE web page `http://fe_host:fe_http_port/variable` in the browser. You can see the currently effective FE configuration items in `Configure Info`.
-
-2. View by command
-
- After the FE is started, you can view the configuration items of the FE in the MySQL client with the following command:
-
- `ADMIN SHOW FRONTEND CONFIG;`
-
- The meanings of the columns in the results are as follows:
-
- * Key: the name of the configuration item.
- * Value: The value of the current configuration item.
- * Type: The configuration item value type, such as integer or string.
- * IsMutable: whether it can be dynamically configured. If true, the configuration item can be dynamically configured at runtime. If false, it means that the configuration item can only be configured in `fe.conf` and takes effect after restarting FE.
- * MasterOnly: Whether it is a unique configuration item of Master FE node. If it is true, it means that the configuration item is meaningful only at the Master FE node, and is meaningless to other types of FE nodes. If false, it means that the configuration item is meaningful in all types of FE nodes.
- * Comment: The description of the configuration item.
-
-## Set configuration items
-
-There are two ways to configure FE configuration items:
-
-1. Static configuration
-
- Add and set configuration items in the `conf/fe.conf` file. The configuration items in `fe.conf` will be read when the FE process starts. Configuration items not in `fe.conf` will use default values.
-
-2. Dynamic configuration via MySQL protocol
-
- After the FE starts, you can set the configuration items dynamically through the following commands. This command requires administrator privilege.
-
- `ADMIN SET FRONTEND CONFIG (" fe_config_name "=" fe_config_value ");`
-
- Not all configuration items support dynamic configuration. You can check whether the dynamic configuration is supported by the `IsMutable` column in the` ADMIN SHOW FRONTEND CONFIG; `command result.
-
- If the configuration item of `MasterOnly` is modified, the command will be directly forwarded to the Master FE and only the corresponding configuration item in the Master FE will be modified.
-
- **Configuration items modified in this way will become invalid after the FE process restarts.**
-
- For more help on this command, you can view it through the `HELP ADMIN SET CONFIG;` command.
-
-3. Dynamic configuration via HTTP protocol
-
- For details, please refer to [Set Config Action](../http-actions/fe/set-config-action.md)
-
- This method can also persist the modified configuration items. The configuration items will be persisted in the `fe_custom.conf` file and will still take effect after FE is restarted.
-
-## Examples
-
-1. Modify `async_pending_load_task_pool_size`
-
- Through `ADMIN SHOW FRONTEND CONFIG;` you can see that this configuration item cannot be dynamically configured (`IsMutable` is false). You need to add in `fe.conf`:
-
- `async_pending_load_task_pool_size = 20`
-
- Then restart the FE process to take effect the configuration.
-
-2. Modify `dynamic_partition_enable`
-
- Through `ADMIN SHOW FRONTEND CONFIG;` you can see that the configuration item can be dynamically configured (`IsMutable` is true). And it is the unique configuration of Master FE. Then first we can connect to any FE and execute the following command to modify the configuration:
-
- ```
- ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true"); `
- ```
-
- Afterwards, you can view the modified value with the following command:
-
- ```
- set forward_to_master = true;
- ADMIN SHOW FRONTEND CONFIG;
- ```
-
- After modification in the above manner, if the Master FE restarts or a Master election is performed, the configuration will be invalid. You can add the configuration item directly in `fe.conf` and restart the FE to make the configuration item permanent.
-
-3. Modify `max_distribution_pruner_recursion_depth`
-
- Through `ADMIN SHOW FRONTEND CONFIG;` you can see that the configuration item can be dynamically configured (`IsMutable` is true). It is not unique to Master FE.
-
- Similarly, we can modify the configuration by dynamically modifying the configuration command. Because this configuration is not unique to the Master FE, user need to connect to different FEs separately to modify the configuration dynamically, so that all FEs use the modified configuration values.
-
-## Configurations
-
-### max_dynamic_partition_num
-
-Default: 500
-
-IsMutable: true
-
-MasterOnly: true
-
-Used to limit the maximum number of partitions that can be created when creating a dynamic partition table, to avoid creating too many partitions at one time. The number is determined by "start" and "end" in the dynamic partition parameters..
-
-### grpc_max_message_size_bytes
-
-Default: 1G
-
-Used to set the initial flow window size of the GRPC client channel, and also used to max message size. When the result set is large, you may need to increase this value.
-
-### min_replication_num_per_tablet
-
-Default: 1
-
-Used to set minimal number of replication per tablet.
-
-### max_replication_num_per_tablet
-
-Default: 32767
-
-Used to set maximal number of replication per tablet.
-
-### enable_outfile_to_local
-
-Default: false
-
-Whether to allow the outfile function to export the results to the local disk.
-
-### enable_access_file_without_broker
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-This config is used to try skip broker when access bos or other cloud storage via broker
-
-### enable_bdbje_debug_mode
-
-Default: false
-
-If set to true, FE will be started in BDBJE debug mode
-
-### enable_alpha_rowset
-
-Default: false
-
-Whether to support the creation of alpha rowset tables. The default is false and should only be used in emergency situations, this config should be remove in some future version
-
-### enable_http_server_v2
-
-Default: The default is true after the official 0.14.0 version is released, and the default is false before
-
-HTTP Server V2 is implemented by SpringBoot. It uses an architecture that separates the front and back ends. Only when httpv2 is enabled can users use the new front-end UI interface.
-
-### jetty_server_acceptors
-
-Default: 2
-
-### jetty_server_selectors
-
-Default: 4
-
-### jetty_server_workers
-
-Default: 0
-
-With the above three parameters, Jetty's thread architecture model is very simple, divided into acceptors, selectors and workers three thread pools. Acceptors are responsible for accepting new connections, and then hand them over to selectors to process the unpacking of the HTTP message protocol, and finally workers process the request. The first two thread pools adopt a non-blocking model, and one thread can handle the read and write of many sockets, so the number of thread pools is small.
-
-For most projects, only 4 acceptors threads are required, and 2 to 4 selectors threads are sufficient. Workers are obstructive business logic, often have more database operations, and require a large number of threads. The specific number depends on the proportion of QPS and IO events of the application. The higher the QPS, the more threads are required, the higher the proportion of IO, the more threads waiting, and the more total threads required.
-
-Worker thread pool is not set by default, set according to your needs
-
-### jetty_threadPool_minThreads
-
-The minimum number of threads in the Jetty thread pool, the default is 20
-
-### jetty_threadPool_maxThreads
-
-The maximum number of threads in the Jetty thread pool, the default is 400
-
-### jetty_server_max_http_post_size
-
-Default: 100 * 1024 * 1024 (100MB)
-
-This is the maximum number of bytes of the file uploaded by the put or post method, the default value: 100MB
-
-### **`disable_mini_load`**
-
-Whether to disable the mini load data import method, the default: true (Disabled)
-
-### frontend_address
-
-Status: Deprecated, not recommended use. This parameter may be deleted later Type: string Description: Explicitly set the IP address of FE instead of using *InetAddress.getByName* to get the IP address. Usually in *InetAddress.getByName* When the expected results cannot be obtained. Only IP address is supported, not hostname. Default value: 0.0.0.0
-
-### default_max_filter_ratio
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximum percentage of data that can be filtered (due to reasons such as data is irregularly) , The default value is 0.
-
-### default_db_data_quota_bytes
-
-Default: 1PB
-
-IsMutable: true
-
-MasterOnly: true
-
-Used to set the default database data quota size. To set the quota size of a single database, you can use:
-
-```
-Set the database data quota, the unit is:B/K/KB/M/MB/G/GB/T/TB/P/PB
-ALTER DATABASE db_name SET DATA QUOTA quota;
-View configuration
-show data (Detail: HELP SHOW DATA)
-```
-
-### default_db_replica_quota_size
-
-Default: 1073741824
-
-IsMutable: true
-
-MasterOnly: true
-
-Used to set the default database replica quota. To set the quota size of a single database, you can use:
-
-```
-Set the database replica quota
-ALTER DATABASE db_name SET REPLICA QUOTA quota;
-View configuration
-show data (Detail: HELP SHOW DATA)
-```
-
-### enable_batch_delete_by_default
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether to add a delete sign column when create unique table
-
-### recover_with_empty_tablet
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
- In some very special circumstances, such as code bugs, or human misoperation, etc., all replicas of some tablets may be lost. In this case, the data has been substantially lost. However, in some scenarios, the business still hopes to ensure that the query will not report errors even if there is data loss, and reduce the perception of the user layer. At this point, we can use the blank Tablet to fill the missing replica to ensure that the query can be executed normally.
-
-Set to true so that Doris will automatically use blank replicas to fill tablets which all replicas have been damaged or missing
-
-### max_allowed_in_element_num_of_delete
-
-Default: 1024
-
-IsMutable: true
-
-MasterOnly: true
-
-This configuration is used to limit element num of InPredicate in delete statement.
-
-### cache_result_max_row_count
-
-Default: 3000
-
-IsMutable: true
-
-MasterOnly: false
-
-In order to avoid occupying too much memory, the maximum number of rows that can be cached is 2000 by default. If this threshold is exceeded, the cache cannot be set
-
-### cache_last_version_interval_second
-
-Default: 900
-
-IsMutable: true
-
-MasterOnly: false
-
-The time interval of the latest partitioned version of the table refers to the time interval between the data update and the current version. It is generally set to 900 seconds, which distinguishes offline and real-time import
-
-### cache_enable_partition_mode
-
-Default: true
-
-IsMutable: true
-
-MasterOnly: false
-
-When this switch is turned on, the query result set will be cached according to the partition. If the interval between the query table partition time and the query time is less than cache_last_version_interval_second, the result set will be cached according to the partition.
-
-Part of the data will be obtained from the cache and some data from the disk when querying, and the data will be merged and returned to the client.
-
-### cache_enable_sql_mode
-
-Default: true
-
-IsMutable: true
-
-MasterOnly: false
-
-If this switch is turned on, the SQL query result set will be cached. If the interval between the last visit version time in all partitions of all tables in the query is greater than cache_last_version_interval_second, and the result set is less than cache_result_max_row_count, the result set will be cached, and the next same SQL will hit the cache
-
-If set to true, fe will enable sql result caching. This option is suitable for offline data update scenarios
-
-| | case1 | case2 | case3 | case4 |
-| ---------------------- | ----- | ----- | ----- | ----- |
-| enable_sql_cache | false | true | true | false |
-| enable_partition_cache | false | false | true | true |
-
-### min_clone_task_timeout_sec 和 max_clone_task_timeout_sec
-
-Default: Minimum 3 minutes, maximum two hours
-
-IsMutable: true
-
-MasterOnly: true
-
-Type: long Description: Used to control the maximum timeout of a clone task. The unit is second. Default value: 7200 Dynamic modification: yes
-
-Can cooperate with `mix_clone_task_timeout_sec` to control the maximum and minimum timeout of a clone task. Under normal circumstances, the timeout of a clone task is estimated by the amount of data and the minimum transfer rate (5MB/s). In some special cases, these two configurations can be used to set the upper and lower bounds of the clone task timeout to ensure that the clone task can be completed successfully.
-
-### agent_task_resend_wait_time_ms
-
-Default: 5000
-
-IsMutable: true
-
-MasterOnly: true
-
-This configuration will decide whether to resend agent task when create_time for agent_task is set, only when current_time - create_time > agent_task_resend_wait_time_ms can ReportHandler do resend agent task.
-
-This configuration is currently mainly used to solve the problem of repeated sending of `PUBLISH_VERSION` agent tasks. The current default value of this configuration is 5000, which is an experimental value.
-
-Because there is a certain time delay between submitting agent tasks to AgentTaskQueue and submitting to be, Increasing the value of this configuration can effectively solve the problem of repeated sending of agent tasks,
-
-But at the same time, it will cause the submission of failed or failed execution of the agent task to be executed again for an extended period of time
-
-### enable_odbc_table
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether to enable the ODBC table, it is not enabled by default. You need to manually configure it when you use it. This parameter can be set by: ADMIN SET FRONTEND CONFIG("key"="value")
-
-### enable_spark_load
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether to enable spark load temporarily, it is not enabled by default
-
-### disable_storage_medium_check
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-If disable_storage_medium_check is true, ReportHandler would not check tablet's storage medium and disable storage cool down function, the default value is false. You can set the value true when you don't care what the storage medium of the tablet is.
-
-### drop_backend_after_decommission
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-1. This configuration is used to control whether the system drops the BE after successfully decommissioning the BE. If true, the BE node will be deleted after the BE is successfully offline. If false, after the BE successfully goes offline, the BE will remain in the DECOMMISSION state, but will not be dropped.
-
- This configuration can play a role in certain scenarios. Assume that the initial state of a Doris cluster is one disk per BE node. After running for a period of time, the system has been vertically expanded, that is, each BE node adds 2 new disks. Because Doris currently does not support data balancing among the disks within the BE, the data volume of the initial disk may always be much higher than the data volume of the newly added disk. At this time, we can perform manual inter-disk balancing by the following operations:
-
- 1. Set the configuration item to false.
- 2. Perform a decommission operation on a certain BE node. This operation will migrate all data on the BE to other nodes.
- 3. After the decommission operation is completed, the BE will not be dropped. At this time, cancel the decommission status of the BE. Then the data will start to balance from other BE nodes back to this node. At this time, the data will be evenly distributed to all disks of the BE.
- 4. Perform steps 2 and 3 for all BE nodes in sequence, and finally achieve the purpose of disk balancing for all nodes
-
-### period_of_auto_resume_min
-
-Default: 5 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Automatically restore the cycle of Routine load
-
-### max_tolerable_backend_down_num
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-As long as one BE is down, Routine Load cannot be automatically restored
-
-### enable_materialized_view
-
-Default: true
-
-IsMutable: true
-
-MasterOnly: true
-
-This configuration is used to turn on and off the creation of materialized views. If set to true, the function to create a materialized view is enabled. The user can create a materialized view through the `CREATE MATERIALIZED VIEW` command. If set to false, materialized views cannot be created.
-
-If you get an error `The materialized view is coming soon` or `The materialized view is disabled` when creating the materialized view, it means that the configuration is set to false and the function of creating the materialized view is turned off. You can start to create a materialized view by modifying the configuration to true.
-
-This variable is a dynamic configuration, and users can modify the configuration through commands after the FE process starts. You can also modify the FE configuration file and restart the FE to take effect
-
-### check_java_version
-
-Default: true
-
-Doris will check whether the compiled and run Java versions are compatible, if not, it will throw a Java version mismatch exception message and terminate the startup
-
-### max_running_rollup_job_num_per_table
-
-Default: 1
-
-IsMutable: true
-
-MasterOnly: true
-
-Control the concurrency limit of Rollup jobs
-
-### dynamic_partition_enable
-
-Default: true
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether to enable dynamic partition, enabled by default
-
-### dynamic_partition_check_interval_seconds
-
-Default: 600 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Decide how often to check dynamic partition
-
-### disable_cluster_feature
-
-Default: true
-
-IsMutable: true
-
-The multi cluster feature will be deprecated in version 0.12 ,set this config to true will disable all operations related to cluster feature, include:
- create/drop cluster
- add free backend/add backend to cluster/decommission cluster balance
- change the backends num of cluster
- link/migration db
-
-### force_do_metadata_checkpoint
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-If set to true, the checkpoint thread will make the checkpoint regardless of the jvm memory used percent
-
-### metadata_checkpoint_memory_threshold
-
-Default: 60 (60%)
-
-IsMutable: true
-
-MasterOnly: true
-
- If the jvm memory used percent(heap or old mem pool) exceed this threshold, checkpoint thread will not work to avoid OOM.
-
-### max_distribution_pruner_recursion_depth
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: false
-
-This will limit the max recursion depth of hash distribution pruner.
- eg: where a in (5 elements) and b in (4 elements) and c in (3 elements) and d in (2 elements).
- a/b/c/d are distribution columns, so the recursion depth will be 5 * 4 * 3 * 2 = 120, larger than 100,
- So that distribution pruner will no work and just return all buckets.
- Increase the depth can support distribution pruning for more elements, but may cost more CPU.
-
-### max_backup_restore_job_num_per_db
-
-Default: 10
-
-This configuration is mainly used to control the number of backup/restore tasks recorded in each database.
-
-### using_old_load_usage_pattern
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-If set to true, the insert stmt with processing error will still return a label to user. And user can use this label to check the load job's status. The default value is false, which means if insert operation encounter errors, exception will be thrown to user client directly without load label.
-
-### small_file_dir
-
-Default: DORIS_HOME_DIR/small_files
-
-Save small files
-
-### max_small_file_size_bytes
-
-Default: 1M
-
-IsMutable: true
-
-MasterOnly: true
-
-The max size of a single file store in SmallFileMgr
-
-### max_small_file_number
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
-The max number of files store in SmallFileMgr
-
-### max_routine_load_task_num_per_be
-
-Default: 5
-
-IsMutable: true
-
-MasterOnly: true
-
-the max concurrent routine load task num per BE. This is to limit the num of routine load tasks sending to a BE, and it should also less than BE config 'routine_load_thread_pool_size'(default 10), which is the routine load task thread pool size on BE.
-
-### max_routine_load_task_concurrent_num
-
-Default: 5
-
-IsMutable: true
-
-MasterOnly: true
-
-the max concurrent routine load task num of a single routine load job
-
-### max_routine_load_job_num
-
-Default: 100
-
-the max routine load job num, including NEED_SCHEDULED, RUNNING, PAUSE
-
-### max_running_txn_num_per_db
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
-This configuration is mainly used to control the number of concurrent load jobs of the same database.
-
-When there are too many load jobs running in the cluster, the newly submitted load jobs may report errors:
-
-```text
-current running txns on db xxx is xx, larger than limit xx
-```
-
-When this error is encountered, it means that the load jobs currently running in the cluster exceeds the configuration value. At this time, it is recommended to wait on the business side and retry the load jobs.
-
-Generally it is not recommended to increase this configuration value. An excessively high number of concurrency may cause excessive system load
-
-### enable_metric_calculator
-
-Default: true
-
-If set to true, metric collector will be run as a daemon timer to collect metrics at fix interval
-
-### report_queue_size
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
- This threshold is to avoid piling up too many report task in FE, which may cause OOM exception. In some large Doris cluster, eg: 100 Backends with ten million replicas, a tablet report may cost several seconds after some modification of metadata(drop partition, etc..). And one Backend will report tablets info every 1 min, so unlimited receiving reports is unacceptable. we will optimize the processing speed of tablet report in future, but now, just discard the report if queue size exceeding limit.
- Some online time cost:
- 1. disk report: 0-1 msta
- 2. sk report: 0-1 ms
- 3. tablet report
- 4. 10000 replicas: 200ms
-
-### partition_rebalance_max_moves_num_per_selection
-
-Default: 10
-
-IsMutable: true
-
-MasterOnly: true
-
-Valid only if use PartitionRebalancer,
-
-### partition_rebalance_move_expire_after_access
-
-Default: 600 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Valid only if use PartitionRebalancer. If this changed, cached moves will be cleared
-
-### tablet_rebalancer_type
-
-Default: BeLoad
-
-MasterOnly: true
-
-Rebalancer type(ignore case): BeLoad, Partition. If type parse failed, use BeLoad as default
-
-### max_balancing_tablets
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
-if the number of balancing tablets in TabletScheduler exceed max_balancing_tablets, no more balance check
-
-### max_scheduling_tablets
-
-Default: 2000
-
-IsMutable: true
-
-MasterOnly: true
-
-if the number of scheduled tablets in TabletScheduler exceed max_scheduling_tablets skip checking.
-
-### disable_balance
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-if set to true, TabletScheduler will not do balance.
-
-### balance_load_score_threshold
-
-Default: 0.1 (10%)
-
-IsMutable: true
-
-MasterOnly: true
-
-the threshold of cluster balance score, if a backend's load score is 10% lower than average score, this backend will be marked as LOW load, if load score is 10% higher than average score, HIGH load will be marked
-
-### schedule_slot_num_per_path
-
-Default: 2
-
-the default slot number per path in tablet scheduler , remove this config and dynamically adjust it by clone task statistic
-
-### tablet_repair_delay_factor_second
-
-Default: 60 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-the factor of delay time before deciding to repair tablet. if priority is VERY_HIGH, repair it immediately.
-
-- HIGH, delay tablet_repair_delay_factor_second * 1;
-- NORMAL: delay tablet_repair_delay_factor_second * 2;
-- LOW: delay tablet_repair_delay_factor_second * 3;
-
-### es_state_sync_interval_second
-
-Default: 10
-
-fe will call es api to get es index shard info every es_state_sync_interval_secs
-
-### disable_hadoop_load
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-Load using hadoop cluster will be deprecated in future. Set to true to disable this kind of load.
-
-### db_used_data_quota_update_interval_secs
-
-Default: 300 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-For better data load performance, in the check of whether the amount of data used by the database before data load exceeds the quota, we do not calculate the amount of data already used by the database in real time, but obtain the periodically updated value of the daemon thread.
-
-This configuration is used to set the time interval for updating the value of the amount of data used by the database
-
-### disable_load_job
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-if this is set to true
-
-- all pending load job will failed when call begin txn api
-- all prepare load job will failed when call commit txn api
-- all committed load job will waiting to be published
-
-### catalog_try_lock_timeout_ms
-
-Default: 5000 (ms)
-
-IsMutable: true
-
-The tryLock timeout configuration of catalog lock. Normally it does not need to change, unless you need to test something.
-
-### max_query_retry_time
-
-Default: 1
-
-IsMutable: true
-
-The number of query retries. A query may retry if we encounter RPC exception and no result has been sent to user. You may reduce this number to avoid Avalanche disaster
-
-### remote_fragment_exec_timeout_ms
-
-Default: 5000 (ms)
-
-IsMutable: true
-
-The timeout of executing async remote fragment. In normal case, the async remote fragment will be executed in a short time. If system are under high load condition,try to set this timeout longer.
-
-### enable_local_replica_selection
-
-Default: false
-
-IsMutable: true
-
-If set to true, Planner will try to select replica of tablet on same host as this Frontend. This may reduce network transmission in following case:
-
-- N hosts with N Backends and N Frontends deployed.
-- The data has N replicas.
-- High concurrency queries are syyuyuient to all Frontends evenly
-- In this case, all Frontends can only use local replicas to do the query. If you want to allow fallback to nonlocal replicas when no local replicas available, set enable_local_replica_selection_fallback to true.
-
-### enable_local_replica_selection_fallback
-
-Default: false
-
-IsMutable: true
-
-Used with enable_local_replica_selection. If the local replicas is not available, fallback to the nonlocal replicas.
-
-### max_unfinished_load_job
-
-Default: 1000
-
-IsMutable: true
-
-MasterOnly: true
-
- Max number of load jobs, include PENDING、ETL、LOADING、QUORUM_FINISHED. If exceed this number, load job is not allowed to be submitted
-
-### max_bytes_per_broker_scanner
-
-Default: 3 * 1024 * 1024 * 1024L (3G)
-
-IsMutable: true
-
-MasterOnly: true
-
-Max bytes a broker scanner can process in one broker load job. Commonly, each Backends has one broker scanner.
-
-### enable_auth_check
-
-Default: true
-
-if set to false, auth check will be disable, in case some goes wrong with the new privilege system.
-
-### tablet_stat_update_interval_second
-
-Default: 300,(5min)
-
-update interval of tablet stat , All frontends will get tablet stat from all backends at each interval
-
-### storage_flood_stage_usage_percent
-
-Default: 95 (95%)
-
-IsMutable: true
-
-MasterOnly: true
-
-### storage_flood_stage_left_capacity_bytes
-
-Default:
-
- storage_flood_stage_usage_percent : 95 (95%)
-
- storage_flood_stage_left_capacity_bytes : 1 * 1024 * 1024 * 1024 (1GB)
-
-IsMutable: true
-
-MasterOnly: true
-
-If capacity of disk reach the 'storage_flood_stage_usage_percent' and 'storage_flood_stage_left_capacity_bytes', the following operation will be rejected:
-
-1. load job
-2. restore job
-
-### storage_high_watermark_usage_percent
-
-Default: 85 (85%)
-
-IsMutable: true
-
-MasterOnly: true
-
-### storage_min_left_capacity_bytes
-
-Default: 2 * 1024 * 1024 * 1024 (2GB)
-
-IsMutable: true
-
-MasterOnly: true
-
- 'storage_high_watermark_usage_percent' limit the max capacity usage percent of a Backend storage path. 'storage_min_left_capacity_bytes' limit the minimum left capacity of a Backend storage path. If both limitations are reached, this storage path can not be chose as tablet balance destination. But for tablet recovery, we may exceed these limit for keeping data integrity as much as possible.
-
-### backup_job_default_timeout_ms
-
-Default: 86400 * 1000 (1day)
-
-IsMutable: true
-
-MasterOnly: true
-
-default timeout of backup job
-
-### with_k8s_certs
-
-Default: false
-
-If use k8s deploy manager locally, set this to true and prepare the certs files
-
-### dpp_hadoop_client_path
-
-Default: /lib/hadoop-client/hadoop/bin/hadoop
-
-### dpp_bytes_per_reduce
-
-Default: 100 * 1024 * 1024L; // 100M
-
-### dpp_default_cluster
-
-Default: palo-dpp
-
-### dpp_default_config_str
-
-Default: {
- hadoop_configs : 'mapred.job.priority=NORMAL;mapred.job.map.capacity=50;mapred.job.reduce.capacity=50;mapred.hce.replace.streaming=false;abaci.long.stored.job=true;dce.shuffle.enable=false;dfs.client.authserver.force_stop=true;dfs.client.auth.method=0'
- }
-
-### dpp_config_str
-
-Default: {
- palo-dpp : {
- hadoop_palo_path : '/dir',
- hadoop_configs : 'fs.default.name=hdfs://host:port;mapred.job.tracker=host:port;hadoop.job.ugi=user,password'
- }
- }
-
-### enable_deploy_manager
-
-Default: disable
-
- Set to true if you deploy Palo using thirdparty deploy manager Valid options are:
-
-- disable: no deploy manager
-- k8s: Kubernetes
-- ambari: Ambari
-- local: Local File (for test or Boxer2 BCC version)
-
-### enable_token_check
-
-Default: true
-
-For forward compatibility, will be removed later. check token when download image file.
-
-### expr_depth_limit
-
-Default: 3000
-
-IsMutable: true
-
-Limit on the depth of an expr tree. Exceed this limit may cause long analysis time while holding db read lock. Do not set this if you know what you are doing
-
-### expr_children_limit
-
-Default: 10000
-
-IsMutable: true
-
-Limit on the number of expr children of an expr tree. Exceed this limit may cause long analysis time while holding database read lock.
-
-### proxy_auth_magic_prefix
-
-Default: x@8
-
-### proxy_auth_enable
-
-Default: false
-
-### meta_publish_timeout_ms
-
-Default: 1000 (ms)
-
-The default user resource publishing timeout
-
-### disable_colocate_balance
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-This configs can set to true to disable the automatic colocate tables's relocate and balance. If 'disable_colocate_balance' is set to true, ColocateTableBalancer will not relocate and balance colocate tables.
- **Attention**:
-
-- Under normal circumstances, there is no need to turn off balance at all.
-- Because once the balance is turned off, the unstable colocate table may not be restored
-- Eventually the colocate plan cannot be used when querying.
-
-### query_colocate_join_memory_limit_penalty_factor
-
-Default: 1
-
-IsMutable: true
-
-colocote join PlanFragment instance的memory_limit = exec_mem_limit / min (query_colocate_join_memory_limit_penalty_factor, instance_num)
-
-### max_connection_scheduler_threads_num
-
-Default: 4096
-
-Maximal number of thread in connection-scheduler-pool.
-
-### qe_max_connection
-
-Default: 1024
-
-Maximal number of connections per FE.
-
-### check_consistency_default_timeout_second
-
-Default: 600 (10分钟)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default timeout of a single consistency check task. Set long enough to fit your tablet size
-
-### consistency_check_start_time
-
-Default: 23
-
-IsMutable: true
-
-MasterOnly: true
-
-Consistency checker will run from *consistency_check_start_time* to *consistency_check_end_time*. Default is from 23:00 to 04:00
-
-### consistency_check_end_time
-
-Default: 04
-
-IsMutable: true
-
-MasterOnly: true
-
-Consistency checker will run from *consistency_check_start_time* to *consistency_check_end_time*. Default is from 23:00 to 04:00
-
-### export_tablet_num_per_task
-
-Default: 5
-
-IsMutable: true
-
-MasterOnly: true
-
-Number of tablets per export query plan
-
-### export_task_default_timeout_second
-
-Default: 2 * 3600 (2 hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default timeout of export jobs.
-
-### export_running_job_num_limit
-
-Default: 5
-
-IsMutable: true
-
-MasterOnly: true
-
-Limitation of the concurrency of running export jobs. Default is 5. 0 is unlimited
-
-### export_checker_interval_second
-
-Default: 5
-
-Export checker's running interval.
-
-### default_load_parallelism
-
-Default: 1
-
-IsMutable: true
-
-MasterOnly: true
-
-Default parallelism of the broker load execution plan on a single node.
-If the user to set the parallelism when the broker load is submitted, this parameter will be ignored.
-
-### max_broker_concurrency
-
-Default: 10
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal concurrency of broker scanners.
-
-### min_bytes_per_broker_scanner
-
-Default: 67108864L (64M)
-
-IsMutable: true
-
-MasterOnly: true
-
-Minimum bytes that a single broker scanner will read.
-
-### catalog_trash_expire_second
-
-Default: 86400L (1day)
-
-IsMutable: true
-
-MasterOnly: true
-
-After dropping database(table/partition), you can recover it by using RECOVER stmt. And this specifies the maximal data retention time. After time, the data will be deleted permanently.
-
-### storage_cooldown_second
-
-Default: 30 * 24 * 3600L (30day)
-
-When create a table(or partition), you can specify its storage medium(HDD or SSD). If set to SSD, this specifies the default duration that tablets will stay on SSD. After that, tablets will be moved to HDD automatically. You can set storage cooldown time in CREATE TABLE stmt.
-
-### default_storage_medium
-
-Default: HDD
-
-When create a table(or partition), you can specify its storage medium(HDD or SSD). If not set, this specifies the default medium when creat.
-
-### max_backend_down_time_second
-
-Default: 3600 (1hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-If a backend is down for *max_backend_down_time_second*, a BACKEND_DOWN event will be triggered.
-
-### alter_table_timeout_second
-
-Default: 86400 (1day)
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal timeout of ALTER TABLE request. Set long enough to fit your table data size.
-
-### capacity_used_percent_high_water
-
-Default: 0.75 (75%)
-
-IsMutable: true
-
-MasterOnly: true
-
-The high water of disk capacity used percent. This is used for calculating load score of a backend
-
-### clone_distribution_balance_threshold
-
-Default: 0.2
-
-IsMutable: true
-
-MasterOnly: true
-
-Balance threshold of num of replicas in Backends.
-
-### clone_capacity_balance_threshold
-
-Default: 0.2
-
-IsMutable: true
-
-MasterOnly: true
-
-Balance threshold of data size in BE.
- The balance algorithm is:
-
-1. Calculate the average used capacity(AUC) of the entire cluster. (total data size / total backends num)
-2. The high water level is (AUC * (1 + clone_capacity_balance_threshold))
-3. The low water level is (AUC * (1 - clone_capacity_balance_threshold))
-4. The Clone checker will try to move replica from high water level BE to low water level BE.
-
-### replica_delay_recovery_second
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-the minimal delay seconds between a replica is failed and fe try to recovery it using clone.
-
-### clone_high_priority_delay_second
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-HIGH priority clone job's delay trigger time.
-
-### clone_normal_priority_delay_second
-
-Default: 300 (5min)
-
-IsMutable: true
-
-MasterOnly: true
-
-NORMAL priority clone job's delay trigger time
-
-### clone_low_priority_delay_second
-
-Default: 600 (10min)
-
-IsMutable: true
-
-MasterOnly: true
-
-LOW priority clone job's delay trigger time. A clone job contains a tablet which need to be cloned(recovery or migration). If the priority is LOW, it will be delayed *clone_low_priority_delay_second* after the job creation and then be executed. This is to avoid a large number of clone jobs running at same time only because a host is down for a short time.
- **NOTICE** that this config(and *clone_normal_priority_delay_second* as well) will not work if it's smaller then *clone_checker_interval_second*
-
-### clone_max_job_num
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
-Concurrency of LOW priority clone jobs. Concurrency of High priority clone jobs is currently unlimited.
-
-### clone_job_timeout_second
-
-Default: 7200 (2小时)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default timeout of a single clone job. Set long enough to fit your replica size. The larger the replica data size is, the more time is will cost to finish clone
-
-### clone_checker_interval_second
-
-Default: 300 (5min)
-
-Clone checker's running interval
-
-### tablet_delete_timeout_second
-
-Default: 2
-
-IsMutable: true
-
-MasterOnly: true
-
-Same meaning as *tablet_create_timeout_second*, but used when delete a tablet.
-
-### async_loading_load_task_pool_size
-
-Default: 10
-
-IsMutable: false
-
-MasterOnly: true
-
-The loading_load task executor pool size. This pool size limits the max running loading_load tasks.
-
-Currently, it only limits the loading_load task of broker load
-
-### async_pending_load_task_pool_size
-
-Default: 10
-
-IsMutable: false
-
-MasterOnly: true
-
-The pending_load task executor pool size. This pool size limits the max running pending_load tasks.
-
-Currently, it only limits the pending_load task of broker load and spark load.
-
-It should be less than 'max_running_txn_num_per_db'
-
-### async_load_task_pool_size
-
-Default: 10
-
-IsMutable: false
-
-MasterOnly: true
-
-This configuration is just for compatible with old version, this config has been replaced by async_loading_load_task_pool_size, it will be removed in the future.
-
-### disable_show_stream_load
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether to disable show stream load and clear stream load records in memory.
-
-### max_stream_load_record_size
-
-Default: 5000
-
-IsMutable: true
-
-MasterOnly: true
-
-Default max number of recent stream load record that can be stored in memory.
-
-### fetch_stream_load_record_interval_second
-
-Default: 120
-
-IsMutable: true
-
-MasterOnly: true
-
-fetch stream load record interval.
-
-### desired_max_waiting_jobs
-
-Default: 100
-
-IsMutable: true
-
-MasterOnly: true
-
-Default number of waiting jobs for routine load and version 2 of load , This is a desired number. In some situation, such as switch the master, the current number is maybe more than desired_max_waiting_jobs.
-
-### yarn_config_dir
-
-Default: PaloFe.DORIS_HOME_DIR + "/lib/yarn-config"
-
-Default yarn config file directory ,Each time before running the yarn command, we need to check that the config file exists under this path, and if not, create them.
-
-
-### yarn_client_path
-
-Default: DORIS_HOME_DIR + "/lib/yarn-client/hadoop/bin/yarn"
-
-Default yarn client path
-
-### spark_launcher_log_dir
-
-Default: sys_log_dir + "/spark_launcher_log"
-
-The specified spark launcher log dir
-
-### spark_resource_path
-
-Default: none
-
-Default spark dependencies path
-
-### spark_home_default_dir
-
-Default: DORIS_HOME_DIR + "/lib/spark2x"
-
-Default spark home dir
-
-### spark_load_default_timeout_second
-
-Default: 86400 (1天)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default spark load timeout
-
-### spark_dpp_version
-
-Default: 1.0.0
-
-Default spark dpp version
-
-### hadoop_load_default_timeout_second
-
-Default: 86400 * 3 (3天)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default hadoop load timeout
-
-### min_load_timeout_second
-
-Default: 1 (1s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Min stream load timeout applicable to all type of load
-
-### max_stream_load_timeout_second
-
-Default: 259200 (3天)
-
-IsMutable: true
-
-MasterOnly: true
-
-This configuration is specifically used to limit timeout setting for stream load. It is to prevent that failed stream load transactions cannot be canceled within a short time because of the user's large timeout setting
-
-### max_load_timeout_second
-
-Default: 259200 (3天)
-
-IsMutable: true
-
-MasterOnly: true
-
-Max load timeout applicable to all type of load except for stream load
-
-### stream_load_default_timeout_second
-
-Default: 600 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default stream load and streaming mini load timeout
-
-### insert_load_default_timeout_second
-
-Default: 3600 (1 hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default insert load timeout
-
-### mini_load_default_timeout_second
-
-Default: 3600 (1 hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default non-streaming mini load timeout
-
-### broker_load_default_timeout_second
-
-Default: 14400 (4 hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-Default broker load timeout
-
-### load_running_job_num_limit
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-The number of loading tasks is limited, the default is 0, no limit
-
-### load_input_size_limit_gb
-
-Default: 0
-
-IsMutable: true
-
-MasterOnly: true
-
-The size of the data entered by the Load job, the default is 0, unlimited
-
-### delete_thread_num
-
-Default: 10
-
-Concurrency of delete jobs.
-
-### load_etl_thread_num_normal_priority
-
-Default: 10
-
-Concurrency of NORMAL priority etl load jobs. Do not change this if you know what you are doing.
-
-### load_etl_thread_num_high_priority
-
-Default: 3
-
-Concurrency of HIGH priority etl load jobs. Do not change this if you know what you are doing
-
-### load_pending_thread_num_normal_priority
-
-Default: 10
-
-Concurrency of NORMAL priority pending load jobs. Do not change this if you know what you are doing.
-
-### load_pending_thread_num_high_priority
-
-Default: 3
-
- Concurrency of HIGH priority pending load jobs. Load job priority is defined as HIGH or NORMAL. All mini batch load jobs are HIGH priority, other types of load jobs are NORMAL priority. Priority is set to avoid that a slow load job occupies a thread for a long time. This is just a internal optimized scheduling policy. Currently, you can not specified the job priority manually, and do not change this if you know what you are doing.
-
-### load_checker_interval_second
-
-Default: 5 (s)
-
-The load scheduler running interval. A load job will transfer its state from PENDING to LOADING to FINISHED. The load scheduler will transfer load job from PENDING to LOADING while the txn callback will transfer load job from LOADING to FINISHED. So a load job will cost at most one interval to finish when the concurrency has not reached the upper limit.
-
-### max_layout_length_per_row
-
-Default: 100000
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal memory layout length of a row. default is 100 KB. In BE, the maximal size of a RowBlock is 100MB(Configure as max_unpacked_row_block_size in be.conf). And each RowBlock contains 1024 rows. So the maximal size of a row is approximately 100 KB.
- eg.
- schema: k1(int), v1(decimal), v2(varchar(2000))
- then the memory layout length of a row is: 4(int) + 16(decimal) + 2000(varchar) = 2020 (Bytes)
- See memory layout length of all types, run 'help create table' in mysql-client.
- If you want to increase this number to support more columns in a row, you also need to increase the
- max_unpacked_row_block_size in be.conf. But the performance impact is unknown.
-
-### load_straggler_wait_second
-
-Default: 300
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal wait seconds for straggler node in load
- eg.
- there are 3 replicas A, B, C
- load is already quorum finished(A,B) at t1 and C is not finished
- if (current_time - t1) > 300s, then palo will treat C as a failure node
- will call transaction manager to commit the transaction and tell transaction manager
- that C is failed
-
- This is also used when waiting for publish tasks
- this parameter is the default value for all job and the DBA could specify it for separate job
-
-### thrift_server_max_worker_threads
-
-Default: 4096
-
-The thrift server max worker threads
-
-### publish_version_interval_ms
-
-Default: 10 (ms)
-
-minimal intervals between two publish version action
-
-### publish_version_timeout_second
-
-Default: 30 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal waiting time for all publish version tasks of one transaction to be finished
-
-### max_create_table_timeout_second
-
-Default: 60 (s)
-
-IsMutable: true
-
-MasterOnly: true
-
-In order not to wait too long for create table(index), set a max timeout.
-
-### tablet_create_timeout_second
-
-Default: 1(s)
-
-IsMutable: true
-
-MasterOnly: true
-
-Maximal waiting time for creating a single replica.
- eg.
- if you create a table with #m tablets and #n replicas for each tablet,
- the create table request will run at most (m * n * tablet_create_timeout_second) before timeout.
-
-### max_mysql_service_task_threads_num
-
-Default: 4096
-
-When FeEstarts the MySQL server based on NIO model, the number of threads responsible for Task events. Only `mysql_service_nio_enabled` is true takes effect.
-
-### rewrite_count_distinct_to_bitmap_hll
-
-Default: true
-
-This variable is a session variable, and the session level takes effect.
-
-- Type: boolean
-- Description: **Only for the table of the AGG model**, when the variable is true, when the user query contains aggregate functions such as count(distinct c1), if the type of the c1 column itself is bitmap, count distnct will be rewritten It is bitmap_union_count(c1). When the type of the c1 column itself is hll, count distinct will be rewritten as hll_union_agg(c1) If the variable is false, no overwriting occurs..
-
-### cluster_id
-
-Default: -1
-
-node(FE or BE) will be considered belonging to the same Palo cluster if they have same cluster id. Cluster id is usually a random integer generated when master FE start at first time. You can also specify one.
-
-### auth_token
-
-Default: 空
-
-Cluster token used for internal authentication.
-
-### cluster_name
-
-Default: Apache doris
-
-Cluster name will be shown as the title of web page
-
-### mysql_service_io_threads_num
-
-Default: 4
-
-When FeEstarts the MySQL server based on NIO model, the number of threads responsible for IO events. Only `mysql_service_nio_enabled` is true takes effect.
-
-### mysql_service_nio_enabled
-
-Default: true
-
-Whether FE starts the MySQL server based on NiO model. It is recommended to turn off this option when the query connection is less than 1000 or the concurrency scenario is not high
-
-### query_port
-
-Default: 9030
-
-FE MySQL server port
-
-### rpc_port
-
-Default: 9020
-
-FE Thrift Server port
-
-### thrift_server_type
-
-This configuration represents the service model used by The Thrift Service of FE, is of type String and is case-insensitive.
-
-If this parameter is 'SIMPLE', then the 'TSimpleServer' model is used, which is generally not suitable for production and is limited to test use.
-
-If the parameter is 'THREADED', then the 'TThreadedSelectorServer' model is used, which is a non-blocking I/O model, namely the master-slave Reactor model, which can timely respond to a large number of concurrent connection requests and performs well in most scenarios.
-
-If this parameter is `THREAD_POOL`, then the `TThreadPoolServer` model is used, the model for blocking I/O model, use the thread pool to handle user connections, the number of simultaneous connections are limited by the number of thread pool, if we can estimate the number of concurrent requests in advance, and tolerant enough thread resources cost, this model will have a better performance, the service model is used by default
-
-### thrift_backlog_num
-
-Default: 1024
-
-The backlog_num for thrift server , When you enlarge this backlog_num, you should ensure it's value larger than the linux /proc/sys/net/core/somaxconn config
-
-### thrift_client_timeout_ms
-
-Default: 0
-
-The connection timeout and socket timeout config for thrift server.
-
-The value for thrift_client_timeout_ms is set to be larger than zero to prevent some hang up problems in java.net.SocketInputStream.socketRead0
-
-### mysql_nio_backlog_num
-
-Default: 1024
-
-The backlog_num for mysql nio server, When you enlarge this backlog_num, you should enlarge the value in the linux /proc/sys/net/core/somaxconn file at the same time
-
-### http_backlog_num
-
-Default: 1024
-
-The backlog_num for netty http server, When you enlarge this backlog_num, you should enlarge the value in the linux /proc/sys/net/core/somaxconn file at the same time
-
-### http_max_line_length
-
-Default: 4096
-
-The max length of an HTTP URL. The unit of this configuration is BYTE. Defaults to 4096.
-
-### http_max_header_size
-
-Default: 8192
-
-The max size of allowed HTTP headers. The unit of this configuration is BYTE. Defaults to 8192.
-
-### http_max_chunk_size
-
-Default: 8192
-
-### http_port
-
-Default: 8030
-
-HTTP bind port. Defaults to 8030
-
-### http_api_extra_base_path
-
-In some deployment environments, user need to specify an additional base path as the unified prefix of the HTTP API. This parameter is used by the user to specify additional prefixes. After setting, user can get the parameter value through the `GET /api/basepath` interface. And the new UI will also try to get this base path first to assemble the URL. Only valid when `enable_http_server_v2` is true.
-
-The default is empty, that is, not set
-
-### max_bdbje_clock_delta_ms
-
-Default: 5000 (5s)
-
-Set the maximum acceptable clock skew between non-master FE to Master FE host. This value is checked whenever a non-master FE establishes a connection to master FE via BDBJE. The connection is abandoned if the clock skew is larger than this value.
-
-### ignore_meta_check
-
-Default: false
-
-IsMutable: true
-
-If true, non-master FE will ignore the meta data delay gap between Master FE and its self, even if the metadata delay gap exceeds *meta_delay_toleration_second*. Non-master FE will still offer read service.
-This is helpful when you try to stop the Master FE for a relatively long time for some reason, but still wish the non-master FE can offer read service.
-
-### metadata_failure_recovery
-
-Default: false
-
-If true, FE will reset bdbje replication group(that is, to remove all electable nodes info) and is supposed to start as Master. If all the electable nodes can not start, we can copy the meta data to another node and set this config to true to try to restart the FE..
-
-### priority_networks
-
-Default: none
-
-Declare a selection strategy for those servers have many ips. Note that there should at most one ip match this list. this is a list in semicolon-delimited format, in CIDR notation, e.g. 10.10.10.0/24 , If no ip match this rule, will choose one randomly..
-
-### txn_rollback_limit
-
-Default: 100
-
-the max txn number which bdbje can rollback when trying to rejoin the group
-
-### max_agent_task_threads_num
-
-Default: 4096
-
-MasterOnly: true
-
-max num of thread to handle agent task in agent task thread-pool.
-
-### heartbeat_mgr_blocking_queue_size
-
-Default: 1024
-
-MasterOnly: true
-
-blocking queue size to store heartbeat task in heartbeat_mgr.
-
-### heartbeat_mgr_threads_num
-
-Default: 8
-
-MasterOnly: true
-
-num of thread to handle heartbeat events in heartbeat_mgr.
-
-### bdbje_replica_ack_timeout_second
-
-Default: 10 (s)
-
-The replica ack timeout when writing to bdbje , When writing some relatively large logs, the ack time may time out, resulting in log writing failure. At this time, you can increase this value appropriately.
-
-### bdbje_lock_timeout_second
-
-Default: 1
-
-The lock timeout of bdbje operation, If there are many LockTimeoutException in FE WARN log, you can try to increase this value
-
-### bdbje_heartbeat_timeout_second
-
-Default: 30
-
-The heartbeat timeout of bdbje between master and follower. the default is 30 seconds, which is same as default value in bdbje. If the network is experiencing transient problems, of some unexpected long java GC annoying you, you can try to increase this value to decrease the chances of false timeouts
-
-### replica_ack_policy
-
-Default: SIMPLE_MAJORITY
-
-OPTION: ALL, NONE, SIMPLE_MAJORITY
-
-Replica ack policy of bdbje. more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html
-
-### replica_sync_policy
-
-Default: SYNC
-
-选项: SYNC, NO_SYNC, WRITE_NO_SYNC
-
-Follower FE sync policy of bdbje.
-
-### master_sync_policy
-
-Default: SYNC
-
-选项: SYNC, NO_SYNC, WRITE_NO_SYNC
-
-Master FE sync policy of bdbje. If you only deploy one Follower FE, set this to 'SYNC'. If you deploy more than 3 Follower FE, you can set this and the following 'replica_sync_policy' to WRITE_NO_SYNC. more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html
-
-### meta_delay_toleration_second
-
-Default: 300 (5分钟)
-
-Non-master FE will stop offering service if meta data delay gap exceeds *meta_delay_toleration_second*
-
-### edit_log_roll_num
-
-Default: 50000
-
-IsMutable: true
-
-MasterOnly: true
-
-Master FE will save image every *edit_log_roll_num* meta journals.
-
-### edit_log_port
-
-Default: 9010
-
-bdbje port
-
-### edit_log_type
-
-Default: BDB
-
-Edit log type.
- BDB: write log to bdbje
- LOCAL: deprecated..
-
-### tmp_dir
-
-Default: PaloFe.DORIS_HOME_DIR + "/temp_dir"
-
-temp dir is used to save intermediate results of some process, such as backup and restore process. file in this dir will be cleaned after these process is finished.
-
-### meta_dir
-
-Default: DORIS_HOME_DIR + "/doris-meta"
-
-Type: string Description: Doris meta data will be saved here.The storage of this dir is highly recommended as to be:
-
-- High write performance (SSD)
-- Safe (RAID)
-
-### custom_config_dir
-
-Default: PaloFe.DORIS_HOME_DIR + "/conf"
-
-Configure the location of the `fe_custom.conf` file. The default is in the `conf/` directory.
-
-In some deployment environments, the `conf/` directory may be overwritten due to system upgrades. This will cause the user modified configuration items to be overwritten. At this time, we can store `fe_custom.conf` in another specified directory to prevent the configuration file from being overwritten.
-
-### log_roll_size_mb
-
-Default: 1024 (1G)
-
-The max size of one sys log and audit log
-
-### sys_log_dir
-
-Default: PaloFe.DORIS_HOME_DIR + "/log"
-
-sys_log_dir:
- This specifies FE log dir. FE will produces 2 log files:
- fe.log: all logs of FE process.
- fe.warn.log all WARNING and ERROR log of FE process.
-
-### sys_log_level
-
-Default: INFO
-
-log level: INFO, WARNING, ERROR, FATAL
-
-### sys_log_roll_num
-
-Default: 10
-
-Maximal FE log files to be kept within an sys_log_roll_interval. default is 10, which means there will be at most 10 log files in a day
-
-### sys_log_verbose_modules
-
-Default: {}
-
-Verbose modules. VERBOSE level is implemented by log4j DEBUG level.
-
-eg:
- sys_log_verbose_modules = org.apache.doris.catalog
- This will only print debug log of files in package org.apache.doris.catalog and all its sub packages.
-
-### sys_log_roll_interval
-
-Default: DAY
-
-sys_log_roll_interval:
-
-- DAY: log suffix is yyyyMMdd
-- HOUR: log suffix is yyyyMMddHH
-
-### sys_log_delete_age
-
-Default: 7d
-
-sys_log_delete_age:
- default is 7 days, if log's last modify time is 7 days ago, it will be deleted.
-
- support format:
- 7d 7 day
- 10h 10 hours
- 60m 60 min
- 120s 120 seconds
-
-
-### audit_log_dir
-
-Default: DORIS_HOME_DIR + "/log"
-
-audit_log_dir:
- This specifies FE audit log dir..
- Audit log fe.audit.log contains all requests with related infos such as user, host, cost, status, etc
-
-### audit_log_roll_num
-
-Default: 90
-
-Maximal FE audit log files to be kept within an audit_log_roll_interval.
-
-### audit_log_modules
-
-Default: {"slow_query", "query", "load", "stream_load"}
-
-Slow query contains all queries which cost exceed *qe_slow_log_ms*
-
-### qe_slow_log_ms
-
-Default: 5000 (5秒)
-
-If the response time of a query exceed this threshold, it will be recorded in audit log as slow_query.
-
-### audit_log_roll_interval
-
-Default: DAY
-
-DAY: logsuffix is : yyyyMMdd
-HOUR: logsuffix is : yyyyMMddHH
-
-### audit_log_delete_age
-
-Default: 30d
-
-default is 30 days, if log's last modify time is 30 days ago, it will be deleted.
-
- support format:
- 7d 7 day
- 10h 10 hours
- 60m 60 min
- 120s 120 seconds
-
-### plugin_dir
-
-Default: DORIS_HOME + "/plugins
-
-plugin install directory
-
-### plugin_enable
-
-Default:true
-
-IsMutable: true
-
-MasterOnly: true
-
-Whether the plug-in is enabled, enabled by default
-
-### label_keep_max_second
-
-Default: 3 * 24 * 3600 (3day)
-
-IsMutable: true
-
-MasterOnly: true
-
-labels of finished or cancelled load jobs will be removed after *label_keep_max_second* , The removed labels can be reused. Set a short time will lower the FE memory usage. (Because all load jobs' info is kept in memory before being removed)
-
-In the case of high concurrent writes, if there is a large backlog of jobs and call frontend service failed, check the log. If the metadata write takes too long to lock, you can adjust this value to 12 hours, or 6 hours less
-
-### streaming_label_keep_max_second
-
-Default: 43200 (12 hour)
-
-IsMutable: true
-
-MasterOnly: true
-
-For some high-frequency load work, such as: INSERT, STREAMING LOAD, ROUTINE_LOAD_TASK. If it expires, delete the completed job or task.
-
-### history_job_keep_max_second
-
-Default: 7 * 24 * 3600 (7 day)
-
-IsMutable: true
-
-MasterOnly: true
-
-The max keep time of some kind of jobs. like schema change job and rollup job.
-
-### label_clean_interval_second
-
-Default: 4 * 3600 (4 hour)
-
-Load label cleaner will run every *label_clean_interval_second* to clean the outdated jobs.
-
-### delete_info_keep_max_second
-
-Default: 3 * 24 * 3600 (3day)
-
-IsMutable: true
-
-MasterOnly: false
-
-Delete all deleteInfo older than *delete_info_keep_max_second* , Setting a shorter time will reduce FE memory usage and image file size. (Because all deleteInfo is stored in memory and image files before being deleted)
-
-### transaction_clean_interval_second
-
-Default: 30
-
-the transaction will be cleaned after transaction_clean_interval_second seconds if the transaction is visible or aborted we should make this interval as short as possible and each clean cycle as soon as possible
-
-
-### default_max_query_instances
-
-The default value when user property max_query_instances is equal or less than 0. This config is used to limit the max number of instances for a user. This parameter is less than or equal to 0 means unlimited.
-
-The default value is -1
-
-### use_compact_thrift_rpc
-
-Default: true
-
-Whether to use compressed format to send query plan structure. After it is turned on, the size of the query plan structure can be reduced by about 50%, thereby avoiding some "send fragment timeout" errors.
-However, in some high-concurrency small query scenarios, the concurrency may be reduced by about 10%.
-
-### enable_force_drop_redundant_replica
-
-Default: false
-
-Dynamically configured: true
-
-Only for Master FE: true
-
-If set to true, the system will immediately drop redundant replicas in the tablet scheduling logic. This may cause some load jobs that are writing to the corresponding replica to fail, but it will speed up the balance and repair speed of the tablet.
-When there are a large number of replicas waiting to be balanced or repaired in the cluster, you can try to set this config to speed up the balance and repair of replicas at the expense of partial load success rate.
-
-### repair_slow_replica
-
-Default: false
-
-IsMutable: true
-
-MasterOnly: true
-
-If set to true, the replica with slower compaction will be automatically detected and migrated to other machines. The detection condition is that the version count of the fastest replica exceeds the value of `min_version_count_indicate_replica_compaction_too_slow`, and the ratio of the version count difference from the fastest replica exceeds the value of `valid_version_count_delta_ratio_between_replicas`
-
-### colocate_group_relocate_delay_second
-
-Default: 1800
-
-Dynamically configured: true
-
-Only for Master FE: true
-
-The relocation of a colocation group may involve a large number of tablets moving within the cluster. Therefore, we should use a more conservative strategy to avoid relocation of colocation groups as much as possible.
-Reloaction usually occurs after a BE node goes offline or goes down. This parameter is used to delay the determination of BE node unavailability. The default is 30 minutes, i.e., if a BE node recovers within 30 minutes, relocation of the colocation group will not be triggered.
-
-### allow_replica_on_same_host
-
-Default: false
-
-Dynamically configured: false
-
-Only for Master FE: false
-
-Whether to allow multiple replicas of the same tablet to be distributed on the same host. This parameter is mainly used for local testing, to facilitate building multiple BEs to test certain multi-replica situations. Do not use it for non-test environments.
-
-### min_version_count_indicate_replica_compaction_too_slow
-
-Default: 300
-
-Dynamically configured: true
-
-Only for Master FE: true
-
-The version count threshold used to judge whether replica compaction is too slow
-
-### valid_version_count_delta_ratio_between_replicas
-
-Default: 0.5
-
-Dynamically configured: true
-
-Only for Master FE: true
-
-The valid ratio threshold of the difference between the version count of the slowest replica and the fastest replica. If `repair_slow_replica` is set to true, it is used to determine whether to repair the slowest replica
-
-### min_bytes_indicate_replica_too_large
-
-Default: 2 * 1024 * 1024 * 1024 (2G)
-
-Dynamically configured: true
-
-Only for Master FE: true
-
-The data size threshold used to judge whether replica is too large
-
-### skip_compaction_slower_replica
-
-Default: true
-
-Dynamically configured: true
-
-Only for Master FE: false
-
-If set to true, the compaction slower replica will be skipped when select get queryable replicas
-
-### enable_create_sync_job
-
-Enable Mysql data synchronization job function. The default is false, this function is turned off
-
-Default: false
-
-Is it possible to configure dynamically: true
-
-Whether it is a configuration item unique to the Master FE node: true
-
-### sync_commit_interval_second
-
-The maximum time interval for committing transactions. If there is still data in the channel that has not been submitted after this time, the consumer will notify the channel to submit the transaction.
-
-Default: 10 (seconds)
-
-Is it possible to configure dynamically: true
-
-Whether it is a configuration item unique to the Master FE node: true
-
-### min_sync_commit_size
-
-The minimum number of events that must be satisfied to commit a transaction. If the number of events received by Fe is less than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 10000 events. If you want to modify this configuration, please make sure that this value is smaller than the `canal.instance.memory.buffer.size` configuration on the canal side (default 16384), otherwise Fe will try to get the queue length longer than the store before ack More events cause the store queue to block until it times out.
-
-Default: 10000
-
-Is it possible to configure dynamically: true
-
-Whether it is a configuration item unique to the Master FE node: true
-
-### min_bytes_sync_commit
-
-The minimum data size required to commit a transaction. If the data size received by Fe is smaller than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 15MB, if you want to modify this configuration, please make sure this value is less than the product of `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.memunit` on the canal side (default 16MB), otherwise Before the ack, Fe will try to obtain data that is larger than the store space, causing the store queue to block until it times out.
-
-Default: 15*1024*1024 (15M)
-
-Is it possible to configure dynamically: true
-
-Whether it is a configuration item unique to the Master FE node: true
-
-### max_bytes_sync_commit
-
- The maximum number of threads in the data synchronization job thread pool. There is only one thread pool in the entire FE, which is used to process all data synchronization tasks in the FE that send data to the BE. The implementation of the thread pool is in the `SyncTaskPool` class.
-
-Default: 10
-
-Is it possible to dynamically configure: false
-
-Is it a configuration item unique to the Master FE node: false
diff --git a/docs/en/administrator-guide/config/user_property.md b/docs/en/administrator-guide/config/user_property.md
deleted file mode 100644
index 27a2d3653f..0000000000
--- a/docs/en/administrator-guide/config/user_property.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-{
- "title": "User Property",
- "language": "en"
-}
----
-
-
-
-# User configuration item
-
-This document mainly introduces related configuration items at the User level. The configuration of the User level is mainly effective for a single user. Each user can set their own User property. Does not affect each other.
-
-## View configuration items
-
-After the FE is started, on the MySQL client, use the following command to view the User configuration items:
-
-`SHOW PROPERTY [FOR user] [LIKE key pattern]`
-
-The specific syntax can be queried through the command: `help show property;`.
-
-## Set configuration items
-
-After FE is started, on the MySQL client, modify the User configuration items with the following command:
-
-`SET PROPERTY [FOR'user'] 'key' = 'value' [,'key' ='value']`
-
-The specific syntax can be queried through the command: `help set property;`.
-
-User-level configuration items will only take effect for the specified users, and will not affect the configuration of other users.
-
-## Application examples
-
-1. Modify the max_user_connections of user Billie
-
- Use `SHOW PROPERTY FOR 'Billie' LIKE '%max_user_connections%';` to check that the current maximum number of links for Billie users is 100.
-
- Use `SET PROPERTY FOR 'Billie' 'max_user_connections' = '200';` to modify the current maximum number of connections for Billie users to 200.
-
-## Configuration item list
-
-### max_user_connections
-
- The maximum number of user connections, the default value is 100 In general, this parameter does not need to be changed unless the number of concurrent queries exceeds the default value.
-
-### max_query_instances
-
- The maximum number of instances that the user can use at a certain point in time, The default value is -1, negative number means use default_max_query_instances config.
-
-### resource
-
-### quota
-
-### default_load_cluster
-
-### load_cluster
diff --git a/docs/en/administrator-guide/dynamic-partition.md b/docs/en/administrator-guide/dynamic-partition.md
deleted file mode 100644
index bf4c1d56ef..0000000000
--- a/docs/en/administrator-guide/dynamic-partition.md
+++ /dev/null
@@ -1,464 +0,0 @@
----
-{
- "title": "Dynamic Partition",
- "language": "en"
-}
----
-
-
-
-# Dynamic Partition
-
-Dynamic partition is a new feature introduced in Doris version 0.12. It's designed to manage partition's Time-to-Life (TTL), reducing the burden on users.
-
-At present, the functions of dynamically adding partitions and dynamically deleting partitions are realized.
-
-Dynamic partitioning is only supported for Range partitions.
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-
-## Principle
-
-In some usage scenarios, the user will partition the table according to the day and perform routine tasks regularly every day. At this time, the user needs to manually manage the partition. Otherwise, the data load may fail because the user does not create a partition. This brings additional maintenance costs to the user.
-
-Through the dynamic partitioning feature, users can set the rules of dynamic partitioning when building tables. FE will start a background thread to create or delete partitions according to the rules specified by the user. Users can also change existing rules at runtime.
-
-## Usage
-
-### Establishment of tables
-
-The rules for dynamic partitioning can be specified when the table is created or modified at runtime. Currently,dynamic partition rules can only be set for partition tables with single partition columns.
-
-* Specified when creating table
-
- ```
- CREATE TABLE tbl1
- (...)
- PROPERTIES
- (
- "dynamic_partition.prop1" = "value1",
- "dynamic_partition.prop2" = "value2",
- ...
- )
- ```
-
-* Modify at runtime
-
- ```
- ALTER TABLE tbl1 SET
- (
- "dynamic_partition.prop1" = "value1",
- "dynamic_partition.prop2" = "value2",
- ...
- )
- ```
-
-### Dynamic partition rule parameters
-
-The rules of dynamic partition are prefixed with `dynamic_partition.`:
-
-* `dynamic_partition.enable`
-
- Whether to enable the dynamic partition feature. Can be specified as `TRUE` or` FALSE`. If not filled, the default is `TRUE`. If it is `FALSE`, Doris will ignore the dynamic partitioning rules of the table.
-
-* `dynamic_partition.time_unit`
-
- The unit for dynamic partition scheduling. Can be specified as `HOUR`,`DAY`,` WEEK`, and `MONTH`, means to create or delete partitions by hour, day, week, and month, respectively.
-
- When specified as `HOUR`, the suffix format of the dynamically created partition name is `yyyyMMddHH`, for example, `2020032501`. *When the time unit is HOUR, the data type of partition column cannot be DATE.*
-
- When specified as `DAY`, the suffix format of the dynamically created partition name is `yyyyMMdd`, for example, `20200325`.
-
- When specified as `WEEK`, the suffix format of the dynamically created partition name is `yyyy_ww`. That is, the week of the year of current date. For example, the suffix of the partition created for `2020-03-25` is `2020_13`, indicating that it is currently the 13th week of 2020.
-
- When specified as `MONTH`, the suffix format of the dynamically created partition name is `yyyyMM`, for example, `202003`.
-
-* `dynamic_partition.time_zone`
-
- The time zone of the dynamic partition, if not filled in, defaults to the time zone of the current machine's system, such as `Asia/Shanghai`, if you want to know the supported TimeZone, you can found in `https://en.wikipedia.org/wiki/List_of_tz_database_time_zones`.
-
-* `dynamic_partition.start`
-
- The starting offset of the dynamic partition, usually a negative number. Depending on the `time_unit` attribute, based on the current day (week / month), the partitions with a partition range before this offset will be deleted. If not filled, the default is `-2147483648`, that is, the history partition will not be deleted.
-
-* `dynamic_partition.end`
-
- The end offset of the dynamic partition, usually a positive number. According to the difference of the `time_unit` attribute, the partition of the corresponding range is created in advance based on the current day (week / month).
-
-* `dynamic_partition.prefix`
-
- The dynamically created partition name prefix.
-
-* `dynamic_partition.buckets`
-
- The number of buckets corresponding to the dynamically created partitions.
-
-* `dynamic_partition.replication_num`
-
- The replication number of dynamic partition.If not filled in, defaults to the number of table's replication number.
-
-* `dynamic_partition.start_day_of_week`
-
- When `time_unit` is` WEEK`, this parameter is used to specify the starting point of the week. The value ranges from 1 to 7. Where 1 is Monday and 7 is Sunday. The default is 1, which means that every week starts on Monday.
-
-* `dynamic_partition.start_day_of_month`
-
- When `time_unit` is` MONTH`, this parameter is used to specify the start date of each month. The value ranges from 1 to 28. 1 means the 1st of every month, and 28 means the 28th of every month. The default is 1, which means that every month starts at 1st. The 29, 30 and 31 are not supported at the moment to avoid ambiguity caused by lunar years or months.
-
-* `dynamic_partition.create_history_partition`
-
- The default is false. When set to true, Doris will automatically create all partitions, as described in the creation rules below. At the same time, the parameter `max_dynamic_partition_num` of FE will limit the total number of partitions to avoid creating too many partitions at once. When the number of partitions expected to be created is greater than `max_dynamic_partition_num`, the operation will fail.
-
- When the `start` attribute is not specified, this parameter has no effect.
-
-* `dynamic_partition.history_partition_num`
-
- When `create_history_partition` is `true`, this parameter is used to specify the number of history partitions. The default value is -1, which means it is not set.
-
-* `dynamic_partition.hot_partition_num`
-
- Specify how many of the latest partitions are hot partitions. For hot partition, the system will automatically set its `storage_medium` parameter to SSD, and set `storage_cooldown_time`.
-
- `hot_partition_num` is all partitions in the previous n days and in the future.
-
-
- Let us give an example. Suppose today is 2021-05-20, partition by day, and the properties of dynamic partition are set to: hot_partition_num=2, end=3, start=-3. Then the system will automatically create the following partitions, and set the `storage_medium` and `storage_cooldown_time` properties:
-
- ```
- p20210517: ["2021-05-17", "2021-05-18") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59
- p20210518: ["2021-05-18", "2021-05-19") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59
- p20210519: ["2021-05-19", "2021-05-20") storage_medium=SSD storage_cooldown_time=2021-05-21 00:00:00
- p20210520: ["2021-05-20", "2021-05-21") storage_medium=SSD storage_cooldown_time=2021-05-22 00:00:00
- p20210521: ["2021-05-21", "2021-05-22") storage_medium=SSD storage_cooldown_time=2021-05-23 00:00:00
- p20210522: ["2021-05-22", "2021-05-23") storage_medium=SSD storage_cooldown_time=2021-05-24 00:00:00
- p20210523: ["2021-05-23", "2021-05-24") storage_medium=SSD storage_cooldown_time=2021-05-25 00:00:00
- ```
-
-
-* `dynamic_partition.reserved_history_periods`
-
- The range of reserved history periods. It should be in the form of `[yyyy-MM-dd,yyyy-MM-dd],[...,...]` while the `dynamic_partition.time_unit` is "DAY, WEEK, and MONTH". And it should be in the form of `[yyyy-MM-dd HH:mm:ss,yyyy-MM-dd HH:mm:ss],[...,...]` while the dynamic_partition.time_unit` is "HOUR". And no more spaces expected. The default value is `"NULL"`, which means it is not set.
-
- Let us give an example. Suppose today is 2021-09-06,partitioned by day, and the properties of dynamic partition are set to:
-
- ```time_unit="DAY/WEEK/MONTH", end=3, start=-3, reserved_history_periods="[2020-06-01,2020-06-20],[2020-10-31,2020-11-15]"```.
-
- The the system will automatically reserve following partitions in following period :
-
- ```
- ["2020-06-01","2020-06-20"],
- ["2020-10-31","2020-11-15"]
- ```
- or
-
- ```time_unit="HOUR", end=3, start=-3, reserved_history_periods="[2020-06-01 00:00:00,2020-06-01 03:00:00]"```.
-
- The the system will automatically reserve following partitions in following period :
-
- ```
- ["2020-06-01 00:00:00","2020-06-01 03:00:00"]
- ```
-
- Otherwise, every `[...,...]` in `reserved_history_periods` is a couple of properties, and they should be set at the same time. And the first date can't be larger than the second one.
-
-
-#### Create History Partition Rules
-
-When `create_history_partition` is `true`, i.e. history partition creation is enabled, Doris determines the number of history partitions to be created based on `dynamic_partition.start` and `dynamic_partition.history_partition_num`.
-
-Assuming the number of history partitions to be created is `expect_create_partition_num`, the number is as follows according to different settings.
-
-1. `create_history_partition` = `true`
- - `dynamic_partition.history_partition_num` is not set, i.e. -1.
- `expect_create_partition_num` = `end` - `start`;
-
- - `dynamic_partition.history_partition_num` is set
- `expect_create_partition_num` = `end` - max(`start`, `-histoty_partition_num`);
-
-2. `create_history_partition` = `false`
- No history partition will be created, `expect_create_partition_num` = `end` - 0;
-
-When `expect_create_partition_num` is greater than `max_dynamic_partition_num` (default 500), creating too many partitions is prohibited.
-
-**Examples:**
-
-1. Suppose today is 2021-05-20, partition by day, and the attributes of dynamic partition are set to `create_history_partition=true, end=3, start=-3, history_partition_num=1`, then the system will automatically create the following partitions.
-
- ```
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-2. `history_partition_num=5` and keep the rest attributes as in 1, then the system will automatically create the following partitions.
-
- ```
- p20210517
- p20210518
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-3. `history_partition_num=-1` i.e., if you do not set the number of history partitions and keep the rest of the attributes as in 1, the system will automatically create the following partitions.
-
- ```
- p20210517
- p20210518
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-### Notice
-
-If some partitions between `dynamic_partition.start` and `dynamic_partition.end` are lost due to some unexpected circumstances when using dynamic partition, the lost partitions between the current time and `dynamic_partition.end` will be recreated, but the lost partitions between `dynamic_partition.start` and the current time will not be recreated.
-
-### Example
-
-1. Table `tbl1` partition column k1, type is DATE, create a dynamic partition rule. By day partition, only the partitions of the last 7 days are kept, and the partitions of the next 3 days are created in advance.
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATE,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-7",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32"
- );
- ```
-
- Suppose the current date is 2020-05-29. According to the above rules, tbl1 will produce the following partitions:
-
- ```
- p20200529: ["2020-05-29", "2020-05-30")
- p20200530: ["2020-05-30", "2020-05-31")
- p20200531: ["2020-05-31", "2020-06-01")
- p20200601: ["2020-06-01", "2020-06-02")
- ```
-
- On the next day, 2020-05-30, a new partition will be created `p20200602: [" 2020-06-02 "," 2020-06-03 ")`
-
- On 2020-06-06, because `dynamic_partition.start` is set to 7, the partition 7 days ago will be deleted, that is, the partition `p20200529` will be deleted.
-
-2. Table tbl1 partition column k1, type is DATETIME, create a dynamic partition rule. Partition by week, only keep the partition of the last 2 weeks, and create the partition of the next 2 weeks in advance.
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATETIME,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "WEEK",
- "dynamic_partition.start" = "-2",
- "dynamic_partition.end" = "2",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "8"
- );
- ```
-
- Suppose the current date is 2020-05-29, which is the 22nd week of 2020. The default week starts on Monday. Based on the above rules, tbl1 will produce the following partitions:
-
- ```
- p2020_22: ["2020-05-25 00:00:00", "2020-06-01 00:00:00")
- p2020_23: ["2020-06-01 00:00:00", "2020-06-08 00:00:00")
- p2020_24: ["2020-06-08 00:00:00", "2020-06-15 00:00:00")
- ```
-
- The start date of each partition is Monday of the week. At the same time, because the type of the partition column k1 is DATETIME, the partition value will fill the hour, minute and second fields, and all are 0.
-
- On 2020-06-15, the 25th week, the partition 2 weeks ago will be deleted, ie `p2020_22` will be deleted.
-
- In the above example, suppose the user specified the start day of the week as `"dynamic_partition.start_day_of_week" = "3"`, that is, set Wednesday as the start of week. The partition is as follows:
-
- ```
- p2020_22: ["2020-05-27 00:00:00", "2020-06-03 00:00:00")
- p2020_23: ["2020-06-03 00:00:00", "2020-06-10 00:00:00")
- p2020_24: ["2020-06-10 00:00:00", "2020-06-17 00:00:00")
- ```
-
- That is, the partition ranges from Wednesday of the current week to Tuesday of the next week.
-
- * Note: 2019-12-31 and 2020-01-01 are in same week, if the starting date of the partition is 2019-12-31, the partition name is `p2019_53`, if the starting date of the partition is 2020-01 -01, the partition name is `p2020_01`.
-
-3. Table tbl1 partition column k1, type is DATE, create a dynamic partition rule. Partition by month without deleting historical partitions, and create partitions for the next 2 months in advance. At the same time, set the starting date on the 3rd of each month.
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATE,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "MONTH",
- "dynamic_partition.end" = "2",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "8",
- "dynamic_partition.start_day_of_month" = "3"
- );
- ```
-
- Suppose the current date is 2020-05-29. Based on the above rules, tbl1 will produce the following partitions:
-
- ```
- p202005: ["2020-05-03", "2020-06-03")
- p202006: ["2020-06-03", "2020-07-03")
- p202007: ["2020-07-03", "2020-08-03")
- ```
-
- Because `dynamic_partition.start` is not set, the historical partition will not be deleted.
-
- Assuming that today is 2020-05-20, and set 28th as the start of each month, the partition range is:
-
- ```
- p202004: ["2020-04-28", "2020-05-28")
- p202005: ["2020-05-28", "2020-06-28")
- p202006: ["2020-06-28", "2020-07-28")
- ```
-
-### Modify Dynamic Partition Properties
-
-You can modify the properties of the dynamic partition with the following command
-
-```
-ALTER TABLE tbl1 SET
-(
- "dynamic_partition.prop1" = "value1",
- ...
-);
-```
-
-The modification of certain attributes may cause conflicts. Assume that the partition granularity was DAY and the following partitions have been created:
-
-```
-p20200519: ["2020-05-19", "2020-05-20")
-p20200520: ["2020-05-20", "2020-05-21")
-p20200521: ["2020-05-21", "2020-05-22")
-```
-
-If the partition granularity is changed to MONTH at this time, the system will try to create a partition with the range `["2020-05-01", "2020-06-01")`, and this range conflicts with the existing partition. So it cannot be created. And the partition with the range `["2020-06-01", "2020-07-01")` can be created normally. Therefore, the partition between 2020-05-22 and 2020-05-30 needs to be filled manually.
-
-### Check Dynamic Partition Table Scheduling Status
-
-You can further view the scheduling of dynamic partitioned tables by using the following command:
-
-```
-mysql> SHOW DYNAMIC PARTITION TABLES;
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-| TableName | Enable | TimeUnit | Start | End | Prefix | Buckets | StartOf | LastUpdateTime | LastSchedulerTime | State | LastCreatePartitionMsg | LastDropPartitionMsg | ReservedHistoryPeriods |
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-| d3 | true | WEEK | -3 | 3 | p | 1 | MONDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | [2021-12-01,2021-12-31] |
-| d5 | true | DAY | -7 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d4 | true | WEEK | -3 | 3 | p | 1 | WEDNESDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d6 | true | MONTH | -2147483648 | 2 | p | 8 | 3rd | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d2 | true | DAY | -3 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d7 | true | MONTH | -2147483648 | 5 | p | 8 | 24th | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-7 rows in set (0.02 sec)
-```
-
-* LastUpdateTime: The last time of modifying dynamic partition properties
-* LastSchedulerTime: The last time of performing dynamic partition scheduling
-* State: The state of the last execution of dynamic partition scheduling
-* LastCreatePartitionMsg: Error message of the last time to dynamically add partition scheduling
-* LastDropPartitionMsg: Error message of the last execution of dynamic deletion partition scheduling
-
-## Advanced Operation
-
-### FE Configuration Item
-
-* dynamic\_partition\_enable
-
- Whether to enable Doris's dynamic partition feature. The default value is false, which is off. This parameter only affects the partitioning operation of dynamic partition tables, not normal tables. You can modify the parameters in `fe.conf` and restart FE to take effect. You can also execute the following commands at runtime to take effect:
-
- MySQL protocol:
-
- `ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true")`
-
- HTTP protocol:
-
- `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_enable=true`
-
- To turn off dynamic partitioning globally, set this parameter to false.
-
-* dynamic\_partition\_check\_interval\_seconds
-
- The execution frequency of dynamic partition threads defaults to 3600 (1 hour), that is, scheduling is performed every 1 hour. You can modify the parameters in `fe.conf` and restart FE to take effect. You can also modify the following commands at runtime:
-
- MySQL protocol:
-
- `ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "7200")`
-
- HTTP protocol:
-
- `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_check_interval_seconds=432000`
-
-### Converting dynamic and manual partition tables to each other
-
-For a table, dynamic and manual partitioning can be freely converted, but they cannot exist at the same time, there is and only one state.
-
-#### Converting Manual Partitioning to Dynamic Partitioning
-
-If a table is not dynamically partitioned when it is created, it can be converted to dynamic partitioning at runtime by modifying the dynamic partitioning properties with `ALTER TABLE`, an example of which can be seen with `HELP ALTER TABLE`.
-
-When dynamic partitioning feature is enabled, Doris will no longer allow users to manage partitions manually, but will automatically manage partitions based on dynamic partition properties.
-
-**NOTICE**: If `dynamic_partition.start` is set, historical partitions with a partition range before the start offset of the dynamic partition will be deleted.
-
-#### Converting Dynamic Partitioning to Manual Partitioning
-
-The dynamic partitioning feature can be disabled by executing `ALTER TABLE tbl_name SET ("dynamic_partition.enable" = "false") ` and converting it to a manual partition table.
-
-When dynamic partitioning feature is disabled, Doris will no longer manage partitions automatically, and users will have to create or delete partitions manually by using `ALTER TABLE`.
-
-## Common problem
-
-1. After creating the dynamic partition table, it prompts ```Could not create table with dynamic partition when fe config dynamic_partition_enable is false```
-
- Because the main switch of dynamic partition, that is, the configuration of FE ```dynamic_partition_enable``` is false, the dynamic partition table cannot be created.
-
- At this time, please modify the FE configuration file, add a line ```dynamic_partition_enable=true```, and restart FE. Or execute the command ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true") to turn on the dynamic partition switch.
diff --git a/docs/en/administrator-guide/export-manual.md b/docs/en/administrator-guide/export-manual.md
deleted file mode 100644
index df2dbbb4df..0000000000
--- a/docs/en/administrator-guide/export-manual.md
+++ /dev/null
@@ -1,198 +0,0 @@
----
-{
- "title": "Data export",
- "language": "en"
-}
----
-
-
-
-# Data export
-
-Export is a function provided by Doris to export data. This function can export user-specified table or partition data in text format to remote storage through Broker process, such as HDFS/BOS.
-
-This document mainly introduces the basic principles, usage, best practices and precautions of Export.
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-* Broker: Doris can manipulate files for remote storage through the Broker process.
-* Tablet: Data fragmentation. A table is divided into multiple data fragments.
-
-## Principle
-
-After the user submits an Export job. Doris counts all Tablets involved in this job. These tablets are then grouped to generate a special query plan for each group. The query plan reads the data on the included tablet and then writes the data to the specified path of the remote storage through Broker. It can also be directly exported to the remote storage that supports S3 protocol through S3 protocol.
-
-The overall mode of dispatch is as follows:
-
-```
-+--------+
-| Client |
-+---+----+
- | 1. Submit Job
- |
-+---v--------------------+
-| FE |
-| |
-| +-------------------+ |
-| | ExportPendingTask | |
-| +-------------------+ |
-| | 2. Generate Tasks
-| +--------------------+ |
-| | ExportExporingTask | |
-| +--------------------+ |
-| |
-| +-----------+ | +----+ +------+ +---------+
-| | QueryPlan +----------------> BE +--->Broker+---> |
-| +-----------+ | +----+ +------+ | Remote |
-| +-----------+ | +----+ +------+ | Storage |
-| | QueryPlan +----------------> BE +--->Broker+---> |
-| +-----------+ | +----+ +------+ +---------+
-+------------------------+ 3. Execute Tasks
-
-```
-
-1. The user submits an Export job to FE.
-2. FE's Export scheduler performs an Export job in two stages:
- 1. PENDING: FE generates Export Pending Task, sends snapshot command to BE, and takes a snapshot of all Tablets involved. And generate multiple query plans.
- 2. EXPORTING: FE generates Export ExportingTask and starts executing the query plan.
-
-### query plan splitting
-
-The Export job generates multiple query plans, each of which scans a portion of the Tablet. The number of Tablets scanned by each query plan is specified by the FE configuration parameter `export_tablet_num_per_task`, which defaults to 5. That is, assuming a total of 100 Tablets, 20 query plans will be generated. Users can also specify this number by the job attribute `tablet_num_per_task`, when submitting a job.
-
-Multiple query plans for a job are executed sequentially.
-
-### Query Plan Execution
-
-A query plan scans multiple fragments, organizes read data in rows, batches every 1024 actions, and writes Broker to remote storage.
-
-The query plan will automatically retry three times if it encounters errors. If a query plan fails three retries, the entire job fails.
-
-Doris will first create a temporary directory named `doris_export_tmp_12345` (where `12345` is the job id) in the specified remote storage path. The exported data is first written to this temporary directory. Each query plan generates a file with an example file name:
-
-`export-data-c69fcf2b6db5420f-a96b94c1ff8bccef-1561453713822`
-
-Among them, `c69fcf2b6db5420f-a96b94c1ff8bccef` is the query ID of the query plan. ` 1561453713822` Timestamp generated for the file.
-
-When all data is exported, Doris will rename these files to the user-specified path.
-
-## Use examples
-
-Export's detailed commands can be passed through `HELP EXPORT;` Examples are as follows:
-
-```
-EXPORT TABLE db1.tbl1
-PARTITION (p1,p2)
-[WHERE [expr]]
-TO "bos://bj-test-cmy/export/"
-PROPERTIES
-(
- "label"="mylabel",
- "column_separator"=",",
- "columns" = "col1,col2",
- "exec_mem_limit"="2147483648",
- "timeout" = "3600"
-)
-WITH BROKER "hdfs"
-(
- "username" = "user",
- "password" = "passwd"
-);
-```
-
-* `label`: The identifier of this export job. You can use this identifier to view the job status later.
-* `column_separator`: Column separator. The default is `\t`. Supports invisible characters, such as'\x07'.
-* `column`: columns to be exported, separated by commas, if this parameter is not filled in, all columns of the table will be exported by default.
-* `line_delimiter`: Line separator. The default is `\n`. Supports invisible characters, such as'\x07'.
-* `exec_mem_limit`: Represents the memory usage limitation of a query plan on a single BE in an Export job. Default 2GB. Unit bytes.
-* `timeout`: homework timeout. Default 2 hours. Unit seconds.
-* `tablet_num_per_task`: The maximum number of fragments allocated per query plan. The default is 5.
-
-After submitting a job, the job status can be imported by querying the `SHOW EXPORT` command. The results are as follows:
-
-```
- JobId: 14008
- Label: mylabel
- State: FINISHED
- Progress: 100%
- TaskInfo: {"partitions":["*"],"exec mem limit":2147483648,"column separator":",","line delimiter":"\n","tablet num":1,"broker":"hdfs","coord num":1,"db":"default_cluster:db1","tbl":"tbl3"}
- Path: bos://bj-test-cmy/export/
-CreateTime: 2019-06-25 17:08:24
- StartTime: 2019-06-25 17:08:28
-FinishTime: 2019-06-25 17:08:34
- Timeout: 3600
- ErrorMsg: N/A
-```
-
-
-* JobId: The unique ID of the job
-* Label: Job identifier
-* State: Job status:
- * PENDING: Jobs to be Scheduled
- * EXPORTING: Data Export
- * FINISHED: Operation Successful
- * CANCELLED: Job Failure
-* Progress: Work progress. The schedule is based on the query plan. Assuming a total of 10 query plans have been completed, the progress will be 30%.
-* TaskInfo: Job information in Json format:
- * db: database name
- * tbl: Table name
- * partitions: Specify the exported partition. `*` Represents all partitions.
- * exec MEM limit: query plan memory usage limit. Unit bytes.
- * column separator: The column separator for the exported file.
- * line delimiter: The line separator for the exported file.
- * tablet num: The total number of tablets involved.
- * Broker: The name of the broker used.
- * Coord num: Number of query plans.
-* Path: Export path on remote storage.
-* CreateTime/StartTime/FinishTime: Creation time, start scheduling time and end time of jobs.
-* Timeout: Job timeout. The unit is seconds. This time is calculated from CreateTime.
-* Error Msg: If there is an error in the job, the cause of the error is shown here.
-
-## Best Practices
-
-### Splitting Query Plans
-
-How many query plans need to be executed for an Export job depends on the total number of Tablets and how many Tablets can be allocated for a query plan at most. Since multiple query plans are executed serially, the execution time of jobs can be reduced if more fragments are processed by one query plan. However, if the query plan fails (e.g., the RPC fails to call Broker, the remote storage jitters, etc.), too many tablets can lead to a higher retry cost of a query plan. Therefore, it is necessary to arrange the number of query plans and the number of fragments to be scanned for each query plan in order to balance the execution time and the success rate of execution. It is generally recommended that the amount of data scanned by a query plan be within 3-5 GB (the size and number of tables in a table can be viewed by `SHOW TABLET FROM tbl_name;`statement.
-
-### exec\_mem\_limit
-
-Usually, a query plan for an Export job has only two parts `scan`- `export`, and does not involve computing logic that requires too much memory. So usually the default memory limit of 2GB can satisfy the requirement. But in some scenarios, such as a query plan, too many Tablets need to be scanned on the same BE, or too many data versions of Tablets, may lead to insufficient memory. At this point, larger memory needs to be set through this parameter, such as 4 GB, 8 GB, etc.
-
-## Notes
-
-* It is not recommended to export large amounts of data at one time. The maximum amount of exported data recommended by an Export job is tens of GB. Excessive export results in more junk files and higher retry costs.
-* If the amount of table data is too large, it is recommended to export it by partition.
-* During the operation of the Export job, if FE restarts or cuts the master, the Export job will fail, requiring the user to resubmit.
-* If the Export job fails, the `__doris_export_tmp_xxx` temporary directory generated in the remote storage and the generated files will not be deleted, requiring the user to delete them manually.
-* If the Export job runs successfully, the `__doris_export_tmp_xxx` directory generated in the remote storage may be retained or cleared according to the file system semantics of the remote storage. For example, in Baidu Object Storage (BOS), after removing the last file in a directory through rename operation, the directory will also be deleted. If the directory is not cleared, the user can clear it manually.
-* When the Export runs successfully or fails, the FE reboots or cuts, then some information of the jobs displayed by `SHOW EXPORT` will be lost and cannot be viewed.
-* Export jobs only export data from Base tables, not Rollup Index.
-* Export jobs scan data and occupy IO resources, which may affect the query latency of the system.
-
-## Relevant configuration
-
-### FE
-
-* `expo_checker_interval_second`: Scheduling interval of Export job scheduler, default is 5 seconds. Setting this parameter requires restarting FE.
-* `export_running_job_num_limit `: Limit on the number of Export jobs running. If exceeded, the job will wait and be in PENDING state. The default is 5, which can be adjusted at run time.
-* `Export_task_default_timeout_second`: Export job default timeout time. The default is 2 hours. It can be adjusted at run time.
-* `export_tablet_num_per_task`: The maximum number of fragments that a query plan is responsible for. The default is 5.
diff --git a/docs/en/administrator-guide/ldap.md b/docs/en/administrator-guide/ldap.md
deleted file mode 100644
index ceaebb7c05..0000000000
--- a/docs/en/administrator-guide/ldap.md
+++ /dev/null
@@ -1,175 +0,0 @@
----
-{
- "title": "LDAP",
- "language": "en"
-}
----
-
-
-
-# LDAP
-
-Access to third-party LDAP services to provide authentication login and group authorization services for Doris.
-
-LDAP authentication login complements Doris authentication login by accessing the LDAP service for password authentication; Doris uses LDAP to authenticate the user's password first; if the user does not exist in the LDAP service, it continues to use Doris to authenticate the password; if the LDAP password is correct but there is no corresponding account in Doris, a temporary user is created to log in to Doris.
-
-LDAP group authorization, is to map the group in LDAP to the Role in Doris, if the user belongs to multiple user groups in LDAP, after logging into Doris the user will get the permission of all groups corresponding to the Role, requiring the group name to be the same as the Role name.
-
-## Noun Interpretation
-
-* LDAP: Lightweight directory access protocol that enables centralized management of account passwords.
-* Privilege: Permissions act on nodes, databases or tables. Different permissions represent different permission to operate.
-* Role: Doris can create custom named roles. A role can be thought of as a collection of permissions.
-
-## Enable LDAP Authentication
-### Server-side Configuration
-
-You need to configure the LDAP basic information in the fe/conf/ldap.conf file, and the LDAP administrator password needs to be set using sql statements.
-
-#### Configure the fe/conf/ldap.conf file:
-* ldap_authentication_enabled = false
- Set the value to "true" to enable LDAP authentication; when the value is "false", LDAP authentication is not enabled and all other configuration items of this profile are invalid.Set the value to "true" to enable LDAP authentication; when the value is "false", LDAP authentication is not enabled and all other configuration items of this profile are invalid.
-
-* ldap_host = 127.0.0.1
- LDAP service ip.
-
-* ldap_port = 389
- LDAP service port, the default plaintext transfer port is 389, currently Doris' LDAP function only supports plaintext password transfer.
-
-* ldap_admin_name = cn=admin,dc=domain,dc=com
- LDAP administrator account "Distinguished Name". When a user logs into Doris using LDAP authentication, Doris will bind the administrator account to search for user information in LDAP.
-
-* ldap_user_basedn = ou=people,dc=domain,dc=com
- Doris base dn when searching for user information in LDAP.
-
-* ldap_user_filter = (&(uid={login}))
-
- For Doris' filtering criteria when searching for user information in LDAP, the placeholder "{login}" will be replaced with the login username. You must ensure that the user searched by this filter is unique, otherwise Doris will not be able to verify the password through LDAP and the error message "ERROR 5081 (42000): user is not unique in LDAP server." will appear when logging in.
-
- For example, if you use the LDAP user node uid attribute as the username to log into Doris, you can configure it as:
- ldap_user_filter = (&(uid={login}));
- This item can be configured using the LDAP user mailbox prefix as the user name:
- ldap_user_filter = (&(mail={login}@baidu.com)).
-
-* ldap_group_basedn = ou=group,dc=domain,dc=com
- base dn when Doris searches for group information in LDAP. if this item is not configured, LDAP group authorization will not be enabled.
-
-#### Set the LDAP administrator password:
-After configuring the ldap.conf file, start fe, log in to Doris with the root or admin account, and execute sql:
-```
-set ldap_admin_password = 'ldap_admin_password';
-```
-
-### Client-side configuration
-Client-side LDAP authentication requires the mysql client-side explicit authentication plugin to be enabled. Logging into Doris using the command line enables the mysql explicit authentication plugin in one of two ways.
-
-* Set the environment variable LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN to value 1.
- For example, in a linux or max environment you can use the command:
- ```
- echo "export LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1" >> ~/.bash_profile && source ~/.bash_profile
- ```
-
-* Add the parameter "--enable-cleartext-plugin" each time you log in to Doris.
- ```
- mysql -hDORIS_HOST -PDORIS_PORT -u user -p --enable-cleartext-plugin
-
- Enter ldap password
- ```
-
-## LDAP authentication detailed explanation
-LDAP password authentication and group authorization are complementary to Doris password authentication and authorization. Enabling LDAP functionality does not completely replace Doris password authentication and authorization, but coexists with Doris password authentication and authorization.
-
-### LDAP authentication login details
-When LDAP is enabled, users have the following in Doris and DLAP:
-
-|LDAP User|Doris User|Password|Login Status|Login to Doris users|
-|--|--|--|--|--|
-|Existent|Existent|LDAP Password|Login successful|Doris User|
-|Existent|Existent|Doris Password|Login failure|None|
-|Non-Existent|Existent|Doris Password|Login successful|Doris User|
-|Existent|Non-Existent|LDAP Password|Login successful|Ldap Temporary user|
-
-After LDAP is enabled, when a user logs in using mysql client, Doris will first verify the user's password through the LDAP service, and if the LDAP user exists and the password is correct, Doris will use the user to log in; at this time, if the corresponding account exists, Doris will directly log in to the account, and if the corresponding account does not exist, it will create a temporary account for the user and log in to the account. The temporary account has the appropriate pair of permissions (see LDAP Group Authorization) and is only valid for the current connection. doris does not create the user and does not generate metadata for creating the user pair.
-If no login user exists in the LDAP service, Doris is used for password authentication.
-
-The following assumes that LDAP authentication is enabled, ldap_user_filter = (&(uid={login})) is configured, and all other configuration items are correct, and the client sets the environment variable LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1
-
-For example:
-
-#### 1:Accounts exist in both Doris and LDAP.
-
-Doris account exists: jack@'172.10.1.10', password: 123456
-LDAP user node presence attribute: uid: jack user password: abcdef
-The jack@'172.10.1.10' account can be logged into by logging into Doris using the following command:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef
-```
-
-Login will fail with the following command:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456
-```
-
-#### 2:The user exists in LDAP and the corresponding account does not exist in Doris.
-
-LDAP user node presence attribute: uid: jack User password: abcdef
-Use the following command to create a temporary user and log in to jack@'%', the temporary user has basic privileges DatabasePrivs: Select_priv, Doris will delete the temporary user after the user logs out and logs in:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef
-```
-
-#### 3:LDAP does not exist for the user.
-
-Doris account exists: jack@'172.10.1.10', password: 123456
-Login to the account using the Doris password, successfully:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456
-```
-
-### LDAP group authorization details
-
-If a DLAP user dn is the "member" attribute of an LDAP group node, Doris assumes that the user belongs to the group. Doris will revoke the corresponding role privileges after the user logs out. Before using LDAP group authorization, you should create the corresponding role pairs in Doris and authorize the roles.
-
-Login user Privileges are related to Doris user and group Privileges, as shown in the following table:
-|LDAP Users|Doris Users|Login User Privileges|
-|--|--|--|
-|exist|exist|LDAP group Privileges + Doris user Privileges|
-|Does not exist|Exists|Doris user Privileges|
-|exist|non-exist|LDAP group Privileges|
-
-If the logged-in user is a temporary user and no group permission exists, the user has the select_priv permission of the information_schema by default
-
-Example:
-LDAP user dn is the "member" attribute of the LDAP group node then the user is considered to belong to the group, Doris will intercept the first Rdn of group dn as the group name.
-For example, if user dn is "uid=jack,ou=aidp,dc=domain,dc=com", the group information is as follows:
-```
-dn: cn=doris_rd,ou=group,dc=domain,dc=com
-objectClass: groupOfNames
-member: uid=jack,ou=aidp,dc=domain,dc=com
-```
-Then the group name is doris_rd.
-
-If jack also belongs to the LDAP groups doris_qa, doris_pm; Doris exists roles: doris_rd, doris_qa, doris_pm, after logging in using LDAP authentication, the user will not only have the original permissions of the account, but will also get the roles doris_rd, doris_qa and doris _pm privileges.
-
-## Limitations of LDAP authentication
-
-* The current LDAP feature of Doris only supports plaintext password authentication, that is, when a user logs in, the password is transmitted in plaintext between client and fe and between fe and LDAP service.
-* The current LDAP authentication only supports password authentication under mysql protocol. If you use the Http interface, you cannot use LDAP users for authentication.
-* Temporary users do not have user properties.
\ No newline at end of file
diff --git a/docs/en/administrator-guide/load-data/batch-delete-manual.md b/docs/en/administrator-guide/load-data/batch-delete-manual.md
deleted file mode 100644
index 1efc2bcb05..0000000000
--- a/docs/en/administrator-guide/load-data/batch-delete-manual.md
+++ /dev/null
@@ -1,204 +0,0 @@
----
-{
- "title": "Batch Delete",
- "language": "en"
-}
----
-
-
-
-# Batch Delete
-Currently, Doris supports multiple import methods such as broker load, routine load, stream load, etc. The data can only be deleted through the delete statement at present. When the delete statement is used to delete, a new data version will be generated every time delete is executed. Frequent deletion will seriously affect the query performance, and when using the delete method to delete, it is achieved by generating an empty rowset to record the deletion conditions. Each time you read, you must filter the deletion jump conditions. Also when there are many conditions, Performance affects. Compared with other systems, the implementation of greenplum is more like a traditional database product. Snowflake is implemented through the merge syntax.
-
-For scenarios similar to the import of cdc data, insert and delete in the data data generally appear interspersed. In this scenario, our current import method is not enough, even if we can separate insert and delete, it can solve the import problem , But still cannot solve the problem of deletion. Use the batch delete function to solve the needs of these scenarios.
-There are three ways to merge data import:
-1. APPEND: All data are appended to existing data
-2. DELETE: delete all rows with the same key column value as the imported data
-3. MERGE: APPEND or DELETE according to DELETE ON decision
-
-## Principle
-This is achieved by adding a hidden column `__DORIS_DELETE_SIGN__`, because we are only doing batch deletion on the unique model, so we only need to add a hidden column whose type is bool and the aggregate function is replace. In be, the various aggregation write processes are the same as normal columns, and there are two read schemes:
-
-Remove `__DORIS_DELETE_SIGN__` when fe encounters extensions such as *, and add the condition of `__DORIS_DELETE_SIGN__ != true` by default
-When be reads, a column is added for judgment, and the condition is used to determine whether to delete.
-
-### Import
-
-When importing, set the value of the hidden column to the value of the `DELETE ON` expression during fe parsing. The other aggregation behaviors are the same as the replace aggregation column
-
-### Read
-
-When reading, add the condition of `__DORIS_DELETE_SIGN__ != true` to all olapScanNodes with hidden columns, be does not perceive this process and executes normally
-
-### Cumulative Compaction
-
-In Cumulative Compaction, hidden columns are treated as normal columns, and the compaction logic remains unchanged
-
-### Base Compaction
-
-In Base Compaction, delete the rows marked for deletion to reduce the space occupied by data
-
-### Syntax
-The import syntax design is mainly to add a column mapping that specifies the field of the delete mark column, and this column needs to be added to the imported data. The method of setting each import method is as follows
-
-#### stream load
-
-The wording of stream load adds a field to set the delete mark column in the columns field in the header. Example
-`-H "columns: k1, k2, label_c3" -H "merge_type: [MERGE|APPEND|DELETE]" -H "delete: label_c3=1"`
-
-#### broker load
-
-Set the field to delete the mark column at `PROPERTIES`
-
-```
-LOAD LABEL db1.label1
-(
- [MERGE|APPEND|DELETE] DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2, label_c3)
- SET
- (
- id=tmp_c2,
- name=tmp_c1,
- )
- [DELETE ON label=true]
-
-)
-WITH BROKER'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-
-);
-
-```
-
-#### routine load
-
-Routine load adds a mapping to the `columns` field. The mapping method is the same as above, the example is as follows
-
-```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- [WITH MERGE|APPEND|DELETE]
- COLUMNS(k1, k2, k3, v1, v2, label),
- WHERE k1> 100 and k2 like "%doris%"
- [DELETE ON label=true]
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-```
-
-## Enable bulk delete support
-There are two ways of enabling batch delete support:
-1. By adding `enable_batch_delete_by_default=true` in the fe configuration file, all newly created tables after restarting fe support batch deletion, this option defaults to false
-
-2. For tables that have not changed the above fe configuration or for existing tables that do not support the bulk delete function, you can use the following statement:
-`ALTER TABLE tablename ENABLE FEATURE "BATCH_DELETE"` to enable the batch delete.
-
-If you want to determine whether a table supports batch delete, you can set a session variable to display the hidden columns `SET show_hidden_columns=true`, and then use `desc tablename`, if there is a `__DORIS_DELETE_SIGN__` column in the output, it is supported, if not, it is not supported
-## Note
-1. Since import operations other than stream load may be executed out of order inside doris, if it is not stream load when importing using the `MERGE` method, it needs to be used with load sequence. For the specific syntax, please refer to the sequence column related documents
-2. `DELETE ON` condition can only be used with MERGE
-
-## Usage example
-Let's take stream load as an example to show how to use it
-1. Import data normally:
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: APPEND" -T ~/table1_data http://127.0.0.1: 8130/api/test/table1/_stream_load
-```
-The APPEND condition can be omitted, which has the same effect as the following statement:
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -T ~/table1_data http://127.0.0.1:8130/api/test/table1 /_stream_load
-```
-2. Delete all data with the same key as the imported data
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: DELETE" -T ~/table1_data http://127.0.0.1: 8130/api/test/table1/_stream_load
-```
-Before load:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 3 | 2 | tom | 2 |
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
-Load data:
-```
-3,2,tom,0
-```
-After load:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
-3. Import the same row as the key column of the row with `site_id=1`
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: MERGE" -H "delete: siteid=1" -T ~/ table1_data http://127.0.0.1:8130/api/test/table1/_stream_load
-```
-Before load:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-| 1 | 1 | jim | 2 |
-+--------+----------+----------+------+
-```
-Load data:
-```
-2,1,grace,2
-3,2,tom,2
-1,1,jim,2
-```
-After load:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 2 | 1 | grace | 2 |
-| 3 | 2 | tom | 2 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
diff --git a/docs/en/administrator-guide/load-data/binlog-load-manual.md b/docs/en/administrator-guide/load-data/binlog-load-manual.md
deleted file mode 100644
index 772162d2c2..0000000000
--- a/docs/en/administrator-guide/load-data/binlog-load-manual.md
+++ /dev/null
@@ -1,523 +0,0 @@
----
-{
- "title": "Binlog Load",
- "language": "en"
-}
----
-
-
-
-# Binlog Load
-
-The Binlog Load feature enables Doris to incrementally synchronize update operations in MySQL, so as to CDC(Change Data Capture) of data on Mysql.
-
-## Scenarios
-* Support insert / update / delete operations
-* Filter query
-* Temporarily incompatible with DDL statements
-
-## Glossary
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, the backend node of Doris. Responsible for query execution and data storage.
-* Canal: Alibaba's open source MySQL binlog parsing tool. Support incremental data subscription & consumption.
-* Batch: A batch of data sent by canal to the client with a globally unique self-incrementing ID.
-* SyncJob: A data synchronization job submitted by the user.
-* Receiver: Responsible for subscribing to and receiving data from canal.
-* Consumer: Responsible for distributing the data received by the Receiver to each channel.
-* Channel: The channel that receives the data distributed by Consumer, it creates tasks for sending data, and controls the begining, committing and aborting of transaction in one table.
-* Task: Task created by channel, sends data to Be when executing.
-
-## Principle
-In the design of phase one, Binlog Load needs to rely on canal as an intermediate medium, so that canal can be pretended to be a slave node to get and parse the binlog on the MySQL master node, and then Doris can get the parsed data on the canal. This process mainly involves mysql, canal and Doris. The overall data flow is as follows:
-
-```
-+---------------------------------------------+
-| Mysql |
-+----------------------+----------------------+
- | Binlog
-+----------------------v----------------------+
-| Canal Server |
-+-------------------+-----^-------------------+
- Get | | Ack
-+-------------------|-----|-------------------+
-| FE | | |
-| +-----------------|-----|----------------+ |
-| | Sync Job | | | |
-| | +------------v-----+-----------+ | |
-| | | Canal Client | | |
-| | | +-----------------------+ | | |
-| | | | Receiver | | | |
-| | | +-----------------------+ | | |
-| | | +-----------------------+ | | |
-| | | | Consumer | | | |
-| | | +-----------------------+ | | |
-| | +------------------------------+ | |
-| +----+---------------+--------------+----+ |
-| | | | |
-| +----v-----+ +-----v----+ +-----v----+ |
-| | Channel1 | | Channel2 | | Channel3 | |
-| | [Table1] | | [Table2] | | [Table3] | |
-| +----+-----+ +-----+----+ +-----+----+ |
-| | | | |
-| +--|-------+ +---|------+ +---|------+|
-| +---v------+| +----v-----+| +----v-----+||
-| +----------+|+ +----------+|+ +----------+|+|
-| | Task |+ | Task |+ | Task |+ |
-| +----------+ +----------+ +----------+ |
-+----------------------+----------------------+
- | | |
-+----v-----------------v------------------v---+
-| Coordinator |
-| BE |
-+----+-----------------+------------------+---+
- | | |
-+----v---+ +---v----+ +----v---+
-| BE | | BE | | BE |
-+--------+ +--------+ +--------+
-
-```
-
-As shown in the figure above, the user first submits a SyncJob to the Fe.
-
-Then, Fe will start a Canal Client for each SyncJob to subscribe to and get data from the Canal Server.
-
-The Receiver in the Canal Client will receives data by the GET request. Every time a Batch is received, it will be distributed by the Consumer to different Channels according to the corresponding target table. Once a channel received data distributed by Consumer, it will submit a send task for sending data.
-
-A Send task is a request from Channel to Be, which contains the data of the same Batch distributed to the current channel.
-
-Channel controls the begin, commit and abort of transaction of single table. In a transaction, the consumer may distribute multiple Batches of data to a channel, so multiple send tasks may be generated. These tasks will not actually take effect until the transaction is committed successfully.
-
-When certain conditions are met (for example, a certain period of time was passed, reach the maximun data size of commit), the Consumer will block and notify each channel to try commit the transaction.
-
-If and only if all channels are committed successfully, Canal Server will be notified by the ACK request and Canal Client continue to get and consume data.
-
-If there are any Channel fails to commit, it will retrieve data from the location where the last consumption was successful and commit again (the Channel that has successfully commited before will not commmit again to ensure the idempotency of commit).
-
-In the whole cycle of a SyncJob, Canal Client continuously received data from Canal Server and send it to Be through the above process to complete data synchronization.
-
-## Configure MySQL Server
-
-In the master-slave synchronization of MySQL Cluster mode, the binary log file (binlog) records all data changes on the master node. Data synchronization and backup among multiple nodes of the cluster should be carried out through binlog logs, so as to improve the availability of the cluster.
-
-The architecture of master-slave synchronization is usually composed of a master node (responsible for writing) and one or more slave nodes (responsible for reading). All data changes on the master node will be copied to the slave node.
-
-**Note that: Currently, you must use MySQL version 5.7 or above to support Binlog Load**
-
-To enable the binlog of MySQL, you need to edit the my.cnf file and set it like:
-
-```
-[mysqld]
-log-bin = mysql-bin # 开启 binlog
-binlog-format=ROW # 选择 ROW 模式
-```
-
-### Principle Description
-
-On MySQL, the binlog files usually name as mysql-bin.000001, mysql-bin.000002... And MySQL will automatically segment the binlog file when certain conditions are met:
-
-1. MySQL is restarted
-2. The user enters the `flush logs` command
-3. The binlog file size exceeds 1G
-
-To locate the latest consumption location of binlog, the binlog file name and position (offset) must be needed.
-
-For instance, the binlog location of the current consumption so far will be saved on each slave node to prepare for disconnection, reconnection and continued consumption at any time.
-
-```
---------------------- ---------------------
-| Slave | read | Master |
-| FileName/Position | <<<--------------------------- | Binlog Files |
---------------------- ---------------------
-```
-
-For the master node, it is only responsible for writing to the binlog. Multiple slave nodes can be connected to a master node at the same time to consume different parts of the binlog log without affecting each other.
-
-Binlog log supports two main formats (in addition to mixed based mode):
-
-* Statement-based format:
-
- Binlog only records the SQL statements executed on the master node, and the slave node copies them to the local node for re-execution.
-
-* Row-based format:
-
- Binlog will record the data change information of each row and all columns of the master node, and the slave node will copy and execute the change of each row to the local node.
-
-The first format only writes the executed SQL statements. Although the log volume will be small, it has the following disadvantages:
-
-1. The actual data of each row is not recorded
-2. The UDF, random and time functions executed on the master node will have inconsistent results on the slave node
-3. The execution order of limit statements may be inconsistent
-
-Therefore, we need to choose the second format which parses each row of data from the binlog log.
-
-In the row-based format, binlog will record the timestamp, server ID, offset and other information of each binlog event. For instance, the following transaction with two insert statements:
-
-```
-begin;
-insert into canal_test.test_tbl values (3, 300);
-insert into canal_test.test_tbl values (4, 400);
-commit;
-```
-
-There will be four binlog events, including one begin event, two insert events and one commit event:
-
-```
-SET TIMESTAMP=1538238301/*!*/;
-BEGIN
-/*!*/.
-# at 211935643
-# at 211935698
-#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25
-#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F
-...
-'/*!*/;
-### INSERT INTO canal_test.test_tbl
-### SET
-### @1=1
-### @2=100
-# at 211935744
-#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641
-...
-'/*!*/;
-### INSERT INTO canal_test.test_tbl
-### SET
-### @1=2
-### @2=200
-# at 211935771
-#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641
-COMMIT/*!*/;
-```
-
-As shown above, each insert event contains modified data. During delete/update, an event can also contain multiple rows of data, making the binlog more compact.
-
-### Open GTID mode (Optional)
-
-A global transaction ID (global transaction identifier) identifies a transaction that has been committed on the master node, which is unique and valid in global. After binlog is enabled, the gtid will be written to the binlog file.
-
-To open the gtid mode of MySQL, you need to edit the my.cnf configuration file and set it like:
-
-```
-gtid-mode=on // Open gtid mode
-enforce-gtid-consistency=1 // Enforce consistency between gtid and transaction
-```
-
-In gtid mode, the master server can easily track transactions, recover data and replicas without binlog file name and offset.
-
-In gtid mode, due to the global validity of gtid, the slave node will no longer need to locate the binlog location on the master node by saving the file name and offset, but can be located by the data itself. During SyncJob, the slave node will skip the execution of any gtid transaction already executed before.
-
-Gtid is expressed as a pair of coordinates, `source_ID` identifies the master node, `transaction_ID` indicates the order in which this transaction is executed on the master node (max 263-1).
-
-```
-GTID = source_id:transaction_id
-```
-
-For example, the gtid of the 23rd transaction executed on the same master node is:
-
-```
-3E11FA47-71CA-11E1-9E33-C80AA9429562:23
-```
-
-## Configure Canal Server
-
-Canal is a sub project of Alibaba Otter project. Its main purpose is to provide incremental data subscription and consumption based on MySQL database binlog analysis, which is originally used to solve the scenario of cross machine-room synchronization.
-
-Canal version 1.1.5 and above is recommended. [download link](https://github.com/alibaba/canal/releases)
-
-After downloading, please follow the steps below to complete the deployment.
-
-1. Unzip the canal deployer
-2. Create a new directory under the conf folder and rename it as the root directory of instance. The directory name is the destination mentioned later.
-3. Modify the instance configuration file (you can copy from `conf/example/instance.properties`)
-
- ```
- vim conf/{your destination}/instance.properties
- ```
- ```
- ## canal instance serverId
- canal.instance.mysql.slaveId = 1234
- ## mysql adress
- canal.instance.master.address = 127.0.0.1:3306
- ## mysql username/password
- canal.instance.dbUsername = canal
- canal.instance.dbPassword = canal
- ```
-4. start up canal server
-
- ```
- sh bin/startup.sh
- ```
-
-5. Validation start up successfully
-
- ```
- cat logs/{your destination}/{your destination}.log
- ```
- ```
- 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
- 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties]
- 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx
- 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful....
- ```
-
-### Principle Description
-
-By faking its own MySQL dump protocol, canal disguises itself as a slave node, get and parses the binlog of the master node.
-
-Multiple instances can be started on the canal server. An instance can be regarded as a slave node. Each instance consists of the following parts:
-
-```
--------------------------------------------------
-| Server |
-| -------------------------------------------- |
-| | Instance 1 | |
-| | ----------- ----------- ----------- | |
-| | | Parser | | Sink | | Store | | |
-| | ----------- ----------- ----------- | |
-| | ----------------------------------- | |
-| | | MetaManager | | |
-| | ----------------------------------- | |
-| -------------------------------------------- |
--------------------------------------------------
-```
-
-* Parser: Access the data source, simulate the dump protocol, interact with the master, and analyze the protocol
-* Sink: Linker between parser and store, for data filtering, processing and distribution
-* Store: Data store
-* Meta Manager: Metadata management module
-
-Each instance has its own unique ID in the cluster, that is, server ID.
-
-In the canal server, the instance is identified by a unique string named destination. The canal client needs destination to connect to the corresponding instance.
-
-**Note that: canal client and canal instance should correspond to each other one by one**
-
-Binlog load has forbidded multiple SyncJobs to connect to the same destination.
-
-The data flow direction in instance is binlog -> Parser -> sink -> store.
-
-Instance parses binlog logs through the parser module, and the parsed data is cached in the store. When the user submits a SyncJob to Fe, it will start a Canal Client to subscripe and get the data in the store in the corresponding instance.
-
-The store is actually a ring queue. Users can configure its length and storage space by themselves.
-
-
-
-Store manages the data in the queue through three pointers:
-
-1. Get pointer: the GET pointer points to the last location get by the Canal Client.
-2. Ack pointer: the ACK pointer points to the location of the last successful consumption.
-3. Put pointer: the PUT pointer points to the location where the sink module successfully wrote to the store at last.
-
-```
-canal client asynchronously get data in the store
-
- get 0 get 1 get 2 put
- | | | ...... |
- v v v v
---------------------------------------------------------------------- store ring queue
- ^ ^
- | |
- ack 0 ack 1
-```
-
-When the Canal Client calls the Get command, the Canal Server will generate data batches and send them to the Canal Client, and move the Get pointer to the right. The Canal Client can get multiple batches until the Get pointer catches up with the Put pointer.
-
-When the consumption of data is successful, the Canal Client will return Ack + Batch ID, notify that the consumption has been successful, and move the Ack pointer to the right. The store will delete the data of this batch from the ring queue, make room to get data from the upstream sink module, and then move the Put pointer to the right.
-
-When the data consumption fails, the client will return a rollback notification of the consumption failure, and the store will reset the Get pointer to the left to the Ack pointer's position, so that the next data get by the Canal Client can start from the Ack pointer again.
-
-Like the slave node in mysql, Canal Server also needs to save the latest consumption location of the client. All metadata in Canal Server (such as gtid and binlog location) is managed by the metamanager. At present, these metadata is persisted in the meta.dat file in the instance's root directory in JSON format by default.
-
-## Basic Operation
-
-### Configure Target Table Properties
-
-User needs to first create the target table which is corresponding to the MySQL side.
-
-Binlog Load can only support unique target tables from now, and the batch delete feature of the target table must be activated.
-
-Example:
-
-```
--- create target table
-CREATE TABLE `test1` (
- `a` int(11) NOT NULL COMMENT "",
- `b` int(11) NOT NULL COMMENT ""
-) ENGINE=OLAP
-UNIQUE KEY(`a`)
-COMMENT "OLAP"
-DISTRIBUTED BY HASH(`a`) BUCKETS 8;
-
--- enable batch delete
-ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
-```
-
-### Create SyncJob
-
-The detailed syntax of creating a SyncJob can be viewd in `help create sync job` command. Here we mainly introduce the precautions when creating a SyncJob.
-
-* job_name
-
- `job_Name` is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time.
-
-* channel_desc
-
- `column_Mapping` mainly refers to the mapping relationship between the columns of the MySQL source table and the Doris target table.
-
- If it is not specified, the columns of the source table and the target table will consider correspond one by one in order.
-
- However, we still recommend explicitly specifying the mapping relationship of columns, so that when the schema-change of the target table (such as adding a nullable column), data synchronization can still be carried out.
-
- Otherwise, when the schema-change occur, because the column mapping relationship is no longer one-to-one, the SyncJob will report an error.
-
-* binlog_desc
-
- `binlog_desc` defines some necessary information for docking the remote binlog address.
-
- At present, the only supported docking type is the canal type. In canal type, all configuration items need to be prefixed with the canal prefix.
-
- 1. canal.server.ip: the address of the canal server
- 2. canal.server.port: the port of canal server
- 3. canal.destination: the identifier of the instance
- 4. canal.batchSize: the maximum batch size get from the canal server for each batch. Default 8192
- 5. canal.username: the username of instance
- 6. canal.password: the password of instance
- 7. canal.debug: when set to true, the details message of each batch and each row will be printed, which may affect the performance.
-
-### Show Job Status
-
-Specific commands and examples for showing job status can be found in `help show sync job;` command.
-
-The parameters in the result set have the following meanings:
-
-* State
-
- The current stage of the job. The transition between job states is shown in the following figure:
-
- ```
- +-------------+
- create job | PENDING | resume job
- +-----------+ <-------------+
- | +-------------+ |
- +----v-------+ +-------+----+
- | RUNNING | pause job | PAUSED |
- | +-----------------------> |
- +----+-------+ run error +-------+----+
- | +-------------+ |
- | | CANCELLED | |
- +-----------> <-------------+
- stop job +-------------+ stop job
- system error
- ```
-
- After the SyncJob is submitted, the status is pending.
-
- After the Fe scheduler starts the canal client, the status becomes running.
-
- User can control the status of the job by three commands: `stop/pause/resume`. After the operation, the job status is `cancelled/paused/running` respectively.
-
- There is only one final stage of the job, Cancelled. When the job status changes to Canceled, it cannot be resumed again.
-
- When an error occurs during SyncJob is running, if the error is unrecoverable, the status will change to cancelled, otherwise it will change to paused.
-
-* Channel
-
- The mapping relationship between all source tables and target tables of the job.
-
-* Status
-
- The latest consumption location of the current binlog (if the gtid mode is on, the gtid will be displayed), and the delay time of the Doris side compared with the MySQL side.
-
-* JobConfig
-
- The remote server information of the docking, such as the address of the Canal Server and the destination of the connected instance.
-
-### Control Operation
-
-Users can control the status of jobs through `stop/pause/resume` commands.
-
-You can use `HELP STOP SYNC JOB;`, `HELP PAUSE SYNC JOB`; And `HELP RESUME SYNC JOB;` commands to view help and examples.
-
-## Related Parameters
-
-### Canal configuration
-
-* `canal.ip`
-
- canal server's ip address
-
-* `canal.port`
-
- canal server's port
-
-* `canal.instance.memory.buffer.size`
-
- The queue length of the store ring queue, must be set to the power of 2, the default length is 16384. This value is equal to the maximum number of events that can be cached on the canal side and directly determines the maximum number of events that can be accommodated in a transaction on the Doris side. It is recommended to make it large enough to prevent the upper limit of the amount of data that can be accommodated in a transaction on the Doris side from being too small, resulting in too frequent transaction submission and data version accumulation.
-
-* `canal.instance.memory.buffer.memunit`
-
- The default space occupied by an event at the canal end, default value is 1024 bytes. This value multiplied by `canal.instance.memory.buffer.size` is equal to the maximum space of the store. For example, if the queue length of the store is 16384, the space of the store is 16MB. However, the actual size of an event is not actually equal to this value, but is determined by the number of rows of data in the event and the length of each row of data. For example, the insert event of a table with only two columns is only 30 bytes, but the delete event may reach thousands of bytes. This is because the number of rows of delete event is usually more than that of insert event.
-
-
-### Fe configuration
-
-The following configuration belongs to the system level configuration of SyncJob. The configuration value can be modified in configuration file fe.conf.
-
-* `enable_create_sync_job`
-
- Turn on the Binlog Load feature. The default value is false. This feature is turned off.
-
-* `sync_commit_interval_second`
-
- Maximum interval time between commit transactions. If there is still data in the channel that has not been committed after this time, the consumer will notify the channel to commit the transaction.
-
-* `min_sync_commit_size`
-
- The minimum number of events required to commit a transaction. If the number of events received by Fe is less than it, Fe will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 10000 events. If you want to modify this configuration, please ensure that this value is less than the `canal.instance.memory.buffer.size` configuration on the canal side (16384 by default). Otherwise, Fe will try to get more events than the length of the store queue without ack, causing the store queue to block until timeout.
-
-* `min_bytes_sync_commit`
-
- The minimum data size required to commit a transaction. If the data size received by Fe is smaller than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 15MB. If you want to modify this configuration, please ensure that this value is less than the product `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.memunit` on the canal side (16MB by default). Otherwise, Fe will try to get data from canal larger than the store space without ack, causing the store queue to block until timeout.
-
-* `max_bytes_sync_commit`
-
- The maximum size of the data when the transaction is committed. If the data size received by Fe is larger than it, it will immediately commit the transaction and send the accumulated data. The default value is 64MB. If you want to modify this configuration, please ensure that this value is greater than the product of `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.mmemunit` on the canal side (16MB by default) and `min_bytes_sync_commit`.
-
-* `max_sync_task_threads_num`
-
- The maximum number of threads in the SyncJobs' thread pool. There is only one thread pool in the whole Fe for synchronization, which is used to process the tasks created by all SyncJobs in the Fe.
-
-## FAQ
-
-1. Will modifying the table structure affect data synchronization?
-
- Yes. The SyncJob cannot prohibit `alter table` operation.
-When the table's schema changes, if the column mapping cannot match, the job may be suspended incorrectly. It is recommended to reduce such problems by explicitly specifying the column mapping relationship in the data synchronization job, or by adding nullable columns or columns with default values.
-
-2. Will the SyncJob continue to run after the database is deleted?
-
- No. In this case, the SyncJob will be checked by the Fe's scheduler thread and be stopped.
-
-3. Can multiple SyncJobs be configured with the same `IP:Port + destination`?
-
- No. When creating a SyncJob, FE will check whether the `IP:Port + destination` is duplicate with the existing job to prevent multiple jobs from connecting to the same instance.
-
-4. Why is the precision of floating-point type different between MySQL and Doris during data synchronization?
-
- The precision of Doris floating-point type is different from that of MySQL. You can choose to use decimal type instead.
\ No newline at end of file
diff --git a/docs/en/administrator-guide/load-data/broker-load-manual.md b/docs/en/administrator-guide/load-data/broker-load-manual.md
deleted file mode 100644
index 72a1976f2f..0000000000
--- a/docs/en/administrator-guide/load-data/broker-load-manual.md
+++ /dev/null
@@ -1,536 +0,0 @@
----
-{
- "title": "Broker Load",
- "language": "en"
-}
----
-
-
-
-# Broker Load
-
-Broker load is an asynchronous import method, and the data source supported depends on the data source supported by the Broker process.
-
-Users need to create Broker load imports through MySQL protocol and check the import results by viewing the import commands.
-
-## Applicable scenarios
-
-* Source data in Broker accessible storage systems, such as HDFS.
-* Data volumes range from tens to hundreds of GB.
-
-## Noun Interpretation
-
-1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import plan and the scheduling of import tasks.
-2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data.
-3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system.
-4. Plan: Import the execution plan, and BE executes the import execution plan to import data into Doris system.
-
-## Basic Principles
-
-After the user submits the import task, FE generates the corresponding plan and distributes the plan to several BEs according to the number of BEs and the size of the file. Each BE performs part of the import data.
-
-BE pulls data from Broker and imports it into the system after transforming the data. All BEs complete the import, and the FE decides whether the import is successful or not.
-
-```
- +
- | 1. user create broker load
- v
- +----+----+
- | |
- | FE |
- | |
- +----+----+
- |
- | 2. BE etl and load the data
- +--------------------------+
- | | |
-+---v---+ +--v----+ +---v---+
-| | | | | |
-| BE | | BE | | BE |
-| | | | | |
-+---+-^-+ +---+-^-+ +--+-^--+
- | | | | | |
- | | | | | | 3. pull data from broker
-+---v-+-+ +---v-+-+ +--v-+--+
-| | | | | |
-|Broker | |Broker | |Broker |
-| | | | | |
-+---+-^-+ +---+-^-+ +---+-^-+
- | | | | | |
-+---v-+-----------v-+----------v-+-+
-| HDFS/BOS/AFS cluster |
-| |
-+----------------------------------+
-
-```
-
-## Basic operations
-
-### Create a load
-
-Broker load create a data load job
-
-Grammar:
-
-```
-LOAD LABEL db_name.label_name
-(data_desc, ...)
-WITH BROKER broker_name broker_properties
-[PROPERTIES (key1=value1, ... )]
-
-* data_desc:
-
- DATA INFILE ('file_path', ...)
- [NEGATIVE]
- INTO TABLE tbl_name
- [PARTITION (p1, p2)]
- [COLUMNS TERMINATED BY separator ]
- [(col1, ...)]
- [PRECEDING FILTER predicate]
- [SET (k1=f1(xx), k2=f2(xx))]
- [WHERE predicate]
-
-* broker_properties:
-
- (key1=value1, ...)
-```
-Examples:
-
-```
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2)
- SET
- (
- id=tmp_c2,
- name=tmp_c1)
- ),
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2")
- INTO TABLE tbl2
- COLUMNS TERMINATED BY ","
- (col1, col2)
- where col1 > 1
-)
-WITH BROKER 'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-Create the imported detailed grammar execution ``HELP BROKER LOAD `` View grammar help. This paper mainly introduces the parametric meaning and points for attention in Broker load's creation import grammar.
-
-#### Label
-
-Identity of import task. Each import task has a unique Label within a single database. Label is a user-defined name in the import command. With this Label, users can view the execution of the corresponding import task.
-
-Another function of Label is to prevent users from repeatedly importing the same data. **It is strongly recommended that users use the same label for the same batch of data. Thus, repeated requests for the same batch of data can only be accepted once, guaranteeing at-Most-One semantics**
-
-When the corresponding import job status of Label is CANCELLED, it can be used again to submit the import job.
-
-#### Data Description Class Parameters
-
-Data description class parameters mainly refer to the parameters belonging to ``data_desc`` in Broker load creating import statements. Each group of ```data_desc``` mainly describes the data source address, ETL function, target table and partition information involved in this import.
-
-The following is a detailed explanation of some parameters of the data description class:
-
-+ Multi-table import
-
- Broker load supports a single import task involving multiple tables, and each Broker load import task can implement multiple tables import by declaring multiple tables in multiple ``data_desc``. Each individual ```data_desc``` can also specify the data source address belonging to the table. Broker load guarantees atomic success or failure between multiple tables imported at a single time.
-
-+ negative
-
- ```data_desc``` can also set up data fetching and anti-importing. This function is mainly used when aggregated columns in data tables are of SUM type. If you want to revoke a batch of imported data. The `negative` parameter can be used as a batch of data. Doris automatically retrieves this batch of data on aggregated columns to eliminate the same batch of data.
-
-+ partition
-
- In `data_desc`, you can specify the partition information of the table to be imported, but it will not be imported if the data to be imported does not belong to the specified partition. At the same time, data that does not specify a Partition is considered error data.
-
-+ preceding filter predicate
-
- Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion.
-
-+ where predicate
-
- The where statement in ```data_desc``` is responsible for filtering the data that has been transformed. The unselected rows which is filtered by where predicate will not be calculated in ```max_filter_ratio``` . If there are more than one where predicate of the same table , the multi where predicate will be merged from different ```data_desc``` and the policy is AND.
-
-+ merge\_type
- The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
-
-#### Import job parameters
-
-Import job parameters mainly refer to the parameters in Broker load creating import statement that belong to ``opt_properties``. Import operation parameters act on the whole import operation.
-
-The following is a detailed explanation of some parameters of the import operation parameters:
-
-+ time out
-
- The time-out of the import job (in seconds) allows the user to set the time-out of each import by himself in ``opt_properties``. If the import task is not completed within the set timeout time, it will be cancelled by the system and become CANCELLED. The default import timeout for Broker load is 4 hours.
-
- Usually, the user does not need to manually set the timeout of the import task. When the import cannot be completed within the default timeout time, the task timeout can be set manually.
-
- > Recommended timeout
- >
- > Total File Size (MB) / Slowest Import Speed (MB/s) > timeout >((MB) * Number of tables to be imported and related Roll up tables) / (10 * Number of concurrent imports)
-
- > The concurrency of imports can be seen in the final configuration of the import system in the document. The current import speed limit is 10MB/s in 10 of the formulas.
-
- > For example, a 1G data to be imported contains three Rollup tables, and the current concurrency of imports is 3. The minimum value of timeout is ```(1 * 1024 * 3) / (10 * 3) = 102 seconds.```
-
- Because the machine environment of each Doris cluster is different and the concurrent query tasks of the cluster are different, the slowest import speed of the user Doris cluster requires the user to guess the import task speed according to the history.
-
-+ max\_filter\_ratio
-
- The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails.
-
- If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0.
-
- The calculation formula is as follows:
-
- ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ```
-
- ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on.
-
- ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the ``SHOW LOAD`` command.
-
- The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL`
-
-* exec\_mem\_limit
-
- Memory limit. Default is 2GB. Unit is Bytes.
-
-+ strict\_mode
-
- Broker load can use `strict mode`. Use ```properties ("strict_mode" = "true")``` to enable `strict mode`, default is false
-
- The strict mode means that the column type conversion in the import process is strictly filtered. The strategy of strict filtering is as follows:
-
- 1. For column type conversion, if strict mode is true, the wrong data will be filtered. Error data here refers to the kind of data that the original data is not null and the result is null after participating in column type conversion.
-
- 2. Strict mode does not affect the imported column when it is generated by a function transformation.
-
- 3. For a column type imported that contains scope restrictions, strict mode does not affect it if the original data can normally pass type conversion, but cannot pass scope restrictions. For example, if the type is decimal (1,0) and the original data is 10, it falls within the scope of type conversion but not column declaration. This data strict has no effect on it.
-
-#### Import Relation between strict mode source data
-
-Here's an example of a column type TinyInt
-
-> Note: When columns in a table allow null values to be imported
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|---------|
-|null | \N | N/A | true or false | NULL|
-|not null | aaa or 2000 | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 | 1 | true or false | correct data|
-
-Here's an example of column type Decimal (1,0)
-
-> Note: When columns in a table allow null values to be imported
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|--------|
-|null | \N | N/A | true or false | NULL|
-|not null | aaa | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 or 10 | 1 | true or false | correct data|
-
-> Note: Although 10 is a value beyond the range, strict mode does not affect it because its type meets the requirements of decimal. 10 will eventually be filtered in other ETL processes. But it will not be filtered by strict mode.
-
-### View load
-
-Broker load import mode is asynchronous, so the user must create the imported Label record and use Label in the **view Import command to view the import result**. View import commands are common in all import modes. The specific syntax can be `HELP SHOW LOAD`.
-
-Examples:
-
-```
-mysql> show load order by createtime desc limit 1\G
-*************************** 1. row ***************************
- JobId: 76391
- Label: label1
- State: FINISHED
- Progress: ETL:100%; LOAD:100%
- Type: BROKER
- EtlInfo: dpp.abnorm.ALL=15; dpp.norm.ALL=28133376
- TaskInfo: cluster:N/A; timeout(s):10800; max_filter_ratio:5.0E-5
- ErrorMsg: N/A
- CreateTime: 2019-07-27 11:46:42
- EtlStartTime: 2019-07-27 11:46:44
- EtlFinishTime: 2019-07-27 11:46:44
- LoadStartTime: 2019-07-27 11:46:44
-LoadFinishTime: 2019-07-27 11:50:16
- URL: http://192.168.1.1:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_4bb00753932c491a-a6da6e2725415317_4bb00753932c491a_a6da6e2725415317
- JobDetails: {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824}
-```
-
-The following is mainly about the significance of viewing the parameters in the return result set of the import command:
-
-+ JobId
-
- The unique ID of the import task is different for each import task, which is automatically generated by the system. Unlike Label, JobId will never be the same, while Label can be reused after the import task fails.
-
-+ Label
-
- Identity of import task.
-
-+ State
-
- Import the current phase of the task. In the Broker load import process, PENDING and LOADING are the two main import states. If the Broker load is in the PENDING state, it indicates that the current import task is waiting to be executed; the LOADING state indicates that it is executing.
-
- There are two final stages of the import task: CANCELLED and FINISHED. When Load job is in these two stages, the import is completed. CANCELLED is the import failure, FINISHED is the import success.
-
-+ Progress
-
- Import the progress description of the task. There are two kinds of progress: ETL and LOAD, which correspond to the two stages of the import process, ETL and LOADING. At present, Broker load only has the LOADING stage, so ETL will always be displayed as `100%`.
-
- The progress range of LOAD is 0-100%.
-
- ``` LOAD Progress = Number of tables currently completed / Number of tables designed for this import task * 100%```
-
- **If all import tables complete the import, then the progress of LOAD is 99%** import enters the final effective stage, and the progress of LOAD will only be changed to 100% after the entire import is completed.
-
- Import progress is not linear. So if there is no change in progress over a period of time, it does not mean that the import is not being implemented.
-
-+ Type
-
- Types of import tasks. The type value of Broker load is only BROKER.
-+ EtlInfo
-
- It mainly shows the imported data quantity indicators `unselected.rows`, `dpp.norm.ALL` and `dpp.abnorm.ALL`. The first value shows the rows which has been filtered by where predicate. Users can verify that the error rate of the current import task exceeds max\_filter\_ratio based on these two indicators.
-
-+ TaskInfo
-
- It mainly shows the current import task parameters, that is, the user-specified import task parameters when creating the Broker load import task, including `cluster`, `timeout`, and `max_filter_ratio`.
-
-+ ErrorMsg
-
- When the import task status is CANCELLED, the reason for the failure is displayed in two parts: type and msg. If the import task succeeds, the `N/A` is displayed.
-
- The value meaning of type:
-
- ```
- USER_CANCEL: User Canceled Tasks
- ETL_RUN_FAIL: Import tasks that failed in the ETL phase
- ETL_QUALITY_UNSATISFIED: Data quality is not up to standard, that is, the error rate exceedsmax_filter_ratio
- LOAD_RUN_FAIL: Import tasks that failed in the LOADING phase
- TIMEOUT: Import task not completed in overtime
- UNKNOWN: Unknown import error
- ```
-
-+ CreateTime /EtlStartTime /EtlFinishTime /LoadStartTime /LoadFinishTime
-
- These values represent the creation time of the import, the beginning time of the ETL phase, the completion time of the ETL phase, the beginning time of the Loading phase and the completion time of the entire import task, respectively.
-
- Broker load import has no ETL stage, so its EtlStartTime, EtlFinishTime, LoadStartTime are set to the same value.
-
- Import tasks stay in CreateTime for a long time, while LoadStartTime is N/A, which indicates that import tasks are heavily stacked at present. Users can reduce the frequency of import submissions.
-
- ```
- LoadFinishTime - CreateTime = Time consumed by the entire import task
- LoadFinishTime - LoadStartTime = The entire Broker load import task execution time = the time consumed by the entire import task - the time the import task waits
- ```
-
-+ URL
-
- The error data sample of the import task can be obtained by accessing the URL address. When there is no error data in this import, the URL field is N/A.
-
-+ JobDetails
-
- Display some details of the running status of the job. Including file number, total file size(Bytes), num of sub tasks, scanned rows, related backend ids and unfinished backend ids.
-
- ```
- {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824}
- ```
-
- This info will be updated every 5 seconds. the ScannedRows only for displaying the job progress, not indicate the real numbers.
-
-### Cancel load
-
-When the Broker load job status is not CANCELLED or FINISHED, it can be manually cancelled by the user. When canceling, you need to specify a Label for the import task to be cancelled. Canceling Import command syntax can perform `HELP CANCEL LOAD` view.
-
-## Relevant System Configuration
-
-### FE configuration
-
-The following configurations belong to the Broker load system-level configuration, which acts on all Broker load import tasks. Configuration values are adjusted mainly by modifying `fe.conf`.
-
-+ min\_bytes\_per\_broker\_scanner/max\_bytes\_per\_broker\_scanner/max\_broker\_concurrency
-
- The first two configurations limit the minimum and maximum amount of data processed by a single BE. The third configuration limits the maximum number of concurrent imports for a job. The minimum amount of data processed, the maximum number of concurrency, the size of source files and the number of BEs in the current cluster **together determine the concurrency of this import**.
-
- ```
- The number of concurrent imports = Math. min (source file size / minimum throughput, maximum concurrency, current number of BE nodes)
- Processing capacity of this import of a single BE = source file size / concurrency of this import
- ```
-
- Usually the maximum amount of data supported by an import job is `max_bytes_per_broker_scanner * number of BE nodes`. If you need to import a larger amount of data, you need to adjust the size of the `max_bytes_per_broker_scanner` parameter appropriately.
-
-Default configuration:
-
-```
-Parameter name: min_bytes_per_broker_scanner, default 64MB, unit bytes.
-Parameter name: max_broker_concurrency, default 10.
-Parameter name: max_bytes_per_broker_scanner, default 3GB, unit bytes.
-```
-
-## Best Practices
-
-### Application scenarios
-
-The most appropriate scenario to use Broker load is the scenario of raw data in a file system (HDFS, BOS, AFS). Secondly, since Broker load is the only way of asynchronous import in a single import, users can also consider using Broker load if they need to use asynchronous access in importing large files.
-
-### Data volume
-
-We will only discuss the case of a single BE. If the user cluster has more than one BE, the amount of data in the heading below should be multiplied by the number of BEs. For example, if the user has three BEs, then the number below 3G (including) should be multiplied by 3, that is, under 9G (including).
-
-+ Below 3G (including)
-
- Users can submit Broker load to create import requests directly.
-
-+ Over 3G
-
- Since the maximum processing capacity of a single imported BE is 3G, the imported files over 3G need to be imported by adjusting the import parameters of Broker load to achieve the import of large files.
-
- 1. Modify the maximum number of scans and concurrency of a single BE according to the current number of BEs and the size of the original file.
-
- ```
- Modify the configuration in fe.conf
-
- max_broker_concurrency = BE number
- The amount of data processed by a single BE for the current import task = the original file size / max_broker_concurrency
- Max_bytes_per_broker_scanner >= the amount of data processed by a single BE of the current import task
-
- For example, a 100G file with 10 BEs in the cluster
- max_broker_concurrency = 10
- Max================
-
- ```
-
- After modification, all BEs process import tasks concurrently, and each BE processes part of the original file.
-
- *Note: The configurations in both FEs are system configurations, that is to say, their modifications work on all Broker load tasks.*
-
- 2. Customize the timeout time of the current import task when creating the import
-
- ```
- Current import task single BE processing data volume / user Doris cluster slowest import speed (MB/s) >= current import task timeout time >= current import task single BE processing data volume / 10M/s
-
- For example, a 100G file with 10 BEs in the cluster
- Timeout > 1000s = 10G / 10M /s
-
- ```
-
- 3. When the user finds that the timeout time calculated in the second step exceeds the default maximum time-out time for importing the system by 4 hours.
-
- At this time, it is not recommended that users directly increase the maximum time-out to solve the problem. If the single import time exceeds the default maximum import timeout of 4 hours, it is better to solve the problem by splitting the file to be imported and importing it several times. The main reason is that if a single import exceeds 4 hours, the time cost of retry after import failure is very high.
-
- The maximum amount of imported file data expected by the Doris cluster can be calculated by the following formula:
-
- ```
- Expected maximum imported file data = 14400s * 10M / s * BE number
- For example, the BE number of clusters is 10.
- Expected maximum imported file data volume = 14400 * 10M / s * 10 = 1440000M ≈ 1440G
-
- Note: The average user's environment may not reach the speed of 10M/s, so it is recommended that more than 500G files be split and imported.
-
- ```
-
-### Job Scheduling
-
-The system limits the number of Broker Load jobs running in a cluster to prevent too many Load jobs from running at the same time.
-
-First, the configuration parameter of FE: `desired_max_waiting_jobs` will limit the number of Broker Load jobs that are pending or running (the job status is PENDING or LOADING) in a cluster. The default is 100. If this threshold is exceeded, the newly submitted job will be rejected directly.
-
-A Broker Load job will be divided into pending task and loading task phases. Among them, the pending task is responsible for obtaining the information of the imported file, and the loading task will be sent to BE to perform specific import tasks.
-
-The configuration parameter `async_pending_load_task_pool_size` of FE is used to limit the number of pending tasks running at the same time. It is also equivalent to controlling the number of import tasks that are actually running. This parameter defaults to 10. In other words, assuming that the user submits 100 Load jobs, only 10 jobs will enter the LOADING state and start execution, while other jobs are in the PENDING waiting state.
-
-The FE configuration parameter `async_loading_load_task_pool_size` is used to limit the number of loading tasks that run at the same time. A Broker Load job will have 1 pending task and multiple loading tasks (equal to the number of DATA INFILE clauses in the LOAD statement). So `async_loading_load_task_pool_size` should be greater than or equal to `async_pending_load_task_pool_size`.
-
-Because the work of pending tasks is relatively lightweight (for example, just accessing hdfs to obtain file information), `async_pending_load_task_pool_size` does not need to be large, and the default 10 is usually sufficient. And `async_loading_load_task_pool_size` is really used to limit the import tasks that can be run at the same time. It can be adjusted appropriately according to the cluster size.
-
-### Performance analysis
-
-You can execute `set enable_profile=true` to open the load job profile before submitting the import job. After the import job is completed, you can view the profile of the import job in the `Queris` tab of the FE web page.
-
-This profile can help analyze the running status of the import job.
-
-Currently, the profile can only be viewed after the job is successfully executed.
-
-### Complete examples
-
-Data situation: User data in HDFS, file address is hdfs://abc.com:8888/store_sales, HDFS authentication user name is root, password is password, data size is about 30G, hope to import into database bj_sales table store_sales.
-
-Cluster situation: The number of BEs in the cluster is about 3, and the Broker name is broker.
-
-+ Step 1: After the calculation of the above method, the single BE import quantity is 10G, then the configuration of FE needs to be modified first, and the maximum amount of single BE import is changed to:
-
- ```
- max_bytes_per_broker_scanner = 10737418240
-
- ```
-
-+ Step 2: Calculated, the import time is about 1000s, which does not exceed the default timeout time. No custom timeout time for import can be configured.
-
-+ Step 3: Create import statements
-
- ```
- LOAD LABEL bj_sales.store_sales_broker_load_01
- (
- DATA INFILE("hdfs://abc.com:8888/store_sales")
- INTO TABLE store_sales
- )
- WITH BROKER 'broker'
- ("username"="root", "password"="password");
- ```
-
-## Common Questions
-
-* failed with: `Scan bytes per broker scanner exceed limit:xxx`
-
- Refer to the Best Practices section of the document to modify the FE configuration items `max_bytes_per_broker_scanner` and `max_broker_concurrency'.`
-
-* failed with: `failed to send batch` or `TabletWriter add batch with unknown id`
-
- Refer to **General System Configuration** in **BE Configuration** in the Import Manual (./load-manual.md), and modify `query_timeout` and `streaming_load_rpc_max_alive_time_sec` appropriately.
-
-* failed with: `LOAD_RUN_FAIL; msg: Invalid Column Name: xxx`
-
- If it is PARQUET or ORC format data, you need to keep the column names in the file header consistent with the column names in the doris table, such as:
- ```
- (tmp_c1, tmp_c2)
- SET
- (
- id = tmp_c2,
- name = tmp_c1
- )
- ```
- Represents getting the column with (tmp_c1, tmp_c2) as the column name in parquet or orc, which is mapped to the (id, name) column in the doris table. If set is not set, the column names in the column are used as the mapping relationship.
-
- Note: If the orc file directly generated by some hive versions is used, the table header in the orc file is not the column name in the hive meta, but (_col0, _col1, _col2, ...), which may cause the Invalid Column Name error, then You need to use set for mapping.
diff --git a/docs/en/administrator-guide/load-data/delete-manual.md b/docs/en/administrator-guide/load-data/delete-manual.md
deleted file mode 100644
index fc0302bb7b..0000000000
--- a/docs/en/administrator-guide/load-data/delete-manual.md
+++ /dev/null
@@ -1,194 +0,0 @@
----
-{
- "title": "Delete",
- "language": "en"
-}
----
-
-
-
-# Delete
-
-Unlike other import methods, delete is a synchronization process. Similar to insert into, all delete operations are an independent import job in Doris. Generally, delete statements need to specify tables, partitions and delete conditions to tell which data to be deleted, and the data on base index and rollup index will be deleted at the same time.
-
-
-## Syntax
-
-The delete statement's syntax is as follows:
-
-```
-DELETE FROM table_name [PARTITION partition_name]
-WHERE
-column_name1 op value[ AND column_name2 op value ...];
-```
-
-example 1:
-
-```
-DELETE FROM my_table PARTITION p1 WHERE k1 = 3;
-```
-
-example 2:
-
-```
-DELETE FROM my_table PARTITION p1 WHERE k1 < 3 AND k2 = "abc";
-```
-
-The following describes the parameters used in the delete statement:
-
-* PARTITION
-
- The target partition of the delete statement. If not specified, the table must be a single partition table, otherwise it cannot be deleted
-
-* WHERE
-
- The condition of the delete statement. All delete statements must specify a where condition.
-
-Explanation:
-
-1. The type of `OP` in the WHERE condition can only include `=, >, <, >=, <=, !=, in, not in`.
-2. The column in the WHERE condition can only be the `key` column.
-3. Cannot delete when the `key` column does not exist in any rollup table.
-4. Each condition in WHERE condition can only be connected by `and`. If you want `or`, you are suggested to write these conditions into two delete statements.
-5. If the specified table is a range or list partitioned table, `PARTITION` must be specified unless the table is a single partition table,.
-6. Unlike the insert into command, delete statement cannot specify `label` manually. You can view the concept of `label` in [Insert Into](./insert-into-manual.md)
-
-## Delete Result
-
-The delete command is an SQL command, and the returned results are synchronous. It can be divided into the following types:
-
-1. Successful visible
-
- If delete completes successfully and is visible, the following results will be returned, `query OK` indicates success.
-
- ```
- mysql> delete from test_tbl PARTITION p1 where k1 = 1;
- Query OK, 0 rows affected (0.04 sec)
- {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'VISIBLE', 'txnId':'4005'}
- ```
-
-2. Submitted successfully, but not visible
-
-
- The transaction submission of Doris is divided into two steps: submission and publish version. Only after the publish version step is completed, the result will be visible to the user. If it has been submitted successfully, then it can be considered that the publish version step will eventually success. Doris will try to wait for publishing for a period of time after submitting. If it has timed out, even if the publishing version has not been completed, it will return to the user in priority and prompt the user that the submission has been completed but not visible. If delete has been committed and executed, but has not been published and visible, the following results will be returned.
-
- ```
- mysql> delete from test_tbl PARTITION p1 where k1 = 1;
- Query OK, 0 rows affected (0.04 sec)
- {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'COMMITTED', 'txnId':'4005', 'err':'delete job is committed but may be taking effect later' }
- ```
-
- The result will return a JSON string at the same time:
-
- `affected rows`: Indicates the row affected by this deletion. Since the deletion of Doris is currently a logical deletion, the value is always 0.
-
- `label`: The label generated automatically to be the signature of the delete jobs. Each job has a unique label within a single database.
-
- `status`: Indicates whether the data deletion is visible. If it is visible, `visible` will be displayed. If it is not visible, `committed` will be displayed.
-
-
- `txnId`: The transaction ID corresponding to the delete job
-
- `err`: Field will display some details of this deletion
-
-3. Commit failed, transaction cancelled
-
- If the delete statement is not submitted successfully, it will be automatically aborted by Doris and the following results will be returned
-
-
- ```
- mysql> delete from test_tbl partition p1 where k1 > 80;
- ERROR 1064 (HY000): errCode = 2, detailMessage = {错误原因}
- ```
-
- example:
-
- A timeout deletion will return the timeout and unfinished replicas displayed as ` (tablet = replica)`
-
-
- ```
- mysql> delete from test_tbl partition p1 where k1 > 80;
- ERROR 1064 (HY000): errCode = 2, detailMessage = failed to delete replicas from job: 4005, Unfinished replicas:10000=60000, 10001=60000, 10002=60000
- ```
-
- **The correct processing logic for the returned results of the delete operation is as follows:**
-
- 1. If `Error 1064 (HY000)` is returned, deletion fails
-
- 2. If the returned result is `Query OK`, the deletion is successful
-
- 1. If `status` is `committed`, the data deletion is committed and will be eventually invisible. Users can wait for a while and then use the `show delete` command to view the results.
- 2. If `status` is `visible`, the data have been deleted successfully.
-
-## Relevant Configuration
-
-### FE configuration
-
-**TIMEOUT configuration**
-
-In general, Doris's deletion timeout is limited from 30 seconds to 5 minutes. The specific time can be adjusted through the following configuration items
-
-* `tablet_delete_timeout_second`
-
- The timeout of delete itself can be elastically changed by the number of tablets in the specified partition. This configuration represents the average timeout contributed by a tablet. The default value is 2.
-
- Assuming that there are 5 tablets under the specified partition for this deletion, the timeout time available for the deletion is 10 seconds. Because the minimum timeout is 30 seconds which is higher than former timeout time, the final timeout is 30 seconds.
-
-* `load_straggler_wait_second`
-
- If the user estimates a large amount of data, so that the upper limit of 5 minutes is insufficient, the user can adjust the upper limit of timeout through this item, and the default value is 300.
-
- **The specific calculation rule of timeout(seconds)**
-
- `TIMEOUT = MIN(load_straggler_wait_second, MAX(30, tablet_delete_timeout_second * tablet_num))`
-
-* `query_timeout`
-
- Because delete itself is an SQL command, the deletion statement is also limited by the session variables, and the timeout is also affected by the session value `query_timeout`. You can increase the value by `set query_timeout = xxx`.
-
-**InPredicate configuration**
-
-* `max_allowed_in_element_num_of_delete`
-
- If the user needs to take a lot of elements when using the in predicate, the user can adjust the upper limit of the allowed in elements number, and the default value is 1024.
-
-## Show delete history
-
-1. The user can view the deletion completed in history through the show delete statement.
-
- Syntax
-
- ```
- SHOW DELETE [FROM db_name]
- ```
-
- example
-
- ```
- mysql> show delete from test_db;
- +-----------+---------------+---------------------+-----------------+----------+
- | TableName | PartitionName | CreateTime | DeleteCondition | State |
- +-----------+---------------+---------------------+-----------------+----------+
- | empty_tbl | p3 | 2020-04-15 23:09:35 | k1 EQ "1" | FINISHED |
- | test_tbl | p4 | 2020-04-15 23:09:53 | k1 GT "80" | FINISHED |
- +-----------+---------------+---------------------+-----------------+----------+
- 2 rows in set (0.00 sec)
- ```
-
diff --git a/docs/en/administrator-guide/load-data/insert-into-manual.md b/docs/en/administrator-guide/load-data/insert-into-manual.md
deleted file mode 100644
index bdc85f439a..0000000000
--- a/docs/en/administrator-guide/load-data/insert-into-manual.md
+++ /dev/null
@@ -1,297 +0,0 @@
----
-{
- "title": "Insert Into",
- "language": "en"
-}
----
-
-
-
-# Insert Into
-
-The use of Insert Into statements is similar to that of Insert Into statements in databases such as MySQL. But in Doris, all data writing is a separate import job. So Insert Into is also introduced here as an import method.
-
-The main Insert Into command contains the following two kinds;
-
-* INSERT INTO tbl SELECT ...
-* INSERT INTO tbl (col1, col2, ...) VALUES (1, 2, ...), (1,3, ...);
-
-The second command is for Demo only, not in a test or production environment.
-
-## Basic operations
-
-### Create a Load
-
-The Insert Into command needs to be submitted through MySQL protocol. Creating an import request returns the import result synchronously.
-
-Grammar:
-
-```
-INSERT INTO table_name [partition_info] [WITH LABEL label] [col_list] [query_stmt] [VALUES];
-```
-
-Examples:
-
-```
-INSERT INTO tbl2 WITH LABEL label1 SELECT * FROM tbl3;
-INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a");
-```
-
-**Notice**
-
-When using `CTE(Common Table Expressions)` as the query part of insert operation, the `WITH LABEL` or column list part must be specified.
-For example:
-
-```
-INSERT INTO tbl1 WITH LABEL label1
-WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2)
-SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1;
-
-INSERT INTO tbl1 (k1)
-WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2)
-SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1;
-```
-
-The following is a brief introduction to the parameters used in creating import statements:
-
-+ partition\_info
-
- Import the target partition of the table. If the target partition is specified, only the data that matches the target partition will be imported. If not specified, the default value is all partitions of the table.
-
-+ col\_list
-
- The target column of the import table can exist in any order. If no target column is specified, the default value is all columns in this table. If a column in the table does not exist in the target column, the column needs a default value, otherwise Insert Into will fail.
-
- If the result column type of the query statement is inconsistent with the type of the target column, an implicit type conversion is invoked. If the conversion is not possible, the Insert Into statement will report a parsing error.
-
-+ query\_stmt
-
- Through a query statement, the results of the query statement are imported into other tables in Doris system. Query statements support any SQL query syntax supported by Doris.
-
-+ VALUES
-
- Users can insert one or more data through VALUES grammar.
-
- *Note: VALUES is only suitable for importing several pieces of data as DEMO. It is totally unsuitable for any test and production environment. Doris system itself is not suitable for single data import scenarios. It is recommended to use INSERT INTO SELECT for batch import.*
-
-* WITH LABEL
-
- INSERT as a load job, it can also be with a label. If not with a label, Doris will use a UUID as label.
-
- This feature needs Doris version 0.11+.
-
- *Note: It is recommended that Label be specified rather than automatically allocated by the system. If the system allocates automatically, but during the execution of the Insert Into statement, the connection is disconnected due to network errors, etc., then it is impossible to know whether Insert Into is successful. If you specify Label, you can view the task results again through Label.*
-
-### Load results
-
-Insert Into itself is a SQL command, and the return result is divided into the following types according to the different execution results:
-
-1. Result set is empty
-
- If the result set of the insert corresponding SELECT statement is empty, it is returned as follows:
-
- ```
- mysql> insert into tbl1 select * from empty_tbl;
- Query OK, 0 rows affected (0.02 sec)
- ```
-
- `Query OK` indicates successful execution. `0 rows affected` means that no data was loaded.
-
-2. The result set is not empty
-
- In the case where the result set is not empty. The returned results are divided into the following situations:
-
- 1. Insert is successful and data is visible:
-
- ```
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 4 rows affected (0.38 sec)
- {'label': 'insert_8510c568-9eda-4173-9e36-6adc7d35291c', 'status': 'visible', 'txnId': '4005'}
-
- mysql> insert into tbl1 with label my_label1 select * from tbl2;
- Query OK, 4 rows affected (0.38 sec)
- {'label': 'my_label1', 'status': 'visible', 'txnId': '4005'}
-
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 2 rows affected, 2 warnings (0.31 sec)
- {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'visible', 'txnId': '4005'}
-
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 2 rows affected, 2 warnings (0.31 sec)
- {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'committed', 'txnId': '4005'}
- ```
-
- `Query OK` indicates successful execution. `4 rows affected` means that a total of 4 rows of data were imported. `2 warnings` indicates the number of lines to be filtered.
-
- Also returns a json string:
-
- ```
- {'label': 'my_label1', 'status': 'visible', 'txnId': '4005'}
- {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'committed', 'txnId': '4005'}
- {'label': 'my_label1', 'status': 'visible', 'txnId': '4005', 'err': 'some other error'}
- ```
-
- `label` is a user-specified label or an automatically generated label. Label is the ID of this Insert Into load job. Each load job has a label that is unique within a single database.
-
- `status` indicates whether the loaded data is visible. If visible, show `visible`, if not, show` committed`.
-
- `txnId` is the id of the load transaction corresponding to this insert.
-
- The `err` field displays some other unexpected errors.
-
- When user need to view the filtered rows, the user can use the following statement
-
- ```
- show load where label = "xxx";
- ```
-
- The URL in the returned result can be used to query the wrong data. For details, see the following **View Error Lines** Summary.
-
- **"Data is not visible" is a temporary status, this batch of data must be visible eventually**
-
- You can view the visible status of this batch of data with the following statement:
-
- ```
- show transaction where id = 4005;
- ```
-
- If the `TransactionStatus` column in the returned result is `visible`, the data is visible.
-
- 2. Insert fails
-
- Execution failure indicates that no data was successfully loaded, and returns as follows:
-
- ```
- mysql> insert into tbl1 select * from tbl2 where k1 = "a";
- ERROR 1064 (HY000): all partitions have no load data. Url: http://10.74.167.16:8042/api/_load_error_log?file=__shard_2/error_log_insert_stmt_ba8bb9e158e4879-ae8de8507c0bf8a2_ba8bb9e158e4879_ae8de850e8de850
- ```
-
- Where `ERROR 1064 (HY000): all partitions have no load data` shows the reason for the failure. The latter url can be used to query the wrong data. For details, see the following **View Error Lines** Summary.
-
-**In summary, the correct processing logic for the results returned by the insert operation should be:**
-
-1. If the returned result is `ERROR 1064 (HY000)`, it means that the import failed.
-2. If the returned result is `Query OK`, it means the execution was successful.
-
- 1. If `rows affected` is 0, the result set is empty and no data is loaded.
- 2. If `rows affected` is greater than 0:
- 1. If `status` is` committed`, the data is not yet visible. You need to check the status through the `show transaction` statement until `visible`.
- 2. If `status` is` visible`, the data is loaded successfully.
- 3. If `warnings` is greater than 0, it means that some data is filtered. You can get the url through the `show load` statement to see the filtered rows.
-
-### SHOW LAST INSERT
-
-In the previous section, we described how to follow up on the results of insert operations. However, it is difficult to get the json string of the returned result in some mysql libraries. Therefore, Doris also provides the `SHOW LAST INSERT` command to explicitly retrieve the results of the last insert operation.
-
-After executing an insert operation, you can execute `SHOW LAST INSERT` on the same session connection. This command returns the result of the most recent insert operation, e.g.
-
-```
-mysql> show last insert\G
-*************************** 1. row ***************************
- TransactionId: 64067
- Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b
- Database: default_cluster:db1
- Table: t1
-TransactionStatus: VISIBLE
- LoadedRows: 2
- FilteredRows: 0
-```
-
-This command returns the insert results and the details of the corresponding transaction. Therefore, you can continue to execute the `show last insert` command after each insert operation to get the insert results.
-
-> Note: This command will only return the results of the last insert operation within the same session connection. If the connection is broken or replaced with a new one, the empty set will be returned.
-
-## Relevant System Configuration
-
-### FE configuration
-
-+ time out
-
- The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED.
-
- At present, Insert Into does not support custom import timeout time. All Insert Into imports have a uniform timeout time. The default timeout time is 1 hour. If the imported source file cannot complete the import within the specified time, the parameter ``insert_load_default_timeout_second`` of FE needs to be adjusted.
-
- At the same time, the Insert Into statement receives the restriction of the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds.
-
-### Session Variables
-
-+ enable\_insert\_strict
-
- The Insert Into import itself cannot control the tolerable error rate of the import. Users can only use the Session parameter `enable_insert_strict`. When this parameter is set to false, it indicates that at least one data has been imported correctly, and then it returns successfully. When this parameter is set to true, the import fails if there is a data error. The default is false. It can be set by `SET enable_insert_strict = true;`.
-
-+ query u timeout
-
- Insert Into itself is also an SQL command, so the Insert Into statement is also restricted by the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds.
-
-## Best Practices
-
-### Application scenarios
-1. Users want to import only a few false data to verify the functionality of Doris system. The grammar of INSERT INTO VALUES is suitable at this time.
-2. Users want to convert the data already in the Doris table into ETL and import it into a new Doris table, which is suitable for using INSERT INTO SELECT grammar.
-3. Users can create an external table, such as MySQL external table mapping a table in MySQL system. Or create Broker external tables to map data files on HDFS. Then the data from the external table is imported into the Doris table for storage through the INSERT INTO SELECT grammar.
-
-### Data volume
-Insert Into has no limitation on the amount of data, and large data imports can also be supported. However, Insert Into has a default timeout time, and the amount of imported data estimated by users is too large, so it is necessary to modify the system's Insert Into import timeout time.
-
-```
-Import data volume = 36G or less than 3600s*10M/s
-Among them, 10M/s is the maximum import speed limit. Users need to calculate the average import speed according to the current cluster situation to replace 10M/s in the formula.
-```
-
-### Complete examples
-
-Users have a table store sales in the database sales. Users create a table called bj store sales in the database sales. Users want to import the data recorded in the store sales into the new table bj store sales. The amount of data imported is about 10G.
-
-```
-large sales scheme
-(id, total, user_id, sale_timestamp, region)
-
-Order large sales schedule:
-(id, total, user_id, sale_timestamp)
-
-```
-
-Cluster situation: The average import speed of current user cluster is about 5M/s
-
-+ Step1: Determine whether you want to modify the default timeout of Insert Into
-
- ```
- Calculate the approximate time of import
- 10G / 5M /s = 2000s
-
- Modify FE configuration
- insert_load_default_timeout_second = 2000
- ```
-
-+ Step2: Create Import Tasks
-
- Since users want to ETL data from a table and import it into the target table, they should use the Insert in query\\stmt mode to import it.
-
- ```
- INSERT INTO bj_store_sales SELECT id, total, user_id, sale_timestamp FROM store_sales where region = "bj";
- ```
-
-## Common Questions
-
-* View the wrong line
-
- Because Insert Into can't control the error rate, it can only tolerate or ignore the error data completely by `enable_insert_strict`. So if `enable_insert_strict` is set to true, Insert Into may fail. If `enable_insert_strict` is set to false, then only some qualified data may be imported. However, in either case, Doris is currently unable to provide the ability to view substandard data rows. Therefore, the user cannot view the specific import error through the Insert Into statement.
-
- The causes of errors are usually: source data column length exceeds destination data column length, column type mismatch, partition mismatch, column order mismatch, etc. When it's still impossible to check for problems. At present, it is only recommended that the SELECT command in the Insert Into statement be run to export the data to a file, and then import the file through Stream load to see the specific errors.
diff --git a/docs/en/administrator-guide/load-data/load-json-format.md b/docs/en/administrator-guide/load-data/load-json-format.md
deleted file mode 100644
index 39a82aa239..0000000000
--- a/docs/en/administrator-guide/load-data/load-json-format.md
+++ /dev/null
@@ -1,467 +0,0 @@
----
-{
- "title": "Load Json Format Data",
- "language": "en"
-}
----
-
-
-
-# Load Json Format Data
-
-Doris supports data load in Json format since version 0.12.
-
-## Supported Load Methods
-
-Currently only the following load methods support data import in Json format:
-
-* Stream Load
-* Routine Load
-
-For specific instructions on the above load methods, please refer to the relevant documentation. This document mainly introduces the instructions for using Json in these load methods.
-
-## Supported Json Format
-
-Currently, only the following two Json formats are supported:
-
-1. Multi-line data represented by Array
-
- Json format with Array as the root node. Each element in the Array represents a row of data to be loaded, usually an Object. Examples are as follows:
-
- ```
- [
- { "id": 123, "city" : "beijing"},
- { "id": 456, "city" : "shanghai"},
- ...
- ]
- ```
-
- ```
- [
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian"}},
- { "id": 456, "city" : { "name" : "beijing", "region" : "chaoyang"}},
- ...
- ]
- ```
-
- This method is usually used for the Stream Load method to represent multiple rows of data in a batch of load data.
-
- This method must be used in conjunction with setting `stripe_outer_array=true`. Doris will expand the array when parsing, and then parse each Object in turn as a row of data.
-
-2. Single row of data represented by Object
-
- Json format with Object as the root node. The entire Object represents a row of data to be loaded. Examples are as follows:
-
- ```
- { "id": 123, "city" : "beijing"}
- ```
-
- ```
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }}
- ```
-
- This method is usually used for the Routine Load method, such as representing a message in Kafka, that is, a row of data.
-
-## Json Path
-
-Doris supports extracting the data specified in Json through Json Path.
-
-**Note: Because for Array type data, Doris will first expand the array, and finally perform single-line processing according to the Object format. Therefore, the examples after this document will be illustrated with Json data in single Object format.**
-
-* Json Path is not specified
-
- If Json Path is not specified, Doris will use the column names in the table to find the elements in Object by default. Examples are as follows:
-
- The table contains two columns: `id`, `city`
-
- Json data is as follows:
-
- ```
- { "id": 123, "city" : "beijing"}
- ```
-
- Then Doris will use `id`, `city` to match, and get the final data `123` and `beijing`.
-
- If the Json data is as follows:
-
- ```
- { "id": 123, "name" : "beijing"}
- ```
-
- Then use `id`, `city` to match and get the final data `123` and `null`.
-
-* Json Path is specified
-
- Specify a set of Json Path in the form of a Json data. Each element in the array represents a column to be extracted. Examples are as follows:
-
- ```
- ["$.id", "$.name"]
- ```
- ```
- ["$.id.sub_id", "$.name[0]", "$.city[0]"]
- ```
-
- Doris will use the specified Json Path for data matching and extraction.
-
-* Match non-primitive types
-
- The values that the previous example finally matched are all primitive types, such as Integer, String, and so on. Doris currently does not support complex types, such as Array, Map, etc. So when a non-primitive type is matched, Doris will convert the type to a Json format string and load it as a string type. Examples are as follows:
-
- ```
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }}
- ```
-
- The Json Path is `["$.city"]`. Then the matched elements are:
-
- ```
- { "name" : "beijing", "region" : "haidian" }
- ```
-
- This element will be converted into a string for subsequent load operations:
-
- ```
- "{'name':'beijing','region':'haidian'}"
- ```
-
-* Match failed
-
- When the match fails, `null` will be returned. Examples are as follows:
-
- Json data is:
-
- ```
- { "id": 123, "name" : "beijing"}
- ```
-
- The Json Path is `["$.id", "$.info"]`. Then the matched elements are `123` and `null`.
-
- Doris currently does not distinguish between the null value represented in the Json data and the null value generated when the match fails. Suppose the Json data is:
-
- ```
- { "id": 123, "name" : null }
- ```
-
- Then use the following two Json Path will get the same result: `123` and `null`.
-
- ```
- ["$.id", "$.name"]
- ```
- ```
- ["$.id", "$.info"]
- ```
-
-* Complete match failed
-
- In order to prevent misoperation caused by some parameter setting errors. When Doris tries to match a row of data, if all columns fail to match, it will be considered a error row. Suppose the Json data is:
-
- ```
- { "id": 123, "city" : "beijing" }
- ```
-
- If Json Path is incorrectly written as (or when Json Path is not specified, the columns in the table do not contain `id` and `city`):
-
- ```
- ["$.ad", "$.infa"]
- ```
-
- Will result in a complete match failure, the line will be marked as an error row, instead of producing `null, null`.
-
-## Json Path and Columns
-
-Json Path is used to specify how to extract data in JSON format, and Columns specify the mapping and conversion relationship of columns. The two can be used together.
-
-In other words, it is equivalent to using Json Path to rearrange the data in a Json format according to the column order specified in Json Path. After that, you can use Columns to map the rearranged source data to the columns of the table. Examples are as follows:
-
-Data content:
-
-```
-{"k1": 1, "k2": 2}
-```
-
-Table schema:
-
-`k2 int, k1 int`
-
-Load statement 1 (take Stream Load as an example):
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -T example.json http:/ /127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-In Load statement 1, only Json Path is specified, and Columns are not specified. The role of Json Path is to extract the Json data in the order of the fields in the Json Path, and then write it in the order of the table schema. The final loaded data results are as follows:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 2 | 1 |
-+------+------+
-```
-
-You will see that the actual k1 column has loaded the value of the "k2" column in the Json data. This is because the field name in Json is not equivalent to the field name in the table schema. We need to explicitly specify the mapping relationship between the two.
-
-Load statement 2:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, k1 "-T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-Compared to load statement 1, here is the Columns field, which is used to describe the mapping relationship of columns, in the order of `k2, k1`. That is, after extracting in the order of the fields in the Json Path, specify the first column as the value of the k2 column in the table, and the second column as the value of the k1 column in the table. The final loaded data results are as follows:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | 2 |
-+------+------+
-```
-
-Of course, like other load methods, you can perform column conversion operations in Columns. Examples are as follows:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, tmp_k1 , k1 = tmp_k1 * 100" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-The above example will multiply the value of k1 by 100 and import it. The final imported data results are as follows:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 100 | 2 |
-+------+------+
-```
-
-## NULL and Default value
-
-The sample data is as follows:
-
-```
-[
- {"k1": 1, "k2": "a"},
- {"k1": 2},
- {"k1": 3, "k2": "c"},
-]
-```
-
-The table schema is: `k1 int null, k2 varchar(32) null default "x"`
-
-The load statement is as follows:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-The import results that users may expect are as follows, that is, for missing columns, fill in default values.
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | a |
-+------+------+
-| 2 | x |
-+------+------+
-| 3 | c |
-+------+------+
-```
-
-But the actual load result is as follows, that is, for missing columns, NULL is added.
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | a |
-+------+------+
-| 2 | NULL |
-+------+------+
-| 3 | c |
-+------+------+
-```
-
-This is because through the information in the load statement, Doris does not know that "the missing column is the k2 column in the table".
-If you want to load the above data as expected, the load statement is as follows:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -H "jsonpaths: [\"$.k1\", \"$.k2\"]"- H "columns: k1, tmp_k2, k2 = ifnull(tmp_k2,'x')" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-## LargetInt and Decimal
-
-Doris supports data types such as largeint and decimal with larger data range and higher data precision. However, due to the fact that the maximum range of the rapid JSON library used by Doris for the resolution of digital types is Int64 and double, there may be some problems when importing largeint or decimal by JSON format, such as loss of precision, data conversion error, etc.
-
-For example:
-
-```
-[
- {"k1": 1, "k2":9999999999999.999999 }
-]
-```
-
-
-The imported K2 column type is `Decimal (16,9)`the import data is: ` 9999999999.999999`. During the JSON load which cause the precision loss of double conversion, the imported data convert to: ` 10000000000.0002 `. It is a import error.
-
-To solve this problem, Doris provides a param `num_as_string `. Doris converts the numeric type to a string when parsing JSON data and JSON load without losing precision.
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "num_as_string: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-But using the param will cause unexpected side effects. Doris currently does not support composite types, such as Array, Map, etc. So when a non basic type is matched, Doris will convert the type to a string in JSON format.` num_as_string`will also convert compound type numbers into strings, for example:
-
-JSON Data:
-
- { "id": 123, "city" : { "name" : "beijing", "city_id" : 1 }}
-
-Not use `num_as_string `, the data of the city column is:
-
-`{ "name" : "beijing", "city_id" : 1 }`
-
-Use `num_as_string `, the data of the city column is:
-
-`{ "name" : "beijing", "city_id" : "1" }`
-
-Warning, the param leads to the city_id of the numeric type in the compound type is treated as a string column and quoted, which is different from the original data.
-
-Therefore, when using JSON load. we should try to avoid importing largeint, decimal and composite types at the same time. If you can't avoid it, you need to fully understand the **side effects**.
-
-## Examples
-
-### Stream Load
-
-Because of the indivisible nature of the Json format, when using Stream Load to load a Json format file, the file content will be fully loaded into memory before processing. Therefore, if the file is too large, it may occupy more memory.
-
-Suppose the table structure is:
-
-```
-id INT NOT NULL,
-city VARHCAR NULL,
-code INT NULL
-```
-
-1. Load single-line data 1
-
- ```
- {"id": 100, "city": "beijing", "code" : 1}
- ```
-
- * Not specify Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- Results:
-
- ```
- 100 beijing 1
- ```
-
- * Specify Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- Results:
-
- ```
- 100 beijing 1
- ```
-
-2. Load sigle-line data 2
-
- ```
- {"id": 100, "content": {"city": "beijing", "code" : 1}}
- ```
-
- * Specify Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.content.city\",\"$.content.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- Results:
-
- ```
- 100 beijing 1
- ```
-
-3. Load multi-line data
-
- ```
- [
- {"id": 100, "city": "beijing", "code" : 1},
- {"id": 101, "city": "shanghai"},
- {"id": 102, "city": "tianjin", "code" : 3},
- {"id": 103, "city": "chongqing", "code" : 4},
- {"id": 104, "city": ["zhejiang", "guangzhou"], "code" : 5},
- {
- "id": 105,
- "city": {
- "order1": ["guangzhou"]
- },
- "code" : 6
- }
- ]
- ```
-
- * Specify Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- Results:
-
- ```
- 100 beijing 1
- 101 shanghai NULL
- 102 tianjin 3
- 103 chongqing 4
- 104 ["zhejiang","guangzhou"] 5
- 105 {"order1":["guangzhou"]} 6
- ```
-
-4. Convert load data
-
- The data is still the multi-row data in Example 3. Now you need to add 1 to the `code` column in the loaded data and load it.
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -H "columns: id, city, tmpc, code=tmpc+1" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- Results:
-
- ```
- 100 beijing 2
- 101 shanghai NULL
- 102 tianjin 4
- 103 chongqing 5
- 104 ["zhejiang","guangzhou"] 6
- 105 {"order1":["guangzhou"]} 7
- ```
-
-### Routine Load
-
-Routine Load processes Json data the same as Stream Load. I will not repeat them here.
-
-For the Kafka data source, the content of each Massage is treated as a complete Json data. If multiple rows of data expressed in Array format in a Massage are loaded, multiple rows will be loaded, and Kafka's offset will only increase by 1. If an Array format Json represents multiple rows of data, but because the Json format error causes the parsing Json to fail, the error row will only increase by 1 (because the parsing fails, in fact, Doris cannot determine how many rows of data it contains, and can only add one row of errors rows record).
diff --git a/docs/en/administrator-guide/load-data/load-manual.md b/docs/en/administrator-guide/load-data/load-manual.md
deleted file mode 100644
index 8d813a75f5..0000000000
--- a/docs/en/administrator-guide/load-data/load-manual.md
+++ /dev/null
@@ -1,228 +0,0 @@
----
-{
- "title": "Introduction Overview",
- "language": "en"
-}
----
-
-
-
-# Introduction Overview
-
-The Load function is to import the user's raw data into Doris. After successful import, users can query data through Mysql client.
-
-Doris supports multiple imports. It is recommended to read this document in full first, and then to view the detailed documents of their respective import modes according to the selected import mode.
-
-## Basic concepts
-
-1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import planning and the scheduling of import tasks.
-2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data.
-3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system.
-4. Load job: The import job reads the source data submitted by the user, transforms or cleans it, and imports the data into the Doris system. After the import is completed, the data can be queried by the user.
-5. Label: All import jobs have a Label. Label is unique in a database and can be specified by the user or automatically generated by the system to identify an import job. The same Label can only be used for a successful import job.
-6. MySQL Protocol/HTTP Protocol: Doris provides two kinds of access protocol interfaces. MySQL protocol and HTTP protocol. Part of the import mode uses MySQL protocol interface to submit jobs, and part of the import mode uses HTTP protocol interface to submit jobs.
-
-## Load mode
-
-To adapt to different data import requirements, Doris system provides 6 different import methods. Each import mode supports different data sources and has different usage modes (asynchronous, synchronous).
-
-All import methods support CSV data format. Broker load also supports parquet and orc data format.
-
-For instructions on each import mode, please refer to the operation manual for a single import mode.
-
-* Broker load
-
- Access and read external data sources (such as HDFS) through the Broker process and import them into Doris. The user submits the import job through Mysql protocol and executes it asynchronously. View the import results through the `SHOW LOAD` command.
-
-* Stream load
-
- Users submit requests through HTTP protocol and create imports with raw data. It is mainly used to quickly import data from local files or data streams into Doris. The Import command returns the import result synchronously.
-
-* Insert
-
- Similar to the Insert statement in MySQL, Doris provides `INSERT INTO tbl SELECT ...;`reading data from Doris's table and importing it into another table. Or by `INSERT INTO tbl VALUES (...);` Insert a single piece of data.
-
-* Multi load
-
- Users submit multiple import jobs through HTTP protocol. Multi Load guarantees the atomic validity of multiple import jobs.
-
-* Routine load
-
- Users submit routine import jobs through MySQL protocol, generate a resident thread, read and import data from data sources (such as Kafka) uninterruptedly into Doris.
-
-* Load through S3 protocol
-
- Users directly load data through the S3 protocol, and the usage is similar to Broker Load
-
-## Basic Principles
-
-### Import execution process
-
-
-```
-+---------+ +---------+ +----------+ +-----------+
-| | | | | | | |
-| PENDING +----->+ ETL +----->+ LOADING +----->+ FINISHED |
-| | | | | | | |
-+---------+ +---+-----+ +----+-----+ +-----------+
- | | |
- | | |
- | | |
- | | | +-----------+
- | | | | |
- +---------------+-----------------+------------> CANCELLED |
- | |
- +-----------+
-
-```
-
-As shown above, an import operation mainly goes through the four stages above.
-
-+ PENDING (not required): Only Broker Load has this stage. Broker Load is submitted by the user and stays at this stage for a short time until it is scheduled by Scheduler in FE. Scheduler's schedule interval is 5 seconds.
-
-+ ETL (not required): This stage exists before version 0.10.0 (included), mainly for transforming raw data according to user declaration and filtering raw data that does not meet the requirements. In the version after 0.10.0, the ETL phase no longer exists, and the work of data transformation is merged into the LOADING phase.
-
-+ LOADING: This stage is mainly used to push the transformed data into the corresponding BE storage before version 0.10.0 (including). In the version after 0.10.0, the data is cleaned and changed first, and then sent to BE storage. When all imported data are imported, the process of waiting for validity enters, and Load job is still LOADING.
-
-+ FINISHED: After all the data involved in Load Job takes effect, the state of Load Job becomes FINISHED. Data imported after FINISHED can be queried.
-
-+ CANCELLED: Before job FINISH, jobs may be cancelled and entered the CANCELLED state. For example, the user manually cancels, or imports errors. CANCELLED is also the final state of Load Job and cannot be executed again.
-
-In the above stage, except for the PENDING to LOADING stage, which is scheduled by Scheduler, the transfer before other stages is implemented by callback mechanism.
-
-### Label and Atomicity
-
-Doris provides atomic assurance for all import methods. It ensures that the data in the same import operation is valid for atoms. There will be no case of importing only part of the data.
-
-At the same time, each import job has a Label designated by the user or automatically generated by the system. Label is unique in a database. When an import job corresponding to a Label is successful enough, the import job cannot be submitted repeatedly using the Label. If the import job corresponding to Label fails, it can be reused.
-
-Users can use Label mechanism to ensure that the data corresponding to Label can be imported at most once, at the level of At-Most-One semantics.
-
-
-## Synchronization and asynchronization
-
-Doris's current import methods fall into two categories, synchronous and asynchronous. If an external program accesses Doris's import function, it is necessary to determine which type of import mode is used and then determine the access logic.
-
-### Synchronization
-
-Synchronized import means that users create import tasks, Doris executes import synchronously, and returns user import results after execution. Users can directly determine whether the import is successful or not by synchronizing the results returned by creating the import task command.
-
-The import methods of synchronous type are **Stream load**, **Insert**.
-
-Operation steps:
-
-1. Users (external systems) create import tasks.
-2. Doris returns the import result.
-3. The user (external system) judges the import result and can submit the import task again if it fails.
-
-*Note: If the user returns the import synchronously and the amount of data imported is too large, it may take a long time to create the import request to return the result.*
-
-### Asynchronism
-Asynchronous import means that after the user creates the import task, Doris directly returns to the successful creation. **Successful creation does not mean that data has been imported into**. The import task will be executed asynchronously. After successful creation, users need to send a polling command to check the status of the import job. If the creation fails, you can judge whether it needs to be created again based on the failure information.
-
-The ways to import asynchronous types are: **Broker load**, **Multi load**.
-
-Operation steps:
-
-1. Users (external systems) create import tasks.
-2. Doris returns the import creation result.
-3. User (external system) judges the result of import creation, success enters 4, failure returns to retry to create import, return to 1.
-4. The user (external system) polls to see the import task until the status changes to FINISHED or CANCELLED.
-
-### Notes
-Neither asynchronous nor synchronous import types should be retried endlessly after Doris returns an import failure or an import creation failure. **After a limited number of retries and failures, the external system retains the failure information. Most of the retries fail because of the problem of using method or data itself.**
-
-## Memory Limit
-
-Users can limit the memory usage of a single load by setting parameters to prevent the system from taking up too much memory and causing the system OOM.
-Different load methods restrict memory in a slightly different way. You can refer to the respective load manuals for viewing.
-
-An load job is usually distributed across multiple Backends. The load memory limit is the memory usage of load job on a single Backend, not memory usage across the cluster.
-
-At the same time, each Backend sets the overall upper limit of the memory available for load. See the General System Configuration section below for specific configuration. This configuration limits the overall memory usage limit for all load tasks running on this Backend.
-
-Smaller memory limits can affect load efficiency because the load process can frequently write in-memory data back to disk because memory reaches the upper limit. Excessive memory limits can cause system OOM when load concurrency is high. Therefore, you need to properly set the load memory limit according to your needs.
-
-## Best Practices
-
-When users access Doris import, they usually use program access mode to ensure that data is imported into Doris regularly. Below is a brief description of the best practices for program access to Doris.
-
-1. Choose the appropriate import mode: According to the location of the data source, choose the import mode. For example, if raw data is stored on HDFS, import it using Broker load.
-2. Protocol for determining the import mode: If Broker load import mode is selected, external systems need to be able to submit and view import jobs regularly using MySQL protocol.
-3. Determine the type of import mode: import mode is synchronous or asynchronous. For example, Broker load is an asynchronous import mode. After submitting the creation import, the external system must call the check import command to determine whether the import is successful or not based on the results of the check import command.
-4. Label generation strategy: Label generation strategy needs to be satisfied, and each batch of data is unique and fixed. Doris can then guarantee At-Most-Once.
-5. The program itself guarantees At-Least-Once: The external system needs to guarantee its own At-Least-Once, so that Exactly-Once of the import process can be guaranteed.
-
-## General System Configuration
-
-The following sections explain several system-level configurations that are common to all imports.
-
-### FE configuration
-
-The following configuration belongs to the system configuration of FE, which can be modified by modifying the configuration file ``fe.conf``.
-
-+ max\_load\_timeout\_second and min\_load\_timeout\_second
-
- The two configurations mean the maximum import timeout time and the minimum import timeout time in seconds. The default maximum timeout time is 3 days and the default minimum timeout time is 1 second. User-defined import timeouts should not exceed this range. This parameter is applicable to all import modes.
-
-+ desired\_max\_waiting\_jobs
-
- The maximum number of imported tasks in the waiting queue is 100 by default. New import requests are rejected when the number of imports in the PENDING state (i.e. waiting for execution) in FE exceeds that value.
-
- This configuration is only valid for asynchronous execution of imports. When the number of import waiting for asynchronous execution exceeds the default value, subsequent creation of import requests will be rejected.
-
-+ max\_running\_txn\_num\_per\_db
-
- The implication of this configuration is that the maximum number of running load jobs in each database (no distinction between import types, uniform counting). The default value is 100. When the current database is running more than the maximum number of imports, subsequent imports will not be executed. If the job is imported synchronously, the import will be rejected. If it is an asynchronous import job. The job will wait in the queue.
-
-### BE configuration
-
-The following configuration belongs to the BE system configuration, which can be modified by modifying the BE configuration file `be.conf`.
-
-+ push\_write\_mbytes\_per\_sec
-
- Writing speed limit for a single Tablet on BE. The default is 10, or 10MB/s. Usually the maximum write speed of BE to a single Tablet is between 10 and 30 MB/s, depending on Schema and the system. This parameter can be adjusted appropriately to control the import speed.
-
-+ write\_buffer\_size
-
- The imported data will be written to a memtable on BE, and the memtable will not be written back to disk until it reaches the threshold. The default size is 100MB. Too small threshold may result in a large number of small files on BE. This threshold can be increased appropriately to reduce the number of files. However, excessive thresholds can lead to RPC timeouts, as shown in the configuration instructions below.
-
-+ tablet\_writer\_rpc\_timeout\_sec
-
- During the import process, a Batch (1024 rows) RPC timeout is sent. Default 600 seconds. Because the RPC may involve multiple memtable writes, it may cause RPC timeouts, which can be adjusted appropriately to reduce timeout errors (such as `send batch fail`). At the same time, if the `write_buffer_size` configuration is increased, this parameter needs to be adjusted appropriately.
-
-+ streaming\_load\_rpc\_max\_alive\_time\_sec
-
- During the import process, Doris opens a Writer for each Tablet to receive and write data. This parameter specifies Writer's waiting timeout time. If Writer does not receive any data at this time, Writer will be destroyed automatically. When the system processing speed is slow, Writer may not receive the next batch of data for a long time, resulting in import error: `Tablet Writer add batch with unknown id`. This configuration can be increased appropriately at this time. The default is 600 seconds.
-
-+ load\_process\_max\_memory\_limit\_bytes and load\_process\_max\_memory\_limit\_percent
-
- These two parameters limit the upper memory limit that can be used to load tasks on a single Backend. The maximum memory and maximum memory percentage are respectively. `load_process_max_memory_limit_percent` defaults to 80%, which is 80% of the `mem_limit` configuration. That is, if the physical memory is M, the default load memory limit is M * 80% * 80%.
-
- `load_process_max_memory_limit_bytes` defaults to 100GB. The system takes the smaller of the two parameters as the final Backend load memory usage limit.
-
-+ label\_keep\_max\_second
-
- The retention time of load job which is FINISHED or CANCELLED. The record of load job will be kept in Doris system for a period of time which is determined by this parameter. The default time of this parameter is 3 days. This parameter is common to all types of load job.
-
-### Column mapping
-Assuming that the imported data is `1, 2, 3` and the table has three columns of `c1, c2, c3`, if the data is directly imported into the table, you can use the following statement `COLUMNS(c1,c2,c3)` This statement is equivalent to `COLUMNS(tmp_c1,tmp_c2,tmp_c3,c1=tmp_c1,c2=tmp_c2,c3=tmp_c3)`
-If you want to perform transformation or use temporary variables when importing data, the transformation or temporary variables must be specified in the order of use, for example, `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= c1+1, c3 = c2+1)`, this statement is equivalent to `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2 = tmp_c1 +1+1, c3 =tmp_c1 +1+1+1)`
-When using an expression, this expression must be defined in front. For example, the following statement is not legal `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = c1+1, c2 = temp + 1, temp = tmp_c1 +1, c3 =c2+1)`
\ No newline at end of file
diff --git a/docs/en/administrator-guide/load-data/routine-load-manual.md b/docs/en/administrator-guide/load-data/routine-load-manual.md
deleted file mode 100644
index 8d54bc0480..0000000000
--- a/docs/en/administrator-guide/load-data/routine-load-manual.md
+++ /dev/null
@@ -1,334 +0,0 @@
----
-{
- "title": "Routine Load",
- "language": "en"
-}
----
-
-
-
-# Routine Load
-
-The Routine Load feature provides users with a way to automatically load data from a specified data source.
-
-This document describes the implementation principles, usage, and best practices of this feature.
-
-## Glossary
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, the backend node of Doris. Responsible for query execution and data storage.
-* RoutineLoadJob: A routine load job submitted by the user.
-* JobScheduler: A routine load job scheduler for scheduling and dividing a RoutineLoadJob into multiple Tasks.
-* Task: RoutineLoadJob is divided by JobScheduler according to the rules.
-* TaskScheduler: Task Scheduler. Used to schedule the execution of a Task.
-
-## Principle
-
-```
- +---------+
- | Client |
- +----+----+
- |
-+-----------------------------+
-| FE | |
-| +-----------v------------+ |
-| | | |
-| | Routine Load Job | |
-| | | |
-| +---+--------+--------+--+ |
-| | | | |
-| +---v--+ +---v--+ +---v--+ |
-| | task | | task | | task | |
-| +--+---+ +---+--+ +---+--+ |
-| | | | |
-+-----------------------------+
- | | |
- v v v
- +---+--+ +--+---+ ++-----+
- | BE | | BE | | BE |
- +------+ +------+ +------+
-
-```
-
-As shown above, the client submits a routine load job to FE.
-
-FE splits an load job into several Tasks via JobScheduler. Each Task is responsible for loading a specified portion of the data. The Task is assigned by the TaskScheduler to the specified BE.
-
-On the BE, a Task is treated as a normal load task and loaded via the Stream Load load mechanism. After the load is complete, report to FE.
-
-The JobScheduler in the FE continues to generate subsequent new Tasks based on the reported results, or retry the failed Task.
-
-The entire routine load job completes the uninterrupted load of data by continuously generating new Tasks.
-
-## Kafka Routine load
-
-Currently we only support routine load from the Kafka system. This section details Kafka's routine use and best practices.
-
-### Usage restrictions
-
-1. Support unauthenticated Kafka access and Kafka clusters certified by SSL.
-2. The supported message format is csv text or json format. Each message is a line in csv format, and the end of the line does not contain a ** line break.
-3. Kafka 0.10.0 (inclusive) or above is supported by default. If you want to use Kafka versions below 0.10.0 (0.9.0, 0.8.2, 0.8.1, 0.8.0), you need to modify the configuration of be, set the value of kafka_broker_version_fallback to be the older version, or directly set the value of property.broker.version.fallback to the old version when creating routine load. The cost of the old version is that some of the new features of routine load may not be available, such as setting the offset of the kafka partition by time.
-
-### Create a routine load task
-
-The detailed syntax for creating a routine load task can be connected to Doris and execute `HELP ROUTINE LOAD;` to see the syntax help. Here is a detailed description of the precautions when creating a job.
-
-* columns_mapping
-
- `columns_mapping` is mainly used to specify the column structure of the table structure and message, as well as the conversion of some columns. If not specified, Doris will default to the columns in the message and the columns of the table structure in a one-to-one correspondence. Although under normal circumstances, if the source data is exactly one-to-one, normal data load can be performed without specifying. However, we still strongly recommend that users **explicitly specify column mappings**. This way, when the table structure changes (such as adding a nullable column), or the source file changes (such as adding a column), the load task can continue. Otherwise, after the above changes occur, the load will report an error because the column mapping relationship is no longer one-to-one.
-
- In `columns_mapping` we can also use some built-in functions for column conversion. But you need to pay attention to the actual column type corresponding to the function parameters. for example:
-
- Suppose the user needs to load a table containing only a column of `k1` with a column type of `int`. And you need to convert the null value in the source file to 0. This feature can be implemented with the `ifnull` function. The correct way to use is as follows:
-
- `COLUMNS (xx, k1=ifnull(xx, "3"))`
-
- Note that we use `"3"` instead of `3`, although `k1` is of type `int`. Because the column type in the source data is `varchar` for the load task, the `xx` virtual column is also of type `varchar`. So we need to use `"3"` to match the match, otherwise the `ifnull` function can't find the function signature with the parameter `(varchar, int)`, and an error will occur.
-
- As another example, suppose the user needs to load a table containing only a column of `k1` with a column type of `int`. And you need to process the corresponding column in the source file: convert the negative number to a positive number and the positive number to 100. This function can be implemented with the `case when` function. The correct wording should be as follows:
-
- `COLUMNS (xx, k1 = case when xx < 0 then cast(-xx as varchar) else cast((xx + '100') as varchar) end)`
-
- Note that we need to convert all the parameters in `case when` to varchar in order to get the desired result.
-
-* where_predicates
-
- The type of the column in `where_predicates` is already the actual column type, so there is no need to cast to the varchar type as `columns_mapping`. Write according to the actual column type.
-
-* desired\_concurrent\_number
-
- `desired_concurrent_number` is used to specify the degree of concurrency expected for a routine job. That is, a job, at most how many tasks are executing at the same time. For Kafka load, the current actual concurrency is calculated as follows:
-
- ```
- Min(partition num, desired_concurrent_number, Config.max_routine_load_task_concurrrent_num)
- ```
-
- Where `Config.max_routine_load_task_concurrrent_num` is a default maximum concurrency limit for the system. This is a FE configuration that can be adjusted by changing the configuration. The default is 5.
-
- Where partition num refers to the number of partitions for the Kafka topic subscribed to.
-
-* max\_batch\_interval/max\_batch\_rows/max\_batch\_size
-
- These three parameters are used to control the execution time of a single task. If any of the thresholds is reached, the task ends. Where `max_batch_rows` is used to record the number of rows of data read from Kafka. `max_batch_size` is used to record the amount of data read from Kafka in bytes. The current consumption rate for a task is approximately 5-10MB/s.
-
- So assume a row of data 500B, the user wants to be a task every 100MB or 10 seconds. The expected processing time for 100MB is 10-20 seconds, and the corresponding number of rows is about 200000 rows. Then a reasonable configuration is:
-
- ```
- "max_batch_interval" = "10",
- "max_batch_rows" = "200000",
- "max_batch_size" = "104857600"
- ```
-
- The parameters in the above example are also the default parameters for these configurations.
-
-* max\_error\_number
-
- `max_error_number` is used to control the error rate. When the error rate is too high, the job will automatically pause. Because the entire job is stream-oriented, and because of the borderless nature of the data stream, we can't calculate the error rate with an error ratio like other load tasks. So here is a new way of calculating to calculate the proportion of errors in the data stream.
-
- We have set up a sampling window. The size of the window is `max_batch_rows * 10`. Within a sampling window, if the number of error lines exceeds `max_error_number`, the job is suspended. If it is not exceeded, the next window restarts counting the number of error lines.
-
- We assume that `max_batch_rows` is 200000 and the window size is 2000000. Let `max_error_number` be 20000, that is, the user expects an error behavior of 20000 for every 2000000 lines. That is, the error rate is 1%. But because not every batch of tasks consumes 200000 rows, the actual range of the window is [2000000, 2200000], which is 10% statistical error.
-
- The error line does not include rows that are filtered out by the where condition. But include rows that do not have a partition in the corresponding Doris table.
-
-* data\_source\_properties
-
- The specific Kafka partition can be specified in `data_source_properties`. If not specified, all partitions of the subscribed topic are consumed by default.
-
- Note that when partition is explicitly specified, the load job will no longer dynamically detect changes to Kafka partition. If not specified, the partitions that need to be consumed are dynamically adjusted based on changes in the kafka partition.
-
-* strict\_mode
-
- Routine load load can turn on strict mode mode. The way to open it is to add ```"strict_mode" = "true"``` to job\_properties. The default strict mode is off.
-
- The strict mode mode means strict filtering of column type conversions during the load process. The strict filtering strategy is as follows:
-
- 1. For column type conversion, if strict mode is true, the wrong data will be filtered. The error data here refers to the fact that the original data is not null, and the result is a null value after participating in the column type conversion.
-
- 2. When a loaded column is generated by a function transformation, strict mode has no effect on it.
-
- 3. For a column type loaded with a range limit, if the original data can pass the type conversion normally, but cannot pass the range limit, strict mode will not affect it. For example, if the type is decimal(1,0) and the original data is 10, it is eligible for type conversion but not for column declarations. This data strict has no effect on it.
-
-* merge\_type
- The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
-
-#### strict mode and load relationship of source data
-
-Here is an example of a column type of TinyInt.
-
-> Note: When a column in a table allows a null value to be loaded
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|---------|
-|null | \N | N/A | true or false | NULL|
-|not null | aaa or 2000 | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 | 1 | true or false | correct data|
-
-Here the column type is Decimal(1,0)
-
-> Note: When a column in a table allows a null value to be loaded
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|--------|
-|null | \N | N/A | true or false | NULL|
-|not null | aaa | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 or 10 | 1 | true or false | correct data|
-
-> Note: 10 Although it is a value that is out of range, because its type meets the requirements of decimal, strict mode has no effect on it. 10 will eventually be filtered in other ETL processing flows. But it will not be filtered by strict mode.
-
-#### Accessing SSL-certified Kafka clusters
-
-Accessing the SSL-certified Kafka cluster requires the user to provide a certificate file (ca.pem) for authenticating the Kafka Broker public key. If the Kafka cluster has both client authentication enabled, you will also need to provide the client's public key (client.pem), key file (client.key), and key password. The files needed here need to be uploaded to Doris via the `CREAE FILE` command, **and the catalog name is `kafka`**. See `HELP CREATE FILE;` for specific help on the `CREATE FILE` command. Here is an example:
-
-1. Upload file
-
- ```
- CREATE FILE "ca.pem" PROPERTIES("url" = "https://example_url/kafka-key/ca.pem", "catalog" = "kafka");
- CREATE FILE "client.key" PROPERTIES("url" = "https://example_urlkafka-key/client.key", "catalog" = "kafka");
- CREATE FILE "client.pem" PROPERTIES("url" = "https://example_url/kafka-key/client.pem", "catalog" = "kafka");
- ```
-
-2. Create a routine load job
-
- ```
- CREATE ROUTINE LOAD db1.job1 on tbl1
- PROPERTIES
- (
- "desired_concurrent_number"="1"
- )
- FROM KAFKA
- (
- "kafka_broker_list"= "broker1:9091,broker2:9091",
- "kafka_topic" = "my_topic",
- "property.security.protocol" = "ssl",
- "property.ssl.ca.location" = "FILE:ca.pem",
- "property.ssl.certificate.location" = "FILE:client.pem",
- "property.ssl.key.location" = "FILE:client.key",
- "property.ssl.key.password" = "abcdefg"
- );
- ```
-
-> Doris accesses Kafka clusters via Kafka's C++ API `librdkafka`. The parameters supported by `librdkafka` can be found.
->
->
-
-### Viewing the status of the load job
-
-Specific commands and examples for viewing the status of the **job** can be viewed with the `HELP SHOW ROUTINE LOAD;` command.
-
-Specific commands and examples for viewing the **Task** status can be viewed with the `HELP SHOW ROUTINE LOAD TASK;` command.
-
-You can only view tasks that are currently running, and tasks that have ended and are not started cannot be viewed.
-
-### Alter job
-
-Users can modify jobs that have been created. Specific instructions can be viewed through the `HELP ALTER ROUTINE LOAD;` command. Or refer to [ALTER ROUTINE LOAD](../../sql-reference/sql-statements/Data%20Manipulation/alter-routine-load.md).
-
-### Job Control
-
-The user can control the stop, pause and restart of the job by the three commands `STOP/PAUSE/RESUME`. You can view help and examples with the three commands `HELP STOP ROUTINE LOAD;`, `HELP PAUSE ROUTINE LOAD;` and `HELP RESUME ROUTINE LOAD;`.
-
-## other instructions
-
-1. The relationship between a routine load job and an ALTER TABLE operation
-
- * Routine load does not block SCHEMA CHANGE and ROLLUP operations. Note, however, that if the column mappings are not matched after SCHEMA CHANGE is completed, the job's erroneous data will spike and eventually cause the job to pause. It is recommended to reduce this type of problem by explicitly specifying column mappings in routine load jobs and by adding Nullable columns or columns with Default values.
- * Deleting a Partition of a table may cause the loaded data to fail to find the corresponding Partition and the job will be paused.
-
-2. Relationship between routine load jobs and other load jobs (LOAD, DELETE, INSERT)
-
- * Routine load does not conflict with other LOAD jobs and INSERT operations.
- * When performing a DELETE operation, the corresponding table partition cannot have any load tasks being executed. Therefore, before performing the DELETE operation, you may need to pause the routine load job and wait for the delivered task to complete before you can execute DELETE.
-
-3. Relationship between routine load jobs and DROP DATABASE/TABLE operations
-
- When the corresponding database or table is deleted, the job will automatically CANCEL.
-
-4. The relationship between the kafka type routine load job and kafka topic
-
- When the user creates a routine load declaration, the `kafka_topic` does not exist in the kafka cluster.
-
- * If the broker of the user kafka cluster has `auto.create.topics.enable = true` set, `kafka_topic` will be automatically created first, and the number of partitions created automatically will be in the kafka cluster** of the user side. The broker is configured with `num.partitions`. The routine job will continue to read the data of the topic continuously.
- * If the broker of the user kafka cluster has `auto.create.topics.enable = false` set, topic will not be created automatically, and the routine will be paused before any data is read, with the status `PAUSED`.
-
- So, if the user wants to be automatically created by the routine when the kafka topic does not exist, just set the broker in **the kafka cluster of the user's side** to set auto.create.topics.enable = true` .
-
-5. Problems that may occur in the some environment
- In some environments, there are isolation measures for network segment and domain name resolution. So should pay attention to:
- 1. The broker list specified in the routine load task must be accessible on the doris environment.
- 2. If `advertised.listeners` is configured in kafka, The addresses in `advertised.listeners` need to be accessible on the doris environment.
-
-6. About specified Partition and Offset
-
- Doris supports specifying Partition and Offset to start consumption. The new version also supports the consumption function at a specified time point. The configuration relationship of the corresponding parameters is explained here.
-
- There are three relevant parameters:
-
- * `kafka_partitions`: Specify the list of partitions to be consumed, such as: "0, 1, 2, 3".
- * `kafka_offsets`: Specify the starting offset of each partition, which must correspond to the number of `kafka_partitions` lists. Such as: "1000, 1000, 2000, 2000"
- * `property.kafka_default_offset`: Specify the default starting offset of the partition.
-
- When creating an routine load job, these three parameters can have the following combinations:
-
- | Combinations | `kafka_partitions` | `kafka_offsets` | `property.kafka_default_offset` | Behavior |
- |---|---|---|---|---|
- |1| No | No | No | The system will automatically find all the partitions corresponding to the topic and start consumption from OFFSET_END |
- |2| No | No | Yes | The system will automatically find all the partitions corresponding to the topic and start consumption from the position specified by the default offset |
- |3| Yes | No | No | The system will start consumption from the OFFSET_END of the specified partition |
- |4| Yes | Yes | No | The system will start consumption from the specified offset of the specified partition |
- |5| Yes | No | Yes | The system will start consumption from the specified partition and the location specified by the default offset |
-
- 7. The difference between STOP and PAUSE
-
- the FE will automatically clean up stopped ROUTINE LOAD,while paused ROUTINE LOAD can be resumed
-
-## Related parameters
-
-Some system configuration parameters can affect the use of routine loads.
-
-1. max\_routine\_load\_task\_concurrent\_num
-
- The FE configuration item, which defaults to 5, can be modified at runtime. This parameter limits the maximum number of subtask concurrency for a routine load job. It is recommended to maintain the default value. If the setting is too large, it may cause too many concurrent tasks and occupy cluster resources.
-
-2. max\_routine_load\_task\_num\_per\_be
-
- The FE configuration item, which defaults to 5, can be modified at runtime. This parameter limits the number of subtasks that can be executed concurrently by each BE node. It is recommended to maintain the default value. If the setting is too large, it may cause too many concurrent tasks and occupy cluster resources.
-
-3. max\_routine\_load\_job\_num
-
- The FE configuration item, which defaults to 100, can be modified at runtime. This parameter limits the total number of routine load jobs, including NEED_SCHEDULED, RUNNING, PAUSE. After the overtime, you cannot submit a new assignment.
-
-4. max\_consumer\_num\_per\_group
-
- BE configuration item, the default is 3. This parameter indicates that up to several consumers are generated in a subtask for data consumption. For a Kafka data source, a consumer may consume one or more kafka partitions. Suppose a task needs to consume 6 kafka partitions, it will generate 3 consumers, and each consumer consumes 2 partitions. If there are only 2 partitions, only 2 consumers will be generated, and each consumer will consume 1 partition.
-
-5. push\_write\_mbytes\_per\_sec
-
- BE configuration item. The default is 10, which is 10MB/s. This parameter is to load common parameters, not limited to routine load jobs. This parameter limits the speed at which loaded data is written to disk. For high-performance storage devices such as SSDs, this speed limit can be appropriately increased.
-
-6. max\_tolerable\_backend\_down\_num
- FE configuration item, the default is 0. Under certain conditions, Doris can reschedule PAUSED tasks, that becomes RUNNING?This parameter is 0, which means that rescheduling is allowed only when all BE nodes are in alive state.
-
-7. period\_of\_auto\_resume\_min
- FE configuration item, the default is 5 mins. Doris reschedules will only try at most 3 times in the 5 minute period. If all 3 times fail, the current task will be locked, and auto-scheduling will not be performed. However, manual intervention can be performed.
diff --git a/docs/en/administrator-guide/load-data/s3-load-manual.md b/docs/en/administrator-guide/load-data/s3-load-manual.md
deleted file mode 100644
index b9c2b2a856..0000000000
--- a/docs/en/administrator-guide/load-data/s3-load-manual.md
+++ /dev/null
@@ -1,93 +0,0 @@
----
-{
-"title": "S3 Load",
-"language": "zh-CN"
-}
----
-
-
-
-# S3 Load
-
-Starting from version 0.14, Doris supports the direct import of data from online storage systems that support the S3 protocol through the S3 protocol.
-
-This document mainly introduces how to import data stored in AWS S3. It also supports the import of other object storage systems that support the S3 protocol, such as Baidu Cloud’s BOS, Alibaba Cloud’s OSS and Tencent Cloud’s COS, etc.
-## Applicable scenarios
-
-* Source data in S3 protocol accessible storage systems, such as S3, BOS.
-* Data volumes range from tens to hundreds of GB.
-
-## Preparing
-1. Standard AK and SK
- First, you need to find or regenerate AWS `Access keys`, you can find the generation method in `My Security Credentials` of AWS console, as shown in the following figure:
- [AK_SK](/images/aws_ak_sk.png)
- Select `Create New Access Key` and pay attention to save and generate AK and SK.
-2. Prepare REGION and ENDPOINT
- REGION can be selected when creating the bucket or can be viewed in the bucket list. ENDPOINT can be found through REGION on the following page [AWS Documentation](https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region)
-
-Other cloud storage systems can find relevant information compatible with S3 in corresponding documents
-
-## Start Loading
-Like Broker Load just replace `WITH BROKER broker_name ()` with
-```
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION"
- )
-```
-
-example:
-```
- LOAD LABEL example_db.exmpale_label_1
- (
- DATA INFILE("s3://your_bucket_name/your_file.txt")
- INTO TABLE load_test
- COLUMNS TERMINATED BY ","
- )
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION"
- )
- PROPERTIES
- (
- "timeout" = "3600"
- );
-```
-
-## FAQ
-
-S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style:
-
-```
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION",
- "use_path_style" = "true"
- )
-```
diff --git a/docs/en/administrator-guide/load-data/sequence-column-manual.md b/docs/en/administrator-guide/load-data/sequence-column-manual.md
deleted file mode 100644
index aeb62e3621..0000000000
--- a/docs/en/administrator-guide/load-data/sequence-column-manual.md
+++ /dev/null
@@ -1,208 +0,0 @@
----
-{
- "title": "Sequence Column",
- "language": "en"
-}
----
-
-
-
-# Sequence Column
-The Sequence Column currently only supports the Uniq model. The Uniq model is mainly for scenarios requiring a unique primary key, which can guarantee the uniqueness constraint of the primary key. However, due to the use of REPLACE aggregation, the replacement sequence is not guaranteed for data imported in the same batch, which can be described in detail [here](../../getting-started/data-model-rollup.md). If the order of substitution is not guaranteed, then the specific data that is finally imported into the table cannot be determined, and there is uncertainty.
-
-To solve this problem, Doris supported a sequence column by allowing the user to specify the sequence column when importing. Under the same key column, columns of the REPLACE aggregate type will be replaced according to the value of the sequence column, larger values can be replaced with smaller values, and vice versa. In this method, the order is determined by the user, and the user controls the replacement order.
-
-## Principle
-
-Implemented by adding a hidden column `__DORIS_SEQUENCE_COL__`, the type of the column is specified by the user while create the table, determines the specific value of the column on import, and replaces the REPLACE column with that value.
-
-### Create Table
-When you create the Uniq table, a hidden column `__DORIS_SEQUENCE_COL__` is automatically added, depending on the type specified by the user
-
-### Import
-
-When importing, fe sets the value of the hidden column during parsing to the value of the 'order by' expression (Broker Load and routine Load), or the value of the 'function_column.sequence_col' expression (stream load), and the value column will be replaced according to this value. The value of the hidden column `__DORIS_SEQUENCE_COL__` can be set as a column in the source data or in the table structure.
-
-### Read
-
-The request with the value column needs to read the additional column of `__DORIS_SEQUENCE_COL__`, which is used as a basis for the order of replacement aggregation function replacement under the same key column, with the larger value replacing the smaller value and not the reverse.
-
-### Cumulative Compaction
-
-Cumulative Compaction works in the same way as the reading process
-
-### Base Compaction
-
-Base Compaction works in the same way as the reading process
-
-### Syntax
-The syntax aspect of the table construction adds a property to the property identifying the type of `__DORIS_SEQUENCE_COL__`.
-The syntax design aspect of the import is primarily the addition of a mapping from the sequence column to other columns, the settings of each import mode are described below
-
-#### Create Table
-When you create the Uniq table, you can specify the sequence column type
-```
-PROPERTIES (
- "function_column.sequence_type" = 'Date',
-);
-```
-The sequence_type is used to specify the type of the sequence column, which can be integral and time
-
-#### stream load
-
-The syntax of the stream load is to add the mapping of hidden columns corresponding to source_sequence in the 'function_column.sequence_col' field in the header, for example
-```
-curl --location-trusted -u root -H "columns: k1,k2,source_sequence,v1,v2" -H "function_column.sequence_col: source_sequence" -T testData http://host:port/api/testDb/testTbl/_stream_load
-```
-
-#### broker load
-
-Set the source_sequence field for the hidden column map at `ORDER BY`
-
-```
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl1`
- COLUMNS TERMINATED BY ","
- (k1,k2,source_sequence,v1,v2)
- ORDER BY source_sequence
-)
-WITH BROKER 'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-#### routine load
-
-The mapping method is the same as above, as shown below
-
-```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- [WITH MERGE|APPEND|DELETE]
- COLUMNS(k1, k2, source_sequence, v1, v2),
- WHERE k1 > 100 and k2 like "%doris%"
- [ORDER BY source_sequence]
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-```
-
-## Enable sequence column support
-If `function_column.sequence_type` is set when creating a new table, then the sequence column will be supported.
-For a table that does not support sequence column, use the following statement if you would like to use this feature:
-`ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")` to enable.
-If you want to determine if a table supports sequence column, you can set the session variable to display the hidden column `SET show_hidden_columns=true`, followed by `desc Tablename`, if the output contains the column `__DORIS_SEQUENCE_COL__`, it is supported, if not, it is not supported
-
-## Usage example
-Let's take the stream Load as an example to show how to use it
-1. Create a table that supports sequence column.
-
-The table structure is shown below
-```
-MySQL > desc test_table;
-+-------------+--------------+------+-------+---------+---------+
-| Field | Type | Null | Key | Default | Extra |
-+-------------+--------------+------+-------+---------+---------+
-| user_id | BIGINT | No | true | NULL | |
-| date | DATE | No | true | NULL | |
-| group_id | BIGINT | No | true | NULL | |
-| modify_date | DATE | No | false | NULL | REPLACE |
-| keyword | VARCHAR(128) | No | false | NULL | REPLACE |
-+-------------+--------------+------+-------+---------+---------+
-```
-
-2. Import data normally:
-
-Import the following data
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-02-22 b
-1 2020-02-22 1 2020-03-05 c
-1 2020-02-22 1 2020-02-26 d
-1 2020-02-22 1 2020-02-22 e
-1 2020-02-22 1 2020-02-22 b
-```
-Take the Stream Load as an example here and map the sequence column to the modify_date column
-```
-curl --location-trusted -u root: -H "function_column.sequence_col: modify_date" -T testData http://host:port/api/test/test_table/_stream_load
-```
-The results is
-```
-MySQL > select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-05 | c |
-+---------+------------+----------+-------------+---------+
-```
-In this import, the c is eventually retained in the keyword column because the value of the sequence column (the value in modify_date) is the maximum value: '2020-03-05'.
-
-3. Guarantee of substitution order
-
-After the above steps are completed, import the following data
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-02-23 b
-```
-Query data
-```
-MySQL [test]> select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-05 | c |
-+---------+------------+----------+-------------+---------+
-```
-Because the sequence column for the newly imported data are all smaller than the values already in the table, they cannot be replaced
-Try importing the following data again
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-03-23 w
-```
-Query data
-```
-MySQL [test]> select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-23 | w |
-+---------+------------+----------+-------------+---------+
-```
-At this point, you can replace the original data in the table
\ No newline at end of file
diff --git a/docs/en/administrator-guide/load-data/spark-load-manual.md b/docs/en/administrator-guide/load-data/spark-load-manual.md
deleted file mode 100644
index 3534be710d..0000000000
--- a/docs/en/administrator-guide/load-data/spark-load-manual.md
+++ /dev/null
@@ -1,632 +0,0 @@
----
-{
- "title": "Spark Load",
- "language": "en"
-}
----
-
-
-
-# Spark Load
-
-Spark load realizes the preprocessing of load data by spark, improves the performance of loading large amount of Doris data and saves the computing resources of Doris cluster. It is mainly used for the scene of initial migration and large amount of data imported into Doris.
-
-Spark load is an asynchronous load method. Users need to create spark type load job by MySQL protocol and view the load results by `show load`.
-
-## Applicable scenarios
-
-* The source data is in a file storage system that spark can access, such as HDFS.
-
-* The data volume ranges from tens of GB to TB.
-
-## Explanation of terms
-
-1. Frontend (FE): metadata and scheduling node of Doris system. In the load process, it is mainly responsible for the scheduling of load jobs.
-
-2. Backend (be): the computing and storage node of Doris system. In the load process, it is mainly responsible for data writing and storage.
-
-3. Spark ETL: in the load process, it is mainly responsible for ETL of data, including global dictionary construction (bitmap type), partition, sorting, aggregation, etc.
-
-4. Broker: broker is an independent stateless process. It encapsulates the file system interface and provides the ability of Doris to read the files in the remote storage system.
-
-5. Global dictionary: it stores the data structure from the original value to the coded value. The original value can be any data type, while the encoded value is an integer. The global dictionary is mainly used in the scene of precise de duplication precomputation.
-
-## Basic principles
-
-### Basic process
-
-The user submits spark type load job by MySQL client, Fe records metadata and returns that the user submitted successfully.
-
-The implementation of spark load task is mainly divided into the following five stages.
-
-
-1. Fe schedules and submits ETL tasks to spark cluster for execution.
-
-2. Spark cluster executes ETL to complete the preprocessing of load data. It includes global dictionary building (bitmap type), partitioning, sorting, aggregation, etc.
-
-3. After the ETL task is completed, Fe obtains the data path of each partition that has been preprocessed, and schedules the related be to execute the push task.
-
-4. Be reads data through broker and converts it into Doris underlying storage format.
-
-5. Fe schedule the effective version and complete the load job.
-
-```
- +
- | 0. User create spark load job
- +----v----+
- | FE |---------------------------------+
- +----+----+ |
- | 3. FE send push tasks |
- | 5. FE publish version |
- +------------+------------+ |
- | | | |
-+---v---+ +---v---+ +---v---+ |
-| BE | | BE | | BE | |1. FE submit Spark ETL job
-+---^---+ +---^---+ +---^---+ |
- |4. BE push with broker | |
-+---+---+ +---+---+ +---+---+ |
-|Broker | |Broker | |Broker | |
-+---^---+ +---^---+ +---^---+ |
- | | | |
-+---+------------+------------+---+ 2.ETL +-------------v---------------+
-| HDFS +-------> Spark cluster |
-| <-------+ |
-+---------------------------------+ +-----------------------------+
-
-```
-
-## Global dictionary
-
-### Applicable scenarios
-
-At present, the bitmap column in Doris is implemented using the class library `roaingbitmap`, while the input data type of `roaringbitmap` can only be integer. Therefore, if you want to pre calculate the bitmap column in the import process, you need to convert the type of input data to integer.
-
-In the existing Doris import process, the data structure of global dictionary is implemented based on hive table, which stores the mapping from original value to encoded value.
-
-### Build process
-
-1. Read the data from the upstream data source and generate a hive temporary table, which is recorded as `hive_table`.
-
-2. Extract the de duplicated values of the fields to be de duplicated from the `hive_table`, and generate a new hive table, which is marked as `distinct_value_table`.
-
-3. Create a new global dictionary table named `dict_table`; one column is the original value, and the other is the encoded value.
-
-4. Left join the `distinct_value_table` and `dict_table`, calculate the new de duplication value set, and then code this set with window function. At this time, the original value of the de duplication column will have one more column of encoded value. Finally, the data of these two columns will be written back to `dict_table`.
-
-5. Join the `dict_table` with the `hive_table` to replace the original value in the `hive_table` with the integer encoded value.
-
-6. `hive_table` will be read by the next data preprocessing process and imported into Doris after calculation.
-
-## Data preprocessing (DPP)
-
-### Basic process
-
-1. Read data from the data source. The upstream data source can be HDFS file or hive table.
-
-2. Map the read data, calculate the expression, and generate the bucket field `bucket_id` according to the partition information.
-
-3. Generate rolluptree according to rollup metadata of Doris table.
-
-4. Traverse rolluptree to perform hierarchical aggregation. The rollup of the next level can be calculated from the rollup of the previous level.
-
-5. After each aggregation calculation, the data will be calculated according to the `bucket_id`is divided into buckets and then written into HDFS.
-
-6. Subsequent brokers will pull the files in HDFS and import them into Doris be.
-
-## Hive Bitmap UDF
-
-Spark supports loading hive-generated bitmap data directly into Doris, see [hive-bitmap-udf documentation](../../extending-doris/hive-bitmap-udf.md)
-
-## Basic operation
-
-### Configure ETL cluster
-
-As an external computing resource, spark is used to complete ETL work in Doris. In the future, there may be other external resources that will be used in Doris, such as spark / GPU for query, HDFS / S3 for external storage, MapReduce for ETL, etc. Therefore, we introduce resource management to manage these external resources used by Doris.
-
-Before submitting the spark import task, you need to configure the spark cluster that performs the ETL task.
-
-Grammar:
-
-```sql
--- create spark resource
-CREATE EXTERNAL RESOURCE resource_name
-PROPERTIES
-(
- type = spark,
- spark_conf_key = spark_conf_value,
- working_dir = path,
- broker = broker_name,
- broker.property_key = property_value
-)
-
--- drop spark resource
-DROP RESOURCE resource_name
-
--- show resources
-SHOW RESOURCES
-SHOW PROC "/resources"
-
--- privileges
-GRANT USAGE_PRIV ON RESOURCE resource_name TO user_identity
-GRANT USAGE_PRIV ON RESOURCE resource_name TO ROLE role_name
-
-REVOKE USAGE_PRIV ON RESOURCE resource_name FROM user_identity
-REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name
-```
-
-#### Create resource
-
-`resource_name` is the name of the spark resource configured in Doris.
-
-`Properties` are the parameters related to spark resources, as follows:
-
-- `type`: resource type, required. Currently, only spark is supported.
-
-- Spark related parameters are as follows:
-
- - `spark.master`: required, yarn is supported at present, `spark://host:port`.
-
- - `spark.submit.deployMode`: the deployment mode of Spark Program. It is required and supports cluster and client.
-
- - `spark.hadoop.yarn.resourcemanager.address`: required when master is yarn.
-
- - `spark.hadoop.fs.defaultfs`: required when master is yarn.
-
- - Other parameters are optional, refer to `http://spark.apache.org/docs/latest/configuration.html`
-
-- `working_dir`: directory used by ETL. Spark is required when used as an ETL resource. For example: `hdfs://host:port/tmp/doris`.
-
-- `broker`: the name of the broker. Spark is required when used as an ETL resource. You need to use the 'alter system add broker' command to complete the configuration in advance.
-
-- `broker.property_key`: the authentication information that the broker needs to specify when reading the intermediate file generated by ETL.
-
-Example:
-
-```sql
--- yarn cluster 模式
-CREATE EXTERNAL RESOURCE "spark0"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "yarn",
- "spark.submit.deployMode" = "cluster",
- "spark.jars" = "xxx.jar,yyy.jar",
- "spark.files" = "/tmp/aaa,/tmp/bbb",
- "spark.executor.memory" = "1g",
- "spark.yarn.queue" = "queue0",
- "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",
- "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker0",
- "broker.username" = "user0",
- "broker.password" = "password0"
-);
-
--- spark standalone client 模式
-CREATE EXTERNAL RESOURCE "spark1"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "spark://127.0.0.1:7777",
- "spark.submit.deployMode" = "client",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker1"
-);
-```
-
-#### Show resources
-
-Ordinary accounts can only see the resources that they have `USAGE_PRIV` to use.
-
-The root and admin accounts can see all the resources.
-
-#### Resource privilege
-
-Resource permissions are managed by grant revoke. Currently, only `USAGE_PRIV` permission is supported.
-
-You can use the `USAGE_PRIV` permission is given to a user or a role, and the role is used the same as before.
-
-```sql
--- Grant permission to the spark0 resource to user user0
-
-GRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%";
-
-
--- Grant permission to the spark0 resource to role ROLE0
-
-GRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0";
-
-
--- Grant permission to all resources to user user0
-
-GRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%";
-
-
--- Grant permission to all resources to role ROLE0
-
-GRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0";
-
-
--- Revoke the spark0 resource permission of user user0
-
-REVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%";
-
-```
-
-### Configure spark client
-
-The Fe submits the spark task by executing the spark submit command. Therefore, it is necessary to configure the spark client for Fe. It is recommended to use the official version of spark 2 above 2.4.5, [download spark here](https://archive.apache.org/dist/spark/). After downloading, please follow the steps to complete the following configuration.
-
-#### Configure SPARK_HOME environment variable
-
-Place the spark client on the same machine as Fe and configure `spark_home_default_dir` in the `fe.conf`. This configuration item defaults to the `fe/lib/spark2x` path. This config cannot be empty.
-
-#### Configure spark dependencies
-
-Package all jar packages in jars folder under spark client root path into a zip file, and configure `spark_resource_patj` in `fe.conf` as this zip file's path.
-
-When the spark load task is submitted, this zip file will be uploaded to the remote repository, and the default repository path will be hung in `working_dir/{cluster_ID}` directory named as `__spark_repository__{resource_name}`, which indicates that a resource in the cluster corresponds to a remote warehouse. The directory structure of the remote warehouse is as follows:
-
-```
-__spark_repository__spark0/
- |-__archive_1.0.0/
- | |-__lib_990325d2c0d1d5e45bf675e54e44fb16_spark-dpp-1.0.0-jar-with-dependencies.jar
- | |-__lib_7670c29daf535efe3c9b923f778f61fc_spark-2x.zip
- |-__archive_1.1.0/
- | |-__lib_64d5696f99c379af2bee28c1c84271d5_spark-dpp-1.1.0-jar-with-dependencies.jar
- | |-__lib_1bbb74bb6b264a270bc7fca3e964160f_spark-2x.zip
- |-__archive_1.2.0/
- | |-...
-```
-
-In addition to spark dependency (named by `spark-2x.zip` by default), Fe will also upload DPP's dependency package to the remote repository. If all the dependency files submitted by spark load already exist in the remote repository, then there is no need to upload dependency, saving the time of repeatedly uploading a large number of files each time.
-
-### Configure yarn client
-
-The Fe obtains the running application status and kills the application by executing the yarn command. Therefore, you need to configure the yarn client for Fe. It is recommended to use the official version of Hadoop above 2.5.2, [download hadoop](https://archive.apache.org/dist/hadoop/common/). After downloading, please follow the steps to complete the following configuration.
-
-#### Configure the yarn client path
-
-Place the downloaded yarn client in the same machine as Fe, and configure `yarn_client_path` in the `fe.conf` as the executable file of yarn, which is set as the `fe/lib/yarn-client/hadoop/bin/yarn` by default.
-
-(optional) when Fe obtains the application status or kills the application through the yarn client, the configuration files required for executing the yarn command will be generated by default in the `lib/yarn-config` path in the Fe root directory. This path can be configured by configuring `yarn-config-dir` in the `fe.conf`. The currently generated configuration yarn config files include `core-site.xml` and `yarn-site.xml`.
-
-### Create load
-
-Grammar:
-
-```sql
-LOAD LABEL load_label
- (data_desc, ...)
- WITH RESOURCE resource_name resource_properties
- [PROPERTIES (key1=value1, ... )]
-
-* load_label:
- db_name.label_name
-
-* data_desc:
- DATA INFILE ('file_path', ...)
- [NEGATIVE]
- INTO TABLE tbl_name
- [PARTITION (p1, p2)]
- [COLUMNS TERMINATED BY separator ]
- [(col1, ...)]
- [SET (k1=f1(xx), k2=f2(xx))]
- [WHERE predicate]
-
-* resource_properties:
- (key2=value2, ...)
-```
-
-Example 1: when the upstream data source is HDFS file
-
-```sql
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2)
- SET
- (
- id=tmp_c2,
- name=tmp_c1
- ),
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2")
- INTO TABLE tbl2
- COLUMNS TERMINATED BY ","
- (col1, col2)
- where col1 > 1
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-Example 2: when the upstream data source is hive table
-
-```sql
-step 1:新建hive外部表
-CREATE EXTERNAL TABLE hive_t1
-(
- k1 INT,
- K2 SMALLINT,
- k3 varchar(50),
- uuid varchar(100)
-)
-ENGINE=hive
-properties
-(
-"database" = "tmp",
-"table" = "t1",
-"hive.metastore.uris" = "thrift://0.0.0.0:8080"
-);
-
-step 2: 提交load命令
-LOAD LABEL db1.label1
-(
- DATA FROM TABLE hive_t1
- INTO TABLE tbl1
- (k1,k2,k3)
- SET
- (
- uuid=bitmap_dict(uuid)
- )
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-Example 3: when the upstream data source is hive binary type table
-
-```sql
-step 1: create hive external table
-CREATE EXTERNAL TABLE hive_t1
-(
- k1 INT,
- K2 SMALLINT,
- k3 varchar(50),
- uuid varchar(100)
-)
-ENGINE=hive
-properties
-(
-"database" = "tmp",
-"table" = "t1",
-"hive.metastore.uris" = "thrift://0.0.0.0:8080"
-);
-
-step 2: submit load command
-LOAD LABEL db1.label1
-(
- DATA FROM TABLE hive_t1
- INTO TABLE tbl1
- (k1,k2,k3)
- SET
- (
- uuid=binary_bitmap(uuid)
- )
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-You can view the details syntax about creating load by input `help spark load`. This paper mainly introduces the parameter meaning and precautions in the creation and load syntax of spark load.
-
-#### Label
-
-Identification of the import task. Each import task has a unique label within a single database. The specific rules are consistent with `broker load`.
-
-#### Data description parameters
-
-Currently, the supported data sources are CSV and hive table. Other rules are consistent with `broker load`.
-
-#### Load job parameters
-
-Load job parameters mainly refer to the `opt_properties` in the spark load. Load job parameters are applied to the entire load job. The rules are consistent with `broker load`.
-
-#### Spark resource parameters
-
-Spark resources need to be configured into the Doris system in advance, and users should be given `USAGE_PRIV`. Spark load can only be used after priv permission.
-
-When users have temporary requirements, such as adding resources for tasks and modifying spark configs, you can set them here. The settings only take effect for this task and do not affect the existing configuration in the Doris cluster.
-
-```sql
-WITH RESOURCE 'spark0'
-(
- "spark.driver.memory" = "1g",
- "spark.executor.memory" = "3g"
-)
-```
-
-#### Load when data source is hive table
-
-At present, if you want to use hive table as a data source in the import process, you need to create an external table of type hive,
-
-Then you can specify the table name of the external table when submitting the Load command.
-
-#### Load process to build global dictionary
-
-The data type applicable to the aggregate columns of the Doris table is of type bitmap.
-
-In the load command, you can specify the field to build a global dictionary. The format is: '```doris field name=bitmap_dict(hive_table field name)```
-
-It should be noted that the construction of global dictionary is supported only when the upstream data source is hive table.
-
-#### Load when data source is hive binary type table
-
-The data type applicable to the aggregate column of the doris table is bitmap type, and the data type of the corresponding column in the hive table of the data source is binary (through the org.apache.doris.load.loadv2.dpp.BitmapValue (FE spark-dpp) class serialized) type.
-
-There is no need to build a global dictionary, just specify the corresponding field in the load command, the format is: ```doris field name=binary_bitmap (hive table field name)```
-
-Similarly, the binary (bitmap) type of data import is currently only supported when the upstream data source is a hive table.
-
-### Show load
-
-Spark load is asynchronous just like broker load, so the user must create the load label record and use label in the **show load command to view the load result**. The show load command is common in all load types. The specific syntax can be viewed by executing help show load.
-
-Example:
-
-```
-mysql> show load order by createtime desc limit 1\G
-*************************** 1. row ***************************
- JobId: 76391
- Label: label1
- State: FINISHED
- Progress: ETL:100%; LOAD:100%
- Type: SPARK
- EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376
- TaskInfo: cluster:cluster0; timeout(s):10800; max_filter_ratio:5.0E-5
- ErrorMsg: N/A
- CreateTime: 2019-07-27 11:46:42
- EtlStartTime: 2019-07-27 11:46:44
- EtlFinishTime: 2019-07-27 11:49:44
- LoadStartTime: 2019-07-27 11:49:44
-LoadFinishTime: 2019-07-27 11:50:16
- URL: http://1.1.1.1:8089/proxy/application_1586619723848_0035/
- JobDetails: {"ScannedRows":28133395,"TaskNumber":1,"FileNumber":1,"FileSize":200000}
-```
-
-Refer to broker load for the meaning of parameters in the returned result set. The differences are as follows:
-
-+ State
-
-The current phase of the load job. After the job is submitted, the status is pending. After the spark ETL is submitted, the status changes to ETL. After ETL is completed, Fe schedules be to execute push operation, and the status changes to finished after the push is completed and the version takes effect.
-
-There are two final stages of the load job: cancelled and finished. When the load job is in these two stages, the load is completed. Among them, cancelled is load failure, finished is load success.
-
-+ Progress
-
-Progress description of the load job. There are two kinds of progress: ETL and load, corresponding to the two stages of the load process, ETL and loading.
-
-The progress range of load is 0 ~ 100%.
-
-```Load progress = the number of tables that have completed all replica imports / the total number of tables in this import task * 100%```
-
-**If all load tables are loaded, the progress of load is 99%**, the load enters the final effective stage. After the whole load is completed, the load progress will be changed to 100%.
-
-The load progress is not linear. Therefore, if the progress does not change over a period of time, it does not mean that the load is not in execution.
-
-+ Type
-
-Type of load job. Spark load is spark.
-
-+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime
-
-These values represent the creation time of the load, the start time of the ETL phase, the completion time of the ETL phase, the start time of the loading phase, and the completion time of the entire load job.
-
-+ JobDetails
-
-Display the detailed running status of some jobs, which will be updated when ETL ends. It includes the number of loaded files, the total size (bytes), the number of subtasks, the number of processed original lines, etc.
-
-```{"ScannedRows":139264,"TaskNumber":1,"FileNumber":1,"FileSize":940754064}```
-
-+ URL
-
-Copy this url to the browser and jump to the web interface of the corresponding application.
-
-### View spark launcher commit log
-
-Sometimes users need to view the detailed logs generated during the spark submission process. The logs are saved in the `log/spark_launcher_log` under the Fe root directory named as `spark_launcher_{load_job_id}_{label}.log`. The log will be saved in this directory for a period of time. When the load information in Fe metadata is cleaned up, the corresponding log will also be cleaned. The default saving log time is 3 days.
-
-### cancel load
-
-When the spark load job status is not cancelled or finished, it can be manually cancelled by the user. When canceling, you need to specify the label to cancel the load job. The syntax of the cancel load command can be viewed by executing `help cancel load`.
-
-## Related system configuration
-
-### FE configuration
-
-The following configuration belongs to the system level configuration of spark load, that is, the configuration for all spark load import tasks. Mainly through modification``` fe.conf ``` to modify the configuration value.
-
-+ `enable_spark_load`
-
-Open spark load and create resource. The default value is false. This feature is turned off.
-
-+ `spark_load_default_timeout_second`
-
-The default timeout for tasks is 259200 seconds (3 days).
-
-+ `spark_home_default_dir`
-
-Spark client path (`Fe/lib/spark2x`).
-
-+ `spark_resource_path`
-
-The path of the packaged spark dependent file (empty by default).
-
-+ `spark_launcher_log_dir`
-
-The directory where the spark client's commit log is stored (`Fe/log/spark)_launcher_log`).
-
-+ `yarn_client_path`
-
-The path of the yarn binary executable file (`Fe/lib/yarn-client/Hadoop/bin/yarn`).
-
-+ `yarn_config_dir`
-
-The path to generate the yarn configuration file (`Fe/lib/yarn-config`).
-
-## Best practices
-
-### Application scenarios
-
-The most suitable scenario to use spark load is that the raw data is in the file system (HDFS), and the amount of data is tens of GB to TB. Stream load or broker load is recommended for small amount of data.
-
-## FAQ
-
-* When using spark load, the `HADOOP_CONF_DIR` environment variable is no set in the `spark-env.sh`.
-
-If the `HADOOP_CONF_DIR` environment variable is not set, the error `When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment` will be reported.
-
-* When using spark load, the `spark_home_default_dir` does not specify correctly.
-
-The spark submit command is used when submitting a spark job. If `spark_home_default_dir` is set incorrectly, an error `Cannot run program 'xxx/bin/spark_submit', error = 2, no such file or directory` will be reported.
-
-* When using spark load, `spark_resource_path` does not point to the packaged zip file.
-
-If `spark_resource_path` is not set correctly. An error `file XXX/jars/spark-2x.zip` does not exist will be reported.
-
-* When using spark load `yarn_client_path` does not point to a executable file of yarn.
-
-If `yarn_client_path` is not set correctly. An error `yarn client does not exist in path: XXX/yarn-client/hadoop/bin/yarn` will be reported.
diff --git a/docs/en/administrator-guide/load-data/stream-load-manual.md b/docs/en/administrator-guide/load-data/stream-load-manual.md
deleted file mode 100644
index 83303c6f35..0000000000
--- a/docs/en/administrator-guide/load-data/stream-load-manual.md
+++ /dev/null
@@ -1,374 +0,0 @@
----
-{
- "title": "Stream load",
- "language": "en"
-}
----
-
-
-
-# Stream load
-
-Stream load is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request.
-
-Stream load is mainly suitable for importing local files or data from data streams through procedures.
-
-## Basic Principles
-
-The following figure shows the main flow of Stream load, omitting some import details.
-
-```
- ^ +
- | |
- | | 1A. User submit load to FE
- | |
- | +--v-----------+
- | | FE |
-5. Return result to user | +--+-----------+
- | |
- | | 2. Redirect to BE
- | |
- | +--v-----------+
- +---+Coordinator BE| 1B. User submit load to BE
- +-+-----+----+-+
- | | |
- +-----+ | +-----+
- | | | 3. Distrbute data
- | | |
- +-v-+ +-v-+ +-v-+
- |BE | |BE | |BE |
- +---+ +---+ +---+
-```
-
-In Stream load, Doris selects a node as the Coordinator node. This node is responsible for receiving data and distributing data to other data nodes.
-
-Users submit import commands through HTTP protocol. If submitted to FE, FE forwards the request to a BE via the HTTP redirect instruction. Users can also submit import commands directly to a specified BE.
-
-The final result of the import is returned to the user by Coordinator BE.
-
-## Support data format
-
-Currently Stream Load supports two data formats: CSV (text) and JSON
-
-## Basic operations
-### Create a Load
-
-Stream load submits and transfers data through HTTP protocol. Here, the `curl` command shows how to submit an import.
-
-Users can also operate through other HTTP clients.
-
-```
-curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load
-
-The properties supported in the header are described in "Load Parameters" below
-The format is: - H "key1: value1"
-```
-
-Examples:
-
-```
-curl --location-trusted -u root -T date -H "label:123" http://abc.com:8030/api/test/date/_stream_load
-```
-The detailed syntax for creating imports helps to execute ``HELP STREAM LOAD`` view. The following section focuses on the significance of creating some parameters of Stream load.
-
-#### Signature parameters
-
-+ user/passwd
-
- Stream load uses the HTTP protocol to create the imported protocol and signs it through the Basic Access authentication. The Doris system verifies user identity and import permissions based on signatures.
-
-#### Load Parameters
-
-Stream load uses HTTP protocol, so all parameters related to import tasks are set in the header. The significance of some parameters of the import task parameters of Stream load is mainly introduced below.
-
-+ label
-
- Identity of import task. Each import task has a unique label inside a single database. Label is a user-defined name in the import command. With this label, users can view the execution of the corresponding import task.
-
- Another function of label is to prevent users from importing the same data repeatedly. **It is strongly recommended that users use the same label for the same batch of data. This way, repeated requests for the same batch of data will only be accepted once, guaranteeing at-Most-Once**
-
- When the corresponding import operation state of label is CANCELLED, the label can be used again.
-
-
-+ column_separator
-
- Used to specify the column separator in the load file. The default is `\t`. If it is an invisible character, you need to add `\x` as a prefix and hexadecimal to indicate the separator.
-
- For example, the separator `\x01` of the hive file needs to be specified as `-H "column_separator:\x01"`.
-
- You can use a combination of multiple characters as the column separator.
-
-+ line_delimiter
-
- Used to specify the line delimiter in the load file. The default is `\n`.
-
- You can use a combination of multiple characters as the column separator.
-
-+ max\_filter\_ratio
-
- The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails.
-
- If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0.
-
- The calculation formula is as follows:
-
- ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ```
-
- ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on.
-
- ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the `SHOW LOAD` command.
-
-The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL`
-
-+ where
-
- Import the filter conditions specified by the task. Stream load supports filtering of where statements specified for raw data. The filtered data will not be imported or participated in the calculation of filter ratio, but will be counted as `num_rows_unselected`.
-
-+ partition
-
- Partition information for tables to be imported will not be imported if the data to be imported does not belong to the specified Partition. These data will be included in `dpp.abnorm.ALL`.
-
-+ columns
-
- The function transformation configuration of data to be imported includes the sequence change of columns and the expression transformation, in which the expression transformation method is consistent with the query statement.
-
- ```
- Examples of column order transformation: There are three columns of original data (src_c1,src_c2,src_c3), and there are also three columns (dst_c1,dst_c2,dst_c3) in the doris table at present.
- when the first column src_c1 of the original file corresponds to the dst_c1 column of the target table, while the second column src_c2 of the original file corresponds to the dst_c2 column of the target table and the third column src_c3 of the original file corresponds to the dst_c3 column of the target table,which is written as follows:
- columns: dst_c1, dst_c2, dst_c3
-
- when the first column src_c1 of the original file corresponds to the dst_c2 column of the target table, while the second column src_c2 of the original file corresponds to the dst_c3 column of the target table and the third column src_c3 of the original file corresponds to the dst_c1 column of the target table,which is written as follows:
- columns: dst_c2, dst_c3, dst_c1
-
- Example of expression transformation: There are two columns in the original file and two columns in the target table (c1, c2). However, both columns in the original file need to be transformed by functions to correspond to the two columns in the target table.
- columns: tmp_c1, tmp_c2, c1 = year(tmp_c1), c2 = mouth(tmp_c2)
- Tmp_* is a placeholder, representing two original columns in the original file.
- ```
-
-+ exec\_mem\_limit
-
- Memory limit. Default is 2GB. Unit is Bytes
-
-+ merge\_type
- The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
-+ two\_phase\_commit
-
- Stream load supports the two-phase commit mode.The mode could be enabled by declaring ```two_phase_commit=true``` in http header. This mode is disabled by default.
- the two-phase commit mode means: During Stream load, after data is written, the message will be returned to the client, the data is invisible at this point and the transaction status is PRECOMMITTED. The data will be visible only after COMMIT is triggered by client.
-
- 1. User can invoke the following interface to trigger commit operations for transaction:
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://fe_host:http_port/api/{db}/_stream_load_2pc
- ```
- or
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://be_host:webserver_port/api/{db}/_stream_load_2pc
- ```
-
- 2. User can invoke the following interface to trigger abort operations for transaction:
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://fe_host:http_port/api/{db}/_stream_load_2pc
- ```
- or
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://be_host:webserver_port/api/{db}/_stream_load_2pc
- ```
-
-### Return results
-
-Since Stream load is a synchronous import method, the result of the import is directly returned to the user by creating the return value of the import.
-
-Examples:
-
-```
-{
- "TxnId": 1003,
- "Label": "b6f3bc78-0d2c-45d9-9e4c-faa0a0149bee",
- "Status": "Success",
- "ExistingJobStatus": "FINISHED", // optional
- "Message": "OK",
- "NumberTotalRows": 1000000,
- "NumberLoadedRows": 1000000,
- "NumberFilteredRows": 1,
- "NumberUnselectedRows": 0,
- "LoadBytes": 40888898,
- "LoadTimeMs": 2144,
- "BeginTxnTimeMs": 1,
- "StreamLoadPutTimeMs": 2,
- "ReadDataTimeMs": 325,
- "WriteDataTimeMs": 1933,
- "CommitAndPublishTimeMs": 106,
- "ErrorURL": "http://192.168.1.1:8042/api/_load_error_log?file=__shard_0/error_log_insert_stmt_db18266d4d9b4ee5-abb00ddd64bdf005_db18266d4d9b4ee5_abb00ddd64bdf005"
-}
-```
-
-The following main explanations are given for the Stream load import result parameters:
-
-+ TxnId: The imported transaction ID. Users do not perceive.
-
-+ Label: Import Label. User specified or automatically generated by the system.
-
-+ Status: Import completion status.
-
- "Success": Indicates successful import.
-
- "Publish Timeout": This state also indicates that the import has been completed, except that the data may be delayed and visible without retrying.
-
- "Label Already Exists": Label duplicate, need to be replaced Label.
-
- "Fail": Import failed.
-
-+ ExistingJobStatus: The state of the load job corresponding to the existing Label.
-
- This field is displayed only when the status is "Label Already Exists". The user can know the status of the load job corresponding to Label through this state. "RUNNING" means that the job is still executing, and "FINISHED" means that the job is successful.
-
-+ Message: Import error messages.
-
-+ NumberTotalRows: Number of rows imported for total processing.
-
-+ NumberLoadedRows: Number of rows successfully imported.
-
-+ NumberFilteredRows: Number of rows that do not qualify for data quality.
-
-+ NumberUnselectedRows: Number of rows filtered by where condition.
-
-+ LoadBytes: Number of bytes imported.
-
-+ LoadTimeMs: Import completion time. Unit milliseconds.
-
-+ BeginTxnTimeMs: The time cost for RPC to Fe to begin a transaction, Unit milliseconds.
-
-+ StreamLoadPutTimeMs: The time cost for RPC to Fe to get a stream load plan, Unit milliseconds.
-
-+ ReadDataTimeMs: Read data time, Unit milliseconds.
-
-+ WriteDataTimeMs: Write data time, Unit milliseconds.
-
-+ CommitAndPublishTimeMs: The time cost for RPC to Fe to commit and publish a transaction, Unit milliseconds.
-
-+ ErrorURL: If you have data quality problems, visit this URL to see specific error lines.
-
-> Note: Since Stream load is a synchronous import mode, import information will not be recorded in Doris system. Users cannot see Stream load asynchronously by looking at import commands. You need to listen for the return value of the create import request to get the import result.
-
-### Cancel Load
-
-Users can't cancel Stream load manually. Stream load will be cancelled automatically by the system after a timeout or import error.
-
-## Relevant System Configuration
-
-### FE configuration
-
-+ stream\_load\_default\_timeout\_second
-
- The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED.
-
- At present, Stream load does not support custom import timeout time. All Stream load import timeout time is uniform. The default timeout time is 300 seconds. If the imported source file can no longer complete the import within the specified time, the FE parameter ```stream_load_default_timeout_second``` needs to be adjusted.
-
-### BE configuration
-
-+ streaming\_load\_max\_mb
-
- The maximum import size of Stream load is 10G by default, in MB. If the user's original file exceeds this value, the BE parameter ```streaming_load_max_mb``` needs to be adjusted.
-
-## Best Practices
-
-### Application scenarios
-
-The most appropriate scenario for using Stream load is that the original file is in memory or on disk. Secondly, since Stream load is a synchronous import method, users can also use this import if they want to obtain the import results in a synchronous manner.
-
-### Data volume
-
-Since Stream load is based on the BE initiative to import and distribute data, the recommended amount of imported data is between 1G and 10G. Since the default maximum Stream load import data volume is 10G, the configuration of BE ```streaming_load_max_mb``` needs to be modified if files exceeding 10G are to be imported.
-
-```
-For example, the size of the file to be imported is 15G
-Modify the BE configuration streaming_load_max_mb to 16000
-```
-
-Stream load default timeout is 300 seconds, according to Doris currently the largest import speed limit, about more than 3G files need to modify the import task default timeout.
-
-```
-Import Task Timeout = Import Data Volume / 10M / s (Specific Average Import Speed Requires Users to Calculate Based on Their Cluster Conditions)
-For example, import a 10G file
-Timeout = 1000s -31561;. 20110G / 10M /s
-```
-
-### Complete examples
-Data situation: In the local disk path / home / store_sales of the sending and importing requester, the imported data is about 15G, and it is hoped to be imported into the table store\_sales of the database bj_sales.
-
-Cluster situation: The concurrency of Stream load is not affected by cluster size.
-
-+ Step 1: Does the import file size exceed the default maximum import size of 10G
-
- ```
- BE conf
- streaming_load_max_mb = 16000
- ```
-+ Step 2: Calculate whether the approximate import time exceeds the default timeout value
-
- ```
- Import time 15000/10 = 1500s
- Over the default timeout time, you need to modify the FE configuration
- stream_load_default_timeout_second = 1500
- ```
-
-+ Step 3: Create Import Tasks
-
- ```
- curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8000/api/bj_sales/store_sales/_stream_load
- ```
-
-## Common Questions
-
-* Label Already Exists
-
- The Label repeat checking steps of Stream load are as follows:
-
- 1. Is there an import Label conflict that already exists with other import methods?
-
- Because imported Label in Doris system does not distinguish between import methods, there is a problem that other import methods use the same Label.
-
- Through ``SHOW LOAD WHERE LABEL = "xxx"'``, where XXX is a duplicate Label string, see if there is already a Label imported by FINISHED that is the same as the Label created by the user.
-
- 2. Are Stream loads submitted repeatedly for the same job?
-
- Since Stream load is an HTTP protocol submission creation import task, HTTP Clients in various languages usually have their own request retry logic. After receiving the first request, the Doris system has started to operate Stream load, but because the result is not returned to the Client side in time, the Client side will retry to create the request. At this point, the Doris system is already operating on the first request, so the second request will be reported to Label Already Exists.
-
- To sort out the possible methods mentioned above: Search FE Master's log with Label to see if there are two ``redirect load action to destination = ``redirect load action to destination cases in the same Label. If so, the request is submitted repeatedly by the Client side.
-
- It is recommended that the user calculate the approximate import time based on the amount of data currently requested, and change the request overtime on the client side to a value greater than the import timeout time according to the import timeout time to avoid multiple submissions of the request by the client side.
-
- 3. Connection reset abnormal
-
- In the community version 0.14.0 and earlier versions, the connection reset exception occurred after Http V2 was enabled, because the built-in web container is tomcat, and Tomcat has pits in 307 (Temporary Redirect). There is a problem with the implementation of this protocol. All In the case of using Stream load to import a large amount of data, a connect reset exception will occur. This is because tomcat started data transmission before the 307 jump, which resulted in the lack of authentication information when the BE received the data request. Later, changing the built-in container to Jetty solved this problem. If you encounter this problem, please upgrade your Doris or disable Http V2 (`enable_http_server_v2=false`).
-
- After the upgrade, also upgrade the http client version of your program to `4.5.13`,Introduce the following dependencies in your pom.xml file
-
- ```xml
-
- org.apache.httpcomponents
- httpclient
- 4.5.13
-
- ```
-
-
-
diff --git a/docs/en/administrator-guide/materialized_view.md b/docs/en/administrator-guide/materialized_view.md
deleted file mode 100644
index 243fcdfeae..0000000000
--- a/docs/en/administrator-guide/materialized_view.md
+++ /dev/null
@@ -1,486 +0,0 @@
----
-{
- "title": "Materialized view",
- "language": "en"
-}
----
-
-
-
-# Materialized view
-A materialized view is a data set that is pre-calculated (according to a defined SELECT statement) and stored in a special table in Doris.
-
-The emergence of materialized views is mainly to satisfy users. It can analyze any dimension of the original detailed data, but also can quickly analyze and query fixed dimensions.
-
-## When to use materialized view
-
-+ Analyze requirements to cover both detailed data query and fixed-dimensional query.
-+ The query only involves a small part of the columns or rows in the table.
-+ The query contains some time-consuming processing operations, such as long-time aggregation operations.
-+ The query needs to match different prefix indexes.
-
-## Advantage
-
-+ For those queries that frequently use the same sub-query results repeatedly, the performance is greatly improved
-+ Doris automatically maintains the data of the materialized view, whether it is a new import or delete operation, it can ensure the data consistency of the base table and the materialized view table. No need for any additional labor maintenance costs.
-+ When querying, it will automatically match the optimal materialized view and read data directly from the materialized view.
-
-*Automatic maintenance of materialized view data will cause some maintenance overhead, which will be explained in the limitations of materialized views later.*
-
-## Materialized View VS Rollup
-
-Before the materialized view function, users generally used the Rollup function to improve query efficiency through pre-aggregation. However, Rollup has certain limitations. It cannot do pre-aggregation based on the detailed model.
-
-Materialized views cover the functions of Rollup while also supporting richer aggregate functions. So the materialized view is actually a superset of Rollup.
-
-In other words, the functions previously supported by the `ALTER TABLE ADD ROLLUP` syntax can now be implemented by `CREATE MATERIALIZED VIEW`.
-
-## Use materialized views
-
-The Doris system provides a complete set of DDL syntax for materialized views, including creating, viewing, and deleting. The syntax of DDL is consistent with PostgreSQL and Oracle.
-
-### Create a materialized view
-
-Here you must first decide what kind of materialized view to create based on the characteristics of your query statement. This is not to say that your materialized view definition is exactly the same as one of your query statements. There are two principles here:
-
-1. **Abstract** from the query statement, the grouping and aggregation methods shared by multiple queries are used as the definition of the materialized view.
-2. It is not necessary to create materialized views for all dimension combinations.
-
-First of all, the first point, if a materialized view is abstracted, and multiple queries can be matched to this materialized view. This materialized view works best. Because the maintenance of the materialized view itself also consumes resources.
-
-If the materialized view only fits a particular query, and other queries do not use this materialized view. As a result, the materialized view is not cost-effective, which not only occupies the storage resources of the cluster, but cannot serve more queries.
-
-Therefore, users need to combine their own query statements and data dimension information to abstract the definition of some materialized views.
-
-The second point is that in the actual analysis query, not all dimensional analysis will be covered. Therefore, it is enough to create a materialized view for the commonly used combination of dimensions, so as to achieve a space and time balance.
-
-Creating a materialized view is an asynchronous operation, which means that after the user successfully submits the creation task, Doris will calculate the existing data in the background until the creation is successful.
-
-The specific syntax can be viewed through the following command:
-
-```
-HELP CREATE MATERIALIZED VIEW
-```
-
-### Support aggregate functions
-
-The aggregate functions currently supported by the materialized view function are:
-
-+ SUM, MIN, MAX (Version 0.12)
-+ COUNT, BITMAP\_UNION, HLL\_UNION (Version 0.13)
-
-+ The form of BITMAP\_UNION must be: `BITMAP_UNION(TO_BITMAP(COLUMN))` The column type can only be an integer (largeint also does not support), or `BITMAP_UNION(COLUMN)` and the base table is an AGG model.
-+ The form of HLL\_UNION must be: `HLL_UNION(HLL_HASH(COLUMN))` The column type cannot be DECIMAL, or `HLL_UNION(COLUMN)` and the base table is an AGG model.
-
-### Update strategy
-
-In order to ensure the data consistency between the materialized view table and the Base table, Doris will import, delete and other operations on the Base table are synchronized to the materialized view table. And through incremental update to improve update efficiency. To ensure atomicity through transaction.
-
-For example, if the user inserts data into the base table through the INSERT command, this data will be inserted into the materialized view synchronously. When both the base table and the materialized view table are written successfully, the INSERT command will return successfully.
-
-### Query automatic matching
-
-After the materialized view is successfully created, the user's query does not need to be changed, that is, it is still the base table of the query. Doris will automatically select an optimal materialized view based on the current query statement, read data from the materialized view and calculate it.
-
-Users can use the EXPLAIN command to check whether the current query uses a materialized view.
-
-The matching relationship between the aggregation in the materialized view and the aggregation in the query:
-
-| Materialized View Aggregation | Query Aggregation |
-| ---------- | -------- |
-| sum | sum |
-| min | min |
-| max | max |
-| count | count |
-| bitmap\_union | bitmap\_union, bitmap\_union\_count, count(distinct) |
-| hll\_union | hll\_raw\_agg, hll\_union\_agg, ndv, approx\_count\_distinct |
-
-After the aggregation functions of bitmap and hll match the materialized view in the query, the aggregation operator of the query will be rewritten according to the table structure of the materialized view. See example 2 for details.
-
-### Query materialized views
-
-Check what materialized views the current table has, and what their table structure is. Through the following command:
-
-```
-MySQL [test]> desc mv_test all;
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-| IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-| mv_test | DUP_KEYS | k1 | INT | Yes | true | NULL | |
-| | | k2 | BIGINT | Yes | true | NULL | |
-| | | k3 | LARGEINT | Yes | true | NULL | |
-| | | k4 | SMALLINT | Yes | false | NULL | NONE |
-| | | | | | | | |
-| mv_2 | AGG_KEYS | k2 | BIGINT | Yes | true | NULL | |
-| | | k4 | SMALLINT | Yes | false | NULL | MIN |
-| | | k1 | INT | Yes | false | NULL | MAX |
-| | | | | | | | |
-| mv_3 | AGG_KEYS | k1 | INT | Yes | true | NULL | |
-| | | to_bitmap(`k2`) | BITMAP | No | false | | BITMAP_UNION |
-| | | | | | | | |
-| mv_1 | AGG_KEYS | k4 | SMALLINT | Yes | true | NULL | |
-| | | k1 | BIGINT | Yes | false | NULL | SUM |
-| | | k3 | LARGEINT | Yes | false | NULL | SUM |
-| | | k2 | BIGINT | Yes | false | NULL | MIN |
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-```
-
-You can see that the current `mv_test` table has three materialized views: mv\_1, mv\_2 and mv\_3, and their table structure.
-
-### Delete materialized view
-
-If the user no longer needs the materialized view, you can delete the materialized view by 'DROP' commen.
-
-The specific syntax can be viewed through the following command:
-
-```
-HELP DROP MATERIALIZED VIEW
-```
-
-## Best Practice 1
-
-The use of materialized views is generally divided into the following steps:
-
-1. Create a materialized view
-2. Asynchronously check whether the materialized view has been constructed
-3. Query and automatically match materialized views
-
-**First is the first step: Create a materialized view**
-
-Assume that the user has a sales record list, which stores the transaction id, salesperson, sales store, sales time, and amount of each transaction. The table building statement is:
-
-```
-create table sales_records(record_id int, seller_id int, store_id int, sale_date date, sale_amt bigint) distributed by hash(record_id) properties("replication_num" = "1");
-```
-The table structure of this `sales_records` is as follows:
-
-```
-MySQL [test]> desc sales_records;
-+-----------+--------+------+-------+---------+--- ----+
-| Field | Type | Null | Key | Default | Extra |
-+-----------+--------+------+-------+---------+--- ----+
-| record_id | INT | Yes | true | NULL | |
-| seller_id | INT | Yes | true | NULL | |
-| store_id | INT | Yes | true | NULL | |
-| sale_date | DATE | Yes | false | NULL | NONE |
-| sale_amt | BIGINT | Yes | false | NULL | NONE |
-+-----------+--------+------+-------+---------+--- ----+
-```
-
-At this time, if the user often performs an analysis query on the sales volume of different stores, you can create a materialized view for the `sales_records` table to group the sales stores and sum the sales of the same sales stores. The creation statement is as follows:
-
-```
-MySQL [test]> create materialized view store_amt as select store_id, sum(sale_amt) from sales_records group by store_id;
-```
-
-The backend returns to the following figure, indicating that the task of creating a materialized view is submitted successfully.
-
-```
-Query OK, 0 rows affected (0.012 sec)
-```
-
-**Step 2: Check whether the materialized view has been built**
-
-Since the creation of a materialized view is an asynchronous operation, after the user submits the task of creating a materialized view, he needs to asynchronously check whether the materialized view has been constructed through a command. The command is as follows:
-
-```
-SHOW ALTER TABLE ROLLUP FROM db_name; (Version 0.12)
-SHOW ALTER TABLE MATERIALIZED VIEW FROM db_name; (Version 0.13)
-```
-
-In this command, `db_name` is a parameter, you need to replace it with your real db name. The result of the command is to display all the tasks of creating a materialized view of this db. The results are as follows:
-
-```
-+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- -------------------------------------------------- -------------------------------------------------- -------------+----------+---------+
-| JobId | TableName | CreateTime | FinishedTime | BaseIndexName | RollupIndexName | RollupId | TransactionId | State | Msg | Progress | Timeout |
-+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- -------------------------------------------------- -------------------------------------------------- -------------+----------+---------+
-| 22036 | sales_records | 2020-07-30 20:04:28 | 2020-07-30 20:04:57 | sales_records | store_amt | 22037 | 5008 | FINISHED | | NULL | 86400 |
-+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- ----------------------------------------
-
-```
-
-Among them, TableName refers to which table the data of the materialized view comes from, and RollupIndexName refers to the name of the materialized view. One of the more important indicators is State.
-
-When the State of the task of creating a materialized view has become FINISHED, it means that the materialized view has been created successfully. This means that it is possible to automatically match this materialized view when querying.
-
-**Step 3: Query**
-
-After the materialized view is created, when users query the sales volume of different stores, they will directly read the aggregated data from the materialized view `store_amt` just created. To achieve the effect of improving query efficiency.
-
-The user's query still specifies the query `sales_records` table, for example:
-
-```
-SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id;
-```
-
-The above query will automatically match `store_amt`. The user can use the following command to check whether the current query matches the appropriate materialized view.
-
-```
-EXPLAIN SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id;
-+-----------------------------------------------------------------------------+
-| Explain String |
-+-----------------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS: `store_id` | sum(`sale_amt`) |
-| PARTITION: UNPARTITIONED |
-| |
-| RESULT SINK |
-| |
-| 4:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `store_id` |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 04 |
-| UNPARTITIONED |
-| |
-| 3:AGGREGATE (merge finalize) |
-| | output: sum( sum(`sale_amt`)) |
-| | group by: `store_id` |
-| | |
-| 2:EXCHANGE |
-| |
-| PLAN FRAGMENT 2 |
-| OUTPUT EXPRS: |
-| PARTITION: RANDOM |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 02 |
-| HASH_PARTITIONED: `store_id` |
-| |
-| 1:AGGREGATE (update serialize) |
-| | STREAMING |
-| | output: sum(`sale_amt`) |
-| | group by: `store_id` |
-| | |
-| 0:OlapScanNode |
-| TABLE: sales_records |
-| PREAGGREGATION: ON |
-| partitions=1/1 |
-| rollup: store_amt |
-| tabletRatio=10/10 |
-| tabletList=22038,22040,22042,22044,22046,22048,22050,22052,22054,22056 |
-| cardinality=0 |
-| avgRowSize=0.0 |
-| numNodes=1 |
-+-----------------------------------------------------------------------------+
-45 rows in set (0.006 sec)
-```
-The final thing is the rollup attribute in OlapScanNode. You can see that the rollup of the current query shows `store_amt`. That is to say, the query has been correctly matched to the materialized view `store_amt`, and data is read directly from the materialized view.
-
-## Best Practice 2 PV,UV
-
-Business scenario: Calculate the UV and PV of advertising
-
-Assuming that the user's original ad click data is stored in Doris, then for ad PV and UV queries, the query speed can be improved by creating a materialized view of `bitmap_union`.
-
-Use the following statement to first create a table that stores the details of the advertisement click data, including the click event of each click, what advertisement was clicked, what channel clicked, and who was the user who clicked.
-
-```
-MySQL [test]> create table advertiser_view_record(time date, advertiser varchar(10), channel varchar(10), user_id int) distributed by hash(time) properties("replication_num" = "1");
-Query OK, 0 rows affected (0.014 sec)
-```
-The original ad click data table structure is:
-
-```
-MySQL [test]> desc advertiser_view_record;
-+------------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+------------+-------------+------+-------+---------+-------+
-| time | DATE | Yes | true | NULL | |
-| advertiser | VARCHAR(10) | Yes | true | NULL | |
-| channel | VARCHAR(10) | Yes | false | NULL | NONE |
-| user_id | INT | Yes | false | NULL | NONE |
-+------------+-------------+------+-------+---------+-------+
-4 rows in set (0.001 sec)
-```
-
-1. Create a materialized view
-
- Since the user wants to query the UV value of the advertisement, that is, a precise de-duplication of users of the same advertisement is required, the user's query is generally:
-
- ```
- SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- ```
-
- For this kind of UV-seeking scene, we can create a materialized view with `bitmap_union` to achieve a precise deduplication effect in advance.
-
- In Doris, the result of `count(distinct)` aggregation is exactly the same as the result of `bitmap_union_count` aggregation. And `bitmap_union_count` is equal to the result of `bitmap_union` to calculate count, so if the query ** involves `count(distinct)`, you can speed up the query by creating a materialized view with `bitmap_union` aggregation.**
-
- For this case, you can create a materialized view that accurately deduplicate `user_id` based on advertising and channel grouping.
-
- ```
- MySQL [test]> create materialized view advertiser_uv as select advertiser, channel, bitmap_union(to_bitmap(user_id)) from advertiser_view_record group by advertiser, channel;
- Query OK, 0 rows affected (0.012 sec)
- ```
-
- *Note: Because the user\_id itself is an INT type, it is called `bitmap_union` directly in Doris. The fields need to be converted to bitmap type through the function `to_bitmap` first, and then `bitmap_union` can be aggregated. *
-
- After the creation is complete, the table structure of the advertisement click schedule and the materialized view table is as follows:
-
- ```
- MySQL [test]> desc advertiser_view_record all;
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- | advertiser_view_record | DUP_KEYS | time | DATE | Yes | true | NULL | |
- | | | advertiser | VARCHAR(10) | Yes | true | NULL | |
- | | | channel | VARCHAR(10) | Yes | false | NULL | NONE |
- | | | user_id | INT | Yes | false | NULL | NONE |
- | | | | | | | | |
- | advertiser_uv | AGG_KEYS | advertiser | VARCHAR(10) | Yes | true | NULL | |
- | | | channel | VARCHAR(10) | Yes | true | NULL | |
- | | | to_bitmap(`user_id`) | BITMAP | No | false | | BITMAP_UNION |
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- ```
-
-2. Automatic query matching
-
- When the materialized view table is created, when querying the advertisement UV, Doris will automatically query the data from the materialized view `advertiser_uv` just created. For example, the original query statement is as follows:
-
- ```
- SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- ```
-
- After the materialized view is selected, the actual query will be transformed into:
-
- ```
- SELECT advertiser, channel, bitmap_union_count(to_bitmap(user_id)) FROM advertiser_uv GROUP BY advertiser, channel;
- ```
-
- Through the EXPLAIN command, you can check whether Doris matches the materialized view:
-
- ```
- MySQL [test]> explain SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- | Explain String |
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- | PLAN FRAGMENT 0 |
- | OUTPUT EXPRS: `advertiser` | `channel` | bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) |
- | PARTITION: UNPARTITIONED |
- | |
- | RESULT SINK |
- | |
- | 4:EXCHANGE |
- | |
- | PLAN FRAGMENT 1 |
- | OUTPUT EXPRS: |
- | PARTITION: HASH_PARTITIONED: `advertiser`, `channel` |
- | |
- | STREAM DATA SINK |
- | EXCHANGE ID: 04 |
- | UNPARTITIONED |
- | |
- | 3:AGGREGATE (merge finalize) |
- | | output: bitmap_union_count( bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`)) |
- | | group by: `advertiser`, `channel` |
- | | |
- | 2:EXCHANGE |
- | |
- | PLAN FRAGMENT 2 |
- | OUTPUT EXPRS: |
- | PARTITION: RANDOM |
- | |
- | STREAM DATA SINK |
- | EXCHANGE ID: 02 |
- | HASH_PARTITIONED: `advertiser`, `channel` |
- | |
- | 1:AGGREGATE (update serialize) |
- | | STREAMING |
- | | output: bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) |
- | | group by: `advertiser`, `channel` |
- | | |
- | 0:OlapScanNode |
- | TABLE: advertiser_view_record |
- | PREAGGREGATION: ON |
- | partitions=1/1 |
- | rollup: advertiser_uv |
- | tabletRatio=10/10 |
- | tabletList=22084,22086,22088,22090,22092,22094,22096,22098,22100,22102 |
- | cardinality=0 |
- | avgRowSize=0.0 |
- | numNodes=1 |
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- 45 rows in set (0.030 sec)
- ```
-
- In the result of EXPLAIN, you can first see that the rollup attribute value of OlapScanNode is advertiser_uv. In other words, the query directly scans the data of the materialized view. The match is successful.
-
- Secondly, the calculation of `count(distinct)` for the `user_id` field is rewritten as `bitmap_union_count`. That is to achieve the effect of precise deduplication through bitmap.
-
-
-## Best Practice 3
-
-Business scenario: matching a richer prefix index
-
-The user's original table has three columns (k1, k2, k3). Among them, k1, k2 are prefix index columns. At this time, if the user query condition contains `where k1=a and k2=b`, the query can be accelerated through the index.
-
-But in some cases, the user's filter conditions cannot match the prefix index, such as `where k3=c`. Then the query speed cannot be improved through the index.
-
-This problem can be solved by creating a materialized view with k3 as the first column.
-
-1. Create a materialized view
-
- ```
- CREATE MATERIALIZED VIEW mv_1 as SELECT k3, k2, k1 FROM tableA ORDER BY k3;
- ```
-
- After the creation of the above grammar is completed, the complete detail data is retained in the materialized view, and the prefix index of the materialized view is the k3 column. The table structure is as follows:
-
- ```
- MySQL [test]> desc tableA all;
- +-----------+---------------+-------+------+------+-------+---------+-------+
- | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
- +-----------+---------------+-------+------+------+-------+---------+-------+
- | tableA | DUP_KEYS | k1 | INT | Yes | true | NULL | |
- | | | k2 | INT | Yes | true | NULL | |
- | | | k3 | INT | Yes | true | NULL | |
- | | | | | | | | |
- | mv_1 | DUP_KEYS | k3 | INT | Yes | true | NULL | |
- | | | k2 | INT | Yes | false | NULL | NONE |
- | | | k1 | INT | Yes | false | NULL | NONE |
- +-----------+---------------+-------+------+------+-------+---------+-------+
- ```
-
-2. Query matching
-
- At this time, if the user's query has k3 column, the filter condition is, for example:
-
- ```
- select k1, k2, k3 from table A where k3=1;
- ```
-
- At this time, the query will read data directly from the mv_1 materialized view just created. The materialized view has a prefix index on k3, and query efficiency will also be improved.
-
-
-## Limitations
-
-1. The parameter of the aggregate function of the materialized view does not support the expression only supports a single column, for example: sum(a+b) does not support.
-2. If the conditional column of the delete statement does not exist in the materialized view, the delete operation cannot be performed. If you must delete data, you need to delete the materialized view before deleting the data.
-3. Too many materialized views on a single table will affect the efficiency of importing: When importing data, the materialized view and base table data are updated synchronously. If a table has more than 10 materialized view tables, it may cause the import speed to be very high. slow. This is the same as a single import needs to import 10 tables at the same time.
-4. The same column with different aggregate functions cannot appear in a materialized view at the same time. For example, select sum(a), min(a) from table are not supported.
-5. For the Unique Key data model, the materialized view can only change the column order and cannot play the role of aggregation. Therefore, in the Unique Key model, it is not possible to perform coarse-grained aggregation operations on the data by creating a materialized view.
-
-## Error
-1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data"
-Materialized view creation failed due to data quality issues.
-Note: The bitmap type only supports positive integers. If there are negative Numbers in the original data, the materialized view will fail to be created
diff --git a/docs/en/administrator-guide/multi-tenant.md b/docs/en/administrator-guide/multi-tenant.md
deleted file mode 100644
index 8c37afad23..0000000000
--- a/docs/en/administrator-guide/multi-tenant.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-{
- "title": "Multi-tenancy",
- "language": "en"
-}
----
-
-
-
-# Multi-tenancy
-
-The main purpose of Doris's multi-tenant and resource isolation solution is to reduce interference between multiple users when performing data operations in the same Doris cluster, and to allocate cluster resources to each user more reasonably.
-
-The scheme is mainly divided into two parts, one is the division of resource groups at the node level in the cluster, and the other is the resource limit for a single query.
-
-## Nodes in Doris
-
-First, let's briefly introduce the node composition of Doris. There are two types of nodes in a Doris cluster: Frontend (FE) and Backend (BE).
-
-FE is mainly responsible for metadata management, cluster management, user request access and query plan analysis.
-
-BE is mainly responsible for data storage and execution of query plans.
-
-FE does not participate in the processing and calculation of user data, so it is a node with low resource consumption. The BE is responsible for all data calculations and task processing, and is a resource-consuming node. Therefore, the resource division and resource restriction schemes introduced in this article are all aimed at BE nodes. Because the FE node consumes relatively low resources and can also be scaled horizontally, there is usually no need to isolate and restrict resources, and the FE node can be shared by all users.
-
-## Node resource division
-
-Node resource division refers to setting tags for BE nodes in a Doris cluster, and the BE nodes with the same tags form a resource group. Resource group can be regarded as a management unit of data storage and calculation. Below we use a specific example to introduce the use of resource groups.
-
-1. Set labels for BE nodes
-
- Assume that the current Doris cluster has 6 BE nodes. They are host[1-6] respectively. In the initial situation, all nodes belong to a default resource group (Default).
-
- We can use the following command to divide these 6 nodes into 3 resource groups: group_a, group_b, group_c:
-
- ```sql
- alter system modify backend "host1:9050" set ("tag.location" = "group_a");
- alter system modify backend "host2:9050" set ("tag.location" = "group_a");
- alter system modify backend "host3:9050" set ("tag.location" = "group_b");
- alter system modify backend "host4:9050" set ("tag.location" = "group_b");
- alter system modify backend "host5:9050" set ("tag.location" = "group_c");
- alter system modify backend "host6:9050" set ("tag.location" = "group_c");
- ```
-
- Here we combine `host[1-2]` to form a resource group `group_a`, `host[3-4]` to form a resource group `group_b`, and `host[5-6]` to form a resource group `group_c`.
-
- > Note: One BE only supports setting one Tag.
-
-2. Distribution of data according to resource groups
-
- After the resource group is divided. We can distribute different copies of user data in different resource groups. Assume a user table UserTable. We want to store a copy in each of the three resource groups, which can be achieved by the following table creation statement:
-
- ```sql
- create table UserTable
- (k1 int, k2 int)
- distributed by hash(k1) buckets 1
- properties(
- "replication_allocation"
- =
- "tag.location.group_a:1, tag.location.group_b:1, tag.location.group_c:1"
- )
- ```
-
- In this way, the data in the UserTable table will be stored in the form of 3 copies in the nodes where the resource groups group_a, group_b, and group_c are located.
-
- The following figure shows the current node division and data distribution:
-
- ```
- ┌────────────────────────────────────────────────────┐
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host1 │ │ host2 │ │
- │ │ ┌─────────────┐ │ │ │ │
- │ group_a │ │ replica1 │ │ │ │ │
- │ │ └─────────────┘ │ │ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- ├────────────────────────────────────────────────────┤
- ├────────────────────────────────────────────────────┤
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host3 │ │ host4 │ │
- │ │ │ │ ┌─────────────┐ │ │
- │ group_b │ │ │ │ replica2 │ │ │
- │ │ │ │ └─────────────┘ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- ├────────────────────────────────────────────────────┤
- ├────────────────────────────────────────────────────┤
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host5 │ │ host6 │ │
- │ │ │ │ ┌─────────────┐ │ │
- │ group_c │ │ │ │ replica3 │ │ │
- │ │ │ │ └─────────────┘ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- └────────────────────────────────────────────────────┘
- ```
-
-3. Use different resource groups for data query
-
- After the execution of the first two steps is completed, we can limit a user's query by setting the user's resource usage permissions, and can only use the nodes in the specified resource group to execute.
-
- For example, we can use the following statement to restrict user1 to only use nodes in the `group_a` resource group for data query, user2 can only use the `group_b` resource group, and user3 can use 3 resource groups at the same time:
-
- ```sql
- set property for'user1''resource_tags.location' = 'group_a';
- set property for'user2''resource_tags.location' = 'group_b';
- set property for'user3''resource_tags.location' = 'group_a, group_b, group_c';
- ```
-
- After the setting is complete, when user1 initiates a query on the UserTable table, it will only access the data copy on the nodes in the `group_a` resource group, and the query will only use the node computing resources in the `group_a` resource group. The query of user3 can use copies and computing resources in any resource group.
-
- In this way, we have achieved physical resource isolation for different user queries by dividing nodes and restricting user resource usage. Furthermore, we can create different users for different business departments and restrict each user from using different resource groups. In order to avoid the use of resource interference between different business parts. For example, there is a business table in the cluster that needs to be shared by all 9 business departments, but it is hoped that resource preemption between different departments can be avoided as much as possible. Then we can create 3 copies of this table and store them in 3 resource groups. Next, we create 9 users for 9 business departments, and limit the use of one resource group for every 3 users. In this way, the degree of competition for resources is reduced from 9 to 3.
-
- On the other hand, for the isolation of online and offline tasks. We can use resource groups to achieve this. For example, we can divide nodes into two resource groups, Online and Offline. The table data is still stored in 3 copies, of which 2 copies are stored in the Online resource group, and 1 copy is stored in the Offline resource group. The Online resource group is mainly used for online data services with high concurrency and low latency. Some large queries or offline ETL operations can be executed using nodes in the Offline resource group. So as to realize the ability to provide online and offline services simultaneously in a unified cluster.
-
-## Single query resource limit
-
-The resource group method mentioned earlier is resource isolation and restriction at the node level. In the resource group, resource preemption problems may still occur. For example, as mentioned above, the three business departments are arranged in the same resource group. Although the degree of resource competition is reduced, the queries of these three departments may still affect each other.
-
-Therefore, in addition to the resource group solution, Doris also provides a single query resource restriction function.
-
-At present, Doris's resource restrictions on single queries are mainly divided into two aspects: CPU and memory restrictions.
-
-1. Memory Limitation
-
- Doris can limit the maximum memory overhead that a query is allowed to use. To ensure that the memory resources of the cluster will not be fully occupied by a query. We can set the memory limit in the following ways:
-
- ```
- // Set the session variable exec_mem_limit. Then all subsequent queries in the session (within the connection) use this memory limit.
- set exec_mem_limit=1G;
- // Set the global variable exec_mem_limit. Then all subsequent queries of all new sessions (new connections) use this memory limit.
- set global exec_mem_limit=1G;
- // Set the variable exec_mem_limit in SQL. Then the variable only affects this SQL.
- select /*+ SET_VAR(exec_mem_limit=1G) */ id, name from tbl where xxx;
- ```
-
- Because Doris' query engine is based on the full-memory MPP query framework. Therefore, when the memory usage of a query exceeds the limit, the query will be terminated. Therefore, when a query cannot run under a reasonable memory limit, we need to solve it through some SQL optimization methods or cluster expansion.
-
-2. CPU limitations
-
- Users can limit the CPU resources of the query in the following ways:
-
- ```
- // Set the session variable cpu_resource_limit. Then all queries in the session (within the connection) will use this CPU limit.
- set cpu_resource_limit = 2
- // Set the user's attribute cpu_resource_limit, then all queries of this user will use this CPU limit. The priority of this attribute is higher than the session variable cpu_resource_limit
- set property for'user1''cpu_resource_limit' = '3';
- ```
-
- The value of `cpu_resource_limit` is a relative value. The larger the value, the more CPU resources can be used. However, the upper limit of the CPU that can be used by a query also depends on the number of partitions and buckets of the table. In principle, the maximum CPU usage of a query is positively related to the number of tablets involved in the query. In extreme cases, assuming that a query involves only one tablet, even if `cpu_resource_limit` is set to a larger value, only 1 CPU resource can be used.
-
-Through memory and CPU resource limits. We can divide user queries into more fine-grained resources within a resource group. For example, we can make some offline tasks with low timeliness requirements, but with a large amount of calculation, use less CPU resources and more memory resources. Some delay-sensitive online tasks use more CPU resources and reasonable memory resources.
-
-## Best practices and forward compatibility
-
-Tag division and CPU limitation are new features in version 0.15. In order to ensure a smooth upgrade from the old version, Doris has made the following forward compatibility:
-
-1. Each BE node will have a default Tag: `"tag.location": "default"`.
-2. The BE node added through the `alter system add backend` statement will also set Tag: `"tag.location": "default"` by default.
-2. The copy distribution of all tables is modified by default to: `"tag.location.default:xx`. xx is the number of original copies.
-3. Users can still specify the number of replicas in the table creation statement by `"replication_num" = "xx"`, this attribute will be automatically converted to: `"tag.location.default:xx`. This ensures that there is no need to modify the original creation. Table statement.
-4. By default, the memory limit for a single query is 2GB for a single node, and the CPU resources are unlimited, which is consistent with the original behavior. And the user's `resource_tags.location` attribute is empty, that is, by default, the user can access the BE of any Tag, which is consistent with the original behavior.
-
-Here we give an example of the steps to start using the resource division function after upgrading from the original cluster to version 0.15:
-
-1. Turn off data repair and balance logic
-
- After the upgrade, the default Tag of BE is `"tag.location": "default"`, and the default copy distribution of the table is: `"tag.location.default:xx`. So if you directly modify the Tag of BE, the system will Automatically detect changes in the distribution of copies, and start data redistribution. This may occupy some system resources. So we can turn off the data repair and balance logic before modifying the tag to ensure that there will be no copies when we plan resources Redistribution operation.
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true");
- ```
-
-2. Set Tag and table copy distribution
-
- Next, you can use the `alter system modify backend` statement to set the BE Tag. And through the `alter table` statement to modify the copy distribution strategy of the table. Examples are as follows:
-
- ```
- alter system modify backend "host1:9050, 1212:9050" set ("tag.location" = "group_a");
- alter table my_table modify partition p1 set ("replication_allocation" = "tag.location.group_a:2");
- ```
-
-3. Turn on data repair and balance logic
-
- After the tag and copy distribution are set, we can turn on the data repair and equalization logic to trigger data redistribution.
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "false");
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "false");
- ```
-
- This process will continue for a period of time depending on the amount of data involved. And it will cause some colocation tables to fail colocation planning (because the copy is being migrated). You can view the progress by `show proc "/cluster_balance/"`. You can also judge the progress by the number of `UnhealthyTabletNum` in `show proc "/statistic"`. When `UnhealthyTabletNum` drops to 0, it means that the data redistribution is completed. .
-
-4. Set the user's resource label permissions.
-
- After the data is redistributed. We can start to set the user's resource label permissions. Because by default, the user's `resource_tags.location` attribute is empty, that is, the BE of any tag can be accessed. Therefore, in the previous steps, the normal query of existing users will not be affected. When the `resource_tags.location` property is not empty, the user will be restricted from accessing the BE of the specified Tag.
-
-Through the above 4 steps, we can smoothly use the resource division function after the original cluster is upgraded.
diff --git a/docs/en/administrator-guide/operation/be-olap-error-code.md b/docs/en/administrator-guide/operation/be-olap-error-code.md
deleted file mode 100644
index 5b4514cf76..0000000000
--- a/docs/en/administrator-guide/operation/be-olap-error-code.md
+++ /dev/null
@@ -1,256 +0,0 @@
----
-{
- "title": "Description of the return value of the OLAP function on the BE side",
- "language": "en"
-}
-
----
-
-
-
-# Description of the return value of the OLAP function on the BE side
-
-
-
-| Return value name | Return value | Return value description |
-| ------------------------------------------------ | ------ | ------------------------------------------- ----------------- |
-| OLAP_SUCCESS | 0 | Success |
-| OLAP_ERR_OTHER_ERROR | -1 | Other errors |
-| OLAP_REQUEST_FAILED | -2 | Request failed |
-| System error codes, such as file system memory and other system call failures | | |
-| OLAP_ERR_OS_ERROR | -100 | Operating system error |
-| OLAP_ERR_DIR_NOT_EXIST | -101 | Directory does not exist error |
-| OLAP_ERR_FILE_NOT_EXIST | -102 | File does not exist error |
-| OLAP_ERR_CREATE_FILE_ERROR | -103 | Error creating file |
-| OLAP_ERR_MALLOC_ERROR | -104 | Memory allocation error |
-| OLAP_ERR_STL_ERROR | -105 | Standard template library error |
-| OLAP_ERR_IO_ERROR | -106 | IO error |
-| OLAP_ERR_MUTEX_ERROR | -107 | Mutex error |
-| OLAP_ERR_PTHREAD_ERROR | -108 | POSIX thread error |
-| OLAP_ERR_NETWORK_ERROR | -109 | Network abnormal error |
-| OLAP_ERR_UB_FUNC_ERROR | -110 | |
-| OLAP_ERR_COMPRESS_ERROR | -111 | Data compression error |
-| OLAP_ERR_DECOMPRESS_ERROR | -112 | Data decompression error |
-| OLAP_ERR_UNKNOWN_COMPRESSION_TYPE | -113 | Unknown data compression type |
-| OLAP_ERR_MMAP_ERROR | -114 | Memory mapped file error |
-| OLAP_ERR_RWLOCK_ERROR | -115 | Read-write lock error |
-| OLAP_ERR_READ_UNENOUGH | -116 | Read memory is not enough exception |
-| OLAP_ERR_CANNOT_CREATE_DIR | -117 | Cannot create directory exception |
-| OLAP_ERR_UB_NETWORK_ERROR | -118 | Network exception |
-| OLAP_ERR_FILE_FORMAT_ERROR | -119 | File format abnormal |
-| OLAP_ERR_EVAL_CONJUNCTS_ERROR | -120 | |
-| OLAP_ERR_COPY_FILE_ERROR | -121 | Copy file error |
-| OLAP_ERR_FILE_ALREADY_EXIST | -122 | File already exists error |
-| General error codes | | |
-| OLAP_ERR_NOT_INITED | -200 | Cannot initialize exception |
-| OLAP_ERR_FUNC_NOT_IMPLEMENTED | -201 | Function cannot be executed exception |
-| OLAP_ERR_CALL_SEQUENCE_ERROR | -202 | Call SEQUENCE exception |
-| OLAP_ERR_INPUT_PARAMETER_ERROR | -203 | Input parameter error |
-| OLAP_ERR_BUFFER_OVERFLOW | -204 | Memory buffer overflow error |
-| OLAP_ERR_CONFIG_ERROR | -205 | Configuration error |
-| OLAP_ERR_INIT_FAILED | -206 | Initialization failed |
-| OLAP_ERR_INVALID_SCHEMA | -207 | Invalid Schema |
-| OLAP_ERR_CHECKSUM_ERROR | -208 | Check value error |
-| OLAP_ERR_SIGNATURE_ERROR | -209 | Signature error |
-| OLAP_ERR_CATCH_EXCEPTION | -210 | Exception caught |
-| OLAP_ERR_PARSE_PROTOBUF_ERROR | -211 | Error parsing Protobuf|
-| OLAP_ERR_INVALID_ROOT_PATH | -222 | Invalid root directory |
-| OLAP_ERR_NO_AVAILABLE_ROOT_PATH | -223 | No valid root directory |
-| OLAP_ERR_CHECK_LINES_ERROR | -224 | Check the number of lines error |
-| OLAP_ERR_INVALID_CLUSTER_INFO | -225 | Invalid Cluster Information |
-| OLAP_ERR_TRANSACTION_NOT_EXIST | -226 | Transaction does not exist |
-| OLAP_ERR_DISK_FAILURE | -227 | Disk error |
-| OLAP_ERR_TRANSACTION_ALREADY_COMMITTED | -228 | Transaction submitted |
-| OLAP_ERR_TRANSACTION_ALREADY_VISIBLE | -229 | Transaction visible |
-| OLAP_ERR_VERSION_ALREADY_MERGED | -230 | Version has been merged |
-| OLAP_ERR_LZO_DISABLED | -231 | LZO is disabled |
-| OLAP_ERR_DISK_REACH_CAPACITY_LIMIT | -232 | Disk reached capacity limit |
-| OLAP_ERR_TOO_MANY_TRANSACTIONS | -233 | Too many transaction backlogs are not completed |
-| OLAP_ERR_INVALID_SNAPSHOT_VERSION | -234 | Invalid snapshot version |
-| OLAP_ERR_TOO_MANY_VERSION | -235 | The tablet data version exceeds the maximum limit (default 500) |
-| OLAP_ERR_NOT_INITIALIZED | -236 | Cannot initialize |
-| OLAP_ERR_ALREADY_CANCELLED | -237 | Has been cancelled |
-| OLAP_ERR_TOO_MANY_SEGMENTS | -238 | usually occurs when the amount of imported data in the same batch is too large, resulting in too many segment files for a tablet |
-| Command execution exception code | | |
-| OLAP_ERR_CE_CMD_PARAMS_ERROR | -300 | Command parameter error |
-| OLAP_ERR_CE_BUFFER_TOO_SMALL | -301 | Too many small files in the buffer |
-| OLAP_ERR_CE_CMD_NOT_VALID | -302 | Invalid command |
-| OLAP_ERR_CE_LOAD_TABLE_ERROR | -303 | Error loading data table |
-| OLAP_ERR_CE_NOT_FINISHED | -304 | The command was not executed successfully |
-| OLAP_ERR_CE_TABLET_ID_EXIST | -305 | Tablet Id does not exist error |
-| OLAP_ERR_CE_TRY_CE_LOCK_ERROR | -306 | Attempt to obtain execution command lock error |
-| Tablet error exception code | | |
-| OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR | -400 | Tablet copy version error |
-| OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR | -401 | teblet version index mismatch exception |
-| OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR | -402 | The initial version of the tablet is not checked here, because if the BE is restarted during a schema-change of a tablet, we may encounter an empty tablet exception |
-| OLAP_ERR_TABLE_INDEX_FIND_ERROR | -403 | Unable to get the position of the first block or failure to find the last block of the block will cause this exception |
-| OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR | -404 | This exception is triggered when the tablet cannot be loaded |
-| OLAP_ERR_TABLE_CREATE_META_ERROR | -405 | Unable to create tablet (change schema), base tablet does not exist, this exception will be triggered |
-| OLAP_ERR_TABLE_ALREADY_DELETED_ERROR | -406 | The tablet has been deleted |
-| Storage Engine Error Code | | |
-| OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE | -500 | Add the same tablet twice, add the tablet to the same data directory twice, the new tablet is empty, and the old tablet exists. Will trigger this exception |
-| OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE | -501 | Delete non-existent table |
-| OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR | -502 | Failed to load tablet_meta, segment group meta with invalid cumulative rowset, will cause this exception |
-| OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR | -503 | Duplicate table insert |
-| OLAP_ERR_DELETE_VERSION_ERROR | -504 | Delete version error |
-| OLAP_ERR_GC_SCAN_PATH_ERROR | -505 | GC scan path error |
-| OLAP_ERR_ENGINE_INSERT_OLD_TABLET | -506 | When BE is restarting and older tablets have been added to the garbage collection queue but have not yet been deleted. In this case, since data_dirs are loaded in parallel, tablets loaded later may be loaded later than before The tablet is old, this should not be confirmed as a failure, so return to change the code at this time |
-| Fetch Handler error code | | |
-| OLAP_ERR_FETCH_OTHER_ERROR | -600 | FetchHandler other errors |
-| OLAP_ERR_FETCH_TABLE_NOT_EXIST | -601 | FetchHandler table does not exist |
-| OLAP_ERR_FETCH_VERSION_ERROR | -602 | FetchHandler version error |
-| OLAP_ERR_FETCH_SCHEMA_ERROR | -603 | FetchHandler Schema error |
-| OLAP_ERR_FETCH_COMPRESSION_ERROR | -604 | FetchHandler compression error |
-| OLAP_ERR_FETCH_CONTEXT_NOT_EXIST | -605 | FetchHandler context does not exist |
-| OLAP_ERR_FETCH_GET_READER_PARAMS_ERR | -606 | FetchHandler GET read parameter error |
-| OLAP_ERR_FETCH_SAVE_SESSION_ERR | -607 | FetchHandler save session error |
-| OLAP_ERR_FETCH_MEMORY_EXCEEDED | -608 | FetchHandler memory exceeded exception |
-| Read exception error code | | |
-| OLAP_ERR_READER_IS_UNINITIALIZED | -700 | Read cannot be initialized |
-| OLAP_ERR_READER_GET_ITERATOR_ERROR | -701 | Get read iterator error |
-| OLAP_ERR_CAPTURE_ROWSET_READER_ERROR | -702 | Current Rowset read error |
-| OLAP_ERR_READER_READING_ERROR | -703 | Failed to initialize column data, the column data of cumulative rowset is invalid, this exception code will be returned |
-| OLAP_ERR_READER_INITIALIZE_ERROR | -704 | Read initialization failed |
-| BaseCompaction exception code information | | |
-| OLAP_ERR_BE_VERSION_NOT_MATCH | -800 | BE Compaction version mismatch error |
-| OLAP_ERR_BE_REPLACE_VERSIONS_ERROR | -801 | BE Compaction replacement version error |
-| OLAP_ERR_BE_MERGE_ERROR | -802 | BE Compaction merge error |
-| OLAP_ERR_CAPTURE_ROWSET_ERROR | -804 | Cannot find the version corresponding to Rowset |
-| OLAP_ERR_BE_SAVE_HEADER_ERROR | -805 | BE Compaction save header error |
-| OLAP_ERR_BE_INIT_OLAP_DATA | -806 | BE Compaction initialized OLAP data error |
-| OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS | -807 | BE Compaction trying to obtain version lock error |
-| OLAP_ERR_BE_NO_SUITABLE_VERSION | -808 | BE Compaction does not have a suitable version |
-| OLAP_ERR_BE_TRY_BE_LOCK_ERROR | -809 | The other base compaction is running, and the attempt to acquire the lock failed |
-| OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS | -810 | Invalid Merge version |
-| OLAP_ERR_BE_ERROR_DELETE_ACTION | -811 | BE performing delete operation error |
-| OLAP_ERR_BE_SEGMENTS_OVERLAPPING | -812 | Rowset exception with overlapping cumulative points |
-| OLAP_ERR_BE_CLONE_OCCURRED | -813 | Cloning tasks may occur after the compression task is submitted to the thread pool, and the set of rows selected for compression may change. In this case, the current compression task should not be performed. Return this code |
-| PUSH exception code | | |
-| OLAP_ERR_PUSH_INIT_ERROR | -900 | Unable to initialize reader, unable to create table descriptor, unable to initialize memory tracker, unsupported file format type, unable to open scanner, unable to obtain tuple descriptor, failed to allocate memory for tuple, Will return this code |
-| OLAP_ERR_PUSH_DELTA_FILE_EOF | -901 | |
-| OLAP_ERR_PUSH_VERSION_INCORRECT | -902 | PUSH version is incorrect |
-| OLAP_ERR_PUSH_SCHEMA_MISMATCH | -903 | PUSH Schema does not match |
-| OLAP_ERR_PUSH_CHECKSUM_ERROR | -904 | PUSH check value error |
-| OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR | -905 | PUSH get data source error |
-| OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR | -906 | PUSH Create CUMULATIVE error code |
-| OLAP_ERR_PUSH_BUILD_DELTA_ERROR | -907 | The pushed incremental file has an incorrect check code |
-| OLAP_ERR_PUSH_VERSION_ALREADY_EXIST | -908 | PUSH version already exists |
-| OLAP_ERR_PUSH_TABLE_NOT_EXIST | -909 | PUSH table does not exist |
-| OLAP_ERR_PUSH_INPUT_DATA_ERROR | -910 | PUSH data is invalid, it may be length, data type and other issues |
-| OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST | -911 | When submitting the transaction to the engine, it is found that Rowset exists, but the Rowset ID is different |
-| OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED | -912 | Deleted the push batch process |
-| OLAP_ERR_PUSH_COMMIT_ROWSET | -913 | PUSH Commit Rowset |
-| OLAP_ERR_PUSH_ROWSET_NOT_FOUND | -914 | PUSH Rowset not found |
-| SegmentGroup exception code | | |
-| OLAP_ERR_INDEX_LOAD_ERROR | -1000 | Load index error |
-| OLAP_ERR_INDEX_EOF | -1001 | |
-| OLAP_ERR_INDEX_CHECKSUM_ERROR | -1002 | Checksum verification error, segment error loading index. |
-| OLAP_ERR_INDEX_DELTA_PRUNING | -1003 | Index incremental pruning |
-| OLAPData exception code information | | |
-| OLAP_ERR_DATA_ROW_BLOCK_ERROR | -1100 | Data row Block block error |
-| OLAP_ERR_DATA_FILE_TYPE_ERROR | -1101 | Data file type error |
-| OLAP_ERR_DATA_EOF | -1102 | |
-| OLAP data write error code | | |
-| OLAP_ERR_WRITER_INDEX_WRITE_ERROR | -1200 | Index write error |
-| OLAP_ERR_WRITER_DATA_WRITE_ERROR | -1201 | Data writing error |
-| OLAP_ERR_WRITER_ROW_BLOCK_ERROR | -1202 | Row Block block write error |
-| OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED | -1203 | Before adding a new segment, the previous segment was not completed |
-| RowBlock error code | | |
-| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock decompression error |
-| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | Failed to obtain Block Entry |
-| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | Error reading Rowblock information |
-| Tablet metadata error | | |
-| OLAP_ERR_HEADER_ADD_VERSION | -1400 | Tablet metadata increase version |
-| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | Tablet metadata deletion version |
-| OLAP_ERR_HEADER_ADD_PENDING_DELTA | -1402 | Tablet metadata add pending increment |
-| OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION | -1403 | Tablet metadata addition self-increment version |
-| OLAP_ERR_HEADER_INVALID_FLAG | -1404 | Invalid tablet metadata flag |
-| OLAP_ERR_HEADER_PUT | -1405 | tablet metadata PUT operation |
-| OLAP_ERR_HEADER_DELETE | -1406 | tablet metadata DELETE operation |
-| OLAP_ERR_HEADER_GET | -1407 | tablet metadata GET operation |
-| OLAP_ERR_HEADER_LOAD_INVALID_KEY | -1408 | Tablet metadata loading invalid Key |
-| OLAP_ERR_HEADER_FLAG_PUT | -1409 | |
-| OLAP_ERR_HEADER_LOAD_JSON_HEADER | -1410 | tablet metadata loading JSON Header |
-| OLAP_ERR_HEADER_INIT_FAILED | -1411 | Tablet metadata header initialization failed |
-| OLAP_ERR_HEADER_PB_PARSE_FAILED | -1412 | Tablet metadata Protobuf parsing failed |
-| OLAP_ERR_HEADER_HAS_PENDING_DATA | -1413 | Tablet metadata pending data |
-| TabletSchema exception code information | | |
-| OLAP_ERR_SCHEMA_SCHEMA_INVALID | -1500 | Invalid Tablet Schema |
-| OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID | -1501 | Tablet Schema field is invalid |
-| SchemaHandler exception code information | | |
-| OLAP_ERR_ALTER_MULTI_TABLE_ERR | -1600 | ALTER multi-table error |
-| OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS | -1601 | Failed to get all data sources, Tablet has no version |
-| OLAP_ERR_ALTER_STATUS_ERR | -1602 | Failed to check the row number, internal sorting failed, row block sorting failed, these will return this code |
-| OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED | -1603 | The previous schema change is not completed |
-| OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID | -1604 | Schema change information is invalid |
-| OLAP_ERR_QUERY_SPLIT_KEY_ERR | -1605 | Query Split key error |
-| OLAP_ERR_DATA_QUALITY_ERR | -1606 | Errors caused by data quality issues during schema changes/materialized views |
-| Column File error code | | |
-| OLAP_ERR_COLUMN_DATA_LOAD_BLOCK | -1700 | Error loading column data block |
-| OLAP_ERR_COLUMN_DATA_RECORD_INDEX | -1701 | Load data record index error |
-| OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER | -1702 | |
-| OLAP_ERR_COLUMN_DATA_READ_VAR_INT | -1703 | Cannot read column data from Stream |
-| OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM | -1704 | |
-| OLAP_ERR_COLUMN_STREAM_EOF | -1705 | If the data stream ends, return this code |
-| OLAP_ERR_COLUMN_READ_STREAM | -1706 | The block size is greater than the buffer size, the remaining compressed size is less than the Stream header size, and the read stream fails. This exception will be thrown in these cases |
-| OLAP_ERR_COLUMN_STREAM_NOT_EXIST | -1707 | Stream is empty, does not exist, the data stream is not found, etc. The exception code is returned |
-| OLAP_ERR_COLUMN_VALUE_NULL | -1708 | Column value is empty exception |
-| OLAP_ERR_COLUMN_SEEK_ERROR | -1709 | If you add a column through a schema change, the column index may exist due to the schema change, and this exception code is returned |
-| DeleteHandler error code | | |
-| OLAP_ERR_DELETE_INVALID_CONDITION | -1900 | Invalid delete condition |
-| OLAP_ERR_DELETE_UPDATE_HEADER_FAILED | -1901 | Delete update Header error |
-| OLAP_ERR_DELETE_SAVE_HEADER_FAILED | -1902 | Delete save header error |
-| OLAP_ERR_DELETE_INVALID_PARAMETERS | -1903 | Invalid delete parameter |
-| OLAP_ERR_DELETE_INVALID_VERSION | -1904 | Invalid delete version |
-| Cumulative Handler error code | | |
-| OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS | -2000 | Cumulative does not have a suitable version |
-| OLAP_ERR_CUMULATIVE_REPEAT_INIT | -2001 | Cumulative Repeat initialization error |
-| OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS | -2002 | Invalid Cumulative parameter |
-| OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE | -2003 | Cumulative failed to obtain data source |
-| OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS | -2004 | Cumulative does not have a valid version that needs to be merged |
-| OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION | -2005 | Cumulative delete operation error |
-| OLAP_ERR_CUMULATIVE_MISS_VERSION | -2006 | rowsets missing version |
-| OLAP_ERR_CUMULATIVE_CLONE_OCCURRED | -2007 | Cloning tasks may occur after the compression task is submitted to the thread pool, and the set of rows selected for compression may change. In this case, the current compression task should not be performed. Otherwise it will trigger a change exception |
-| OLAPMeta exception code | | |
-| OLAP_ERR_META_INVALID_ARGUMENT | -3000 | Invalid metadata parameter |
-| OLAP_ERR_META_OPEN_DB | -3001 | Open DB metadata error |
-| OLAP_ERR_META_KEY_NOT_FOUND | -3002 | Metadata key not found |
-| OLAP_ERR_META_GET | -3003 | GET metadata error |
-| OLAP_ERR_META_PUT | -3004 | PUT metadata error |
-| OLAP_ERR_META_ITERATOR | -3005 | Metadata iterator error |
-| OLAP_ERR_META_DELETE | -3006 | Delete metadata error |
-| OLAP_ERR_META_ALREADY_EXIST | -3007 | Metadata already has an error |
-| Rowset error code | | |
-| OLAP_ERR_ROWSET_WRITER_INIT | -3100 | Rowset write initialization error |
-| OLAP_ERR_ROWSET_SAVE_FAILED | -3101 | Rowset save failed |
-| OLAP_ERR_ROWSET_GENERATE_ID_FAILED | -3102 | Rowset failed to generate ID |
-| OLAP_ERR_ROWSET_DELETE_FILE_FAILED | -3103 | Rowset failed to delete file |
-| OLAP_ERR_ROWSET_BUILDER_INIT | -3104 | Rowset initialization failed to build |
-| OLAP_ERR_ROWSET_TYPE_NOT_FOUND | -3105 | Rowset type not found |
-| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset already exists |
-| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset failed to create read object |
-| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset is invalid |
-| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset load failed |
-| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset read object initialization failed |
-| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset read failure |
-| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset invalid transaction state |
-
-
-
diff --git a/docs/en/administrator-guide/operation/disk-capacity.md b/docs/en/administrator-guide/operation/disk-capacity.md
deleted file mode 100644
index 77473cf775..0000000000
--- a/docs/en/administrator-guide/operation/disk-capacity.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-{
- "title": "Disk Capacity Management",
- "language": "en"
-}
----
-
-
-
-# Disk Capacity Management
-
-This document mainly introduces system parameters and processing strategies related to disk storage capacity.
-
-If Doris' data disk capacity is not controlled, the process will hang because the disk is full. Therefore, we monitor the disk usage and remaining capacity, and control various operations in the Doris system by setting different warning levels, and try to avoid the situation where the disk is full.
-
-## Glossary
-
-* FE: Doris Frontend Node. Responsible for metadata management and request access.
-* BE: Doris Backend Node. Responsible for query execution and data storage.
-* Data Dir: Data directory, each data directory specified in the `storage_root_path` of the BE configuration file `be.conf`. Usually a data directory corresponds to a disk, so the following **disk** also refers to a data directory.
-
-## Basic Principles
-
-BE will report disk usage to FE on a regular basis (every minute). FE records these statistical values and restricts various operation requests based on these statistical values.
-
-Two thresholds, **High Watermark** and **Flood Stage**, are set in FE. Flood Stage is higher than High Watermark. When the disk usage is higher than High Watermark, Doris will restrict the execution of certain operations (such as replica balancing, etc.). If it is higher than Flood Stage, certain operations (such as load data) will be prohibited.
-
-At the same time, a **Flood Stage** is also set on the BE. Taking into account that FE cannot fully detect the disk usage on BE in a timely manner, and cannot control certain BE operations (such as Compaction). Therefore, Flood Stage on the BE is used for the BE to actively refuse and stop certain operations to achieve the purpose of self-protection.
-
-## FE Parameter
-
-**High Watermark:**
-
-```
-storage_high_watermark_usage_percent: default value is 85 (85%).
-storage_min_left_capacity_bytes: default value is 2GB.
-```
-
-When disk capacity **more than** `storage_high_watermark_usage_percent`, **or** disk free capacity **less than** `storage_min_left_capacity_bytes`, the disk will no longer be used as the destination path for the following operations:
-
-* Tablet Balance
-* Colocation Relocation
-* Decommission
-
-**Flood Stage:**
-
-```
-storage_flood_stage_usage_percent: default value is 95 (95%).
-storage_flood_stage_left_capacity_bytes: default value is 1GB.
-```
-
-When disk capacity **more than** `storage_flood_stage_usage_percent`, **or** disk free capacity **less than** `storage_flood_stage_left_capacity_bytes`, the disk will no longer be used as the destination path for the following operations:
-
-* Tablet Balance
-* Colocation Relocation
-* Replica make up
-* Restore
-* Load/Insert
-
-## BE Parameter
-
-**Flood Stage:**
-
-```
-capacity_used_percent_flood_stage: default value is 95 (95%).
-capacity_min_left_bytes_flood_stage: default value is 1GB.
-```
-
-When disk capacity **more than** `storage_flood_stage_usage_percent`, **and** disk free capacity **less than** `storage_flood_stage_left_capacity_bytes`, the following operations on this disk will be prohibited:
-
-* Base/Cumulative Compaction
-* Data load
-* Clone Task (Usually occurs when the replica is repaired or balanced.)
-* Push Task (Occurs during the Loading phase of Hadoop import, and the file is downloaded. )
-* Alter Task (Schema Change or Rollup Task.)
-* Download Task (The Downloading phase of the recovery operation.)
-
-## Disk Capacity Release
-
-When the disk capacity is higher than High Watermark or even Flood Stage, many operations will be prohibited. At this time, you can try to reduce the disk usage and restore the system in the following ways.
-
-* Delete table or partition
-
- By deleting tables or partitions, you can quickly reduce the disk space usage and restore the cluster.
- **Note: Only the `DROP` operation can achieve the purpose of quickly reducing the disk space usage, the `DELETE` operation cannot.**
-
- ```
- DROP TABLE tbl;
- ALTER TABLE tbl DROP PARTITION p1;
- ```
-
-* BE expansion
-
- After backend expansion, data tablets will be automatically balanced to BE nodes with lower disk usage. The expansion operation will make the cluster reach a balanced state in a few hours or days depending on the amount of data and the number of nodes.
-
-* Modify replica of a table or partition
-
- You can reduce the number of replica of a table or partition. For example, the default 3 replica can be reduced to 2 replica. Although this method reduces the reliability of the data, it can quickly reduce the disk usage rate and restore the cluster to normal.
- This method is usually used in emergency recovery systems. Please restore the number of copies to 3 after reducing the disk usage rate by expanding or deleting data after recovery.
- Modifying the replica operation takes effect instantly, and the backends will automatically and asynchronously delete the redundant replica.
-
- ```
- ALTER TABLE tbl MODIFY PARTITION p1 SET("replication_num" = "2");
- ```
-
-* Delete unnecessary files
-
- When the BE has crashed because the disk is full and cannot be started (this phenomenon may occur due to untimely detection of FE or BE), you need to delete some temporary files in the data directory to ensure that the BE process can start.
- Files in the following directories can be deleted directly:
-
- * log/: Log files in the log directory.
- * snapshot/: Snapshot files in the snapshot directory.
- * trash/ Trash files in the trash directory.
-
- **This operation will affect [Restore data from BE Recycle Bin](./tablet-restore-tool.md).**
-
- If the BE can still be started, you can use `ADMIN CLEAN TRASH ON(BackendHost:BackendHeartBeatPort);` to actively clean up temporary files. **all trash files** and expired snapshot files will be cleaned up, **This will affect the operation of restoring data from the trash bin**.
-
-
- If you do not manually execute `ADMIN CLEAN TRASH`, the system will still automatically execute the cleanup within a few minutes to tens of minutes.There are two situations as follows:
- * If the disk usage does not reach 90% of the **Flood Stage**, expired trash files and expired snapshot files will be cleaned up. At this time, some recent files will be retained without affecting the recovery of data.
- * If the disk usage has reached 90% of the **Flood Stage**, **all trash files** and expired snapshot files will be cleaned up, **This will affect the operation of restoring data from the trash bin**.
-
- The time interval for automatic execution can be changed by `max_garbage_sweep_interval` and `max_garbage_sweep_interval` in the configuration items.
-
- When the recovery fails due to lack of trash files, the following results may be returned:
-
- ```
- {"status": "Fail","msg": "can find tablet path in trash"}
- ```
-
-* Delete data file (dangerous!!!)
-
- When none of the above operations can free up capacity, you need to delete data files to free up space. The data file is in the `data/` directory of the specified data directory. To delete a tablet, you must first ensure that at least one replica of the tablet is normal, otherwise **deleting the only replica will result in data loss**.
-
- Suppose we want to delete the tablet with id 12345:
-
- * Find the directory corresponding to Tablet, usually under `data/shard_id/tablet_id/`. like:
-
- ```data/0/12345/```
-
- * Record the tablet id and schema hash. The schema hash is the name of the next-level directory of the previous step. The following is 352781111:
-
- ```data/0/12345/352781111```
-
- * Delete the data directory:
-
- ```rm -rf data/0/12345/```
-
- * Delete tablet metadata (refer to [Tablet metadata management tool](./tablet-meta-tool.md))
-
- ```./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=12345 --schema_hash= 352781111```
\ No newline at end of file
diff --git a/docs/en/administrator-guide/operation/metadata-operation.md b/docs/en/administrator-guide/operation/metadata-operation.md
deleted file mode 100644
index df8023cef9..0000000000
--- a/docs/en/administrator-guide/operation/metadata-operation.md
+++ /dev/null
@@ -1,404 +0,0 @@
----
-{
- "title": "Metadata Operations and Maintenance",
- "language": "en"
-}
----
-
-
-
-# Metadata Operations and Maintenance
-
-This document focuses on how to manage Doris metadata in a real production environment. It includes the proposed deployment of FE nodes, some commonly used operational methods, and common error resolution methods.
-
-For the time being, read the [Doris metadata design document](../../internal/metadata-design_EN.md) to understand how Doris metadata works.
-
-## Important tips
-
-* Current metadata design is not backward compatible. That is, if the new version has a new metadata structure change (you can see whether there is a new VERSION in the `FeMetaVersion.java` file in the FE code), it is usually impossible to roll back to the old version after upgrading to the new version. Therefore, before upgrading FE, be sure to test metadata compatibility according to the operations in the [Upgrade Document](../../installing/upgrade_EN.md).
-
-## Metadata catalog structure
-
-Let's assume that the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`. In a normal Doris cluster, the directory structure of metadata should be as follows:
-
-```
-/path/to/palo-meta/
- |-- bdb/
- | |-- 00000000.jdb
- | |-- je.config.csv
- | |-- je.info.0
- | |-- je.info.0.lck
- | |-- je.lck
- | `-- je.stat.csv
- `-- image/
- |-- ROLE
- |-- VERSION
- `-- image.xxxx
-```
-
-1. bdb
-
- We use [bdbje](https://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html) as a distributed kV system to store metadata journal. This BDB directory is equivalent to the "data directory" of bdbje.
-
- The `.jdb` suffix is the data file of bdbje. These data files will increase with the increasing number of metadata journals. When Doris regularly completes the image, the old log is deleted. So normally, the total size of these data files varies from several MB to several GB (depending on how Doris is used, such as import frequency). When the total size of the data file is larger than 10GB, you may need to wonder whether the image failed or the historical journals that failed to distribute the image could not be deleted.
-
- ` je.info.0 ` is the running log of bdbje. The time in this log is UTC + 0 time zone. We may fix this in a later version. From this log, you can also see how some bdbje works.
-
-2. image directory
-
- The image directory is used to store metadata mirrors generated regularly by Doris. Usually, you will see a `image.xxxxx` mirror file. Where `xxxxx` is a number. This number indicates that the image contains all metadata journal before `xxxx`. And the generation time of this file (viewed through `ls -al`) is usually the generation time of the mirror.
-
- You may also see a `image.ckpt` file. This is a metadata mirror being generated. The `du -sh` command should show that the file size is increasing, indicating that the mirror content is being written to the file. When the mirror is written, it automatically renames itself to a new `image.xxxxx` and replaces the old image file.
-
- Only FE with a Master role will actively generate image files on a regular basis. After each generation, FE is pushed to other non-Master roles. When it is confirmed that all other FEs have received this image, Master FE deletes the metadata journal in bdbje. Therefore, if image generation fails or image push fails to other FEs, data in bdbje will accumulate.
-
- `ROLE` file records the type of FE (FOLLOWER or OBSERVER), which is a text file.
-
- `VERSION` file records the cluster ID of the Doris cluster and the token used to access authentication between nodes, which is also a text file.
-
- `ROLE` file and `VERSION` file may only exist at the same time, or they may not exist at the same time (e.g. at the first startup).
-
-## Basic operations
-
-### Start single node FE
-
-Single node FE is the most basic deployment mode. A complete Doris cluster requires at least one FE node. When there is only one FE node, the type of the node is Follower and the role is Master.
-
-1. First start-up
-
- 1. Suppose the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`.
- 2. Ensure that `path/to/palo-meta` already exists, that the permissions are correct and that the directory is empty.
- 3. Start directly through `sh bin/start_fe.sh`.
- 4. After booting, you should be able to see the following log in fe.log:
-
- * Palo FE starting...
- * image does not exist: /path/to/palo-meta/image/image.0
- * transfer from INIT to UNKNOWN
- * transfer from UNKNOWN to MASTER
- * the very first time to open bdb, dbname is 1
- * start fencing, epoch number is 1
- * finish replay in xxx msec
- * QE service start
- * thrift server started
-
- The above logs are not necessarily strictly in this order, but they are basically similar.
-
- 5. The first start-up of a single-node FE usually does not encounter problems. If you haven't seen the above logs, generally speaking, you haven't followed the document steps carefully, please read the relevant wiki carefully.
-
-2. Restart
-
- 1. Stopped FE nodes can be restarted by using `sh bin/start_fe.sh`.
- 2. After restarting, you should be able to see the following log in fe.log:
-
- * Palo FE starting...
- * finished to get cluster id: xxxx, role: FOLLOWER and node name: xxxx
- * If no image has been generated before reboot, you will see:
- * image does not exist: /path/to/palo-meta/image/image.0
-
- * If an image is generated before the restart, you will see:
- * start load image from /path/to/palo-meta/image/image.xxx. is ckpt: false
- * finished load image in xxx ms
-
- * transfer from INIT to UNKNOWN
- * replayed journal id is xxxx, replay to journal id is yyyy
- * transfer from UNKNOWN to MASTER
- * finish replay in xxx msec
- * master finish replay journal, can write now.
- * begin to generate new image: image.xxxx
- * start save image to /path/to/palo-meta/image/image.ckpt. is ckpt: true
- * finished save image /path/to/palo-meta/image/image.ckpt in xxx ms. checksum is xxxx
- * push image.xxx to other nodes. totally xx nodes, push successed xx nodes
- * QE service start
- * thrift server started
-
- The above logs are not necessarily strictly in this order, but they are basically similar.
-
-3. Common problems
-
- For the deployment of single-node FE, start-stop usually does not encounter any problems. If you have any questions, please refer to the relevant Wiki and check your operation steps carefully.
-
-### Add FE
-
-Adding FE processes is described in detail in the [Deployment and Upgrade Documents](../../installing/install-deploy.md#Adding%20FE%20nodes) and will not be repeated. Here are some points for attention, as well as common problems.
-
-1. Notes
-
- * Before adding a new FE, make sure that the current Master FE runs properly (connection is normal, JVM is normal, image generation is normal, bdbje data directory is too large, etc.)
- * The first time you start a new FE, you must make sure that the `--helper` parameter is added to point to Master FE. There is no need to add `--helper` when restarting. (If `--helper` is specified, FE will directly ask the helper node for its role. If not, FE will try to obtain information from `ROLE` and `VERSION` files in the `palo-meta/image/` directory.
- * The first time you start a new FE, you must make sure that the `meta_dir` of the FE is created, has correct permissions and is empty.
- * Starting a new FE and executing the `ALTER SYSTEM ADD FOLLOWER/OBSERVER` statement adds FE to metadata in a sequence that is not required. If a new FE is started first and no statement is executed, the `current node is not added to the group. Please add it first.` in the new FE log. When the statement is executed, it enters the normal process.
- * Make sure that after the previous FE is added successfully, the next FE is added.
- * Connect to MASTER FE and execute `ALTER SYSTEM ADD FOLLOWER/OBSERVER` stmt.
-
-2. Common problems
-
- 1. this need is DETACHED
-
- When you first start a FE to be added, if the data in palo-meta/bdb on Master FE is large, you may see the words `this node is DETACHED`. in the FE log to be added. At this point, bdbje is copying data, and you can see that the `bdb/` directory of FE to be added is growing. This process usually takes several minutes (depending on the amount of data in bdbje). Later, there may be some bdbje-related error stack information in fe. log. If `QE service start` and `thrift server start` are displayed in the final log, the start is usually successful. You can try to connect this FE via mysql-client. If these words do not appear, it may be the problem of bdbje replication log timeout. At this point, restarting the FE directly will usually solve the problem.
-
- 2. Failure to add due to various reasons
-
- * If OBSERVER is added, because OBSERVER-type FE does not participate in the majority of metadata writing, it can theoretically start and stop at will. Therefore, for the case of adding OBSERVER failure. The process of OBSERVER FE can be killed directly. After clearing the metadata directory of OBSERVER, add the process again.
-
- * If FOLLOWER is added, because FOLLOWER is mostly written by participating metadata. So it is possible that FOLLOWER has joined the bdbje electoral team. If there are only two FOLLOWER nodes (including MASTER), then stopping one FE may cause another FE to quit because it cannot write most of the time. At this point, we should first delete the newly added FOLLOWER node from the metadata through the `ALTER SYSTEM DROP FOLLOWER` command, then kill the FOLLOWER process, empty the metadata and re-add the process.
-
-
-### Delete FE
-
-The corresponding type of FE can be deleted by the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command. The following points should be noted:
-
-* For OBSERVER type FE, direct DROP is enough, without risk.
-
-* For FOLLOWER type FE. First, you should make sure that you start deleting an odd number of FOLLOWERs (three or more).
-
- 1. If the FE of non-MASTER role is deleted, it is recommended to connect to MASTER FE, execute DROP command, and then kill the process.
- 2. If you want to delete MASTER FE, first confirm that there are odd FOLLOWER FE and it works properly. Then kill the MASTER FE process first. At this point, a FE will be elected MASTER. After confirming that the remaining FE is working properly, connect to the new MASTER FE and execute the DROP command to delete the old MASTER FE.
-
-## Advanced Operations
-
-### Failure recovery
-
-FE may fail to start bdbje and synchronize between FEs for some reasons. Phenomena include the inability to write metadata, the absence of MASTER, and so on. At this point, we need to manually restore the FE. The general principle of manual recovery of FE is to start a new MASTER through metadata in the current `meta_dir`, and then add other FEs one by one. Please follow the following steps strictly:
-
-1. First, stop all FE processes and all business access. Make sure that during metadata recovery, external access will not lead to other unexpected problems.
-
-2. Identify which FE node's metadata is up-to-date:
-
- * First of all, **be sure to back up all FE's `meta_dir` directories first.**
- * Usually, Master FE's metadata is up to date. You can see the suffix of image.xxxx file in the `meta_dir/image` directory. The larger the number, the newer the metadata.
- * Usually, by comparing all FOLLOWER FE image files, you can find the latest metadata.
- * After that, we use the FE node with the latest metadata to recover.
- * If using metadata of OBSERVER node to recover will be more troublesome, it is recommended to choose FOLLOWER node as far as possible.
-
-3. The following operations are performed on the FE nodes selected in step 2.
-
- 1. If the node is an OBSERVER, first change the `role=OBSERVER` in the `meta_dir/image/ROLE` file to `role=FOLLOWER`. (Recovery from the OBSERVER node will be more cumbersome, first follow the steps here, followed by a separate description)
- 2. Add configuration in fe.conf: `metadata_failure_recovery=true`.
- 3. Run `sh bin/start_fe.sh` to start the FE
- 4. If normal, the FE will start in the role of MASTER, similar to the description in the previous section `Start a single node FE`. You should see the words `transfer from XXXX to MASTER` in fe.log.
- 5. After the start-up is completed, connect to the FE first, and execute some query imports to check whether normal access is possible. If the operation is not normal, it may be wrong. It is recommended to read the above steps carefully and try again with the metadata previously backed up. If not, the problem may be more serious.
- 6. If successful, through the `show frontends;` command, you should see all the FEs you added before, and the current FE is master.
- 7. Delete the `metadata_failure_recovery=true` configuration item in fe.conf, or set it to `false`, and restart the FE (**Important**).
-
-
- > If you are recovering metadata from an OBSERVER node, after completing the above steps, you will find that the current FE role is OBSERVER, but `IsMaster` appears as `true`. This is because the "OBSERVER" seen here is recorded in Doris's metadata, but whether it is master or not, is recorded in bdbje's metadata. Because we recovered from an OBSERVER node, there was inconsistency. Please take the following steps to fix this problem (we will fix it in a later version):
-
- > 1. First, all FE nodes except this "OBSERVER" are DROPed out.
- > 2. A new FOLLOWER FE is added through the `ADD FOLLOWER` command, assuming that it is on hostA.
- > 3. Start a new FE on hostA and join the cluster by `helper`.
- > 4. After successful startup, you should see two FEs through the `show frontends;` statement, one is the previous OBSERVER, the other is the newly added FOLLOWER, and the OBSERVER is the master.
- > 5. After confirming that the new FOLLOWER is working properly, the new FOLLOWER metadata is used to perform a failure recovery operation again.
- > 6. The purpose of the above steps is to manufacture a metadata of FOLLOWER node artificially, and then use this metadata to restart fault recovery. This avoids inconsistencies in recovering metadata from OBSERVER.
-
- >The meaning of `metadata_failure_recovery = true` is to empty the metadata of `bdbje`. In this way, bdbje will not contact other FEs before, but start as a separate FE. This parameter needs to be set to true only when restoring startup. After recovery, it must be set to false. Otherwise, once restarted, the metadata of bdbje will be emptied again, which will make other FEs unable to work properly.
-
-4. After the successful execution of step 3, we delete the previous FEs from the metadata by using the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command and add them again by adding new FEs.
-
-5. If the above operation is normal, it will be restored.
-
-### FE type change
-
-If you need to change the existing FOLLOWER/OBSERVER type FE to OBSERVER/FOLLOWER type, please delete FE in the way described above, and then add the corresponding type FE.
-
-### FE Migration
-
-If you need to migrate one FE from the current node to another, there are several scenarios.
-
-1. FOLLOWER, or OBSERVER migration for non-MASTER nodes
-
- After adding a new FOLLOWER / OBSERVER directly, delete the old FOLLOWER / OBSERVER.
-
-2. Single-node MASTER migration
-
- When there is only one FE, refer to the `Failure Recovery` section. Copy the palo-meta directory of FE to the new node and start the new MASTER in Step 3 of the `Failure Recovery` section
-
-3. A set of FOLLOWER migrates from one set of nodes to another set of new nodes
-
- Deploy FE on the new node and add the new node first by adding FOLLOWER. The old nodes can be dropped by DROP one by one. In the process of DROP-by-DROP, MASTER automatically selects the new FOLLOWER node.
-
-### Replacement of FE port
-
-FE currently has the following ports
-
-* Ed_log_port: bdbje's communication port
-* http_port: http port, also used to push image
-* rpc_port: thrift server port of Frontend
-* query_port: Mysql connection port
-
-1. edit_log_port
-
- If this port needs to be replaced, it needs to be restored with reference to the operations in the `Failure Recovery` section. Because the port has been persisted into bdbje's own metadata (also recorded in Doris's own metadata), it is necessary to clear bdbje's metadata by setting `metadata_failure_recovery=true`.
-
-2. http_port
-
- All FE http_ports must be consistent. So if you want to modify this port, all FEs need to be modified and restarted. Modifying this port will be more complex in the case of multiple FOLLOWER deployments (involving laying eggs and laying hens...), so this operation is not recommended. If necessary, follow the operation in the `Failure Recovery` section directly.
-
-3. rpc_port
-
- After modifying the configuration, restart FE directly. Master FE informs BE of the new port through heartbeat. Only this port of Master FE will be used. However, it is still recommended that all FE ports be consistent.
-
-4. query_port
-
- After modifying the configuration, restart FE directly. This only affects mysql's connection target.
-
-### Recover metadata from FE memory
-In some extreme cases, the image file on the disk may be damaged, but the metadata in the memory is intact. At this point, we can dump the metadata from the memory and replace the image file on the disk to recover the metadata. the entire non-stop query service operation steps are as follows:
-
-1. Stop all Load, Create, Alter operations.
-
-2. Execute the following command to dump metadata from the Master FE memory: (hereafter called image_mem)
-```
-curl -u $root_user:$password http://$master_hostname:8030/dump
-```
-3. Execute the following command to verify the integrity and correctness of the generated image_mem file:
-```
-sh start_fe.sh --image path_to_image_mem
-```
-
-> note: `path_to_image_mem` is the path to the image_mem file.
->
-> If the file is valid, the output will be `Load image success. Image file /absolute/path/to/image.xxxxxx valid`.
->
-> If the file is invalid, the output will be `Load image failed. Image file /absolute/path/to/image.xxxxxx is invalid`.
-
-4. Replace the image file in the `meta_dir/image` directory on the OBSERVER/FOLLOWER FE node with the image_mem file in turn, restart the FOLLOWER FE node, and confirm that the metadata and query services are normal.
-
-5. Replace the image file in the `meta_dir/image` directory on the Master FE node with the image_mem file, restart the Master FE node, and then confirm that the FE Master switch is normal and The Master FE node can generate a new image file through checkpoint.
-
-6. Recover all Load, Create, Alter operations.
-
-**Note: If the Image file is large, the entire process can take a long time, so during this time, make sure Master FE does not generate a new image file via checkpoint. When the image.ckpt file in the meta_dir/image directory on the Master FE node is observed to be as large as the image.xxx file, the image.ckpt file can be deleted directly.**
-
-### View data in BDBJE
-
-The metadata log of FE is stored in BDBJE in the form of Key-Value. In some abnormal situations, FE may not be started due to metadata errors. In this case, Doris provides a way to help users query the data stored in BDBJE to facilitate troubleshooting.
-
-First, you need to add configuration in fe.conf: `enable_bdbje_debug_mode=true`, and then start FE through `sh start_fe.sh --daemon`.
-
-At this time, FE will enter the debug mode, only start the http server and MySQL server, and open the BDBJE instance, but will not load any metadata and other subsequent startup processes.
-
-This is, we can view the data stored in BDBJE by visiting the web page of FE, or after connecting to Doris through the MySQL client, through `show proc /bdbje;`.
-
-```
-mysql> show proc "/bdbje";
-+----------+---------------+---------+
-| DbNames | JournalNumber | Comment |
-+----------+---------------+---------+
-| 110589 | 4273 | |
-| epochDB | 4 | |
-| metricDB | 430694 | |
-+----------+---------------+---------+
-```
-
-The first level directory will display all the database names in BDBJE and the number of entries in each database.
-
-```
-mysql> show proc "/bdbje/110589";
-+-----------+
-| JournalId |
-+-----------+
-| 1 |
-| 2 |
-
-...
-| 114858 |
-| 114859 |
-| 114860 |
-| 114861 |
-+-----------+
-4273 rows in set (0.06 sec)
-```
-
-Entering the second level, all the entry keys under the specified database will be listed.
-
-```
-mysql> show proc "/bdbje/110589/114861";
-+-----------+--------------+---------------------------------------------+
-| JournalId | OpType | Data |
-+-----------+--------------+---------------------------------------------+
-| 114861 | OP_HEARTBEAT | org.apache.doris.persist.HbPackage@6583d5fb |
-+-----------+--------------+---------------------------------------------+
-1 row in set (0.05 sec)
-```
-
-The third level can display the value information of the specified key.
-
-## Best Practices
-
-The deployment recommendation of FE is described in the Installation and [Deployment Document](../../installing/install-deploy_EN.md). Here are some supplements.
-
-* **If you don't know the operation logic of FE metadata very well, or you don't have enough experience in the operation and maintenance of FE metadata, we strongly recommend that only one FOLLOWER-type FE be deployed as MASTER in practice, and the other FEs are OBSERVER, which can reduce many complex operation and maintenance problems.** Don't worry too much about the failure of MASTER single point to write metadata. First, if you configure it properly, FE as a java process is very difficult to hang up. Secondly, if the MASTER disk is damaged (the probability is very low), we can also use the metadata on OBSERVER to recover manually through `fault recovery`.
-
-* The JVM of the FE process must ensure sufficient memory. We **strongly recommend** that FE's JVM memory should be at least 10GB and 32GB to 64GB. And deploy monitoring to monitor JVM memory usage. Because if OOM occurs in FE, metadata writing may fail, resulting in some failures that **cannot recover**!
-
-* FE nodes should have enough disk space to prevent the excessive metadata from causing insufficient disk space. At the same time, FE logs also take up more than a dozen gigabytes of disk space.
-
-## Other common problems
-
-1. Output `meta out of date. current time: xxx, synchronized time: xxx, has log: xxx, fe type: xxx` in fe.log
-
- This is usually because the FE cannot elect Master. For example, if three FOLLOWERs are configured, but only one FOLLOWER is started, this FOLLOWER will cause this problem. Usually, just start the remaining FOLLOWER. If the problem has not been solved after the start-up, manual recovery may be required in accordance with the way in the `Failure Recovery` section.
-
-2. `Clock delta: xxxx ms. between Feeder: xxxx and this Replica exceeds max permissible delta: xxxx ms.`
-
- Bdbje requires that clock errors between nodes should not exceed a certain threshold. If exceeded, the node will exit abnormally. The default threshold is 5000ms, which is controlled by FE parameter `max_bdbje_clock_delta_ms', and can be modified as appropriate. But we suggest using NTP and other clock synchronization methods to ensure the clock synchronization of Doris cluster hosts.
-
-
-3. Mirror files in the `image/` directory have not been updated for a long time
-
- Master FE generates a mirror file by default for every 50,000 metadata journal. In a frequently used cluster, a new image file is usually generated every half to several days. If you find that the image file has not been updated for a long time (for example, more than a week), you can see the reasons in sequence as follows:
-
- 1. Search for `memory is not enough to do checkpoint. Committed memroy XXXX Bytes, used memory XXXX Bytes. ` in the fe.log of Master FE. If found, it indicates that the current FE's JVM memory is insufficient for image generation (usually we need to reserve half of the FE memory for image generation). Then you need to add JVM memory and restart FE before you can observe. Each time Master FE restarts, a new image is generated directly. This restart method can also be used to actively generate new images. Note that if there are multiple FOLLOWER deployments, then when you restart the current Master FE, another FOLLOWER FE will become MASTER, and subsequent image generation will be the responsibility of the new Master. Therefore, you may need to modify the JVM memory configuration of all FOLLOWER FE.
-
- 2. Search for `begin to generate new image: image.xxxx` in the fe.log of Master FE. If it is found, then the image is generated. Check the subsequent log of this thread, and if `checkpoint finished save image.xxxx` appears, the image is written successfully. If `Exception when generating new image file` occurs, the generation fails and specific error messages need to be viewed.
-
-
-4. The size of the `bdb/` directory is very large, reaching several Gs or more.
-
- The BDB directory will remain large for some time after eliminating the error that the new image cannot be generated. Maybe it's because Master FE failed to push image. You can search `push image.XXXX to other nodes. totally XX nodes, push successed YY nodes` in the fe. log of Master FE. If YY is smaller than xx, then some FEs are not pushed successfully. You can see the specific error `Exception when pushing image file.url = xxx` in the fe. log.
-
- At the same time, you can add the configuration in the FE configuration file: `edit_log_roll_num = xxxx`. This parameter sets the number of metadata journals and makes an image once. The default is 50000. This number can be reduced appropriately to make images more frequent, thus speeding up the deletion of old journals.
-
-5. FOLLOWER FE hangs up one after another
-
- Because Doris's metadata adopts the majority writing strategy, that is, a metadata journal must be written to at least a number of FOLLOWER FEs (for example, three FOLLOWERs, two must be written successfully) before it can be considered successful. If the write fails, the FE process exits on its own initiative. So suppose there are three FOLLOWERs: A, B and C. C hangs up first, and then B hangs up, then A will hang up. So as described in the `Best Practices `section, if you don't have extensive experience in metadata operations and maintenance, it's not recommended to deploy multiple FOLLOWERs.
-
-6. fe.log 中出现 `get exception when try to close previously opened bdb database. ignore it`
-
- If there is the word `ignore it` behind it, there is usually no need to deal with it. If you are interested, you can search for this error in `BDBEnvironment.java`, and see the annotations.
-
-7. From `show frontends;` Look, the `Join` of a FE is listed as `true`, but actually the FE is abnormal.
-
- Through `show frontends;` see the `Join` information. If the column is `true`, it only means that the FE **has joined the** cluster. It does not mean that it still exists normally in the cluster. If `false`, it means that the FE **has never joined the** cluster.
-
-8. Configuration of FE `master_sync_policy`, `replica_sync_policy`, and `txn_rollback_limit.`
-
- `master_sync_policy` is used to specify whether fsync (), `replica_sync_policy` is called when Leader FE writes metadata log, and `replica_sync_policy` is used to specify whether other Follower FE calls fsync () when FE HA deploys synchronous metadata. In earlier versions of Oris, these two parameters defaulted to `WRITE_NO_SYNC`, i.e., fsync () was not called. In the latest version of Oris, the default has been changed to `SYNC`, that is, fsync () is called. Calling fsync () significantly reduces the efficiency of metadata disk writing. In some environments, IOPS may drop to several hundred and the latency increases to 2-3ms (but it's still enough for Doris metadata manipulation). Therefore, we recommend the following configuration:
-
- 1. For a single Follower FE deployment, `master_sync_policy` is set to `SYNC`, which prevents the loss of metadata due to the downtime of the FE system.
- 2. For multi-Follower FE deployment, we can set `master_sync_policy` and `replica_sync_policy` to `WRITE_NO_SYNC`, because we think that the probability of simultaneous outage of multiple systems is very low.
-
- If `master_sync_policy` is set to `WRITE_NO_SYNC` in a single Follower FE deployment, then a FE system outage may occur, resulting in loss of metadata. At this point, if other Observer FE attempts to restart, it may report an error:
-
- ```
- Node xxx must rollback xx total commits(numPassedDurableCommits of which were durable) to the earliest point indicated by transaction xxxx in order to rejoin the replication group, but the transaction rollback limit of xxx prohibits this.
- ```
-
-This means that some transactions that have been persisted need to be rolled back, but the number of entries exceeds the upper limit. Here our default upper limit is 100, which can be changed by setting `txn_rollback_limit`. This operation is only used to attempt to start FE normally, but lost metadata cannot be recovered.
diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md
deleted file mode 100644
index 94badd114b..0000000000
--- a/docs/en/administrator-guide/operation/tablet-meta-tool.md
+++ /dev/null
@@ -1,145 +0,0 @@
----
-{
- "title": "Tablet metadata management tool",
- "language": "en"
-}
----
-
-
-
-# Tablet metadata management tool
-
-## Background
-
-In the latest version of the code, we introduced RocksDB in BE to store meta-information of tablet, in order to solve various functional and performance problems caused by storing meta-information through header file. Currently, each data directory (root path) has a corresponding RocksDB instance, in which all tablets on the corresponding root path are stored in the key-value manner.
-
-To facilitate the maintenance of these metadata, we provide an online HTTP interface and an offline meta tool to complete related management operations.
-
-The HTTP interface is only used to view tablet metadata online, and can be used when the BE process is running.
-
-However, meta tool is only used for off-line metadata management operations. BE must be stopped before it can be used.
-
-The meta tool tool is stored in the Lib / directory of BE.
-
-## Operation
-
-### View Tablet Meta
-
-Viewing Tablet Meta information can be divided into online and offline methods
-
-#### Online
-
-Access BE's HTTP interface to obtain the corresponding Tablet Meta information:
-
-api:
-
-`http://{host}:{port}/api/meta/header/{tablet_id}`
-
-
-> Host: be Hostname
->
-> port: BE's HTTP port
->
-> tablet id: tablet id
-
-Give an example:
-
-`http://be_host:8040/api/meta/header/14156`
-
-If the final query is successful, the Tablet Meta will be returned as json.
-
-#### Offline
-
-Get Tablet Meta on a disk based on the meta\ tool tool.
-
-Command:
-
-```
-./lib/meta_tool --root_path=/path/to/root_path --operation=get_meta --tablet_id=xxx --schema_hash=xxx
-```
-
-> root_path: The corresponding root_path path path configured in be.conf.
-
-The result is also a presentation of Tablet Meta in JSON format.
-
-### Load header
-
-The function of loading header is provided to realize manual migration of tablet. This function is based on Tablet Meta in JSON format, so if changes in the shard field and version information are involved, they can be changed directly in the JSON content of Tablet Meta. Then use the following commands to load.
-
-Command:
-
-```
-./lib/meta_tool --operation=load_meta --root_path=/path/to/root_path --json_header_path=path
-```
-
-### Delete header
-
-In order to realize the function of deleting a tablet meta from a disk of a BE. Support single delete and batch delete.
-
-Single delete:
-
-```
-./lib/meta_tool --operation=delete_meta --root_path=/path/to/root_path --tablet_id=xxx --schema_hash=xxx`
-```
-
-Batch delete:
-
-```
-./lib/meta_tool --operation=batch_delete_meta --tablet_file=/path/to/tablet_file.txt
-```
-
-Each line in `tablet_file.txt` represents the information of a tablet. The format is:
-
-`root_path,tablet_id,schema_hash`
-
-Each column are separated by comma.
-
-`tablet_file` example:
-
-```
-/output/be/data/,14217,352781111
-/output/be/data/,14219,352781111
-/output/be/data/,14223,352781111
-/output/be/data/,14227,352781111
-/output/be/data/,14233,352781111
-/output/be/data/,14239,352781111
-```
-
-Batch delete will skip the line with incorrect tablet information format in `tablet_file`. And after the execution is completed, the number of successful deletions and the number of errors are displayed.
-
-### TabletMeta in Pb format
-
-This command is to view the old file-based management PB format Tablet Meta, and to display Tablet Meta in JSON format.
-
-Command:
-
-```
-./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path
-```
-
-### Segment meta in Pb format
-
-This command is to view the PB format segment meta, and to display segment meta in JSON format.
-
-Command:
-
-```
-./meta_tool --operation=show_segment_footer --file=/path/to/segment/file
-```
diff --git a/docs/en/administrator-guide/operation/tablet-repair-and-balance.md b/docs/en/administrator-guide/operation/tablet-repair-and-balance.md
deleted file mode 100644
index f4f3e21965..0000000000
--- a/docs/en/administrator-guide/operation/tablet-repair-and-balance.md
+++ /dev/null
@@ -1,774 +0,0 @@
----
-{
- "title": "Data replica management",
- "language": "en"
-}
----
-
-
-
-# Data replica management
-
-Beginning with version 0.9.0, Doris introduced an optimized replica management strategy and supported a richer replica status viewing tool. This document focuses on Doris data replica balancing, repair scheduling strategies, and replica management operations and maintenance methods. Help users to more easily master and manage the replica status in the cluster.
-
-> Repairing and balancing copies of tables with Collocation attributes can be referred to `docs/documentation/cn/administrator-guide/colocation-join.md'.`
-
-## Noun Interpretation
-
-1. Tablet: The logical fragmentation of a Doris table, where a table has multiple fragmentations.
-2. Replica: A sliced copy, defaulting to three copies of a slice.
-3. Healthy Replica: A healthy copy that survives at Backend and has a complete version.
-4. Tablet Checker (TC): A resident background thread that scans all Tablets regularly, checks the status of these Tablets, and decides whether to send them to Tablet Scheduler based on the results.
-5. Tablet Scheduler (TS): A resident background thread that handles Tablets sent by Tablet Checker that need to be repaired. At the same time, cluster replica balancing will be carried out.
-6. Tablet SchedCtx (TSC): is a tablet encapsulation. When TC chooses a tablet, it encapsulates it as a TSC and sends it to TS.
-7. Storage Medium: Storage medium. Doris supports specifying different storage media for partition granularity, including SSD and HDD. The replica scheduling strategy is also scheduled for different storage media.
-
-```
-
- +--------+ +-----------+
- | Meta | | Backends |
- +---^----+ +------^----+
- | | | 3. Send clone tasks
- 1. Check tablets | | |
- +--------v------+ +-----------------+
- | TabletChecker +--------> TabletScheduler |
- +---------------+ +-----------------+
- 2. Waiting to be scheduled
-
-
-```
-The figure above is a simplified workflow.
-
-
-## Duplicate status
-
-Multiple copies of a Tablet may cause state inconsistencies due to certain circumstances. Doris will attempt to automatically fix the inconsistent copies of these states so that the cluster can recover from the wrong state as soon as possible.
-
-**The health status of a Replica is as follows:**
-
-1. BAD
-
- That is, the copy is damaged. Includes, but is not limited to, the irrecoverable damaged status of copies caused by disk failures, BUGs, etc.
-
-2. VERSION\_MISSING
-
- Version missing. Each batch of imports in Doris corresponds to a data version. A copy of the data consists of several consecutive versions. However, due to import errors, delays and other reasons, the data version of some copies may be incomplete.
-
-3. HEALTHY
-
- Health copy. That is, a copy of the normal data, and the BE node where the copy is located is in a normal state (heartbeat is normal and not in the offline process).
-
-**The health status of a Tablet is determined by the status of all its copies. There are the following categories:**
-
-1. REPLICA\_MISSING
-
- The copy is missing. That is, the number of surviving copies is less than the expected number of copies.
-
-2. VERSION\_INCOMPLETE
-
- The number of surviving copies is greater than or equal to the number of expected copies, but the number of healthy copies is less than the number of expected copies.
-
-3. REPLICA\_RELOCATING
-
- Have a full number of live copies of the replication num version, but the BE nodes where some copies are located are in unavailable state (such as decommission)
-
-4. REPLICA\_MISSING\_IN\_CLUSTER
-
- When using multi-cluster, the number of healthy replicas is greater than or equal to the expected number of replicas, but the number of replicas in the corresponding cluster is less than the expected number of replicas.
-
-5. REDUNDANT
-
- Duplicate redundancy. Healthy replicas are in the corresponding cluster, but the number of replicas is larger than the expected number. Or there's a spare copy of unavailable.
-
-6. FORCE\_REDUNDANT
-
- This is a special state. It only occurs when the number of expected replicas is greater than or equal to the number of available nodes, and when the Tablet is in the state of replica missing. In this case, you need to delete a copy first to ensure that there are available nodes for creating a new copy.
-
-7. COLOCATE\_MISMATCH
-
- Fragmentation status of tables for Collocation attributes. Represents that the distribution of fragmented copies is inconsistent with the specified distribution of Colocation Group.
-
-8. COLOCATE\_REDUNDANT
-
- Fragmentation status of tables for Collocation attributes. Represents the fragmented copy redundancy of the Colocation table.
-
-8. HEALTHY
-
- Healthy fragmentation, that is, conditions [1-5] are not satisfied.
-
-## Replica Repair
-
-As a resident background process, Tablet Checker regularly checks the status of all fragments. For unhealthy fragmentation, it will be sent to Tablet Scheduler for scheduling and repair. The actual operation of repair is accomplished by clone task on BE. FE is only responsible for generating these clone tasks.
-
-> Note 1: The main idea of replica repair is to make the number of fragmented replicas reach the desired value by creating or completing them first. Then delete the redundant copy.
->
-> Note 2: A clone task is to complete the process of copying specified data from a specified remote end to a specified destination.
-
-For different states, we adopt different repair methods:
-
-1. REPLICA\_MISSING/REPLICA\_RELOCATING
-
- Select a low-load, available BE node as the destination. Choose a healthy copy as the source. Clone tasks copy a complete copy from the source to the destination. For replica completion, we will directly select an available BE node, regardless of the storage medium.
-
-2. VERSION\_INCOMPLETE
-
- Select a relatively complete copy as the destination. Choose a healthy copy as the source. The clone task attempts to copy the missing version from the source to the destination.
-
-3. REPLICA\_MISSING\_IN\_CLUSTER
-
- This state processing method is the same as REPLICAMISSING.
-
-4. REDUNDANT
-
- Usually, after repair, there will be redundant copies in fragmentation. We select a redundant copy to delete it. The selection of redundant copies follows the following priorities:
- 1. The BE where the copy is located has been offline.
- 2. The copy is damaged
- 3. The copy is lost in BE or offline
- 4. The replica is in the CLONE state (which is an intermediate state during clone task execution)
- 5. The copy has version missing
- 6. The cluster where the copy is located is incorrect
- 7. The BE node where the replica is located has a high load
-
-5. FORCE\_REDUNDANT
-
- Unlike REDUNDANT, because at this point Tablet has a copy missing, because there are no additional available nodes for creating new copies. So at this point, a copy must be deleted to free up a available node for creating a new copy.
- The order of deleting copies is the same as REDUNDANT.
-
-6. COLOCATE\_MISMATCH
-
- Select one of the replica distribution BE nodes specified in Colocation Group as the destination node for replica completion.
-
-7. COLOCATE\_REDUNDANT
-
- Delete a copy on a BE node that is distributed by a copy specified in a non-Colocation Group.
-
- Doris does not deploy a copy of the same Tablet on a different BE of the same host when selecting a replica node. It ensures that even if all BEs on the same host are deactivated, all copies will not be lost.
-
-### Scheduling priority
-
-Waiting for the scheduled fragments in Tablet Scheduler gives different priorities depending on the status. High priority fragments will be scheduled first. There are currently several priorities.
-
-1. VERY\_HIGH
-
- * REDUNDANT. For slices with duplicate redundancy, we give priority to them. Logically, duplicate redundancy is the least urgent, but because it is the fastest to handle and can quickly release resources (such as disk space, etc.), we give priority to it.
- * FORCE\_REDUNDANT. Ditto.
-
-2. HIGH
-
- * REPLICA\_MISSING and most copies are missing (for example, 2 copies are missing in 3 copies)
- * VERSION\_INCOMPLETE and most copies are missing
- * COLOCATE\_MISMATCH We hope that the fragmentation related to the Collocation table can be repaired as soon as possible.
- * COLOCATE\_REDUNDANT
-
-3. NORMAL
-
- * REPLICA\_MISSING, but most survive (for example, three copies lost one)
- * VERSION\_INCOMPLETE, but most copies are complete
- * REPLICA\_RELOCATING and relocate is required for most replicas (e.g. 3 replicas with 2 replicas)
-
-4. LOW
-
- * REPLICA\_MISSING\_IN\_CLUSTER
- * REPLICA\_RELOCATING most copies stable
-
-### Manual priority
-
-The system will automatically determine the scheduling priority. Sometimes, however, users want the fragmentation of some tables or partitions to be repaired faster. So we provide a command that the user can specify that a slice of a table or partition is repaired first:
-
-`ADMIN REPAIR TABLE tbl [PARTITION (p1, p2, ...)];`
-
-This command tells TC to give VERY HIGH priority to the problematic tables or partitions that need to be repaired first when scanning Tablets.
-
-> Note: This command is only a hint, which does not guarantee that the repair will be successful, and the priority will change with the scheduling of TS. And when Master FE switches or restarts, this information will be lost.
-
-Priority can be cancelled by the following commands:
-
-`ADMIN CANCEL REPAIR TABLE tbl [PARTITION (p1, p2, ...)];`
-
-### Priority scheduling
-
-Priority ensures that severely damaged fragments can be repaired first, and improves system availability. But if the high priority repair task fails all the time, the low priority task will never be scheduled. Therefore, we will dynamically adjust the priority of tasks according to the running status of tasks, so as to ensure that all tasks have the opportunity to be scheduled.
-
-* If the scheduling fails for five consecutive times (e.g., no resources can be obtained, no suitable source or destination can be found, etc.), the priority will be lowered.
-* If not scheduled for 30 minutes, priority will be raised.
-* The priority of the same tablet task is adjusted at least five minutes apart.
-
-At the same time, in order to ensure the weight of the initial priority, we stipulate that the initial priority is VERY HIGH, and the lowest is lowered to NORMAL. When the initial priority is LOW, it is raised to HIGH at most. The priority adjustment here also adjusts the priority set manually by the user.
-
-## Duplicate Equilibrium
-
-Doris automatically balances replicas within the cluster. Currently supports two rebalance strategies, BeLoad and Partition. BeLoad rebalance will consider about the disk usage and replica count for each BE. Partition rebalance just aim at replica count for each partition, this helps to avoid hot spots. If you want high read/write performance, you may need this. Note that Partition rebalance do not consider about the disk usage, pay more attention to it when you are using Partition rebalance. The strategy selection config is not mutable at runtime.
-
-### BeLoad
-
-The main idea of balancing is to create a replica of some fragments on low-load nodes, and then delete the replicas of these fragments on high-load nodes. At the same time, because of the existence of different storage media, there may or may not exist one or two storage media on different BE nodes in the same cluster. We require that fragments of storage medium A be stored in storage medium A as far as possible after equalization. So we divide the BE nodes of the cluster according to the storage medium. Then load balancing scheduling is carried out for different BE node sets of storage media.
-
-Similarly, replica balancing ensures that a copy of the same table will not be deployed on the BE of the same host.
-
-### BE Node Load
-
-We use Cluster LoadStatistics (CLS) to represent the load balancing of each backend in a cluster. Tablet Scheduler triggers cluster equilibrium based on this statistic. We currently calculate a load Score for each BE as the BE load score by using **disk usage** and **number of copies**. The higher the score, the heavier the load on the BE.
-
-Disk usage and number of copies have a weight factor, which is **capacityCoefficient** and **replicaNumCoefficient**, respectively. The sum of them is **constant to 1**. Among them, capacityCoefficient will dynamically adjust according to actual disk utilization. When the overall disk utilization of a BE is below 50%, the capacityCoefficient value is 0.5, and if the disk utilization is above 75% (configurable through the FE configuration item `capacity_used_percent_high_water`), the value is 1. If the utilization rate is between 50% and 75%, the weight coefficient increases smoothly. The formula is as follows:
-
-`capacityCoefficient = 2 * Disk Utilization - 0.5`
-
-The weight coefficient ensures that when disk utilization is too high, the backend load score will be higher to ensure that the BE load is reduced as soon as possible.
-
-Tablet Scheduler updates CLS every 20 seconds.
-
-### Partition
-
-The main idea of `partition rebalancing` is to decrease the skew of partitions. The skew of the partition is defined as the difference between the maximum replica count of the partition over all bes and the minimum replica count over all bes.
-
-So we only consider about the replica count, do not consider replica size(disk usage).
-To fewer moves, we use TwoDimensionalGreedyAlgo which two dims are cluster & partition. It prefers a move that reduce the skew of the cluster when we want to rebalance a max skew partition.
-
-#### Skew Info
-
-The skew info is represented by `ClusterBalanceInfo`. `partitionInfoBySkew` is a multimap which key is the partition's skew, so we can get max skew partitions simply. `beByTotalReplicaCount` is a multimap which key is the total replica count of the backend.
-
-`ClusterBalanceInfo` is in CLS, updated every 20 seconds.
-
-When get more than one max skew partitions, we random select one partition to calculate the move.
-
-### Equilibrium strategy
-
-Tablet Scheduler uses Load Balancer to select a certain number of healthy fragments as candidate fragments for balance in each round of scheduling. In the next scheduling, balanced scheduling will be attempted based on these candidate fragments.
-
-## Resource control
-
-Both replica repair and balancing are accomplished by replica copies between BEs. If the same BE performs too many tasks at the same time, it will bring a lot of IO pressure. Therefore, Doris controls the number of tasks that can be performed on each node during scheduling. The smallest resource control unit is the disk (that is, a data path specified in be.conf). By default, we configure two slots per disk for replica repair. A clone task occupies one slot at the source and one slot at the destination. If the number of slots is zero, no more tasks will be assigned to this disk. The number of slots can be configured by FE's `schedule_slot_num_per_path` parameter.
-
-In addition, by default, we provide two separate slots per disk for balancing tasks. The purpose is to prevent high-load nodes from losing space by balancing because slots are occupied by repair tasks.
-
-## Duplicate Status View
-
-Duplicate status view mainly looks at the status of the duplicate, as well as the status of the duplicate repair and balancing tasks. Most of these states **exist only in** Master FE nodes. Therefore, the following commands need to be executed directly to Master FE.
-
-### Duplicate status
-
-1. Global state checking
-
- Through `SHOW PROC'/ statistic'; `commands can view the replica status of the entire cluster.
-
- ```
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- | DbId | DbName | TableNum | PartitionNum | IndexNum | TabletNum | ReplicaNum | UnhealthyTabletNum | InconsistentTabletNum |
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- | 35153636 | default_cluster:DF_Newrisk | 3 | 3 | 3 | 96 | 288 | 0 | 0 |
- | 48297972 | default_cluster:PaperData | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
- | 5909381 | default_cluster:UM_TEST | 7 | 7 | 10 | 320 | 960 | 1 | 0 |
- | Total | 240 | 10 | 10 | 13 | 416 | 1248 | 1 | 0 |
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- ```
-
- The `UnhealthyTabletNum` column shows how many Tablets are in an unhealthy state in the corresponding database. `The Inconsistent Tablet Num` column shows how many Tablets are in an inconsistent replica state in the corresponding database. The last `Total` line counts the entire cluster. Normally `Unhealth Tablet Num` and `Inconsistent Tablet Num` should be 0. If it's not zero, you can further see which Tablets are there. As shown in the figure above, one table in the UM_TEST database is not healthy, you can use the following command to see which one is.
-
- `SHOW PROC '/statistic/5909381';`
-
- Among them `5909381'is the corresponding DbId.
-
- ```
- +------------------+---------------------+
- | UnhealthyTablets | InconsistentTablets |
- +------------------+---------------------+
- | [40467980] | [] |
- +------------------+---------------------+
- ```
-
- The figure above shows the specific unhealthy Tablet ID (40467980). Later we'll show you how to view the status of each copy of a specific Tablet.
-
-2. Table (partition) level status checking
-
- Users can view the status of a copy of a specified table or partition through the following commands and filter the status through a WHERE statement. If you look at table tbl1, the state on partitions P1 and P2 is a copy of OK:
-
- `ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) WHERE STATUS = "OK";`
-
- ```
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- | TabletId | ReplicaId | BackendId | Version | LastFailedVersion | LastSuccessVersion | CommittedVersion | SchemaHash | VersionNum | IsBad | State | Status |
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- | 29502429 | 29502432 | 10006 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502429 | 36885996 | 10002 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502429 | 48100551 | 10007 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 29502434 | 10001 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 44900737 | 10004 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 48369135 | 10006 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- ```
-
- The status of all copies is shown here. Where `IsBad` is listed as `true`, the copy is damaged. The `Status` column displays other states. Specific status description, you can see help through `HELP ADMIN SHOW REPLICA STATUS`.
-
- ` The ADMIN SHOW REPLICA STATUS `command is mainly used to view the health status of copies. Users can also view additional information about copies of a specified table by using the following commands:
-
- `SHOW TABLETS FROM tbl1;`
-
- ```
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- | TabletId | ReplicaId | BackendId | SchemaHash | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | DataSize | RowCount | State | LstConsistencyCheckTime | CheckVersion | CheckVersionHash | VersionCount | PathHash | MetaUrl | CompactionStatus |
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- | 29502429 | 29502432 | 10006 | 1421156361 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -5822326203532286804 | url | url |
- | 29502429 | 36885996 | 10002 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -1441285706148429853 | url | url |
- | 29502429 | 48100551 | 10007 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -4784691547051455525 | url | url |
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- ```
-
- The figure above shows some additional information, including copy size, number of rows, number of versions, where the data path is located.
-
- > Note: The contents of the `State` column shown here do not represent the health status of the replica, but the status of the replica under certain tasks, such as CLONE, SCHEMA CHANGE, ROLLUP, etc.
-
- In addition, users can check the distribution of replicas in a specified table or partition by following commands.
-
- `ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;`
-
- ```
- +-----------+------------+-------+---------+
- | BackendId | ReplicaNum | Graph | Percent |
- +-----------+------------+-------+---------+
- | 10000 | 7 | | 7.29 % |
- | 10001 | 9 | | 9.38 % |
- | 10002 | 7 | | 7.29 % |
- | 10003 | 7 | | 7.29 % |
- | 10004 | 9 | | 9.38 % |
- | 10005 | 11 | > | 11.46 % |
- | 10006 | 18 | > | 18.75 % |
- | 10007 | 15 | > | 15.62 % |
- | 10008 | 13 | > | 13.54 % |
- +-----------+------------+-------+---------+
- ```
-
- Here we show the number and percentage of replicas of table tbl1 on each BE node, as well as a simple graphical display.
-
-4. Tablet level status checking
-
- When we want to locate a specific Tablet, we can use the following command to view the status of a specific Tablet. For example, check the tablet with ID 2950253:
-
- `SHOW TABLET 29502553;`
-
- ```
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- | DbName | TableName | PartitionName | IndexName | DbId | TableId | PartitionId | IndexId | IsSync | DetailCmd |
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- | default_cluster:test | test | test | test | 29502391 | 29502428 | 29502427 | 29502428 | true | SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553'; |
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- ```
-
- The figure above shows the database, tables, partitions, roll-up tables and other information corresponding to this tablet. The user can copy the command in the `DetailCmd` command to continue executing:
-
- `Show Proc'/DBS/29502391/29502428/Partitions/29502427/29502428/29502553;`
-
- ```
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+
- | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | SchemaHash | DataSize | RowCount | State | IsBad | VersionCount | PathHash |
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+
- | 43734060 | 10004 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | -8566523878520798656 |
- | 29502555 | 10002 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1885826196444191611 |
- | 39279319 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1656508631294397870 |
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+
- ```
-
- The figure above shows all replicas of the corresponding Tablet. The content shown here is the same as `SHOW TABLET FROM tbl1;`. But here you can clearly see the status of all copies of a specific Tablet.
-
-### Duplicate Scheduling Task
-
-1. View tasks waiting to be scheduled
-
- `SHOW PROC '/cluster_balance/pending_tablets';`
-
- ```
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- | TabletId | Type | Status | State | OrigPrio | DynmPrio | SrcBe | SrcPath | DestBe | DestPath | Timeout | Create | LstSched | LstVisit | Finished | Rate | FailedSched | FailedRunning | LstAdjPrio | VisibleVer | VisibleVerHash | CmtVer | CmtVerHash | ErrMsg |
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- | 4203036 | REPAIR | REPLICA_MISSING | PENDING | HIGH | LOW | -1 | -1 | -1 | -1 | 0 | 2019-02-21 15:00:20 | 2019-02-24 11:18:41 | 2019-02-24 11:18:41 | N/A | N/A | 2 | 0 | 2019-02-21 15:00:43 | 1 | 0 | 2 | 0 | unable to find source replica |
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- ```
-
- The specific meanings of each column are as follows:
-
- * TabletId: The ID of the Tablet waiting to be scheduled. A scheduling task is for only one Tablet
- * Type: Task type, which can be REPAIR (repair) or BALANCE (balance)
- * Status: The current status of the Tablet, such as REPLICAMISSING (copy missing)
- * State: The status of the scheduling task may be PENDING/RUNNING/FINISHED/CANCELLED/TIMEOUT/UNEXPECTED
- * OrigPrio: Initial Priority
- * DynmPrio: Current dynamically adjusted priority
- * SrcBe: ID of the BE node at the source end
- * SrcPath: hash value of the path of the BE node at the source end
- * DestBe: ID of destination BE node
- * DestPath: hash value of the path of the destination BE node
- * Timeout: When the task is scheduled successfully, the timeout time of the task is displayed here in units of seconds.
- * Create: The time when the task was created
- * LstSched: The last time a task was scheduled
- * LstVisit: The last time a task was accessed. Here "accessed" refers to the processing time points associated with the task, including scheduling, task execution reporting, and so on.
- * Finished: Task End Time
- * Rate: Clone Task Data Copy Rate
- * Failed Sched: Number of Task Scheduling Failures
- * Failed Running: Number of task execution failures
- * LstAdjPrio: Time of last priority adjustment
- * CmtVer/CmtVerHash/VisibleVer/VisibleVerHash: version information for clone tasks
- * ErrMsg: Error messages that occur when tasks are scheduled and run
-
-2. View running tasks
-
- `SHOW PROC '/cluster_balance/running_tablets';`
-
- The columns in the result have the same meaning as `pending_tablets`.
-
-3. View completed tasks
-
- `SHOW PROC '/cluster_balance/history_tablets';`
-
- By default, we reserve only the last 1,000 completed tasks. The columns in the result have the same meaning as `pending_tablets`. If `State` is listed as `FINISHED`, the task is normally completed. For others, you can see the specific reason based on the error information in the `ErrMsg` column.
-
-## Viewing Cluster Load and Scheduling Resources
-
-1. Cluster load
-
- You can view the current load of the cluster by following commands:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat';`
-
- First of all, we can see the division of different storage media:
-
- ```
- +---------------+
- | StorageMedium |
- +---------------+
- | HDD |
- | SSD |
- +---------------+
- ```
-
- Click on a storage medium to see the equilibrium state of the BE node that contains the storage medium:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat/HDD';`
-
- ```
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- | BeId | Cluster | Available | UsedCapacity | Capacity | UsedPercent | ReplicaNum | CapCoeff | ReplCoeff | Score | Class |
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- | 10003 | default_cluster | true | 3477875259079 | 19377459077121 | 17.948 | 493477 | 0.5 | 0.5 | 0.9284678149967587 | MID |
- | 10002 | default_cluster | true | 3607326225443 | 19377459077121 | 18.616 | 496928 | 0.5 | 0.5 | 0.948660871419998 | MID |
- | 10005 | default_cluster | true | 3523518578241 | 19377459077121 | 18.184 | 545331 | 0.5 | 0.5 | 0.9843539990641831 | MID |
- | 10001 | default_cluster | true | 3535547090016 | 19377459077121 | 18.246 | 558067 | 0.5 | 0.5 | 0.9981869446537612 | MID |
- | 10006 | default_cluster | true | 3636050364835 | 19377459077121 | 18.764 | 547543 | 0.5 | 0.5 | 1.0011489897614072 | MID |
- | 10004 | default_cluster | true | 3506558163744 | 15501967261697 | 22.620 | 468957 | 0.5 | 0.5 | 1.0228319835582569 | MID |
- | 10007 | default_cluster | true | 4036460478905 | 19377459077121 | 20.831 | 551645 | 0.5 | 0.5 | 1.057279369420761 | MID |
- | 10000 | default_cluster | true | 4369719923760 | 19377459077121 | 22.551 | 547175 | 0.5 | 0.5 | 1.0964036415787461 | MID |
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- ```
-
- Some of these columns have the following meanings:
-
- * Available: True means that BE heartbeat is normal and not offline.
- * UsedCapacity: Bytes, the size of disk space used on BE
- * Capacity: Bytes, the total disk space size on BE
- * UsedPercent: Percentage, disk space utilization on BE
- * ReplicaNum: Number of copies on BE
- * CapCoeff/ReplCoeff: Weight Coefficient of Disk Space and Copy Number
- * Score: Load score. The higher the score, the heavier the load.
- * Class: Classified by load, LOW/MID/HIGH. Balanced scheduling moves copies from high-load nodes to low-load nodes
-
- Users can further view the utilization of each path on a BE, such as the BE with ID 10001:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat/HDD/10001';`
-
- ```
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- | RootPath | DataUsedCapacity | AvailCapacity | TotalCapacity | UsedPct | State | PathHash |
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- | /home/disk4/palo | 498.757 GB | 3.033 TB | 3.525 TB | 13.94 % | ONLINE | 4883406271918338267 |
- | /home/disk3/palo | 704.200 GB | 2.832 TB | 3.525 TB | 19.65 % | ONLINE | -5467083960906519443 |
- | /home/disk1/palo | 512.833 GB | 3.007 TB | 3.525 TB | 14.69 % | ONLINE | -7733211489989964053 |
- | /home/disk2/palo | 881.955 GB | 2.656 TB | 3.525 TB | 24.65 % | ONLINE | 4870995507205544622 |
- | /home/disk5/palo | 694.992 GB | 2.842 TB | 3.525 TB | 19.36 % | ONLINE | 1916696897889786739 |
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- ```
-
- The disk usage of each data path on the specified BE is shown here.
-
-2. Scheduling resources
-
- Users can view the current slot usage of each node through the following commands:
-
- `SHOW PROC '/cluster_balance/working_slots';`
-
- ```
- +----------+----------------------+------------+------------+-------------+----------------------+
- | BeId | PathHash | AvailSlots | TotalSlots | BalanceSlot | AvgRate |
- +----------+----------------------+------------+------------+-------------+----------------------+
- | 10000 | 8110346074333016794 | 2 | 2 | 2 | 2.459007474009069E7 |
- | 10000 | -5617618290584731137 | 2 | 2 | 2 | 2.4730105014001578E7 |
- | 10001 | 4883406271918338267 | 2 | 2 | 2 | 1.6711402709780257E7 |
- | 10001 | -5467083960906519443 | 2 | 2 | 2 | 2.7540126380326536E7 |
- | 10002 | 9137404661108133814 | 2 | 2 | 2 | 2.417217089806745E7 |
- | 10002 | 1885826196444191611 | 2 | 2 | 2 | 1.6327378456676323E7 |
- +----------+----------------------+------------+------------+-------------+----------------------+
- ```
-
- In this paper, data path is used as granularity to show the current use of slot. Among them, `AvgRate'is the copy rate of clone task in bytes/seconds on the path of historical statistics.
-
-3. Priority repair view
-
- The following command allows you to view the priority repaired tables or partitions set by the `ADMIN REPAIR TABLE'command.
-
- `SHOW PROC '/cluster_balance/priority_repair';`
-
- Among them, `Remaining TimeMs'indicates that these priority fixes will be automatically removed from the priority fix queue after this time. In order to prevent resources from being occupied due to the failure of priority repair.
-
-### Scheduler Statistical Status View
-
-We have collected some statistics of Tablet Checker and Tablet Scheduler during their operation, which can be viewed through the following commands:
-
-`SHOW PROC '/cluster_balance/sched_stat';`
-
-```
-+---------------------------------------------------+-------------+
-| Item | Value |
-+---------------------------------------------------+-------------+
-| num of tablet check round | 12041 |
-| cost of tablet check(ms) | 7162342 |
-| num of tablet checked in tablet checker | 18793506362 |
-| num of unhealthy tablet checked in tablet checker | 7043900 |
-| num of tablet being added to tablet scheduler | 1153 |
-| num of tablet schedule round | 49538 |
-| cost of tablet schedule(ms) | 49822 |
-| num of tablet being scheduled | 4356200 |
-| num of tablet being scheduled succeeded | 320 |
-| num of tablet being scheduled failed | 4355594 |
-| num of tablet being scheduled discard | 286 |
-| num of tablet priority upgraded | 0 |
-| num of tablet priority downgraded | 1096 |
-| num of clone task | 230 |
-| num of clone task succeeded | 228 |
-| num of clone task failed | 2 |
-| num of clone task timeout | 2 |
-| num of replica missing error | 4354857 |
-| num of replica version missing error | 967 |
-| num of replica relocating | 0 |
-| num of replica redundant error | 90 |
-| num of replica missing in cluster error | 0 |
-| num of balance scheduled | 0 |
-+---------------------------------------------------+-------------+
-```
-
-The meanings of each line are as follows:
-
-* num of tablet check round: Tablet Checker 检查次数
-* cost of tablet check(ms): Tablet Checker 检查总耗时
-* num of tablet checked in tablet checker: Tablet Checker 检查过的 tablet 数量
-* num of unhealthy tablet checked in tablet checker: Tablet Checker 检查过的不健康的 tablet 数量
-* num of tablet being added to tablet scheduler: 被提交到 Tablet Scheduler 中的 tablet 数量
-* num of tablet schedule round: Tablet Scheduler 运行次数
-* cost of tablet schedule(ms): Tablet Scheduler 运行总耗时
-* num of tablet being scheduled: 被调度的 Tablet 总数量
-* num of tablet being scheduled succeeded: 被成功调度的 Tablet 总数量
-* num of tablet being scheduled failed: 调度失败的 Tablet 总数量
-* num of tablet being scheduled discard: 调度失败且被抛弃的 Tablet 总数量
-* num of tablet priority upgraded: 优先级上调次数
-* num of tablet priority downgraded: 优先级下调次数
-* num of clone task: number of clone tasks generated
-* num of clone task succeeded: clone 任务成功的数量
-* num of clone task failed: clone 任务失败的数量
-* num of clone task timeout: clone 任务超时的数量
-* num of replica missing error: the number of tablets whose status is checked is the missing copy
-* num of replica version missing error: 检查的状态为版本缺失的 tablet 的数量(该统计值包括了 num of replica relocating 和 num of replica missing in cluster error)
-*num of replica relocation *29366;* 24577;*replica relocation tablet *
-* num of replica redundant error: Number of tablets whose checked status is replica redundant
-* num of replica missing in cluster error: 检查的状态为不在对应 cluster 的 tablet 的数量
-* num of balance scheduled: 均衡调度的次数
-
-> Note: The above states are only historical accumulative values. We also print these statistics regularly in the FE logs, where the values in parentheses represent the number of changes in each statistical value since the last printing dependence of the statistical information.
-
-## Relevant configuration instructions
-
-### Adjustable parameters
-
-The following adjustable parameters are all configurable parameters in fe.conf.
-
-* use\_new\_tablet\_scheduler
-
- * Description: Whether to enable the new replica scheduling mode. The new replica scheduling method is the replica scheduling method introduced in this document. If turned on, `disable_colocate_join` must be `true`. Because the new scheduling strategy does not support data fragmentation scheduling of co-locotion tables for the time being.
- * Default value:true
- * Importance: High
-
-* tablet\_repair\_delay\_factor\_second
-
- * Note: For different scheduling priorities, we will delay different time to start repairing. In order to prevent a large number of unnecessary replica repair tasks from occurring in the process of routine restart and upgrade. This parameter is a reference coefficient. For HIGH priority, the delay is the reference coefficient * 1; for NORMAL priority, the delay is the reference coefficient * 2; for LOW priority, the delay is the reference coefficient * 3. That is, the lower the priority, the longer the delay waiting time. If the user wants to repair the copy as soon as possible, this parameter can be reduced appropriately.
- * Default value: 60 seconds
- * Importance: High
-
-* schedule\_slot\_num\_per\_path
-
- * Note: The default number of slots allocated to each disk for replica repair. This number represents the number of replica repair tasks that a disk can run simultaneously. If you want to repair the copy faster, you can adjust this parameter appropriately. The higher the single value, the greater the impact on IO.
- * Default value: 2
- * Importance: High
-
-* balance\_load\_score\_threshold
-
- * Description: Threshold of Cluster Equilibrium. The default is 0.1, or 10%. When the load core of a BE node is not higher than or less than 10% of the average load core, we think that the node is balanced. If you want to make the cluster load more even, you can adjust this parameter appropriately.
- * Default value: 0.1
- * Importance:
-
-* storage\_high\_watermark\_usage\_percent 和 storage\_min\_left\_capacity\_bytes
-
- * Description: These two parameters represent the upper limit of the maximum space utilization of a disk and the lower limit of the minimum space remaining, respectively. When the space utilization of a disk is greater than the upper limit or the remaining space is less than the lower limit, the disk will no longer be used as the destination address for balanced scheduling.
- * Default values: 0.85 and 1048576000 (1GB)
- * Importance:
-
-* disable\_balance
-
- * Description: Control whether to turn off the balancing function. When replicas are in equilibrium, some functions, such as ALTER TABLE, will be banned. Equilibrium can last for a long time. Therefore, if the user wants to do the prohibited operation as soon as possible. This parameter can be set to true to turn off balanced scheduling.
- * Default value: false
- * Importance:
-
-### Unadjustable parameters
-
-The following parameters do not support modification for the time being, just for illustration.
-
-* Tablet Checker scheduling interval
-
- Tablet Checker schedules checks every 20 seconds.
-
-* Tablet Scheduler scheduling interval
-
- Tablet Scheduler schedules every five seconds
-
-* Number of Tablet Scheduler Schedules per Batch
-
- Tablet Scheduler schedules up to 50 tablets at a time.
-
-* Tablet Scheduler Maximum Waiting Schedule and Number of Tasks in Operation
-
- The maximum number of waiting tasks and running tasks is 2000. When over 2000, Tablet Checker will no longer generate new scheduling tasks to Tablet Scheduler.
-
-* Tablet Scheduler Maximum Balanced Task Number
-
- The maximum number of balanced tasks is 500. When more than 500, there will be no new balancing tasks.
-
-* Number of slots per disk for balancing tasks
-
- The number of slots per disk for balancing tasks is 2. This slot is independent of the slot used for replica repair.
-
-* Update interval of cluster equilibrium
-
- Tablet Scheduler recalculates the load score of the cluster every 20 seconds.
-
-* Minimum and Maximum Timeout for Clone Tasks
-
- A clone task timeout time range is 3 minutes to 2 hours. The specific timeout is calculated by the size of the tablet. The formula is (tablet size)/ (5MB/s). When a clone task fails three times, the task terminates.
-
-* Dynamic Priority Adjustment Strategy
-
- The minimum priority adjustment interval is 5 minutes. When a tablet schedule fails five times, priority is lowered. When a tablet is not scheduled for 30 minutes, priority is raised.
-
-## Relevant issues
-
-* In some cases, the default replica repair and balancing strategy may cause the network to be full (mostly in the case of gigabit network cards and a large number of disks per BE). At this point, some parameters need to be adjusted to reduce the number of simultaneous balancing and repair tasks.
-
-* Current balancing strategies for copies of Colocate Table do not guarantee that copies of the same Tablet will not be distributed on the BE of the same host. However, the repair strategy of the copy of Colocate Table detects this distribution error and corrects it. However, it may occur that after correction, the balancing strategy regards the replicas as unbalanced and rebalances them. As a result, the Colocate Group cannot achieve stability because of the continuous alternation between the two states. In view of this situation, we suggest that when using Colocate attribute, we try to ensure that the cluster is isomorphic, so as to reduce the probability that replicas are distributed on the same host.
-
-## Best Practices
-
-### Control and manage the progress of replica repair and balancing of clusters
-
-In most cases, Doris can automatically perform replica repair and cluster balancing by default parameter configuration. However, in some cases, we need to manually intervene to adjust the parameters to achieve some special purposes. Such as prioritizing the repair of a table or partition, disabling cluster balancing to reduce cluster load, prioritizing the repair of non-colocation table data, and so on.
-
-This section describes how to control and manage the progress of replica repair and balancing of the cluster by modifying the parameters.
-
-1. Deleting Corrupt Replicas
-
- In some cases, Doris may not be able to automatically detect some corrupt replicas, resulting in frequent query or import errors on the corrupt replicas. In this case, we need to delete the corrupted copies manually. This method can be used to: delete a copy with a high version number resulting in a -235 error, delete a corrupted copy of a file, etc.
-
- First, find the tablet id of the corresponding copy, let's say 10001, and use `show tablet 10001;` and execute the `show proc` statement to see the details of each copy of the corresponding tablet.
-
- Assuming that the backend id of the copy to be deleted is 20001, the following statement is executed to mark the copy as `bad`.
-
- ```
- ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10001", "backend_id" = "20001", "status" = "bad");
- ```
-
- At this point, the `show proc` statement again shows that the `IsBad` column of the corresponding copy has a value of `true`.
-
- The replica marked as `bad` will no longer participate in imports and queries. The replica repair logic will automatically replenish a new replica at the same time. 2.
-
-2. prioritize repairing a table or partition
-
- `help admin repair table;` View help. This command attempts to repair the tablet of the specified table or partition as a priority.
-
-3. Stop the balancing task
-
- The balancing task will take up some network bandwidth and IO resources. If you wish to stop the generation of new balancing tasks, you can do so with the following command.
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");
- ```
-
-4. Stop all replica scheduling tasks
-
- Copy scheduling tasks include balancing and repair tasks. These tasks take up some network bandwidth and IO resources. All replica scheduling tasks (excluding those already running, including colocation tables and common tables) can be stopped with the following command.
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true");
- ```
-
-5. Stop the copy scheduling task for all colocation tables.
-
- The colocation table copy scheduling is run separately and independently from the regular table. In some cases, users may wish to stop the balancing and repair of colocation tables first and use the cluster resources for normal table repair with the following command.
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_colocate_balance" = "true");
- ```
-
-6. Repair replicas using a more conservative strategy
-
- Doris automatically repairs replicas when it detects missing replicas, BE downtime, etc. However, in order to reduce some errors caused by jitter (e.g., BE being down briefly), Doris delays triggering these tasks.
-
- * The `tablet_repair_delay_factor_second` parameter. Default 60 seconds. Depending on the priority of the repair task, it will delay triggering the repair task for 60 seconds, 120 seconds, or 180 seconds. This time can be extended so that longer exceptions can be tolerated to avoid triggering unnecessary repair tasks by using the following command.
-
- ```
- ADMIN SET FRONTEND CONFIG ("tablet_repair_delay_factor_second" = "120");
- ```
-
-7. use a more conservative strategy to trigger redistribution of colocation groups
-
- Redistribution of colocation groups may be accompanied by a large number of tablet migrations. `colocate_group_relocate_delay_second` is used to control the redistribution trigger delay. The default is 1800 seconds. If a BE node is likely to be offline for a long time, you can try to increase this parameter to avoid unnecessary redistribution by.
-
- ```
- ADMIN SET FRONTEND CONFIG ("colocate_group_relocate_delay_second" = "3600");
- ```
-
-8. Faster Replica Balancing
-
- Doris' replica balancing logic adds a normal replica first and then deletes the old one for the purpose of replica migration. When deleting the old replica, Doris waits for the completion of the import task that has already started on this replica to avoid the balancing task from affecting the import task. However, this will slow down the execution speed of the balancing logic. In this case, you can make Doris ignore this wait and delete the old replica directly by modifying the following parameters.
-
- ```
- ADMIN SET FRONTEND CONFIG ("enable_force_drop_redundant_replica" = "true");
- ```
-
- This operation may cause some import tasks to fail during balancing (requiring a retry), but it will speed up balancing significantly.
-
-Overall, when we need to bring the cluster back to a normal state quickly, consider handling it along the following lines.
-
-1. find the tablet that is causing the highly optimal task to report an error and set the problematic copy to bad.
-2. repair some tables with the `admin repair` statement.
-3. Stop the replica balancing logic to avoid taking up cluster resources, and then turn it on again after the cluster is restored.
-4. Use a more conservative strategy to trigger repair tasks to deal with the avalanche effect caused by frequent BE downtime.
-5. Turn off scheduling tasks for colocation tables on-demand and focus cluster resources on repairing other high-optimality data.
diff --git a/docs/en/administrator-guide/outfile.md b/docs/en/administrator-guide/outfile.md
deleted file mode 100644
index f07c6d54e0..0000000000
--- a/docs/en/administrator-guide/outfile.md
+++ /dev/null
@@ -1,195 +0,0 @@
----
-{
- "title": "Export Query Result",
- "language": "en"
-}
----
-
-
-
-# Export Query Result
-
-This document describes how to use the `SELECT INTO OUTFILE` command to export query results.
-
-## Syntax
-
-The `SELECT INTO OUTFILE` statement can export the query results to a file. Currently supports export to remote storage through Broker process, or directly through S3, HDFS protocol such as HDFS, S3, BOS and COS(Tencent Cloud) through the Broker process. The syntax is as follows:
-
-```
-query_stmt
-INTO OUTFILE "file_path"
-[format_as]
-[properties]
-```
-
-* `file_path`
-
- `file_path` specify the file path and file name prefix. Like: `hdfs://path/to/my_file_`.
-
- The final file name will be assembled as `my_file_`, file seq no and the format suffix. File seq no starts from 0, determined by the number of split.
-
- ```
- my_file_abcdefg_0.csv
- my_file_abcdefg_1.csv
- my_file_abcdegf_2.csv
- ```
-
-* `[format_as]`
-
- ```
- FORMAT AS CSV
- ```
-
- Specify the export format. support csv、parquet、csv_with_names、csv_with_names_and_types. The default is CSV.
-
-* `[properties]`
-
- Specify the relevant attributes. Currently it supports exporting through the Broker process, or through the S3, HDFS protocol.
-
- + Broker related attributes need to be prefixed with `broker.`. For details, please refer to [Broker Document](./broker.html).
- + HDFS protocal can directly execute HDFS protocal configuration. hdfs.fs.defaultFS is used to fill in the namenode address and port. It is required.
- + S3 protocol can directly execute S3 protocol configuration.
-
- ```
- PROPERTIES
- ("broker.prop_key" = "broker.prop_val", ...)
- or
- ("hdfs.fs.defaultFS" = "xxx", "hdfs.user" = "xxx")
- or
- ("AWS_ENDPOINT" = "xxx", ...)
- ```
-
- Other properties
-
- ```
- PROPERTIES
- ("key1" = "val1", "key2" = "val2", ...)
- ```
-
- currently supports the following properties:
-
- * `column_separator`: Column separator, only applicable to CSV format. The default is `\t`.
- * `line_delimiter`: Line delimiter, only applicable to CSV format. The default is `\n`.
- * `max_file_size`: The max size of a single file. Default is 1GB. Range from 5MB to 2GB. Files exceeding this size will be splitted.
- * `schema`: schema infomation for PARQUET, only applicable to PARQUET format. If the exported file format is PARQUET, `schema` must be specified.
-
-## Concurrent export
-
-By default, the export of the query result set is non-concurrent, that is, a single point of export. If the user wants the query result set to be exported concurrently, the following conditions need to be met:
-
-1. session variable 'enable_parallel_outfile' to enable concurrent export: ```set enable_parallel_outfile = true;```
-2. The export method is S3, HDFS instead of using a broker
-3. The query can meet the needs of concurrent export, for example, the top level does not contain single point nodes such as sort. (I will give an example later, which is a query that does not export the result set concurrently)
-
-If the above three conditions are met, the concurrent export query result set can be triggered. Concurrency = ```be_instacne_num * parallel_fragment_exec_instance_num```
-
-### How to verify that the result set is exported concurrently
-
-After the user enables concurrent export through the session variable setting, if you want to verify whether the current query can be exported concurrently, you can use the following method.
-
-```
-explain select xxx from xxx where xxx into outfile "s3://xxx" format as csv properties ("AWS_ENDPOINT" = "xxx", ...);
-```
-
-After explaining the query, Doris will return the plan of the query. If you find that ```RESULT FILE SINK``` appears in ```PLAN FRAGMENT 1```, it means that the export concurrency has been opened successfully.
-If ```RESULT FILE SINK``` appears in ```PLAN FRAGMENT 0```, it means that the current query cannot be exported concurrently (the current query does not satisfy the three conditions of concurrent export at the same time).
-
-```
-Planning example for concurrent export:
-+-----------------------------------------------------------------------------+
-| Explain String |
-+-----------------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS: | | | |
-| PARTITION: UNPARTITIONED |
-| |
-| RESULT SINK |
-| |
-| 1:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS:`k1` + `k2` |
-| PARTITION: HASH_PARTITIONED: `default_cluster:test`.`multi_tablet`.`k1` |
-| |
-| RESULT FILE SINK |
-| FILE PATH: s3://ml-bd-repo/bpit_test/outfile_1951_ |
-| STORAGE TYPE: S3 |
-| |
-| 0:OlapScanNode |
-| TABLE: multi_tablet |
-+-----------------------------------------------------------------------------+
-```
-
-## Usage example
-
-For details, please refer to [OUTFILE Document](../sql-reference/sql-statements/Data%20Manipulation/OUTFILE.md).
-
-## Return result
-
-The command is a synchronization command. The command returns, which means the operation is over.
-At the same time, a row of results will be returned to show the exported execution result.
-
-If it exports and returns normally, the result is as follows:
-
-```
-mysql> select * from tbl1 limit 10 into outfile "file:///home/work/path/result_";
-+------------+-----------+----------+--------------------------------------------------------------------+
-| FileNumber | TotalRows | FileSize | URL |
-+------------+-----------+----------+--------------------------------------------------------------------+
-| 1 | 2 | 8 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ |
-+------------+-----------+----------+--------------------------------------------------------------------+
-1 row in set (0.05 sec)
-```
-
-* FileNumber: The number of files finally generated.
-* TotalRows: The number of rows in the result set.
-* FileSize: The total size of the exported file. Unit byte.
-* URL: If it is exported to a local disk, the Compute Node to which it is exported is displayed here.
-
-If a concurrent export is performed, multiple rows of data will be returned.
-
-```
-+------------+-----------+----------+--------------------------------------------------------------------+
-| FileNumber | TotalRows | FileSize | URL |
-+------------+-----------+----------+--------------------------------------------------------------------+
-| 1 | 3 | 7 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ |
-| 1 | 2 | 4 | file:///192.168.1.11/home/work/path/result_{fragment_instance_id}_ |
-+------------+-----------+----------+--------------------------------------------------------------------+
-2 rows in set (2.218 sec)
-```
-
-If the execution is incorrect, an error message will be returned, such as:
-
-```
-mysql> SELECT * FROM tbl INTO OUTFILE ...
-ERROR 1064 (HY000): errCode = 2, detailMessage = Open broker writer failed ...
-```
-
-## Notice
-
-* The CSV format does not support exporting binary types, such as BITMAP and HLL types. These types will be output as `\N`, which is null.
-* If you do not enable concurrent export, the query result is exported by a single BE node in a single thread. Therefore, the export time and the export result set size are positively correlated. Turning on concurrent export can reduce the export time.
-* The export command does not check whether the file and file path exist. Whether the path will be automatically created or whether the existing file will be overwritten is entirely determined by the semantics of the remote storage system.
-* If an error occurs during the export process, the exported file may remain on the remote storage system. Doris will not clean these files. The user needs to manually clean up.
-* The timeout of the export command is the same as the timeout of the query. It can be set by `SET query_timeout = xxx`.
-* For empty result query, there will be an empty file.
-* File spliting will ensure that a row of data is stored in a single file. Therefore, the size of the file is not strictly equal to `max_file_size`.
-* For functions whose output is invisible characters, such as BITMAP and HLL types, the output is `\N`, which is NULL.
-* At present, the output type of some geo functions, such as `ST_Point` is VARCHAR, but the actual output value is an encoded binary character. Currently these functions will output garbled characters. For geo functions, use `ST_AsText` for output.
diff --git a/docs/en/administrator-guide/privilege.md b/docs/en/administrator-guide/privilege.md
deleted file mode 100644
index f20316b1f7..0000000000
--- a/docs/en/administrator-guide/privilege.md
+++ /dev/null
@@ -1,224 +0,0 @@
----
-{
- "title": "Authority Management",
- "language": "en"
-}
----
-
-
-
-# Authority Management
-
-Doris's new privilege management system refers to Mysql's privilege management mechanism, achieves table-level fine-grained privilege control, role-based privilege access control, and supports whitelist mechanism.
-
-## Noun Interpretation
-
-1. user_identity
-
- In a permission system, a user is identified as a User Identity. User ID consists of two parts: username and userhost. Username is a user name, which is composed of English upper and lower case. Userhost represents the IP from which the user link comes. User_identity is presented as username@'userhost', representing the username from userhost.
-
- Another expression of user_identity is username@['domain'], where domain is the domain name, which can be resolved into a set of IPS by DNS BNS (Baidu Name Service). The final expression is a set of username@'userhost', so we use username@'userhost'to represent it.
-
-2. Privilege
-
- The objects of permissions are nodes, databases or tables. Different permissions represent different operating permissions.
-
-3. Role
-
- Doris can create custom named roles. Roles can be seen as a set of permissions. When a newly created user can be assigned a role, the role's permissions are automatically granted. Subsequent changes in the role's permissions will also be reflected in all user permissions that belong to the role.
-
-4. user_property
-
- User attributes are directly attached to a user, not to a user identity. That is, both cmy@'192.%'and cmy@['domain'] have the same set of user attributes, which belong to user cmy, not cmy@'192.%' or cmy@['domain'].
-
- User attributes include, but are not limited to, the maximum number of user connections, import cluster configuration, and so on.
-
-## Supported operations
-
-1. Create users: CREATE USER
-2. Delete users: DROP USER
-3. Authorization: GRANT
-4. Withdrawal: REVOKE
-5. Create role: CREATE ROLE
-6. Delete Roles: DROP ROLE
-7. View current user privileges: SHOW GRANTS
-8. View all user privilegesSHOW ALL GRANTS;
-9. View the created roles: SHOW ROLES
-10. View user attributes: SHOW PROPERTY
-
-For detailed help with the above commands, you can use help + command to get help after connecting Doris through the MySQL client. For example `HELP CREATE USER`.
-
-## Permission type
-
-Doris currently supports the following permissions
-
-1. Node_priv
-
- Nodes change permissions. Including FE, BE, BROKER node addition, deletion, offline operations. Currently, this permission can only be granted to Root users.
-
-2. Grant_priv
-
- Permissions change permissions. Allow the execution of operations including authorization, revocation, add/delete/change user/role, etc.
-
-3. Select_priv
-
- Read-only access to databases and tables.
-
-4. Load_priv
-
- Write permissions to databases and tables. Including Load, Insert, Delete and so on.
-
-5. Alter_priv
-
- Change permissions on databases and tables. It includes renaming libraries/tables, adding/deleting/changing columns, and adding/deleting partitions.
-
-6. Create_priv
-
- The right to create databases, tables, and views.
-
-7. Drop_priv
-
- Delete permissions for databases, tables, and views.
-
-## Permission hierarchy
-
-At the same time, according to the scope of application of permissions, we divide them into three levels:
-
-1. GLOBAL LEVEL: Global permissions. That is, permissions on `*.*` granted by GRANT statements. The granted permissions apply to any table in any database.
-2. DATABASE LEVEL: Database-level permissions. That is, permissions on `db.*` granted by GRANT statements. The granted permissions apply to any table in the specified database.
-3. TABLE LEVEL: Table-level permissions. That is, permissions on `db.tbl` granted by GRANT statements. The permissions granted apply to the specified tables in the specified database.
-
-
-## ADMIN /GRANT
-
-ADMIN\_PRIV and GRANT\_PRIV have the authority of **"grant authority"** at the same time, which is more special. The operations related to these two privileges are described here one by one.
-
-1. CREATE USER
-
- * Users with ADMIN or GRANT privileges at any level can create new users.
-
-2. DROP USER
-
- * Only ADMIN privileges can delete users.
-
-3. CREATE/DROP ROLE
-
- * Only ADMIN privileges can create roles.
-
-4. GRANT /REVOKE
-
- * Users with ADMIN or GLOBAL GRANT privileges can grant or revoke the privileges of any user.
- * Users with GRANT privileges at the DATABASE level can grant or revoke the privileges of any user on the specified database.
- * Users with GRANT privileges at TABLE level can grant or revoke the privileges of any user on the specified tables in the specified database.
-
-5. SET PASSWORD
-
- * Users with ADMIN or GLOBAL GRANT privileges can set any user's password.
- * Ordinary users can set their corresponding User Identity password. The corresponding User Identity can be viewed by `SELECT CURRENT_USER();`command.
- * Users with GRANT privileges at non-GLOBAL level cannot set the password of existing users, but can only specify the password when creating users.
-
-
-## Some explanations
-
-1. When Doris initializes, the following users and roles are automatically created:
-
- 1. Operator role: This role has Node\_priv and Admin\_priv, i.e. all permissions for Doris. In a subsequent upgrade version, we may restrict the role's permissions to Node\_priv, which is to grant only node change permissions. To meet some cloud deployment requirements.
-
- 2. admin role: This role has Admin\_priv, which is all permissions except for node changes.
-
- 3. root@'%': root user, which allows login from any node, with the role of operator.
-
- 4. admin@'%': admin user, allowing login from any node, role admin.
-
-2. It is not supported to delete or change the permissions of default created roles or users.
-
-3. The user of the operator role has one and only one user. Users of admin roles can create multiple.
-
-4. Operational instructions for possible conflicts
-
- 1. Conflict between domain name and ip:
-
- Assume that the following users are created:
-
- CREATE USER cmy@['domain'];
-
- And authorize:
-
- GRANT SELECT_PRIV ON \*.\* TO cmy@['domain']
-
- The domain is resolved into two ips: IP1 and IP2
-
- Let's assume that we have a separate authorization for cmy@'ip1':
-
- GRANT ALTER_PRIV ON \*.\* TO cmy@'ip1';
-
- The permissions of CMY @'ip1'will be changed to SELECT\_PRIV, ALTER\_PRIV. And when we change the permissions of cmy@['domain'] again, cmy@'ip1' will not follow.
-
- 2. duplicate IP conflicts:
-
- Assume that the following users are created:
-
- CREATE USER cmy@'%' IDENTIFIED BY "12345";
-
- CREATE USER cmy@'192.%' IDENTIFIED BY "abcde";
-
- In priority,'192.%'takes precedence over'%', so when user CMY tries to login Doris with password '12345' from 192.168.1.1, the machine will be rejected.
-
-5. Forget passwords
-
- If you forget your password and cannot log in to Doris, you can log in to Doris without a password using the following command on the machine where the Doris FE node is located:
-
- `mysql-client -h 127.0.0.1 -P query_port -uroot`
-
- After login, the password can be reset through the SET PASSWORD command.
-
-6. No user can reset the password of the root user except the root user himself.
-
-7. ADMIN\_PRIV permissions can only be granted or revoked at the GLOBAL level.
-
-8. Having GRANT\_PRIV at GLOBAL level is actually equivalent to having ADMIN\_PRIV, because GRANT\_PRIV at this level has the right to grant arbitrary permissions, please use it carefully.
-
-9. `current_user()` and `user()`
-
- Users can view `current_user` and `user` respectively by `SELECT current_user();` and `SELECT user();`. Where `current_user` indicates which identity the current user is passing through the authentication system, and `user` is the user's current actual `user_identity`.
-
- For example, suppose the user `user1@'192.%'` is created, and then a user user1 from 192.168.10.1 is logged into the system. At this time, `current_user` is `user1@'192.%'`, and `user` is `user1@'192.168.10.1'`.
-
- All privileges are given to a `current_user`, and the real user has all the privileges of the corresponding `current_user`.
-
-## Best Practices
-
-Here are some usage scenarios of Doris privilege system.
-
-1. Scene 1
-
- The users of Doris cluster are divided into Admin, RD and Client. Administrators have all the rights of the whole cluster, mainly responsible for cluster building, node management and so on. The development engineer is responsible for business modeling, including database building, data import and modification. Users access different databases and tables to get data.
-
- In this scenario, ADMIN or GRANT privileges can be granted to administrators. Give RD CREATE, DROP, ALTER, LOAD, SELECT permissions to any or specified database tables. Give Client SELECT permission to any or specified database table. At the same time, it can also simplify the authorization of multiple users by creating different roles.
-
-2. Scene 2
-
- There are multiple services in a cluster, and each business may use one or more data. Each business needs to manage its own users. In this scenario. Administrator users can create a user with GRANT privileges at the DATABASE level for each database. The user can only authorize the specified database for the user.
-
-3. Blacklist
-
- Doris itself does not support blacklist, only whitelist, but we can simulate blacklist in some way. Suppose you first create a user named `user@'192.%'`, which allows users from `192.*` to login. At this time, if you want to prohibit users from `192.168.10.1` from logging in, you can create another user with `cmy@'192.168.10.1'` and set a new password. Since `192.168.10.1` has a higher priority than `192.%`, user can no longer login by using the old password from `192.168.10.1`.
-
-
diff --git a/docs/en/administrator-guide/query_cache.md b/docs/en/administrator-guide/query_cache.md
deleted file mode 100644
index bb0cde2e43..0000000000
--- a/docs/en/administrator-guide/query_cache.md
+++ /dev/null
@@ -1,138 +0,0 @@
----
-{
- "title": "QUERY CACHE",
- "language": "en"
-}
----
-
-
-
-# QUERY CACHE
-
-## 1 Demond
-
-Although the corresponding cache is also made in the database storage layer, the cache in the database storage layer is generally aimed at the query content, and the granularity is too small. Generally, only when the data in the table is not changed can the corresponding cache of the database play a role. However, this can not reduce the huge IO pressure brought by the addition, deletion and query of the database by the business system. Therefore, the database cache technology was born here to realize the cache of hot data, improve the response speed of the application, and greatly relieve the pressure of the back-end database
-
-- High concurrency scenarios
- Doris have a well support for high concurrency while single sever is unable to load too high QPS.
-
-- Complex Graph Dashboard
- It is not uncommon to see that, data of the complex Dashboard and the large screen applications come from many table together which have tens of queries in a single page.Even though every single query cost only few milliseconds, the total queries would cost seconds.
-
-- Trend Analysis
- In some scenarios, the queries are in a given date range , the index is shown by date.For example, we want to query the trend of the number of user in the last 7 days.This type of queries has a large amount of data and a wide range of fields, and the queries often takes tens of seconds.
-
-- User repeated query
- If the product does not have an anti-re-flash mechanism, the user accidentally refreshes the page repeatedly due many reasons, which resulting in submitting a large number of repeated SQL
-
-In the above four scenarios, we have solutions at the application layer. We put the result of queries in the Redis and update the cache periodically or the user update the cache manually.However, this solution has the following problems:
-
-- Inconsistence of data , we are unable to sense the update of data, causing users to often see old data
-
-- Low hit rate, we usually cache the whole result of query.If the data is writed real-time, we would often failed in cache, resulting in low hit rate and overload for the system.
-
-- Extra Cost we introduce external cache components, which will bring system complexity and increase additional costs.
-
-## 2 Solutions
-
-At present, we design two modules: result cache and partition cache
-
-## 3 Explanation of terms
-
-1. result cache
-
-SQL directly caches the result collection of queries for users
-
-2. partition cache
-
-In the partition granularity, cache the results of each partition query
-
-## 4 Design
-
-### 1 `result cache`
-
-result_cache is divided into two types. The first type is result_ cache_ The second type of TTL is result_ cache_ version
-
-#### `result_cache_ttl`
-
-result_ cache_ ttl variable is set in the user session. The user can customize whether to turn it on or not. The TTL time is used to determine whether the user's SQL uses cache. The correctness of the data is not guaranteed when the data is changed`
-
-The cache is stored and retrieved according to the user connected and the query SQL. If it exceeds the cache expiration time, the cache will not be hit and the cache will be cleaned
-
-#### ` result_cache_version`
-
-result_ cache_ version stores and fetches the cache according to the signature of SQL, partition ID of the query table, latest version of partition. The combination of the three determines a cache dataset. If any one of them changes, such as SQL changes, query fields or conditions are not the same, or the version after data update changes, the cache will not be hit.
-
-If multiple tables are joined, the latest partition ID and the latest version number are used. If one of the tables is updated, the partition ID or version number will be different, and the cache will not be hit.
-
-### 2 `partition_cache`
-
-1. SQL can be split in parallel, Q = Q1 ∪ Q2 ... ∪ Qn, R= R1 ∪ R2 ... ∪ Rn, Q is the query statement and R is the result set
-2. Split into read-only partition and updatable partition, read-only partition cache, update partition not cache
-
-## 5 usage
-
-|cache type|usage|
-|--|--|
-|result_cache_ttl|Mainly solve the scenario of high QPS and repeated query by users|
-|result_cache_version|It mainly solves the scenario that the whole table has not changed for a long time|
-|partition_cache|It mainly solves the scenario that the historical partition does not change|
-
-## 6 parameter
-
-### fe
-
-1. `cache_per_query_max_row_count`
-- Cache the maximum number of rows per query
-- The default value is 3000
-
-2. `cache_per_query_max_size_In_bytes`
-- The size of each query in bytes
-- The default value is 1MB
-
-3. `result_cache_ttl_In_milliseconds`
-- Cache duration of result cache
-- The default value is 3S
-
-### be
-
-1. `cache_max_partition_count`
-- Be maximum number of partitions cache_ max_ partition_ Count refers to the maximum number of partitions corresponding to each SQL. If the partition is based on date, the data can be cached for more than 2 years. If you want to keep the cache for a longer time, please set this parameter larger and modify the cache_ result_ max_ row_ Count parameter.
-- Default value : 1024
-
-2. `cache_max_size_in_mb` `cache_elasticity_size_in_mb`
-- The cache memory setting in backend has two parameters: cache_max_size_In_mb(256) and cache_elasticity_size_In_mb(128), memory exceeds cache_ max_ size_In_mb+cache_elasticity_size_In_mb will clean up and control the memory to cache_max_size_In_mb. These two parameters can be set according to the number of be nodes, the memory size of nodes, and cache hit rate.
-
-## 7 how to use
-
-- use enable_result_cache_ttl
-```
-set `global` enable_result_cache_ttl =true
-```
-
-- use enable_result_cache_version
-```
-set `global` enable_result_cache_version = true
-```
-
-- use enable_partition_cache
-```
-set `global` enable_partition_cache = true
-```
\ No newline at end of file
diff --git a/docs/en/administrator-guide/resource-management.md b/docs/en/administrator-guide/resource-management.md
deleted file mode 100644
index 7d0ed32079..0000000000
--- a/docs/en/administrator-guide/resource-management.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-{
- "title": "Resource management",
- "language": "en"
-}
----
-
-
-
-# Resource Management
-
-In order to save the compute and storage resources in the Doris cluster, Doris needs to reference to some other external resources to do the related work. such as spark/GPU for query, HDFS/S3 for external storage, spark/MapReduce for ETL, connect to external storage by ODBC driver. Therefore, Doris need a resource management mechanism to manage these external resources.
-
-## Fundamental Concept
-
-A resource contains basic information such as name and type. The name is globally unique. Different types of resources contain different attributes. Please refer to the introduction of each resource for details.
-
-The creation and deletion of resources can only be performed by users own `admin` permission. One resource belongs to the entire Doris cluster. Users with `admin` permission can assign permission of resource to other users. Please refer to `HELP GRANT` or doris document.
-
-
-## Operation Of Resource
-
-There are three main commands for resource management: `create resource`, `drop resource` and `show resources`. They are to create, delete and check resources. The specific syntax of these three commands can be viewed by executing `help CMD` after MySQL client connects to Doris.
-
-1. CREATE RESOURCE
-
-
- ```sql
- CREATE [EXTERNAL] RESOURCE "resource_name"
- PROPERTIES ("key"="value", ...);
- ```
-
- In the command to create a resource, the user must provide the following information:
-
- * `resource_name` name of the resource
- * `PROPERTIES` related parameters, as follows:
- * `type`: resource type, required. Currently, only spark and odbc_catalog are supported.
- * For other parameters, see the resource introduction
-
-
-
-2. DROP RESOURCE
-
- This command can delete an existing resource. For details, please refer to: `HELP DROP RESOURCE`
-
-3. SHOW RESOURCES
-
- This command can view the resources that the user has permission to use. Please refer to: `HELP SHOW RESOURCES`
-
-
-
-## Resources Supported
-
-Currently, Doris can support
-
-* Spark resource: do ETL work
-* ODBC resource: query and import data from external tables
-
-The following shows how the two resources are used.
-
-### Spark
-
-#### Parameter
-
-##### Spark Parameters:
-
-`spark.master`: required, currently supported yarn, spark://host:port.
-
-`spark.submit.deployMode`: The deployment mode of spark. required. It supports cluster and client.
-
-`spark.hadoop.yarn.resourcemanager.address`: required when master is yarn.
-
-`spark.hadoop.fs.defaultFS`: required when master is yarn.
-
-Other parameters are optional, refer to: http://spark.apache.org/docs/latest/configuration.html.
-
-##### If spark is used for ETL, also need to specify the following parameters:
-
-`working_dir`: Directory used by ETL. Spark is required when used as an ETL resource. For example: hdfs://host:port/tmp/doris.
-
-`broker`: The name of broker. Is required when spark be used as ETL resource. You need to use the `ALTER SYSTEM ADD BROKER` command to complete the configuration in advance.
-
- * `broker.property_key`: When the broker reads the intermediate file generated by ETL, it needs the specified authentication information.
-
-
-
-#### Example
-
-Create a spark resource named `spark0 `in the yarn cluster mode.
-
-
-```sql
-CREATE EXTERNAL RESOURCE "spark0"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "yarn",
- "spark.submit.deployMode" = "cluster",
- "spark.jars" = "xxx.jar,yyy.jar",
- "spark.files" = "/tmp/aaa,/tmp/bbb",
- "spark.executor.memory" = "1g",
- "spark.yarn.queue" = "queue0",
- "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",
- "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker0",
- "broker.username" = "user0",
- "broker.password" = "password0"
-);
-```
-
-### ODBC
-
-#### Parameter
-
-##### ODBC Parameters:
-
-`type`: Required, must be `odbc_catalog`. As the type identifier of resource.
-
-`user`: The user name of the external table, required.
-
-`password`: The user password of the external table, required.
-
-`host`: The ip address of the external table, required.
-
-`port`: The port of the external table, required.
-
-`odbc_type`: Indicates the type of external table. Currently, Doris supports `MySQL` and `Oracle`. In the future, it may support more databases. The ODBC external table referring to the resource is required. The old MySQL external table referring to the resource is optional.
-
-`driver`: Indicates the driver dynamic library used by the ODBC external table.
-The ODBC external table referring to the resource is required. The old MySQL external table referring to the resource is optional.
-
-For the usage of ODBC resource, please refer to [ODBC of Doris](../extending-doris/odbc-of-doris.html)
-
-
-#### Example
-
-Create the ODBC resource of Oracle, named `oracle_odbc`.
-
-```sql
-CREATE EXTERNAL RESOURCE `oracle_odbc`
-PROPERTIES (
-"type" = "odbc_catalog",
-"host" = "192.168.0.1",
-"port" = "8086",
-"user" = "test",
-"password" = "test",
-"database" = "test",
-"odbc_type" = "oracle",
-"driver" = "Oracle 19 ODBC driver"
-);
-```
diff --git a/docs/en/administrator-guide/running-profile.md b/docs/en/administrator-guide/running-profile.md
deleted file mode 100644
index 17ce052f09..0000000000
--- a/docs/en/administrator-guide/running-profile.md
+++ /dev/null
@@ -1,276 +0,0 @@
----
-{
- "title": "Statistics of query execution",
- "language": "en"
-}
----
-
-
-
-# Statistics of query execution
-
-This document focuses on introducing the **Running Profile** which recorded runtime status of Doris in query execution. Using these statistical information, we can understand the execution of frgment to become a expert of Doris's **debugging and tuning**.
-
-## Noun Interpretation
-
-* **FE**: Frontend, frontend node of Doris. Responsible for metadata management and request access.
-
-* **BE**: Backend, backend node of Doris. Responsible for query execution and data storage.
-
-* **Fragment**: FE will convert the execution of specific SQL statements into corresponding fragments and distribute them to BE for execution. BE will execute corresponding fragments and gather the result of RunningProfile to send back FE.
-
-## Basic concepts
-
-FE splits the query plan into fragments and distributes them to BE for task execution. BE records the statistics of **Running State** when executing fragment. BE print the outputs statistics of fragment execution into the log. FE can also collect these statistics recorded by each fragment and print the results on FE's web page.
-## Specific operation
-
-Turn on the report switch on FE through MySQL command
-
-```
-mysql> set enable_profile=true;
-```
-
-After executing the corresponding SQL statement(`is_report_success` in old versions), we can see the report information of the corresponding SQL statement on the FE web page like the picture below.
-
-
-The latest **100 statements** executed will be listed here. We can view detailed statistics of RunningProfile.
-```
-Query:
- Summary:
- Query ID: 9664061c57e84404-85ae111b8ba7e83a
- Start Time: 2020-05-02 10:34:57
- End Time: 2020-05-02 10:35:08
- Total: 10s323ms
- Query Type: Query
- Query State: EOF
- Doris Version: trunk
- User: root
- Default Db: default_cluster:test
- Sql Statement: select max(Bid_Price) from quotes group by Symbol
-```
-Here is a detailed list of ```query ID, execution time, execution statement``` and other summary information. The next step is to print the details of each fragment collected from be.
- ```
- Fragment 0:
- Instance 9664061c57e84404-85ae111b8ba7e83d (host=TNetworkAddress(hostname:192.168.0.1, port:9060)):(Active: 10s270ms, % non-child: 0.14%)
- - MemoryLimit: 2.00 GB
- - BytesReceived: 168.08 KB
- - PeakUsedReservation: 0.00
- - SendersBlockedTimer: 0ns
- - DeserializeRowBatchTimer: 501.975us
- - PeakMemoryUsage: 577.04 KB
- - RowsProduced: 8.322K (8322)
- EXCHANGE_NODE (id=4):(Active: 10s256ms, % non-child: 99.35%)
- - ConvertRowBatchTime: 180.171us
- - PeakMemoryUsage: 0.00
- - RowsReturned: 8.322K (8322)
- - MemoryUsed: 0.00
- - RowsReturnedRate: 811
- ```
-The fragment ID is listed here; ``` hostname ``` show the be node executing the fragment; ```active: 10s270ms```show the total execution time of the node; ```non child: 0.14%``` means the execution time of the execution node itself (not including the execution time of child nodes) as a percentage of the total time.
-
-`PeakMemoryUsage` indicates the peak memory usage of `EXCHANGE_NODE`; `RowsReturned` indicates the number of rows returned by `EXCHANGE_NODE`; `RowsReturnedRate`=`RowsReturned`/`ActiveTime`; the meaning of these three statistics in other `NODE` the same.
-
-Subsequently, the statistics of the child nodes will be printed in turn. **here you can distinguish the parent-child relationship by intent**.
-
-## Profile statistic analysis
-
-There are many statistical information collected at BE. so we list the corresponding meanings of profile are below:
-
-#### `Fragment`
- - AverageThreadTokens: Number of threads used to execute fragment, excluding the usage of thread pool
- - PeakReservation: Peak memory used by buffer pool
- - MemoryLimit: Memory limit at query
- - PeakMemoryUsage: Peak memory usage of instance
- - RowsProduced: Number of rows that process
-
-#### `BlockMgr`
- - BlocksCreated: Number of Block be created by BlockMgr
- - BlocksRecycled: Number of Block be recycled by BlockMgr
- - BytesWritten: How many bytes be writen to spill to disk
- - MaxBlockSize: Max size of one Block
- - TotalReadBlockTime: Total time read block from disk
-
-#### `DataStreamSender`
- - BytesSent: Total bytes data sent
- - IgnoreRows: Rows filtered
- - LocalBytesSent: The amount bytes of local node send to it's self during Exchange
- - OverallThroughput: Total throughput = BytesSent / Time
- - SerializeBatchTime: Sending data serialization time
- - UncompressedRowBatchSize: Size of rowbatch before sending data compression
-
-#### `ODBC_TABLE_SINK`
- - NumSentRows: Total number of rows written to ODBC table
- - TupleConvertTime: Time consuming of sending data serialization to insert statement
- - ResultSendTime: Time consuming of writing through ODBC driver
-
-#### `EXCHANGE_NODE`
- - BytesReceived: Size of bytes received by network
- - DataArrivalWaitTime: Total waiting time of sender to push data
- - MergeGetNext: When there is a sort in the lower level node, exchange node will perform a unified merge sort and output an ordered result. This indicator records the total time consumption of merge sorting, including the time consumption of MergeGetNextBatch.
- - MergeGetNextBatch: It takes time for merge node to get data. If it is single-layer merge sort, the object to get data is network queue. For multi-level merge sorting, the data object is child merger.
- - ChildMergeGetNext: When there are too many senders in the lower layer to send data, single thread merge will become a performance bottleneck. Doris will start multiple child merge threads to do merge sort in parallel. The sorting time of child merge is recorded, which is the cumulative value of multiple threads.
- - ChildMergeGetNextBatch: It takes time for child merge to get data,If the time consumption is too large, the bottleneck may be the lower level data sending node.
- - FirstBatchArrivalWaitTime: The time waiting for the first batch come from sender
- - DeserializeRowBatchTimer: Time consuming to receive data deserialization
- - SendersBlockedTotalTimer(*): When the DataStreamRecv's queue buffer is full, wait time of sender
- - ConvertRowBatchTime: Time taken to transfer received data to RowBatch
- - RowsReturned: Number of receiving rows
- - RowsReturnedRate: Rate of rows received
-
-#### `SORT_NODE`
- - InMemorySortTime: In memory sort time
- - InitialRunsCreated: Number of initialize sort run
- - MergeGetNext: Time cost of MergeSort from multiple sort_run to get the next batch (only show spilled disk)
- - MergeGetNextBatch: Time cost MergeSort one sort_run to get the next batch (only show spilled disk)
- - SortDataSize: Total sorted data
- - TotalMergesPerformed: Number of external sort merges
-
-#### `AGGREGATION_NODE`
- - PartitionsCreated: Number of partition split by aggregate
- - GetResultsTime: Time to get aggregate results from each partition
- - HTResizeTime: Time spent in resizing hashtable
- - HTResize: Number of times hashtable resizes
- - HashBuckets: Number of buckets in hashtable
- - HashBucketsWithDuplicate: Number of buckets with duplicatenode in hashtable
- - HashCollisions: Number of hash conflicts generated
- - HashDuplicateNodes: Number of duplicate nodes with the same buckets in hashtable
- - HashFailedProbe: Number of failed probe operations
- - HashFilledBuckets: Number of buckets filled data
- - HashProbe: Number of hashtable probe
- - HashTravelLength: The number of steps moved when hashtable queries
-
-#### `HASH_JOIN_NODE`
- - ExecOption: The way to construct a HashTable for the right child (synchronous or asynchronous), the right child in Join may be a table or a subquery, the same is true for the left child
- - BuildBuckets: The number of Buckets in HashTable
- - BuildRows: the number of rows of HashTable
- - BuildTime: Time-consuming to construct HashTable
- - LoadFactor: Load factor of HashTable (ie the number of non-empty buckets)
- - ProbeRows: Traverse the number of rows of the left child for Hash Probe
- - ProbeTime: Time consuming to traverse the left child for Hash Probe, excluding the time consuming to call GetNext on the left child RowBatch
- - PushDownComputeTime: The calculation time of the predicate pushdown condition
- - PushDownTime: The total time consumed by the predicate push down. When Join, the right child who meets the requirements is converted to the left child's in query
-
-#### `CROSS_JOIN_NODE`
- - ExecOption: The way to construct RowBatchList for the right child (synchronous or asynchronous)
- - BuildRows: The number of rows of RowBatchList (ie the number of rows of the right child)
- - BuildTime: Time-consuming to construct RowBatchList
- - LeftChildRows: the number of rows of the left child
- - LeftChildTime: The time it takes to traverse the left child and find the Cartesian product with the right child, not including the time it takes to call GetNext on the left child RowBatch
-
-#### `UNION_NODE`
- - MaterializeExprsEvaluateTime: When the field types at both ends of the Union are inconsistent, the time spent to evaluates type conversion exprs and materializes the results
-
-#### `ANALYTIC_EVAL_NODE`
- - EvaluationTime: Analysis function (window function) calculation total time
- - GetNewBlockTime: It takes time to apply for a new block during initialization. Block saves the cache line window or the entire partition for analysis function calculation
- - PinTime: the time it takes to apply for a new block later or reread the block written to the disk back to the memory
- - UnpinTime: the time it takes to flush the data of the block to the disk when the memory pressure of the block that is not in use or the current operator is high
-
-#### `OLAP_SCAN_NODE`
-
-The `OLAP_SCAN_NODE` is responsible for specific data scanning tasks. One `OLAP_SCAN_NODE` will generate one or more `OlapScanner`. Each Scanner thread is responsible for scanning part of the data.
-
-Some or all of the predicate conditions in the query will be pushed to `OLAP_SCAN_NODE`. Some of these predicate conditions will continue to be pushed down to the storage engine in order to use the storage engine's index for data filtering. The other part will be kept in `OLAP_SCAN_NODE` to filter the data returned from the storage engine.
-
-The profile of the `OLAP_SCAN_NODE` node is usually used to analyze the efficiency of data scanning. It is divided into three layers: `OLAP_SCAN_NODE`, `OlapScanner`, and `SegmentIterator` according to the calling relationship.
-
-The profile of a typical `OLAP_SCAN_NODE` is as follows. Some indicators will have different meanings depending on the storage format (V1 or V2).
-
-```
-OLAP_SCAN_NODE (id=0):(Active: 1.2ms,% non-child: 0.00%)
- - BytesRead: 265.00 B # The amount of data read from the data file. Assuming that 10 32-bit integers are read, the amount of data is 10 * 4B = 40 Bytes. This data only represents the fully expanded size of the data in memory, and does not represent the actual IO size.
- - NumDiskAccess: 1 # The number of disks involved in this ScanNode node.
- - NumScanners: 20 # The number of Scanners generated by this ScanNode.
- - PeakMemoryUsage: 0.00 # Peak memory usage during query, not used yet
- - RowsRead: 7 # The number of rows returned from the storage engine to the Scanner, excluding the number of rows filtered by the Scanner.
- - RowsReturned: 7 # The number of rows returned from ScanNode to the upper node.
- - RowsReturnedRate: 6.979K /sec # RowsReturned/ActiveTime
- - TabletCount: 20 # The number of Tablets involved in this ScanNode.
- - TotalReadThroughput: 74.70 KB/sec # BytesRead divided by the total time spent in this node (from Open to Close). For IO bounded queries, this should be very close to the total throughput of all the disks
- - ScannerBatchWaitTime: 426.886us # To count the time the transfer thread waits for the scaner thread to return rowbatch.
- - ScannerWorkerWaitTime: 17.745us # To count the time that the scanner thread waits for the available worker threads in the thread pool.
- OlapScanner:
- - BlockConvertTime: 8.941us # The time it takes to convert a vectorized Block into a RowBlock with a row structure. The vectorized Block is VectorizedRowBatch in V1 and RowBlockV2 in V2.
- - BlockFetchTime: 468.974us # Rowset Reader gets the time of the Block.
- - ReaderInitTime: 5.475ms # The time when OlapScanner initializes Reader. V1 includes the time to form MergeHeap. V2 includes the time to generate various Iterators and read the first group of blocks.
- - RowsDelFiltered: 0 # Including the number of rows filtered out according to the Delete information in the Tablet, and the number of rows filtered for marked deleted rows under the unique key model.
- - RowsPushedCondFiltered: 0 # Filter conditions based on the predicates passed down, such as the conditions passed from BuildTable to ProbeTable in Join calculation. This value is not accurate, because if the filtering effect is poor, it will no longer be filtered.
- - ScanTime: 39.24us # The time returned from ScanNode to the upper node.
- - ShowHintsTime_V1: 0ns # V2 has no meaning. Read part of the data in V1 to perform ScanRange segmentation.
- SegmentIterator:
- - BitmapIndexFilterTimer: 779ns # Use bitmap index to filter data time-consuming.
- - BlockLoadTime: 415.925us # SegmentReader(V1) or SegmentIterator(V2) gets the time of the block.
- - BlockSeekCount: 12 # The number of block seeks when reading Segment.
- - BlockSeekTime: 222.556us # It takes time to block seek when reading Segment.
- - BlocksLoad: 6 # read the number of blocks
- - CachedPagesNum: 30 # In V2 only, when PageCache is enabled, the number of Pages that hit the Cache.
- - CompressedBytesRead: 0.00 # In V1, the size of the data read from the file before decompression. In V2, the pre-compressed size of the read page that did not hit the PageCache.
- - DecompressorTimer: 0ns # Data decompression takes time.
- - IOTimer: 0ns # IO time for actually reading data from the operating system.
- - IndexLoadTime_V1: 0ns # Only in V1, it takes time to read Index Stream.
- - NumSegmentFiltered: 0 # When generating Segment Iterator, the number of Segments that are completely filtered out through column statistics and query conditions.
- - NumSegmentTotal: 6 # Query the number of all segments involved.
- - RawRowsRead: 7 # The number of raw rows read in the storage engine. See below for details.
- - RowsBitmapIndexFiltered: 0 # Only in V2, the number of rows filtered by the Bitmap index.
- - RowsBloomFilterFiltered: 0 # Only in V2, the number of rows filtered by BloomFilter index.
- - RowsKeyRangeFiltered: 0 # In V2 only, the number of rows filtered out by SortkeyIndex index.
- - RowsStatsFiltered: 0 # In V2, the number of rows filtered by the ZoneMap index, including the deletion condition. V1 also contains the number of rows filtered by BloomFilter.
- - RowsConditionsFiltered: 0 # Only in V2, the number of rows filtered by various column indexes.
- - RowsVectorPredFiltered: 0 # The number of rows filtered by the vectorized condition filtering operation.
- - TotalPagesNum: 30 # Only in V2, the total number of pages read.
- - UncompressedBytesRead: 0.00 # V1 is the decompressed size of the read data file (if the file does not need to be decompressed, the file size is directly counted). In V2, only the decompressed size of the Page that missed PageCache is counted (if the Page does not need to be decompressed, the Page size is directly counted)
- - VectorPredEvalTime: 0ns # Time-consuming of vectorized condition filtering operation.
- - ShortPredEvalTime: 0ns # Time-consuming of short-circuiting predicate condition filtering operations.
- - PredColumnReadTime: 0ns # Time-consuming of predicate column read.
- - LazyReadTime: 0ns # Time-consuming of non-predicate column read.
- - OutputColumnTime: 0ns # Time-consuming of materialize columns.
-```
-
-The predicate push down and index usage can be inferred from the related indicators of the number of data rows in the profile. The following only describes the profile in the reading process of segment V2 format data. In segment V1 format, the meaning of these indicators is slightly different.
-
- - When reading a segment V2, if the query has key_ranges (the query range composed of prefix keys), first filter the data through the SortkeyIndex index, and the number of filtered rows is recorded in `RowsKeyRangeFiltered`.
- - After that, use the Bitmap index to perform precise filtering on the columns containing the bitmap index in the query condition, and the number of filtered rows is recorded in `RowsBitmapIndexFiltered`.
- - After that, according to the equivalent (eq, in, is) condition in the query condition, use the BloomFilter index to filter the data and record it in `RowsBloomFilterFiltered`. The value of `RowsBloomFilterFiltered` is the difference between the total number of rows of the Segment (not the number of rows filtered by the Bitmap index) and the number of remaining rows after BloomFilter, so the data filtered by BloomFilter may overlap with the data filtered by Bitmap.
- - After that, use the ZoneMap index to filter the data according to the query conditions and delete conditions and record it in `RowsStatsFiltered`.
- - `RowsConditionsFiltered` is the number of rows filtered by various indexes, including the values of `RowsBloomFilterFiltered` and `RowsStatsFiltered`.
- - So far, the Init phase is completed, and the number of rows filtered by the condition to be deleted in the Next phase is recorded in `RowsDelFiltered`. Therefore, the number of rows actually filtered by the delete condition are recorded in `RowsStatsFiltered` and `RowsDelFiltered` respectively.
- - `RawRowsRead` is the final number of rows to be read after the above filtering.
- - `RowsRead` is the number of rows finally returned to Scanner. `RowsRead` is usually smaller than `RawRowsRead`, because returning from the storage engine to the Scanner may go through a data aggregation. If the difference between `RawRowsRead` and `RowsRead` is large, it means that a large number of rows are aggregated, and aggregation may be time-consuming.
- - `RowsReturned` is the number of rows finally returned by ScanNode to the upper node. `RowsReturned` is usually smaller than `RowsRead`. Because there will be some predicate conditions on the Scanner that are not pushed down to the storage engine, filtering will be performed once. If the difference between `RowsRead` and `RowsReturned` is large, it means that many rows are filtered in the Scanner. This shows that many highly selective predicate conditions are not pushed to the storage engine. The filtering efficiency in Scanner is worse than that in storage engine.
-
-Through the above indicators, you can roughly analyze the number of rows processed by the storage engine and the size of the final filtered result row. Through the `Rows***Filtered` group of indicators, it is also possible to analyze whether the query conditions are pushed down to the storage engine, and the filtering effects of different indexes. In addition, a simple analysis can be made through the following aspects.
-
- - Many indicators under `OlapScanner`, such as `IOTimer`, `BlockFetchTime`, etc., are the accumulation of all Scanner thread indicators, so the value may be relatively large. And because the Scanner thread reads data asynchronously, these cumulative indicators can only reflect the cumulative working time of the Scanner, and do not directly represent the time consumption of the ScanNode. The time-consuming ratio of ScanNode in the entire query plan is the value recorded in the `Active` field. Sometimes it appears that `IOTimer` has tens of seconds, but `Active` is actually only a few seconds. This situation is usually due to:
- - `IOTimer` is the accumulated time of multiple Scanners, and there are more Scanners.
- - The upper node is time-consuming. For example, the upper node takes 100 seconds, while the lower ScanNode only takes 10 seconds. The field reflected in `Active` may be only a few milliseconds. Because while the upper layer is processing data, ScanNode has performed data scanning asynchronously and prepared the data. When the upper node obtains data from ScanNode, it can obtain the prepared data, so the Active time is very short.
- - `NumScanners` represents the number of Tasks submitted by the Scanner to the thread pool. It is scheduled by the thread pool in `RuntimeState`. The two parameters `doris_scanner_thread_pool_thread_num` and `doris_scanner_thread_pool_queue_size` control the size of the thread pool and the queue length respectively. Too many or too few threads will affect query efficiency. At the same time, some summary indicators can be divided by the number of threads to roughly estimate the time consumption of each thread.
- - `TabletCount` indicates the number of tablets to be scanned. Too many may mean a lot of random read and data merge operations.
- - `UncompressedBytesRead` indirectly reflects the amount of data read. If the value is large, it means that there may be a lot of IO operations.
- - `CachedPagesNum` and `TotalPagesNum` can check the hitting status of PageCache. The higher the hit rate, the less time-consuming IO and decompression operations.
-
-#### `Buffer pool`
- - AllocTime: Memory allocation time
- - CumulativeAllocationBytes: Cumulative amount of memory allocated
- - CumulativeAllocations: Cumulative number of memory allocations
- - PeakReservation: Peak of reservation
- - PeakUnpinnedBytes: Amount of memory data of unpin
- - PeakUsedReservation: Peak usage of reservation
- - ReservationLimit: Limit of reservation of bufferpool
-
diff --git a/docs/en/administrator-guide/runtime-filter.md b/docs/en/administrator-guide/runtime-filter.md
deleted file mode 100644
index 9089e0d2e2..0000000000
--- a/docs/en/administrator-guide/runtime-filter.md
+++ /dev/null
@@ -1,284 +0,0 @@
----
-{
- "title": "Runtime Filter",
- "language": "en"
-}
----
-
-
-
-# Runtime Filter
-
-Runtime Filter is a new feature officially added in Doris 0.15. It is designed to dynamically generate filter conditions for certain Join queries at runtime to reduce the amount of scanned data, avoid unnecessary I/O and network transmission, and speed up the query.
-
-It's design, implementation and effects, please refer to [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116).
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, the back-end node of Doris. Responsible for query execution and data storage.
-* Left table: the table on the left during Join query. Perform Probe operation. The order can be adjusted by Join Reorder.
-* Right table: the table on the right during Join query. Perform the Build operation. The order can be adjusted by Join Reorder.
-* Fragment: FE will convert the execution of specific SQL statements into corresponding fragments and send them to BE for execution. The corresponding Fragment is executed on the BE, and the results are aggregated and returned to the FE.
-* Join on clause: `Aa=Bb` in `A join B on Aa=Bb`, based on this to generate join conjuncts during query planning, including expr used by join Build and Probe, where Build expr is called in Runtime Filter src expr, Probe expr are called target expr in Runtime Filter.
-
-## Principle
-
-Runtime Filter is generated during query planning, constructed in HashJoinNode, and applied in ScanNode.
-
-For example, there is currently a Join query between the T1 table and the T2 table. Its Join mode is HashJoin. T1 is a fact table with 100,000 rows of data. T2 is a dimension table with 2000 rows of data. Doris join The actual situation is:
-```
-| > HashJoinNode <
-| | |
-| | 100000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 100000 | 2000
-| T1 T2
-|
-```
-Obviously, scanning data for T2 is much faster than T1. If we take the initiative to wait for a while and then scan T1, after T2 sends the scanned data record to HashJoinNode, HashJoinNode calculates a filter condition based on the data of T2, such as the maximum value of T2 data And the minimum value, or build a Bloom Filter, and then send this filter condition to ScanNode waiting to scan T1, the latter applies this filter condition and delivers the filtered data to HashJoinNode, thereby reducing the number of probe hash tables and network overhead. This filter condition is Runtime Filter, and the effect is as follows:
-```
-| > HashJoinNode <
-| | |
-| | 6000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 100000 | 2000
-| T1 T2
-|
-```
-If the filter condition (Runtime Filter) can be pushed down to the storage engine, in some cases, the index can be used to directly reduce the amount of scanned data, thereby greatly reducing the scanning time. The effect is as follows:
-```
-| > HashJoinNode <
-| | |
-| | 6000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 6000 | 2000
-| T1 T2
-|
-```
-It can be seen that, unlike predicate push-down and partition cutting, Runtime Filter is a filter condition dynamically generated at runtime, that is, when the query is run, the join on clause is parsed to determine the filter expression, and the expression is broadcast to ScanNode that is reading the left table , Thereby reducing the amount of scanned data, thereby reducing the number of probe hash table, avoiding unnecessary I/O and network transmission.
-
-Runtime Filter is mainly used to optimize joins between a large table and a small table. If the amount of data in the left table is too small, or the amount of data in the right table is too large, the Runtime Filter may not achieve the expected effect.
-
-## Usage
-
-### Runtime Filter query options
-
-For query options related to Runtime Filter, please refer to the following sections:
-
-- The first query option is to adjust the type of Runtime Filter used. In most cases, you only need to adjust this option, and keep the other options as default.
-
- - `runtime_filter_type`: Including Bloom Filter, MinMax Filter, IN predicate and IN Or Bloom Filter. By default, only IN Or Bloom Filter will be used. In some cases, the performance will be higher when both Bloom Filter, MinMax Filter and IN predicate are used at the same time.
-
-- Other query options usually only need to be further adjusted in certain specific scenarios to achieve the best results. Usually only after performance testing, optimize for resource-intensive, long enough running time and high enough frequency queries.
-
- - `runtime_filter_mode`: Used to adjust the push-down strategy of Runtime Filter, including three strategies of OFF, LOCAL, and GLOBAL. The default setting is the GLOBAL strategy
-
- - `runtime_filter_wait_time_ms`: the time that ScanNode in the left table waits for each Runtime Filter, the default is 1000ms
-
- - `runtime_filters_max_num`: The maximum number of Bloom Filters in the Runtime Filter that can be applied to each query, the default is 10
-
- - `runtime_bloom_filter_min_size`: the minimum length of Bloom Filter in Runtime Filter, default 1048576 (1M)
-
- - `runtime_bloom_filter_max_size`: the maximum length of Bloom Filter in Runtime Filter, the default is 16777216 (16M)
-
- - `runtime_bloom_filter_size`: The default length of Bloom Filter in Runtime Filter, the default is 2097152 (2M)
-
- - `runtime_filter_max_in_num`: If the number of rows in the right table of the join is greater than this value, we will not generate an IN predicate, the default is 1024
-
-The query options are further explained below.
-
-#### 1.runtime_filter_type
-Type of Runtime Filter used.
-
-**Type**: Number (1, 2, 4, 8) or the corresponding mnemonic string (IN, BLOOM_FILTER, MIN_MAX, ```IN_OR_BLOOM_FILTER```), the default is 8 (```IN_OR_BLOOM_FILTER```), use multiple commas to separate, pay attention to the need to add quotation marks , Or add any number of types, for example:
-```
-set runtime_filter_type="BLOOM_FILTER,IN,MIN_MAX";
-```
-Equivalent to:
-```
-set runtime_filter_type=7;
-```
-
-**Precautions for use**
-
-- **IN or Bloom Filter**: According to the actual number of rows in the right table during execution, the system automatically determines whether to use IN predicate or Bloom Filter.
- - By default, IN Predicate will be used when the number of data rows in the right table is less than 1024 (which can be adjusted by ` runtime_filter_max_in_num 'in the session variable). Otherwise, use bloom filter.
-- **Bloom Filter**: There is a certain misjudgment rate, which results in the filtered data being a little less than expected, but it will not cause the final result to be inaccurate. In most cases, Bloom Filter can improve performance or has no significant impact on performance, but in some cases Under circumstances will cause performance degradation.
- - Bloom Filter construction and application overhead is high, so when the filtering rate is low, or the amount of data in the left table is small, Bloom Filter may cause performance degradation.
- - At present, only the Key column of the left table can be pushed down to the storage engine if the Bloom Filter is applied, and the test results show that the performance of the Bloom Filter is often reduced when the Bloom Filter is not pushed down to the storage engine.
- - Currently Bloom Filter only has short-circuit logic when using expression filtering on ScanNode, that is, when the false positive rate is too high, the Bloom Filter will not continue to be used, but there is no short-circuit logic when the Bloom Filter is pushed down to the storage engine , So when the filtration rate is low, it may cause performance degradation.
-
-- **MinMax Filter**: Contains the maximum value and the minimum value, thereby filtering data smaller than the minimum value and greater than the maximum value. The filtering effect of the MinMax Filter is related to the type of the Key column in the join on clause and the data distribution of the left and right tables.
- - When the type of the Key column in the join on clause is int/bigint/double, etc., in extreme cases, if the maximum and minimum values of the left and right tables are the same, there is no effect, otherwise the maximum value of the right table is less than the minimum value of the left table, or the minimum of the right table The value is greater than the maximum value in the left table, the effect is best.
- - When the type of the Key column in the join on clause is varchar, etc., applying the MinMax Filter will often cause performance degradation.
-
-- **IN predicate**: Construct IN predicate based on all the values of Key listed in the join on clause on the right table, and use the constructed IN predicate to filter on the left table. Compared with Bloom Filter, the cost of construction and application is lower. The amount of data in the right table is lower. When it is less, it tends to perform better.
- - By default, only the number of data rows in the right table is less than 1024 will be pushed down (can be adjusted by `runtime_filter_max_in_num` in the session variable).
- - Currently IN predicate already implement a merge method.
- - When IN predicate and other filters are specified at the same time, and the filtering value of IN predicate does not reach runtime_filter_max_in_num will try to remove other filters. The reason is that IN predicate is an accurate filtering condition. Even if there is no other filter, it can filter efficiently. If it is used at the same time, other filters will do useless work. Currently, only when the producer and consumer of the runtime filter are in the same fragment can there be logic to remove the Non-IN predicate.
-
-#### 2.runtime_filter_mode
-Used to control the transmission range of Runtime Filter between instances.
-
-**Type**: Number (0, 1, 2) or corresponding mnemonic string (OFF, LOCAL, GLOBAL), default 2 (GLOBAL).
-
-**Precautions for use**
-
-LOCAL: Relatively conservative, the constructed Runtime Filter can only be used in the same Fragment on the same instance (the smallest unit of query execution), that is, the Runtime Filter producer (the HashJoinNode that constructs the Filter) and the consumer (the ScanNode that uses the RuntimeFilter) The same Fragment, such as the general scene of broadcast join;
-
-GLOBAL: Relatively radical. In addition to satisfying the scenario of the LOCAL strategy, the Runtime Filter can also be combined and transmitted to different Fragments on different instances via the network. For example, the Runtime Filter producer and consumer are in different Fragments, such as shuffle join.
-
-In most cases, the GLOBAL strategy can optimize queries in a wider range of scenarios, but in some shuffle joins, the cost of generating and merging Runtime Filters exceeds the performance advantage brought to the query, and you can consider changing to the LOCAL strategy.
-
-If the join query involved in the cluster does not improve performance due to Runtime Filter, you can change the setting to OFF to completely turn off the function.
-
-When building and applying Runtime Filters on different Fragments, the reasons and strategies for merging Runtime Filters can be found in [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116)
-
-#### 3.runtime_filter_wait_time_ms
-Waiting for Runtime Filter is time consuming.
-
-**Type**: integer, default 1000, unit ms
-
-**Precautions for use**
-
-After the Runtime Filter is turned on, the ScanNode in the table on the left will wait for a period of time for each Runtime Filter assigned to itself before scanning the data, that is, if the ScanNode is assigned 3 Runtime Filters, it will wait at most 3000ms.
-
-Because it takes time to build and merge the Runtime Filter, ScanNode will try to push down the Runtime Filter that arrives within the waiting time to the storage engine. If the waiting time is exceeded, ScanNode will directly start scanning data using the Runtime Filter that has arrived.
-
-If the Runtime Filter arrives after ScanNode starts scanning, ScanNode will not push the Runtime Filter down to the storage engine. Instead, it will use expression filtering on ScanNode based on the Runtime Filter for the data that has been scanned from the storage engine. The scanned data will not apply the Runtime Filter, so the intermediate data size obtained will be larger than the optimal solution, but serious cracking can be avoided.
-
-If the cluster is busy and there are many resource-intensive or long-time-consuming queries on the cluster, consider increasing the waiting time to avoid missing optimization opportunities for complex queries. If the cluster load is light, and there are many small queries on the cluster that only take a few seconds, you can consider reducing the waiting time to avoid an increase of 1s for each query.
-
-#### 4.runtime_filters_max_num
-The upper limit of the number of Bloom Filters in the Runtime Filter generated by each query.
-
-**Type**: integer, default 10
-
-**Precautions for use**
-Currently, only the number of Bloom Filters is limited, because the construction and application of Bloom Filters are more expensive than MinMax Filter and IN predicate.
-
-If the number of Bloom Filters generated exceeds the maximum allowable number, then the Bloom Filter with a large selectivity is retained. A large selectivity means that more rows are expected to be filtered. This setting can prevent Bloom Filter from consuming too much memory overhead and causing potential problems.
-```
-Selectivity = (HashJoinNode Cardinality / HashJoinNode left child Cardinality)
-- Because the cardinality of FE is currently inaccurate, the selectivity of Bloom Filter calculation here is inaccurate, so in the end, it may only randomly reserve part of Bloom Filter.
-```
-This query option needs to be adjusted only when tuning some long-consuming queries involving joins between large tables.
-
-#### 5. Bloom Filter length related parameters
-Including `runtime_bloom_filter_min_size`, `runtime_bloom_filter_max_size`, `runtime_bloom_filter_size`, used to determine the size (in bytes) of the Bloom Filter data structure used by the Runtime Filter.
-
-**Type**: Integer
-
-**Precautions for use**
-Because it is necessary to ensure that the length of the Bloom Filter constructed by each HashJoinNode is the same to be merged, the length of the Bloom Filter is currently calculated in the FE query planning.
-
-If you can get the number of data rows (Cardinality) in the statistical information of the join right table, it will try to estimate the optimal size of the Bloom Filter based on Cardinality, and round to the nearest power of 2 (log value with the base 2). If the Cardinality of the table on the right cannot be obtained, the default Bloom Filter length `runtime_bloom_filter_size` will be used. `runtime_bloom_filter_min_size` and `runtime_bloom_filter_max_size` are used to limit the minimum and maximum length of the final Bloom Filter.
-
-Larger Bloom Filters are more effective when processing high-cardinality input sets, but require more memory. If the query needs to filter high cardinality columns (for example, containing millions of different values), you can consider increasing the value of `runtime_bloom_filter_size` for some benchmark tests, which will help make the Bloom Filter filter more accurate, so as to obtain the expected Performance improvement.
-
-The effectiveness of Bloom Filter depends on the data distribution of the query, so it is usually only for some specific queries to additionally adjust the length of the Bloom Filter, rather than global modification, generally only for some long time-consuming queries involving joins between large tables. Only when you need to adjust this query option.
-
-### View Runtime Filter generated by query
-
-The query plan that can be displayed by the `explain` command includes the join on clause information used by each Fragment, as well as comments on the generation and use of the Runtime Filter by the Fragment, so as to confirm whether the Runtime Filter is applied to the desired join on clause.
-- The comment contained in the Fragment that generates the Runtime Filter, such as `runtime filters: filter_id[type] <- table.column`.
-- Use the comment contained in the fragment of Runtime Filter such as `runtime filters: filter_id[type] -> table.column`.
-
-The query in the following example uses a Runtime Filter with ID RF000.
-```
-CREATE TABLE test (t1 INT) DISTRIBUTED BY HASH (t1) BUCKETS 2 PROPERTIES("replication_num" = "1");
-INSERT INTO test VALUES (1), (2), (3), (4);
-
-CREATE TABLE test2 (t2 INT) DISTRIBUTED BY HASH (t2) BUCKETS 2 PROPERTIES("replication_num" = "1");
-INSERT INTO test2 VALUES (3), (4), (5);
-
-EXPLAIN SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2;
-+-------------------------------------------------------------------+
-| Explain String |
-+-------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`t1` |
-| |
-| 4:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test`.`t1` |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BUCKET_SHUFFLE) |
-| | equal join conjunct: `test`.`t1` = `test2`.`t2` |
-| | runtime filters: RF000[in] <- `test2`.`t2` |
-| | |
-| |----3:EXCHANGE |
-| | |
-| 0:OlapScanNode |
-| TABLE: test |
-| runtime filters: RF000[in] -> `test`.`t1` |
-| |
-| PLAN FRAGMENT 2 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test2`.`t2` |
-| |
-| 1:OlapScanNode |
-| TABLE: test2 |
-+-------------------------------------------------------------------+
--- The line of `runtime filters` above shows that `2:HASH JOIN` of `PLAN FRAGMENT 1` generates IN predicate with ID RF000,
--- Among them, the key values of `test2`.`t2` are only known at runtime,
--- This IN predicate is used in `0:OlapScanNode` to filter unnecessary data when reading `test`.`t1`.
-
-SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2;
--- Return 2 rows of results [3, 4];
-
--- Through the query profile (set enable_profile=true;) you can view the detailed information of the internal work of the query,
--- Including whether each Runtime Filter is pushed down, waiting time,
--- and the total time from prepare to receiving Runtime Filter for OLAP_SCAN_NODE.
-RuntimeFilter:in:
- - HasPushDownToEngine: true
- - AWaitTimeCost: 0ns
- - EffectTimeCost: 2.76ms
-
--- In addition, in the OLAP_SCAN_NODE of the profile, you can also view the filtering effect
--- and time consumption after the Runtime Filter is pushed down.
- - RowsVectorPredFiltered: 9.320008M (9320008)
- - VectorPredEvalTime: 364.39ms
-```
-
-## Runtime Filter planning rules
-1. Only support the generation of Runtime Filter for the equivalent conditions in the join on clause, excluding the Null-safe condition, because it may filter out the null value of the join left table.
-2. Does not support pushing down Runtime Filter to the left table of left outer, full outer, and anti join;
-3. Does not support src expr or target expr is constant;
-4. The equality of src expr and target expr is not supported;
-5. The type of src expr is not supported to be equal to `HLL` or `BITMAP`;
-6. Currently only supports pushing down Runtime Filter to OlapScanNode;
-7. Target expr does not support NULL-checking expressions, such as `COALESCE/IFNULL/CASE`, because when the join on clause of other joins at the upper level of the outer join contains NULL-checking expressions and a Runtime Filter is generated, this Runtime Filter is downloaded Pushing to the left table of outer join may cause incorrect results;
-8. The column (slot) in target expr is not supported, and an equivalent column cannot be found in the original table;
-9. Column conduction is not supported. This includes two cases:
- - First, when the join on clause contains A.k = B.k and B.k = C.k, currently C.k can only be pushed down to B.k, but not to A.k;
- - Second, for example, the join on clause contains Aa + Bb = Cc. If Aa can be transmitted to Ba, that is, Aa and Ba are equivalent columns, then you can replace Aa with Ba, and then you can try to push the Runtime Filter down to B ( If Aa and Ba are not equivalent columns, they cannot be pushed down to B, because target expr must be bound to the only join left table);
-10. The types of Target expr and src expr must be equal, because Bloom Filter is based on hash, if the types are not equal, it will try to convert the type of target expr to the type of src expr;
-11. The Runtime Filter generated by `PlanNode.Conjuncts` is not supported. Unlike HashJoinNode's `eqJoinConjuncts` and `otherJoinConjuncts`, the Runtime Filter generated by `PlanNode.Conjuncts` found in the test that it may cause incorrect results, such as ` When an IN` subquery is converted to a join, the automatically generated join on clause will be stored in `PlanNode.Conjuncts`. At this time, applying Runtime Filter may result in missing some rows in the result.
diff --git a/docs/en/administrator-guide/small-file-mgr.md b/docs/en/administrator-guide/small-file-mgr.md
deleted file mode 100644
index 2a8226edda..0000000000
--- a/docs/en/administrator-guide/small-file-mgr.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-{
- "title": "File Manager",
- "language": "en"
-}
----
-
-
-
-# File Manager
-
-Some functions in Doris require some user-defined files. For example, public keys, key files, certificate files and so on are used to access external data sources. The File Manager provides a function that allows users to upload these files in advance and save them in Doris system, which can then be referenced or accessed in other commands.
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-* BDBJE: Oracle Berkeley DB Java Edition. Distributed embedded database for persistent metadata in FE.
-* SmallFileMgr: File Manager. Responsible for creating and maintaining user files.
-
-## Basic concepts
-
-Files are files created and saved by users in Doris.
-
-A file is located by `database`, `catalog`, `file_name`. At the same time, each file also has a globally unique ID (file_id), which serves as the identification in the system.
-
-File creation and deletion can only be performed by users with `admin` privileges. A file belongs to a database. Users who have access to a database (queries, imports, modifications, etc.) can use the files created under the database.
-
-## Specific operation
-
-File management has three main commands: `CREATE FILE`, `SHOW FILE` and `DROP FILE`, creating, viewing and deleting files respectively. The specific syntax of these three commands can be viewed by connecting to Doris and executing `HELP cmd;`.
-
-1. CREATE FILE
-
- In the command to create a file, the user must provide the following information:
-
- * file_name: File name. User-defined, unique within a catalog.
- * Catalog: Category of files. User-defined, unique within a database.
-
- > Doris also has some special classification names for specific commands.
-
- > 1. Kafka
-
- > When the data source is specified as Kafka in the routine Import command and the file needs to be referenced, Doris defaults to looking for the file from the catalog category named "kafka".
-
- * url: the download address of the file. Currently, only unauthenticated HTTP download addresses are supported. This download address is only used to download files from this address when executing the create file command. When the file is successfully created and saved in Doris, the address will no longer be used.
- * md5: optional. The MD5 value of the file. If the user provides this value, the MD5 value will be checked after the file is downloaded. File creation fails if validation fails.
-
- When the file is created successfully, the file-related information will be persisted in Doris. Users can view successfully created files through the `SHOW FILE` command.
-
-2. SHOW FILE
-
- This command allows you to view files that have been created successfully. Specific operations see: `HELP SHOW FILE;`
-
-3. DROP FILE
-
- This command can delete a file that has been created. Specific operations see: `HELP DROP FILE;`
-
-## Implementation details
-
-### Create and delete files
-
-When the user executes the `CREATE FILE` command, FE downloads the file from a given URL. The contents of the file are stored in FE memory directly in the form of Base64 encoding. At the same time, the file content and meta-information related to the file will be persisted in BDBJE. All created files, their meta-information and file content reside in FE memory. If the FE goes down and restarts, meta information and file content will also be loaded into memory from the BDBJE. When a file is deleted, the relevant information is deleted directly from FE memory and persistent information is deleted from BDBJE.
-
-### Use of documents
-
-If the FE side needs to use the created file, SmallFileMgr will directly save the data in FE memory as a local file, store it in the specified directory, and return the local file path for use.
-
-If the BE side needs to use the created file, BE will download the file content to the specified directory on BE through FE's HTTP interface `api/get_small_file` for use. At the same time, BE also records the information of the files that have been downloaded in memory. When BE requests a file, it first checks whether the local file exists and verifies it. If the validation passes, the local file path is returned directly. If the validation fails, the local file is deleted and downloaded from FE again. When BE restarts, local files are preloaded into memory.
-
-## Use restrictions
-
-Because the file meta-information and content are stored in FE memory. So by default, only files with size less than 1MB can be uploaded. And the total number of files is limited to 100. The configuration items described in the next section can be modified.
-
-## Relevant configuration
-
-1. FE configuration
-
-* `Small_file_dir`: The path used to store uploaded files, defaulting to the `small_files/` directory of the FE runtime directory.
-* `max_small_file_size_bytes`: A single file size limit in bytes. The default is 1MB. File creation larger than this configuration will be rejected.
-* `max_small_file_number`: The total number of files supported by a Doris cluster. The default is 100. When the number of files created exceeds this value, subsequent creation will be rejected.
-
- > If you need to upload more files or increase the size limit of a single file, you can modify the `max_small_file_size_bytes` and `max_small_file_number` parameters by using the `ADMIN SET CONFIG` command. However, the increase in the number and size of files will lead to an increase in FE memory usage.
-
-2. BE configuration
-
-* `Small_file_dir`: The path used to store files downloaded from FE by default is in the `lib/small_files/` directory of the BE runtime directory.
diff --git a/docs/en/administrator-guide/sql-mode.md b/docs/en/administrator-guide/sql-mode.md
deleted file mode 100644
index 90ad36783c..0000000000
--- a/docs/en/administrator-guide/sql-mode.md
+++ /dev/null
@@ -1,76 +0,0 @@
----
-{
- "title": "SQL MODE",
- "language": "en"
-}
----
-
-
-
-# SQL MODE
-
-The SQL MODE supported by Doris refers to the sql mode management mechanism of MySQL. Each client can set its own sql mode, and the database administrator with admin permission can set the global sql mode.
-
-## Sql mode introduction
-
-SQL MODE enables users to switch between different styles of SQL syntax and data verification strictness, making Doris more compatible with other databases. For example, in some databases, the '||' symbol is a string connector, but in Doris it is equivalent to 'or'. At this time, users only need to use SQL mode to switch to the style they want. Each client can set sql mode, which is valid in the current conversation. Only users with admin permission can set global SQL mode.
-
-## Theory
-
-SQL MODE is stored in session variables with a 64 bit long type. Each bit of this address represents the on / off (1 for on, 0 for off) state of a mode. As long as we know the specific bit of each mode, we can easily and quickly verify and operate SQL mode through bit operation.
-
-Every time you query sql mode, the long type will be parsed into a user-readable string. Similarly, the sql mode string sent by the user to the server will be parsed into a long type that can be stored in session variables.
-
-The set global sql mode will be persisted, so the operation on the global sql mode is always only once, even after the program is restarted, the last global sql mode can be recovered.
-
-## Operation
-
-1、set sql mode
-
-```
-set global sql_mode = ""
-set session sql_mode = ""
-```
->At present, Doris's default sql mode is empty.
->Setting global sql mode requires admin permission and affects all clients that connect later.
->Setting session sql mode will only affect the current conversation client. The default setting way is session.
-
-2、select sql mode
-
-```
-select @@global.sql_mode
-select @@session.sql_mode
-```
->In addition to this method, you can also view the current sql mode by returning all session variables as follows
-
-```
-show global variables
-show session variables
-```
-
-## supported mode
-
-1. `PIPES_AS_CONCAT`
-
- Treat '||' as a string concatenation operator (same as CONCAT()) rather than as a synonym for OR. (e.g., `'a'||'b' = 'ab'`, `1||0 = '10'`)
-
-## combine mode
-
-(Work in progress)
\ No newline at end of file
diff --git a/docs/en/administrator-guide/time-zone.md b/docs/en/administrator-guide/time-zone.md
deleted file mode 100644
index 25110b9630..0000000000
--- a/docs/en/administrator-guide/time-zone.md
+++ /dev/null
@@ -1,98 +0,0 @@
----
-{
- "title": "Time zone",
- "language": "en"
-}
----
-
-
-
-# Time zone
-
-Doris supports multiple time zone settings
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-
-## Basic concepts
-
-There are multiple time zone related parameters in Doris
-
-* `system_time_zone`:
-
-When the server starts, it will be set automatically according to the time zone set by the machine, which cannot be modified after setting.
-
-* `time_zone`:
-
-Server current time zone, set it at session level or global level.
-
-## Specific operations
-
-1. `SHOW VARIABLES LIKE '% time_zone%'`
-
- View the current time zone related configuration
-
-2. `SET time_zone = 'Asia/Shanghai'`
-
- This command can set the session level time zone, which will fail after disconnection.
-
-3. `SET global time_zone = 'Asia/Shanghai'`
-
- This command can set time zone parameters at the global level. The FE will persist the parameters and will not fail when the connection is disconnected.
-
-### Impact of time zone
-
-Time zone setting affects the display and storage of time zone sensitive values.
-
-It includes the values displayed by time functions such as `NOW()` or `CURTIME()`, as well as the time values in `SHOW LOAD` and `SHOW BACKENDS` statements.
-
-However, it does not affect the `LESS THAN VALUE` of the time-type partition column in the `CREATE TABLE` statement, nor does it affect the display of values stored as `DATE/DATETIME` type.
-
-Functions affected by time zone:
-
-* `FROM_UNIXTIME`: Given a UTC timestamp, return the date and time of the specified time zone, such as `FROM_UNIXTIME(0)`, return the CST time zone: `1970-01-08:00`.
-
-* `UNIX_TIMESTAMP`: Given a specified time zone date and time, return UTC timestamp, such as CST time zone `UNIX_TIMESTAMP('1970-01 08:00:00')`, return `0`.
-
-* `CURTIME`: Returns the datetime of specified time zone.
-
-* `NOW`: Returns the specified date and time of specified time zone.
-
-* `CONVERT_TZ`: Converts a date and time from one specified time zone to another.
-
-## Restrictions
-
-Time zone values can be given in several formats, case-insensitive:
-
-* A string representing UTC offset, such as '+10:00' or '-6:00'.
-
-* Standard time zone formats, such as "Asia/Shanghai", "America/Los_Angeles"
-
-* Abbreviated time zone formats such as MET and CTT are not supported. Because the abbreviated time zone is ambiguous in different scenarios, it is not recommended to use it.
-
-* In order to be compatible with Doris and support CST abbreviated time zone, CST will be internally transferred to "Asia/Shanghai", which is Chinese standard time zone.
-
-## Time zone format list
-
-[List of TZ database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
-
-[Edit on GitHub](https://github.com/apache/incubator-doris/blob/master/docs/documentation/en/administrator-guide/time-zone_EN.md)
\ No newline at end of file
diff --git a/docs/en/administrator-guide/update.md b/docs/en/administrator-guide/update.md
deleted file mode 100644
index b3efc45ece..0000000000
--- a/docs/en/administrator-guide/update.md
+++ /dev/null
@@ -1,126 +0,0 @@
----
-{
- "title": "update",
- "language": "en"
-}
----
-
-
-
-# Update
-
-If we need to modify or update the data in Doris, we can use the UPDATE command.
-
-## Applicable scenarios
-
-+ To modify the value of a row that meets certain conditions.
-+ Point updates, small updates, where the rows to be updated are preferably a very small part of the entire table.
-+ Only could be used in Unique table
-
-## Explanation of terms
-
-1. Unique model: A data model in the Doris system. When the user imports rows with the same Key, the Value of the latter overrides the existing Value, in the same sense as Unique in Mysql.
-
-## Fundamentals
-
-Use the query engine's own where filtering logic to filter the rows that need to be updated from the table to be updated. Then use the Unique model's own Value column replacement logic to change the rows to be updated and reinsert them into the table. This enables row-level updates.
-
-### Example
-
-Suppose there is an order table in Doris, where order id is the Key column, order status, and order amount are the Value columns. The data state is as follows.
-
-| order id | order amount | order status |
-|--|--|--|
-| 1 | 100| Pending Payment |
-
-At this time, after the user clicks the payment, Doris system needs to change the order id to '1' order status to 'pending shipment', you need to use the Update function.
-
-```
-UPDATE order SET order status='To be shipped' WHERE order id=1;
-```
-
-After the user executes the UPDATE command, the system performs the following three steps.
-
-+ Step 1: Read the rows that satisfy WHERE order id=1
- (1, 100, 'pending payment')
-+ Step 2: Change the order status of the row from 'Pending Payment' to 'Pending Shipping'
- (1, 100, 'Pending shipment')
-+ Step 3: Insert the updated row back into the table to achieve the updated effect.
- | order id | order amount | order status |
- | ---| ---| ---|
- | 1 | 100| Pending Payment |
- | 1 | 100 | Pending shipments |
- Since the table order is a UNIQUE model, the rows with the same Key, after which the latter will take effect, so the final effect is as follows.
- | order id | order amount | order status |
- |--|--|--|
- | 1 | 100 | Pending shipments |
-
-## Basic operations
-
-### UPDATE syntax
-
-```UPDATE table_name SET value=xxx WHERE condition;```
-
-+ ``table_name``: the table to be updated, must be a UNIQUE model table to update.
-
-+ value=xxx: The column to be updated, the left side of the equation must be the value column of the table. The right side of the equation can be a constant or an expression transformation of a column in a table.
- For example, if value = 1, then the value of the column to be updated will be 1.
- For example, if value = value + 1, the value of the column to be updated is incremented by 1.
-
-+ condition: Only rows that satisfy the condition will be updated. condition must be an expression that results in a Boolean type.
- For example, if k1 = 1, only rows with a k1 column value of 1 will be updated.
- For example, if k1 = k2, only rows with the same value in column k1 as in column k2 will be updated.
- No support for unfilled condition, i.e., no support for full table updates.
-
-### Synchronization
-
-The Update syntax is a synchronization syntax in Doris. If the Update statement succeeds, the update succeeds and the data is visible.
-
-### Performance
-
-The performance of the Update statement is closely related to the number of rows to be updated and the retrieval efficiency of the condition.
-
-+ Number of rows to be updated: The more rows to be updated, the slower the Update statement will be. This is consistent with the principle of importing.
- Doris updates are more suitable for occasional update scenarios, such as changing the values of individual rows.
- Doris is not suitable for large batches of data changes. Large modifications can make Update statements take a long time to run.
-
-+ Condition retrieval efficiency: Doris Update implements the principle of reading the rows that satisfy the condition first, so if the condition retrieval efficiency is high, the Update will be faster.
- The condition column should ideally be hit, indexed, or bucket clipped. This way Doris does not need to scan the entire table and can quickly locate the rows that need to be updated. This improves update efficiency.
- It is strongly discouraged to include the UNIQUE model value column in the condition column.
-
-### Concurrency Control
-
-By default, multiple concurrent Update operations on the same table are not allowed at the same time.
-
-The main reason for this is that Doris currently supports row updates, which means that even if the user declares ``SET v2 = 1``, virtually all other Value columns will be overwritten (even though the values are not changed).
-
-This presents a problem in that if two Update operations update the same row at the same time, the behavior may be indeterminate. That is, there may be dirty data.
-
-However, in practice, the concurrency limit can be turned on manually if the user himself can guarantee that even if concurrent updates are performed, they will not operate on the same row at the same time. This is done by modifying the FE configuration ``enable_concurrent_update``. When the configuration value is true, there is no limit on concurrent updates.
-
-## Risks of use
-
-Since Doris currently supports row updates and uses a two-step read-and-write operation, there is uncertainty about the outcome of an Update statement if it modifies the same row as another Import or Delete statement.
-
-Therefore, when using Doris, you must be careful to control the concurrency of Update statements and other DML statements on the *user side itself*.
-
-## Version
-
-Doris Version 0.15.x +
diff --git a/docs/en/administrator-guide/variables.md b/docs/en/administrator-guide/variables.md
deleted file mode 100644
index e6d6e22d28..0000000000
--- a/docs/en/administrator-guide/variables.md
+++ /dev/null
@@ -1,499 +0,0 @@
----
-{
- "title": "Variable",
- "language": "en"
-}
----
-
-
-
-# Variable
-
-This document focuses on currently supported variables.
-
-Variables in Doris refer to variable settings in MySQL. However, some of the variables are only used to be compatible with some MySQL client protocols, and do not produce their actual meaning in the MySQL database.
-
-## Variable setting and viewing
-
-### View
-
-All or specified variables can be viewed via `SHOW VARIABLES [LIKE 'xxx'];`. Such as:
-
-```
-SHOW VARIABLES;
-SHOW VARIABLES LIKE '%time_zone%';
-```
-
-### Settings
-
-Some variables can be set at global-level or session-only. For global-level, the set value will be used in subsequent new session connections. For session-only, the variable only works for the current session.
-
-For session-only, set by the `SET var_name=xxx;` statement. Such as:
-
-```
-SET exec_mem_limit = 137438953472;
-SET forward_to_master = true;
-SET time_zone = "Asia/Shanghai";
-```
-
-For global-level, set by `SET GLOBAL var_name=xxx;`. Such as:
-
-```
-SET GLOBAL exec_mem_limit = 137438953472
-```
-
-> Note 1: Only ADMIN users can set variable at global-level.
-> Note 2: Global-level variables do not affect variable values in the current session, only variables in new sessions.
-
-Variables that support both session-level and global-level setting include:
-
-* `time_zone`
-* `wait_timeout`
-* `sql_mode`
-* `enable_profile`
-* `query_timeout`
-* `exec_mem_limit`
-* `batch_size`
-* `parallel_fragment_exec_instance_num`
-* `parallel_exchange_instance_num`
-* `allow_partition_column_nullable`
-* `insert_visible_timeout_ms`
-* `enable_fold_constant_by_be`
-
-Variables that support only global-level setting include:
-
-* `default_rowset_type`
-
-At the same time, variable settings also support constant expressions. Such as:
-
-```
-SET exec_mem_limit = 10 * 1024 * 1024 * 1024;
-SET forward_to_master = concat('tr', 'u', 'e');
-```
-
-### Set variables in the query statement
-
-In some scenarios, we may need to set variables specifically for certain queries.
-The SET_VAR hint sets the session value of a system variable temporarily (for the duration of a single statement). Examples:
-
-```
-SELECT /*+ SET_VAR(exec_mem_limit = 8589934592) */ name FROM people ORDER BY name;
-SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
-```
-
-Note that the comment must start with /*+ and can only follow the SELECT.
-
-## Supported variables
-
-* `SQL_AUTO_IS_NULL`
-
- Used for compatible JDBC connection pool C3P0. No practical effect.
-
-* `auto_increment_increment`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `autocommit`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `batch_size`
-
- Used to specify the number of rows of a single packet transmitted by each node during query execution. By default, the number of rows of a packet is 1024 rows. That is, after the source node generates 1024 rows of data, it is packaged and sent to the destination node.
-
- A larger number of rows will increase the throughput of the query in the case of scanning large data volumes, but may increase the query delay in small query scenario. At the same time, it also increases the memory overhead of the query. The recommended setting range is 1024 to 4096.
-
-* `character_set_client`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `character_set_connection`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `character_set_results`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `character_set_server`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `codegen_level`
-
- Used to set the level of LLVM codegen. (Not currently in effect).
-
-* `collation_connection`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `collation_database`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `collation_server`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `delete_without_partition`
-
- When set to true. When using the delete command to delete partition table data, no partition is required. The delete operation will be automatically applied to all partitions.
-
- Note, however, that the automatic application to all partitions may cause the delete command to take a long time to trigger a large number of subtasks and cause a long time. If it is not necessary, it is not recommended to turn it on.
-
-* `disable_colocate_join`
-
- Controls whether the [Colocation Join](./colocation-join.md) function is enabled. The default is false, which means that the feature is enabled. True means that the feature is disabled. When this feature is disabled, the query plan will not attempt to perform a Colocation Join.
-
-
-* `enable_bucket_shuffle_join`
-
- Controls whether the [Bucket Shuffle Join] (./bucket-shuffle-join.md) function is enabled. The default is true, which means that the feature is enabled. False means that the feature is disabled. When this feature is disabled, the query plan will not attempt to perform a Bucket Shuffle Join.
-
-* `disable_streaming_preaggregations`
-
- Controls whether streaming pre-aggregation is turned on. The default is false, which is enabled. Currently not configurable and enabled by default.
-
-* `enable_insert_strict`
-
- Used to set the `strict` mode when loading data via INSERT statement. The default is false, which means that the `strict` mode is not turned on. For an introduction to this mode, see [here](./load-data/insert-into-manual.md).
-
-* `enable_spilling`
-
- Used to set whether to enable external sorting. The default is false, which turns off the feature. This feature is enabled when the user does not specify a LIMIT condition for the ORDER BY clause and also sets `enable_spilling` to true. When this feature is enabled, the temporary data is stored in the `doris-scratch/` directory of the BE data directory and the temporary data is cleared after the query is completed.
-
- This feature is mainly used for sorting operations with large amounts of data using limited memory.
-
- Note that this feature is experimental and does not guarantee stability. Please turn it on carefully.
-
-* `exec_mem_limit`
-
- Used to set the memory limit for a single query. The default is 2GB, you can set it in B/K/KB/M/MB/G/GB/T/TB/P/PB, the default is B.
-
- This parameter is used to limit the memory that can be used by an instance of a single query fragment in a query plan. A query plan may have multiple instances, and a BE node may execute one or more instances. Therefore, this parameter does not accurately limit the memory usage of a query across the cluster, nor does it accurately limit the memory usage of a query on a single BE node. The specific needs need to be judged according to the generated query plan.
-
- Usually, only some blocking nodes (such as sorting node, aggregation node, and join node) consume more memory, while in other nodes (such as scan node), data is streamed and does not occupy much memory.
-
- When a `Memory Exceed Limit` error occurs, you can try to increase the parameter exponentially, such as 4G, 8G, 16G, and so on.
-
-* `forward_to_master`
-
- The user sets whether to forward some commands to the Master FE node for execution. The default is `true`, which means no forwarding. There are multiple FE nodes in Doris, one of which is the Master node. Usually users can connect to any FE node for full-featured operation. However, some of detail information can only be obtained from the Master FE node.
-
- For example, the `SHOW BACKENDS;` command, if not forwarded to the Master FE node, can only see some basic information such as whether the node is alive, and forwarded to the Master FE to obtain more detailed information including the node startup time and the last heartbeat time.
-
- The commands currently affected by this parameter are as follows:
-
- 1. `SHOW FRONTEND;`
-
- Forward to Master to view the last heartbeat information.
-
- 2. `SHOW BACKENDS;`
-
- Forward to Master to view startup time, last heartbeat information, and disk capacity information.
-
- 3. `SHOW BROKERS;`
-
- Forward to Master to view the start time and last heartbeat information.
-
- 4. `SHOW TABLET;`/`ADMIN SHOW REPLICA DISTRIBUTION;`/`ADMIN SHOW REPLICA STATUS;`
-
- Forward to Master to view the tablet information stored in the Master FE metadata. Under normal circumstances, the tablet information in different FE metadata should be consistent. When a problem occurs, this method can be used to compare the difference between the current FE and Master FE metadata.
-
- 5. `SHOW PROC;`
-
- Forward to Master to view information about the relevant PROC stored in the Master FE metadata. Mainly used for metadata comparison.
-
-* `init_connect`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `interactive_timeout`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `enable_profile`
-
- Used to set whether you need to view the profile of the query. The default is false, which means no profile is required.
-
- By default, the BE sends a profile to the FE for viewing errors only if an error occurs in the query. A successful query will not send a profile. Sending a profile will incur a certain amount of network overhead, which is detrimental to a high concurrent query scenario.
-
- When the user wants to analyze the profile of a query, the query can be sent after this variable is set to true. After the query is finished, you can view the profile on the web page of the currently connected FE:
-
- `fe_host:fe_http:port/query`
-
- It will display the most recent 100 queries which `enable_profile` is set to true.
-
-* `language`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `license`
-
- Show Doris's license. No other effect.
-
-* `load_mem_limit`
-
- Used to specify the memory limit of the load operation. The default is 0, which means that this variable is not used, and `exec_mem_limit` is used as the memory limit for the load operation.
-
- This variable is usually used for INSERT operations. Because the INSERT operation has both query and load part. If the user does not set this variable, the respective memory limits of the query and load part are `exec_mem_limit`. Otherwise, the memory of query part of INSERT is limited to `exec_mem_limit`, and the load part is limited to` load_mem_limit`.
-
- For other load methods, such as BROKER LOAD, STREAM LOAD, the memory limit still uses `exec_mem_limit`.
-
-* `lower_case_table_names`
-
- Used to control whether the user table name is case-sensitive.
-
- A value of 0 makes the table name case-sensitive. The default is 0.
-
- When the value is 1, the table name is case insensitive. Doris will convert the table name to lowercase when storing and querying.
- The advantage is that any case of table name can be used in one statement. The following SQL is correct:
- ```
- mysql> show tables;
- +------------------+
- | Tables_ in_testdb|
- +------------------+
- | cost |
- +------------------+
- mysql> select * from COST where COst.id < 100 order by cost.id;
- ```
- The disadvantage is that the table name specified in the table creation statement cannot be obtained after table creation. The table name viewed by 'show tables' is lower case of the specified table name.
-
- When the value is 2, the table name is case insensitive. Doris stores the table name specified in the table creation statement and converts it to lowercase for comparison during query.
- The advantage is that the table name viewed by 'show tables' is the table name specified in the table creation statement;
- The disadvantage is that only one case of table name can be used in the same statement. For example, the table name 'cost' can be used to query the 'cost' table:
- ```
- mysql> select * from COST where COST.id < 100 order by COST.id;
- ```
-
- This variable is compatible with MySQL and must be configured at cluster initialization by specifying `lower_case_table_names=` in fe.conf. It cannot be modified by the `set` statement after cluster initialization is complete, nor can it be modified by restarting or upgrading the cluster.
-
- The system view table names in information_schema are case-insensitive and behave as 2 when the value of `lower_case_table_names` is 0.
-
-Translated with www.DeepL.com/Translator (free version)
-
-* `max_allowed_packet`
-
- Used for compatible JDBC connection pool C3P0. No practical effect.
-
-* `max_pushdown_conditions_per_column`
-
- For the specific meaning of this variable, please refer to the description of `max_pushdown_conditions_per_column` in [BE Configuration](./config/be_config.md). This variable is set to -1 by default, which means that the configuration value in `be.conf` is used. If the setting is greater than 0, the query in the current session will use the variable value, and ignore the configuration value in `be.conf`.
-
-* `max_scan_key_num`
-
- For the specific meaning of this variable, please refer to the description of `doris_max_scan_key_num` in [BE Configuration](./config/be_config.md). This variable is set to -1 by default, which means that the configuration value in `be.conf` is used. If the setting is greater than 0, the query in the current session will use the variable value, and ignore the configuration value in `be.conf`.
-
-* `net_buffer_length`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `net_read_timeout`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `net_write_timeout`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `parallel_exchange_instance_num`
-
- Used to set the number of exchange nodes used by an upper node to receive data from the lower node in the execution plan. The default is -1, which means that the number of exchange nodes is equal to the number of execution instances of the lower nodes (default behavior). When the setting is greater than 0 and less than the number of execution instances of the lower node, the number of exchange nodes is equal to the set value.
-
- In a distributed query execution plan, the upper node usually has one or more exchange nodes for receiving data from the execution instances of the lower nodes on different BEs. Usually the number of exchange nodes is equal to the number of execution instances of the lower nodes.
-
- In some aggregate query scenarios, if the amount of data to be scanned at the bottom is large, but the amount of data after aggregation is small, you can try to modify this variable to a smaller value, which can reduce the resource overhead of such queries. Such as the scenario of aggregation query on the DUPLICATE KEY data model.
-
-* `parallel_fragment_exec_instance_num`
-
- For the scan node, set its number of instances to execute on each BE node. The default is 1.
-
- A query plan typically produces a set of scan ranges, the range of data that needs to be scanned. These data are distributed across multiple BE nodes. A BE node will have one or more scan ranges. By default, a set of scan ranges for each BE node is processed by only one execution instance. When the machine resources are abundant, you can increase the variable and let more execution instances process a set of scan ranges at the same time, thus improving query efficiency.
-
- The number of scan instances determines the number of other execution nodes in the upper layer, such as aggregate nodes and join nodes. Therefore, it is equivalent to increasing the concurrency of the entire query plan execution. Modifying this parameter will help improve the efficiency of large queries, but larger values will consume more machine resources, such as CPU, memory, and disk IO.
-
-* `query_cache_size`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `query_cache_type`
-
- Used for compatible JDBC connection pool C3P0. No practical effect.
-
-* `query_timeout`
-
- Used to set the query timeout. This variable applies to all query statements in the current connection, as well as INSERT statements. The default is 5 minutes, in seconds.
-
-* `resource_group`
-
- Not used.
-
-* `send_batch_parallelism`
-
- Used to set the default parallelism for sending batch when execute InsertStmt operation, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`.
-
-* `sql_mode`
-
- Used to specify SQL mode to accommodate certain SQL dialects. For the SQL mode, see [here](./sql-mode.md).
-
-* `sql_safe_updates`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `sql_select_limit`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `system_time_zone`
-
- Displays the current system time zone. Cannot be changed.
-
-* `time_zone`
-
- Used to set the time zone of the current session. The time zone has an effect on the results of certain time functions. For the time zone, see [here](./time-zone.md).
-
-* `tx_isolation`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `tx_read_only`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `transaction_read_only`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `transaction_isolation`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `version`
-
- Used for compatibility with MySQL clients. No practical effect.
-
-* `performance_schema`
-
- Used for compatibility with MySQL JDBC 8.0.16 or later version. No practical effect.
-
-* `version_comment`
-
- Used to display the version of Doris. Cannot be changed.
-
-* `wait_timeout`
-
- The length of the connection used to set up an idle connection. When an idle connection does not interact with Doris for that length of time, Doris will actively disconnect the link. The default is 8 hours, in seconds.
-
-* `default_rowset_type`
-
- Used for setting the default storage format of Backends storage engine. Valid options: alpha/beta
-
-* `use_v2_rollup`
-
- Used to control the sql query to use segment v2 rollup index to get data. This variable is only used for validation when upgrading to segment v2 feature. Otherwise, not recommended to use.
-
-* `rewrite_count_distinct_to_bitmap_hll`
-
- Whether to rewrite count distinct queries of bitmap and HLL types as bitmap_union_count and hll_union_agg.
-
-* `prefer_join_method`
-
- When choosing the join method(broadcast join or shuffle join), if the broadcast join cost and shuffle join cost are equal, which join method should we prefer.
-
- Currently, the optional values for this variable are "broadcast" or "shuffle".
-
-* `allow_partition_column_nullable`
-
- Whether to allow the partition column to be NULL when creating the table. The default is true, which means NULL is allowed. false means the partition column must be defined as NOT NULL.
-
-* `insert_visible_timeout_ms`
-
- When execute insert statement, doris will wait for the transaction to commit and visible after the import is completed.
- This parameter controls the timeout of waiting for transaction to be visible. The default value is 10000, and the minimum value is 1000.
-
-* `enable_exchange_node_parallel_merge`
-
- In a sort query, when an upper level node receives the ordered data of the lower level node, it will sort the corresponding data on the exchange node to ensure that the final data is ordered. However, when a single thread merges multiple channels of data, if the amount of data is too large, it will lead to a single point of exchange node merge bottleneck.
-
- Doris optimizes this part if there are too many data nodes in the lower layer. Exchange node will start multithreading for parallel merging to speed up the sorting process. This parameter is false by default, which means that exchange node does not adopt parallel merge sort to reduce the extra CPU and memory consumption.
-
-* `extract_wide_range_expr`
-
- Used to control whether turn on the 'Wide Common Factors' rule. The value has two: true or false. On by default.
-
-* `enable_fold_constant_by_be`
-
- Used to control the calculation method of constant folding. The default is `false`, that is, calculation is performed in `FE`; if it is set to `true`, it will be calculated by `BE` through `RPC` request.
-
-* `cpu_resource_limit`
-
- Used to limit the resource overhead of a query. This is an experimental feature. The current implementation is to limit the number of scan threads for a query on a single node. The number of scan threads is limited, and the data returned from the bottom layer slows down, thereby limiting the overall computational resource overhead of the query. Assuming it is set to 2, a query can use up to 2 scan threads on a single node.
-
- This parameter will override the effect of `parallel_fragment_exec_instance_num`. That is, assuming that `parallel_fragment_exec_instance_num` is set to 4, and this parameter is set to 2. Then 4 execution instances on a single node will share up to 2 scanning threads.
-
- This parameter will be overridden by the `cpu_resource_limit` configuration in the user property.
-
- The default is -1, which means no limit.
-
-* `disable_join_reorder`
-
- Used to turn off all automatic join reorder algorithms in the system. There are two values: true and false.It is closed by default, that is, the automatic join reorder algorithm of the system is adopted. After set to true, the system will close all automatic sorting algorithms, adopt the original SQL table order, and execute join
-
-* `enable_infer_predicate`
-
- Used to control whether to perform predicate derivation. There are two values: true and false. It is turned off by default, that is, the system does not perform predicate derivation, and uses the original predicate to perform related operations. After it is set to true, predicate expansion is performed.
-
-* `return_object_data_as_binary`
- Used to identify whether to return the bitmap/hll result in the select result. In the select into outfile statement, if the export file format is csv, the bimap/hll data will be base64-encoded, if it is the parquet file format, the data will be stored as a byte array
-
-* `block_encryption_mode`
- The block_encryption_mode variable controls the block encryption mode. The default setting is empty, when use AES equal to `AES_128_ECB`, when use SM4 equal to `SM3_128_ECB`
- available values:
-```
- AES_128_ECB,
- AES_192_ECB,
- AES_256_ECB,
- AES_128_CBC,
- AES_192_CBC,
- AES_256_CBC,
- AES_128_CFB,
- AES_192_CFB,
- AES_256_CFB,
- AES_128_CFB1,
- AES_192_CFB1,
- AES_256_CFB1,
- AES_128_CFB8,
- AES_192_CFB8,
- AES_256_CFB8,
- AES_128_CFB128,
- AES_192_CFB128,
- AES_256_CFB128,
- AES_128_CTR,
- AES_192_CTR,
- AES_256_CTR,
- AES_128_OFB,
- AES_192_OFB,
- AES_256_OFB,
- SM4_128_ECB,
- SM4_128_CBC,
- SM4_128_CFB128,
- SM4_128_OFB,
- SM4_128_CTR,
-```
\ No newline at end of file
diff --git a/new-docs/en/advanced/alter-table/replace-table.md b/docs/en/advanced/alter-table/replace-table.md
similarity index 100%
rename from new-docs/en/advanced/alter-table/replace-table.md
rename to docs/en/advanced/alter-table/replace-table.md
diff --git a/new-docs/en/advanced/alter-table/schema-change.md b/docs/en/advanced/alter-table/schema-change.md
similarity index 100%
rename from new-docs/en/advanced/alter-table/schema-change.md
rename to docs/en/advanced/alter-table/schema-change.md
diff --git a/new-docs/en/advanced/best-practice/debug-log.md b/docs/en/advanced/best-practice/debug-log.md
similarity index 100%
rename from new-docs/en/advanced/best-practice/debug-log.md
rename to docs/en/advanced/best-practice/debug-log.md
diff --git a/new-docs/en/advanced/best-practice/import-analysis.md b/docs/en/advanced/best-practice/import-analysis.md
similarity index 100%
rename from new-docs/en/advanced/best-practice/import-analysis.md
rename to docs/en/advanced/best-practice/import-analysis.md
diff --git a/new-docs/en/advanced/best-practice/query-analysis.md b/docs/en/advanced/best-practice/query-analysis.md
similarity index 100%
rename from new-docs/en/advanced/best-practice/query-analysis.md
rename to docs/en/advanced/best-practice/query-analysis.md
diff --git a/new-docs/en/advanced/broker.md b/docs/en/advanced/broker.md
similarity index 100%
rename from new-docs/en/advanced/broker.md
rename to docs/en/advanced/broker.md
diff --git a/new-docs/en/advanced/cache/partition-cache.md b/docs/en/advanced/cache/partition-cache.md
similarity index 100%
rename from new-docs/en/advanced/cache/partition-cache.md
rename to docs/en/advanced/cache/partition-cache.md
diff --git a/new-docs/en/advanced/cache/query-cache.md b/docs/en/advanced/cache/query-cache.md
similarity index 100%
rename from new-docs/en/advanced/cache/query-cache.md
rename to docs/en/advanced/cache/query-cache.md
diff --git a/new-docs/en/advanced/join-optimization/bucket-shuffle-join.md b/docs/en/advanced/join-optimization/bucket-shuffle-join.md
similarity index 100%
rename from new-docs/en/advanced/join-optimization/bucket-shuffle-join.md
rename to docs/en/advanced/join-optimization/bucket-shuffle-join.md
diff --git a/new-docs/en/advanced/join-optimization/colocation-join.md b/docs/en/advanced/join-optimization/colocation-join.md
similarity index 100%
rename from new-docs/en/advanced/join-optimization/colocation-join.md
rename to docs/en/advanced/join-optimization/colocation-join.md
diff --git a/new-docs/en/advanced/join-optimization/runtime-filter.md b/docs/en/advanced/join-optimization/runtime-filter.md
similarity index 100%
rename from new-docs/en/advanced/join-optimization/runtime-filter.md
rename to docs/en/advanced/join-optimization/runtime-filter.md
diff --git a/new-docs/en/advanced/materialized-view.md b/docs/en/advanced/materialized-view.md
similarity index 100%
rename from new-docs/en/advanced/materialized-view.md
rename to docs/en/advanced/materialized-view.md
diff --git a/docs/en/administrator-guide/orthogonal-bitmap-manual.md b/docs/en/advanced/orthogonal-bitmap-manual.md
similarity index 100%
rename from docs/en/administrator-guide/orthogonal-bitmap-manual.md
rename to docs/en/advanced/orthogonal-bitmap-manual.md
diff --git a/new-docs/en/advanced/orthogonal-hll-manual.md b/docs/en/advanced/orthogonal-hll-manual.md
similarity index 100%
rename from new-docs/en/advanced/orthogonal-hll-manual.md
rename to docs/en/advanced/orthogonal-hll-manual.md
diff --git a/new-docs/en/advanced/partition/dynamic-partition.md b/docs/en/advanced/partition/dynamic-partition.md
similarity index 100%
rename from new-docs/en/advanced/partition/dynamic-partition.md
rename to docs/en/advanced/partition/dynamic-partition.md
diff --git a/new-docs/en/advanced/partition/table-temp-partition.md b/docs/en/advanced/partition/table-temp-partition.md
similarity index 100%
rename from new-docs/en/advanced/partition/table-temp-partition.md
rename to docs/en/advanced/partition/table-temp-partition.md
diff --git a/new-docs/en/advanced/resource.md b/docs/en/advanced/resource.md
similarity index 100%
rename from new-docs/en/advanced/resource.md
rename to docs/en/advanced/resource.md
diff --git a/new-docs/en/advanced/small-file-mgr.md b/docs/en/advanced/small-file-mgr.md
similarity index 100%
rename from new-docs/en/advanced/small-file-mgr.md
rename to docs/en/advanced/small-file-mgr.md
diff --git a/new-docs/en/advanced/time-zone.md b/docs/en/advanced/time-zone.md
similarity index 100%
rename from new-docs/en/advanced/time-zone.md
rename to docs/en/advanced/time-zone.md
diff --git a/new-docs/en/advanced/variables.md b/docs/en/advanced/variables.md
similarity index 100%
rename from new-docs/en/advanced/variables.md
rename to docs/en/advanced/variables.md
diff --git a/docs/en/administrator-guide/vectorized-execution-engine.md b/docs/en/advanced/vectorized-execution-engine.md
similarity index 100%
rename from docs/en/administrator-guide/vectorized-execution-engine.md
rename to docs/en/advanced/vectorized-execution-engine.md
diff --git a/docs/en/benchmark/samples.md b/docs/en/benchmark/samples.md
deleted file mode 100644
index 309808c806..0000000000
--- a/docs/en/benchmark/samples.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-{
- "title": "Samples",
- "language": "en"
-}
----
-
-
-
-# Samples
-
-Doris provides a wealth of usage samples, which can help Doris users quickly get started to experience the features of Doris.
-
-## Description
-
-The sample codes are stored in the [`samples/`](https://github.com/apache/incubator-doris/tree/master/samples) directory of the Doris code base.
-
-```
-├── connect
-├── doris-demo
-├── insert
-└── mini_load
-```
-
-* `connect/`
-
- This catalog mainly shows the code examples of connecting Doris in various programming languages.
-
-* `doris-demo/`
-
- The code examples of the multiple functions of Doris are shown mainly in the form of Maven project. Such as spark-connector and flink-connector usage examples, integration with the Spring framework, Stream Load examples, and so on.
-
-* `insert/`
-
- This catalog shows some code examples of importing data through python or shell script calling Doris's Insert command.
-
-* `miniload/`
-
- This catalog shows the code example of calling mini load through python to import data. However, because the mini load function has been replaced by the stream load function, it is recommended to use the stream load function for data import.
\ No newline at end of file
diff --git a/new-docs/en/benchmark/ssb.md b/docs/en/benchmark/ssb.md
similarity index 100%
rename from new-docs/en/benchmark/ssb.md
rename to docs/en/benchmark/ssb.md
diff --git a/docs/en/benchmark/star-schema-benchmark.md b/docs/en/benchmark/star-schema-benchmark.md
deleted file mode 100644
index da93925528..0000000000
--- a/docs/en/benchmark/star-schema-benchmark.md
+++ /dev/null
@@ -1,181 +0,0 @@
----
-{
- "title": "Star-Schema-Benchmark",
- "language": "en"
-}
----
-
-
-
-# Star Schema Benchmark
-
-[Star Schema Benchmark(SSB)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) is a lightweight data warehouse scenario performance test set. Based on [TPC-H](http://www.tpc.org/tpch/), SSB provides a simplified version of the star model data set, which is mainly used to test the performance of multi-table association queries under the star model.
-
-This document mainly introduces how to pass the preliminary performance test of the SSB process in Doris.
-
-> Note 1: The standard test set including SSB is usually far from the actual business scenario, and some tests will perform parameter tuning for the test set. Therefore, the test results of the standard test set can only reflect the performance of the database in a specific scenario. It is recommended that users use actual business data for further testing.
->
-> Note 2: The operations involved in this document are all performed in the CentOS 7 environment.
-
-## Environmental preparation
-
-Please refer to the [official document](http://doris.incubator.apache.org/master/en/installing/install-deploy.html) to install and deploy Doris to obtain a normal running Doris cluster ( Contain at least 1 FE, 1 BE).
-
-The scripts involved in the following documents are all stored under `tools/ssb-tools/` in the Doris code base.
-
-## data preparation
-
-### 1. Download and install the SSB data generation tool.
-
-Execute the following script to download and compile the [ssb-dbgen](https://github.com/electrum/ssb-dbgen.git) tool.
-
-```
-sh build-ssb-dbgen.sh
-```
-
-After the installation is successful, the `dbgen` binary file will be generated in the `ssb-dbgen/` directory.
-
-### 2. Generate SSB test set
-
-Execute the following script to generate the SSB data set:
-
-```
-sh gen-ssb-data.sh -s 100 -c 100
-```
-
-> Note 1: Run `sh gen-ssb-data.sh -h` for help.
->
-> Note 2: The data will be generated under the directory `ssb-data/` with a suffix of `.tbl`. The total file size is about 60GB. The generation time may vary from a few minutes to an hour.
->
-> Note 3: `-s 100` means that the test set size factor is 100, `-c 100` means that 100 threads concurrently generate data in the lineorder table. The `-c` parameter also determines the number of files in the final lineorder table. The larger the parameter, the more files and the smaller each file.
-
-Under the `-s 100` parameter, the generated data set size is:
-
-|Table |Rows |Size | File Number |
-|---|---|---|---|
-|lineorder| 600 million (600037902) | 60GB | 100|
-|customer|3 million (3000000) |277M |1|
-|part|1.4 million (1400000) | 116M|1|
-|supplier|200,000 (200,000) |17M |1|
-|date| 2556|228K |1|
-
-3. Build a table
-
- Copy the table creation statement in [create-tables.sql](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/create-tables.sql) and execute it in Doris.
-
-4. Import data
-
- 0. Prepare the 'doris-cluster.conf' file.
-
- Before calling the load script, you need to write the FE's ip port and other information in the `doris-cluster.conf` file.
-
- 'doris-cluster.conf' in the same directory as `load-dimension-data.sh`.
-
- The contents of the file include FE's ip, HTTP port, user name, password and the DB name of the data to be loaded:
-
- ````
- export FE_HOST="xxx"
- export FE_HTTP_PORT="8030"
- export USER="root"
- export PASSWORD='xxx'
- export DB="ssb"
- ````
-
- 1. Load 4 dimension table data (customer, part, supplier and date)
-
- Because the data volume of these 4 dimension tables is small, and the load is simpler, we use the following command to load the data of these 4 tables first:
-
- `sh load-dimension-data.sh`
-
- 2. Load the fact table lineorder.
-
- Load the lineorder table data with the following command:
-
- `sh load-fact-data.sh -c 5`
-
- `-c 5` means to start 5 concurrent threads to import (the default is 3). In the case of a single BE node, the load time of lineorder data generated by `sh gen-ssb-data.sh -s 100 -c 100` using `sh load-fact-data.sh -c 3` is about 10 minutes. The memory overhead is about 5-6GB. If you turn on more threads, you can speed up the load speed, but it will increase additional memory overhead.
-
- > Note: To get a faster import speed, you can add `flush_thread_num_per_store=5` in be.conf and restart BE. This configuration indicates the number of disk write threads for each data directory, and the default is 2. Larger data can increase write data throughput, but may increase IO Util. (Reference value: 1 mechanical disk, when the default is 2, the IO Util during the import process is about 12%, when it is set to 5, the IO Util is about 26%. If it is an SSD disk, it is almost 0) .
-
-5. Check the loaded data
-
- ```
- select count(*) from part;
- select count(*) from customer;
- select count(*) from supplier;
- select count(*) from date;
- select count(*) from lineorder;
- ```
-
- The amount of data should be the same as the number of rows of generated data.
-
-## Query test
-
-There are 4 groups of 14 SQL in the SSB test set. The query statement is in the [queries/](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/queries) directory.
-
-## testing report
-
-The following test report is based on Doris [branch-0.15](https://github.com/apache/incubator-doris/tree/branch-0.15) branch code test, for reference only. (Update time: October 25, 2021)
-
-1. Hardware environment
-
- * 1 FE + 1-3 BE mixed
- * CPU: 96core, Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz
- * Memory: 384GB
- * Hard disk: 1 HDD
- * Network card: 10 Gigabit network card
-
-2. Data set
-
- |Table |Rows |Origin Size | Compacted Size(1 Replica) |
- |---|---|---|---|
- |lineorder| 600 million (600037902) | 60 GB | 14.846 GB |
- |customer|3 million (3000000) |277 MB | 414.741 MB |
- |part|1.4 million (1.400000) | 116 MB | 38.277 MB |
- |supplier|200,000 (200,000) |17 MB | 27.428 MB |
- |date| 2556|228 KB | 275.804 KB |
-
-3. Test results
-
- |Query |Time(ms) (1 BE) | Time(ms) (3 BE) | Parallelism | Runtime Filter Mode |
- |---|---|---|---|---|
- | q1.1 | 200 | 140 | 8 | IN |
- | q1.2 | 90 | 80 | 8 | IN |
- | q1.3 | 90 | 80 | 8 | IN |
- | q2.1 | 1100 | 400 | 8 | BLOOM_FILTER |
- | q2.2 | 900 | 330 | 8 | BLOOM_FILTER |
- | q2.3 | 790 | 320 | 8 | BLOOM_FILTER |
- | q3.1 | 3100 | 1280 | 8 | BLOOM_FILTER |
- | q3.2 | 700 | 270 | 8 | BLOOM_FILTER |
- | q3.3 | 540 | 270 | 8 | BLOOM_FILTER |
- | q3.4 | 560 | 240 | 8 | BLOOM_FILTER |
- | q4.1 | 2820 | 1150 | 8 | BLOOM_FILTER |
- | q4.2 | 1430 | 670 | 8 | BLOOM_FILTER |
- | q4.2 | 1750 | 1030 | 8 | BLOOM_FILTER |
-
- > Note 1: "This test set is far from your production environment, please be skeptical!"
- >
- > Note 2: The test result is the average value of multiple executions (Page Cache will play a certain acceleration role). And the data has undergone sufficient compaction (if you test immediately after importing the data, the query delay may be higher than the test result)
- >
- > Note 3: Due to environmental constraints, the hardware specifications used in this test are relatively high, but so many hardware resources will not be consumed during the entire test. The memory consumption is within 10GB, and the CPU usage is within 10%.
- >
- > Note 4: Parallelism means query concurrency, which is set by `set parallel_fragment_exec_instance_num=8`.
- >
- > Note 5: Runtime Filter Mode is the type of Runtime Filter, set by `set runtime_filter_type="BLOOM_FILTER"`. ([Runtime Filter](http://doris.incubator.apache.org/master/en/administrator-guide/runtime-filter.html) function has a significant effect on the SSB test set. Because in this test set, The data from the right table of Join can filter the left table very well. You can try to turn off this function through `set runtime_filter_mode=off` to see the change in query latency.)
diff --git a/docs/en/benchmark/systemd.md b/docs/en/benchmark/systemd.md
deleted file mode 100644
index 5180091b97..0000000000
--- a/docs/en/benchmark/systemd.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-{
- "title": "Systemd",
- "language": "zh-CN"
-}
----
-
-
-
-# Systemd
-
-The Systemd configuration file is provided in the Doris code base, which can help users control the start and stop of the Doris service in Linux.
-
-Please go to [Code Base](https://github.com/apache/incubator-doris/tree/master/tools/systemd) to view the configuration file.
diff --git a/new-docs/en/benchmark/tpc-h.md b/docs/en/benchmark/tpc-h.md
similarity index 100%
rename from new-docs/en/benchmark/tpc-h.md
rename to docs/en/benchmark/tpc-h.md
diff --git a/docs/en/community/how-to-contribute/commit-format-specification.md b/docs/en/community/how-to-contribute/commit-format-specification.md
index 3b9034f106..da4fb59203 100644
--- a/docs/en/community/how-to-contribute/commit-format-specification.md
+++ b/docs/en/community/how-to-contribute/commit-format-specification.md
@@ -53,7 +53,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l
* deps: Modification of third-party dependency Library
* community: Such as modification of Github issue template.
- Some tips:
+ Some tips:
1. If there are multiple types in one commit, multiple types need to be added
2. If code refactoring brings performance improvement, [refactor][optimize] can be added at the same time
@@ -80,7 +80,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l
* config
* docs
- Some tips:
+ Some tips:
1. Try to use options that already exist in the list. If you need to add, please update this document in time
@@ -93,7 +93,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l
commit message should follow the following format:
```
- issue: #7777
+ issue:#7777
your message
```
diff --git a/docs/en/community/release-and-verify/release-complete.md b/docs/en/community/release-and-verify/release-complete.md
index 7abfec5727..e9db21ce84 100644
--- a/docs/en/community/release-and-verify/release-complete.md
+++ b/docs/en/community/release-and-verify/release-complete.md
@@ -44,10 +44,10 @@ https://dist.apache.org/repos/dist/release/incubator/doris/
For the first release, you need to copy the KEYS file as well. Then add it to the svn release.
```
-After add succeeds, you can see the files you published on the following website
+add 成功后就可以在下面网址上看到你发布的文件
https://dist.apache.org/repos/dist/release/incubator/doris/0.xx.0-incubating/
-After a while, you can see on the official website of Apache:
+稍等一段时间后,能在 apache 官网看到:
http://www.apache.org/dist/incubator/doris/0.9.0-incubating/
```
@@ -150,7 +150,7 @@ Title:
[ANNOUNCE] Apache Doris (incubating) 0.9.0 Release
```
-To mail:
+To mail:
```
dev@doris.apache.org
diff --git a/new-docs/en/data-operate/export/export-manual.md b/docs/en/data-operate/export/export-manual.md
similarity index 100%
rename from new-docs/en/data-operate/export/export-manual.md
rename to docs/en/data-operate/export/export-manual.md
diff --git a/docs/en/administrator-guide/export_with_mysql_dump.md b/docs/en/data-operate/export/export_with_mysql_dump.md
similarity index 100%
rename from docs/en/administrator-guide/export_with_mysql_dump.md
rename to docs/en/data-operate/export/export_with_mysql_dump.md
diff --git a/new-docs/en/data-operate/export/outfile.md b/docs/en/data-operate/export/outfile.md
similarity index 100%
rename from new-docs/en/data-operate/export/outfile.md
rename to docs/en/data-operate/export/outfile.md
diff --git a/new-docs/en/data-operate/import/import-scenes/external-storage-load.md b/docs/en/data-operate/import/import-scenes/external-storage-load.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/external-storage-load.md
rename to docs/en/data-operate/import/import-scenes/external-storage-load.md
diff --git a/new-docs/en/data-operate/import/import-scenes/external-table-load.md b/docs/en/data-operate/import/import-scenes/external-table-load.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/external-table-load.md
rename to docs/en/data-operate/import/import-scenes/external-table-load.md
diff --git a/new-docs/en/data-operate/import/import-scenes/jdbc-load.md b/docs/en/data-operate/import/import-scenes/jdbc-load.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/jdbc-load.md
rename to docs/en/data-operate/import/import-scenes/jdbc-load.md
diff --git a/new-docs/en/data-operate/import/import-scenes/kafka-load.md b/docs/en/data-operate/import/import-scenes/kafka-load.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/kafka-load.md
rename to docs/en/data-operate/import/import-scenes/kafka-load.md
diff --git a/new-docs/en/data-operate/import/import-scenes/load-atomicity.md b/docs/en/data-operate/import/import-scenes/load-atomicity.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/load-atomicity.md
rename to docs/en/data-operate/import/import-scenes/load-atomicity.md
diff --git a/new-docs/en/data-operate/import/import-scenes/load-data-convert.md b/docs/en/data-operate/import/import-scenes/load-data-convert.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/load-data-convert.md
rename to docs/en/data-operate/import/import-scenes/load-data-convert.md
diff --git a/new-docs/en/data-operate/import/import-scenes/load-strict-mode.md b/docs/en/data-operate/import/import-scenes/load-strict-mode.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/load-strict-mode.md
rename to docs/en/data-operate/import/import-scenes/load-strict-mode.md
diff --git a/new-docs/en/data-operate/import/import-scenes/local-file-load.md b/docs/en/data-operate/import/import-scenes/local-file-load.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-scenes/local-file-load.md
rename to docs/en/data-operate/import/import-scenes/local-file-load.md
diff --git a/new-docs/en/data-operate/import/import-way/binlog-load-manual.md b/docs/en/data-operate/import/import-way/binlog-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/binlog-load-manual.md
rename to docs/en/data-operate/import/import-way/binlog-load-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/broker-load-manual.md b/docs/en/data-operate/import/import-way/broker-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/broker-load-manual.md
rename to docs/en/data-operate/import/import-way/broker-load-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/insert-into-manual.md b/docs/en/data-operate/import/import-way/insert-into-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/insert-into-manual.md
rename to docs/en/data-operate/import/import-way/insert-into-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/load-json-format.md b/docs/en/data-operate/import/import-way/load-json-format.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/load-json-format.md
rename to docs/en/data-operate/import/import-way/load-json-format.md
diff --git a/new-docs/en/data-operate/import/import-way/routine-load-manual.md b/docs/en/data-operate/import/import-way/routine-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/routine-load-manual.md
rename to docs/en/data-operate/import/import-way/routine-load-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/s3-load-manual.md b/docs/en/data-operate/import/import-way/s3-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/s3-load-manual.md
rename to docs/en/data-operate/import/import-way/s3-load-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/spark-load-manual.md b/docs/en/data-operate/import/import-way/spark-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/spark-load-manual.md
rename to docs/en/data-operate/import/import-way/spark-load-manual.md
diff --git a/new-docs/en/data-operate/import/import-way/stream-load-manual.md b/docs/en/data-operate/import/import-way/stream-load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/import-way/stream-load-manual.md
rename to docs/en/data-operate/import/import-way/stream-load-manual.md
diff --git a/new-docs/en/data-operate/import/load-manual.md b/docs/en/data-operate/import/load-manual.md
similarity index 100%
rename from new-docs/en/data-operate/import/load-manual.md
rename to docs/en/data-operate/import/load-manual.md
diff --git a/new-docs/en/data-operate/update-delete/batch-delete-manual.md b/docs/en/data-operate/update-delete/batch-delete-manual.md
similarity index 100%
rename from new-docs/en/data-operate/update-delete/batch-delete-manual.md
rename to docs/en/data-operate/update-delete/batch-delete-manual.md
diff --git a/new-docs/en/data-operate/update-delete/delete-manual.md b/docs/en/data-operate/update-delete/delete-manual.md
similarity index 100%
rename from new-docs/en/data-operate/update-delete/delete-manual.md
rename to docs/en/data-operate/update-delete/delete-manual.md
diff --git a/new-docs/en/data-operate/update-delete/sequence-column-manual.md b/docs/en/data-operate/update-delete/sequence-column-manual.md
similarity index 100%
rename from new-docs/en/data-operate/update-delete/sequence-column-manual.md
rename to docs/en/data-operate/update-delete/sequence-column-manual.md
diff --git a/new-docs/en/data-operate/update-delete/update.md b/docs/en/data-operate/update-delete/update.md
similarity index 100%
rename from new-docs/en/data-operate/update-delete/update.md
rename to docs/en/data-operate/update-delete/update.md
diff --git a/new-docs/en/data-table/advance-usage.md b/docs/en/data-table/advance-usage.md
similarity index 100%
rename from new-docs/en/data-table/advance-usage.md
rename to docs/en/data-table/advance-usage.md
diff --git a/new-docs/en/data-table/basic-usage.md b/docs/en/data-table/basic-usage.md
similarity index 100%
rename from new-docs/en/data-table/basic-usage.md
rename to docs/en/data-table/basic-usage.md
diff --git a/new-docs/en/data-table/best-practice.md b/docs/en/data-table/best-practice.md
similarity index 100%
rename from new-docs/en/data-table/best-practice.md
rename to docs/en/data-table/best-practice.md
diff --git a/new-docs/en/data-table/data-model.md b/docs/en/data-table/data-model.md
similarity index 100%
rename from new-docs/en/data-table/data-model.md
rename to docs/en/data-table/data-model.md
diff --git a/new-docs/en/data-table/data-partition.md b/docs/en/data-table/data-partition.md
similarity index 100%
rename from new-docs/en/data-table/data-partition.md
rename to docs/en/data-table/data-partition.md
diff --git a/new-docs/en/data-table/hit-the-rollup.md b/docs/en/data-table/hit-the-rollup.md
similarity index 100%
rename from new-docs/en/data-table/hit-the-rollup.md
rename to docs/en/data-table/hit-the-rollup.md
diff --git a/new-docs/en/data-table/index/bitmap-index.md b/docs/en/data-table/index/bitmap-index.md
similarity index 100%
rename from new-docs/en/data-table/index/bitmap-index.md
rename to docs/en/data-table/index/bitmap-index.md
diff --git a/docs/en/administrator-guide/bloomfilter.md b/docs/en/data-table/index/bloomfilter.md
similarity index 100%
rename from docs/en/administrator-guide/bloomfilter.md
rename to docs/en/data-table/index/bloomfilter.md
diff --git a/new-docs/en/data-table/index/prefix-index.md b/docs/en/data-table/index/prefix-index.md
similarity index 100%
rename from new-docs/en/data-table/index/prefix-index.md
rename to docs/en/data-table/index/prefix-index.md
diff --git a/docs/en/developer-guide/be-vscode-dev.md b/docs/en/developer-guide/be-vscode-dev.md
index 86c3c7f452..612f6f8710 100644
--- a/docs/en/developer-guide/be-vscode-dev.md
+++ b/docs/en/developer-guide/be-vscode-dev.md
@@ -32,7 +32,7 @@ under the License.
1. Download the doris source code
- URL: [apache/incubator-doris: Apache Doris (Incubating) (github.com)](https://github.com/apache/incubator-doris)
+ URL:[apache/incubator-doris: Apache Doris (Incubating) (github.com)](https://github.com/apache/incubator-doris)
2. Install GCC 8.3.1+, Oracle JDK 1.8+, Python 2.7+, confirm that the gcc, java, python commands point to the correct version, and set the JAVA_HOME environment variable
@@ -132,7 +132,7 @@ Need to create this folder, this is where the be data is stored
mkdir -p /soft/be/storage
```
-3. Open vscode, and open the directory where the be source code is located. In this case, open the directory as **/home/workspace/incubator-doris/**,For details on how to vscode, refer to the online tutorial
+3. Open vscode, and open the directory where the be source code is located. In this case, open the directory as **/home/workspace/incubator-doris/**,For details on how to vscode, refer to the online tutorial
4. Install the vscode ms c++ debugging plug-in, the plug-in identified by the red box in the figure below
diff --git a/docs/en/developer-guide/benchmark-tool.md b/docs/en/developer-guide/benchmark-tool.md
index 74b1ce3da1..536881d7d4 100644
--- a/docs/en/developer-guide/benchmark-tool.md
+++ b/docs/en/developer-guide/benchmark-tool.md
@@ -33,7 +33,7 @@ It can be used to test the performance of some parts of the BE storage layer (fo
## Compilation
-1. To ensure that the environment has been able to successfully compile the Doris ontology, you can refer to [Installation and deployment] (https://doris.apache.org/master/en/installing/compilation.html).
+1. To ensure that the environment has been able to successfully compile the Doris ontology, you can refer to [Installation and deployment] (https://doris.apache.org/master/en/installing/compilation.html)。
2. Execute`run-be-ut.sh`
@@ -53,9 +53,9 @@ The data set is generated according to the following rules.
>int: Random in [1,1000000].
The data character set of string type is uppercase and lowercase English letters, and the length varies according to the type.
-> char: Length random in [1,8].
-> varchar: Length random in [1,128].
-> string: Length random in [1,100000].
+> char: Length random in [1,8]。
+> varchar: Length random in [1,128]。
+> string: Length random in [1,100000]。
`rows_number` indicates the number of rows of data, the default value is `10000`.
diff --git a/docs/en/developer-guide/cpp-diagnostic-code.md b/docs/en/developer-guide/cpp-diagnostic-code.md
index 642ce2595c..dd172d8206 100644
--- a/docs/en/developer-guide/cpp-diagnostic-code.md
+++ b/docs/en/developer-guide/cpp-diagnostic-code.md
@@ -26,7 +26,7 @@ under the License.
# C++ Code Diagnostic
-Doris support to use [Clangd](https://clangd.llvm.org/) and [Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/) to diagnostic code. Clangd and Clang-Tidy already has in [LDB-toolchain](https://doris.apache.org/zh-CN/installing/compilation-with-ldb-toolchain),also can install by self.
+Doris support to use [Clangd](https://clangd.llvm.org/) and [Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/) to diagnostic code. Clangd and Clang-Tidy already has in [LDB-toolchain](https://doris.apache.org/zh-CN/installing/compilation-with-ldb-toolchain),also can install by self.
### Clang-Tidy
Clang-Tidy can do some diagnostic cofig, config file `.clang-tidy` is in Doris root path. Compared with vscode-cpptools, clangd can provide more powerful and accurate code jumping for vscode, and integrates the analysis and quick-fix functions of clang-tidy.
diff --git a/docs/en/developer-guide/fe-idea-dev.md b/docs/en/developer-guide/fe-idea-dev.md
index afc90a0635..4146046a4b 100644
--- a/docs/en/developer-guide/fe-idea-dev.md
+++ b/docs/en/developer-guide/fe-idea-dev.md
@@ -46,16 +46,16 @@ under the License.
Doris build against `thrift` 0.13.0 ( note : `Doris` 0.15 and later version build against `thrift` 0.13.0 , the previous version is still `thrift` 0.9.3)
Windows:
- 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`
- 2. Copy: copy the file to `./thirdparty/installed/bin`
+ 1. Download:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`
+ 2. Copy:copy the file to `./thirdparty/installed/bin`
MacOS:
- 1. Download: `brew install thrift@0.13.0`
- 2. Establish soft connection:
+ 1. Download:`brew install thrift@0.13.0`
+ 2. Establish soft connection:
`mkdir -p ./thirdparty/installed/bin`
`ln -s /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift`
- Note: The error that the version cannot be found may be reported when MacOS execute `brew install thrift@0.13.0`. The solution is execute at the terminal as follows:
+ Note:The error that the version cannot be found may be reported when MacOS execute `brew install thrift@0.13.0`. The solution is execute at the terminal as follows:
1. `brew tap-new $USER/local-tap`
2. `brew extract --version='0.13.0' thrift $USER/local-tap`
3. `brew install thrift@0.13.0`
diff --git a/docs/en/developer-guide/fe-vscode-dev.md b/docs/en/developer-guide/fe-vscode-dev.md
index e839449a7f..e90fc05269 100644
--- a/docs/en/developer-guide/fe-vscode-dev.md
+++ b/docs/en/developer-guide/fe-vscode-dev.md
@@ -47,7 +47,7 @@ Create `settings.json` in `.vscode/` , and set settings:
* `"java.configuration.runtimes"`
* `"java.jdt.ls.java.home"` -- must set it to the directory of JDK11+, used for vscode-java plugin
-* `"maven.executable.path"` -- maven path,for maven-language-server plugin
+* `"maven.executable.path"` -- maven path,for maven-language-server plugin
example:
diff --git a/docs/en/extending-doris/audit-plugin.md b/docs/en/ecosystem/audit-plugin.md
similarity index 100%
rename from docs/en/extending-doris/audit-plugin.md
rename to docs/en/ecosystem/audit-plugin.md
diff --git a/new-docs/en/ecosystem/datax.md b/docs/en/ecosystem/datax.md
similarity index 100%
rename from new-docs/en/ecosystem/datax.md
rename to docs/en/ecosystem/datax.md
diff --git a/docs/en/ecosystem/doris-manager/cluster-managenent.md b/docs/en/ecosystem/doris-manager/cluster-managenent.md
new file mode 100644
index 0000000000..5315294005
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/cluster-managenent.md
@@ -0,0 +1,69 @@
+---
+{
+ "title": "Cluster management",
+ "language": "en"
+}
+---
+
+
+
+# Cluster management
+
+The super administrator and space administrator can mainly perform the following operations under the cluster module:
+
+- View cluster overview
+- View node list
+- Edit parameter configuration
+
+## Cluster overview
+
+### View basic cluster information
+
+Cluster function, showing a cluster-based monitoring panel.
+
+On the home page, click "Cluster" in the navigation bar to enter the cluster function.
+
+
+
+The operation and maintenance monitoring panel provides various performance monitoring indicators of the cluster for users to gain insight into the cluster status. Users can control the start and stop operations of the cluster through buttons in the upper right corner.
+
+### View cluster resource usage
+
+Users can view disk usage through pie charts, and view the number of databases, etc.
+
+## Node list
+
+Displays information about FE nodes, BE nodes, and brokers in the cluster.
+Provides fields including Node ID, Node Type, Host IP, and Node Status.
+
+
+
+## Parameter configuration
+
+Parameter configuration provides parameter name, parameter type, parameter value type, thermal effect and operation fields.
+
+
+
+- **Operation**: Click the "Edit" button, you can edit and modify the corresponding configuration value, you can choose the corresponding effective method; click the "View current value" button, you can view the current value corresponding to the host IP
+
+
+
+
+
diff --git a/docs/en/ecosystem/doris-manager/compiling-deploying.md b/docs/en/ecosystem/doris-manager/compiling-deploying.md
new file mode 100644
index 0000000000..1062cfd728
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/compiling-deploying.md
@@ -0,0 +1,112 @@
+---
+{
+ "title": "Compile and deploy",
+ "language": "en"
+}
+---
+
+
+
+# Compile and deploy
+
+## Compile
+
+Running the build.sh script under the manager path directly will generate the installation and running package -- output under the manager path, including:
+1. Doris Manager's running package doris-manager.jar
+2. The running configuration folder conf
+3. Start the script start_manager.sh
+4. Stop the script stop_manager.sh
+
+## Run
+
+### 1 Configuration
+
+Enter the generated installation and running package, view the configuration file conf path, and open the configuration file manager.conf in the path. The configuration items to focus on are as follows:
+
+````$xslt
+The service's startup http port
+STUDIO_PORT=8080
+
+The type of database where the backend data is stored, including mysql/h2/postgresql. The default is to support mysql
+MB_DB_TYPE=mysql
+
+Database connection information
+If it is a configured h2 type database, you do not need to configure this information, and the data will be stored locally as a local file
+h2 data file storage path, directly stored in the current path by default
+H2_FILE_PATH=
+
+If it is mysql/postgresql, you need to configure the following connection information
+database address
+MB_DB_HOST=
+
+database port
+MB_DB_PORT=3306
+
+database access port
+MB_DB_USER=
+
+Database access password
+MB_DB_PASS=
+
+database name of the database
+MB_DB_DBNAME=
+
+The path where the service runs, which is directly stored in the log folder of the current running path by default.
+LOG_PATH=
+
+The length of the waiting queue of the web container, the default is 100. The queue is also used as a buffer pool, but it cannot be infinitely long. It not only consumes memory, but also consumes CPU when entering the queue.
+WEB_ACCEPT_COUNT=100
+
+The maximum number of worker threads for the web container, 200 by default. (usually the number of CPU cores * 200)
+WEB_MAX_THREADS=200
+
+The minimum number of working idle threads for the web container, the default is 10. (Appropriately increase some to cope with the sudden increase in traffic)
+WEB_MIN_SPARE_THREADS=10
+
+The maximum number of connections for the web container, the default is 10000. (Appropriately increase some to cope with the sudden increase in traffic)
+WEB_MAX_CONNECTIONS=10000
+
+The maximum number of connections to access the database connection pool, the default is 10
+DB_MAX_POOL_SIZE=20
+
+The minimum number of idle connections to access the database connection pool, the default is 10
+DB_MIN_IDLE=10
+````
+
+### 2 Start
+
+After the configuration modification is completed, go back to the installation and run package and run the following command directly
+
+````$xslt
+nohup sh ./start_manager.sh > start.log 2>&1 &
+````
+
+Check the logs in the logs to determine whether the program started successfully
+
+### 3 Use
+
+Doris Manager presets a super administrator user with the following information:
+
+````$xslt
+Username: Admin
+Password: Admin@123
+````
+
+To ensure safe use, please change your password after logging in!
diff --git a/docs/en/ecosystem/doris-manager/initializing.md b/docs/en/ecosystem/doris-manager/initializing.md
new file mode 100644
index 0000000000..3c867d81c6
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/initializing.md
@@ -0,0 +1,43 @@
+---
+{
+ "title": "Initialize",
+ "language": "en"
+}
+---
+
+
+
+# Initialize
+
+After the deployment is complete, the super administrator needs to complete the local initialization.
+
+## Manage users
+
+The first step of initialization is to manage users, which mainly completes the selection and configuration of authentication methods. Currently Doris Manger supports local user authentication.
+
+
+
+### Local user authentication
+
+Local user authentication is the user system that comes with Doris Manger. User registration can be completed by filling in the user name, email address and password. User addition, information modification, deletion and permission relationship are all completed locally.
+
+
+
+At this point, the local initialization process has been completed. Super administrators can create spaces, space administrators can enter the space, manage the space, add and invite users to enter the space for data analysis, etc.
\ No newline at end of file
diff --git a/docs/en/ecosystem/doris-manager/space-list.md b/docs/en/ecosystem/doris-manager/space-list.md
new file mode 100644
index 0000000000..543cb80973
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/space-list.md
@@ -0,0 +1,234 @@
+---
+{
+ "title": "Space list",
+ "language": "en"
+}
+---
+
+
+
+# Space list
+
+The super administrator can perform the following operations in the space list:
+
+- Perform new cluster and cluster hosting operations
+
+- Recovery and deletion of unfinished spaces
+
+- Completed space deletion operation
+
+The space administrator can mainly perform the following operations in the space list:
+
+- View authorized space information
+
+## Completed space
+
+The super administrator can operate the completed space through the button to the right of the space name. Space administrators can click to enter the space to manage clusters or data in the space.
+
+
+
+## Unfinished space
+
+Doris Manger provides a draft save function of the space creation process to record the incomplete space creation process. Super administrators can view the list of unfinished spaces by switching tabs, and perform recovery or deletion operations.
+
+
+
+# New space
+
+There are two ways to create a new space: new cluster and cluster hosting.
+
+## New cluster
+
+### 1 Registration space
+
+Space information includes space name, space introduction, and selection of space administrators.
+
+Space name and administrator are required/optional fields.
+
+
+
+### 2 Add host
+
+
+
+#### Configure SSH login-free
+
+Doris Manager needs to distribute the Agent installation package during installation, so it is necessary to configure SSH login-free on the server (agent01) where Doris is to be installed.
+
+```shell
+#1. To log in to the server, you need to use the manager and agent accounts to be consistent
+su - xxx
+pwd
+#2. Generate a key pair on the machine where doris manager is deployed
+ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
+
+#3. Copy the public key to the machine agent01
+scp ~/.ssh/id_rsa.pub root@agent01:~
+
+#4. Log in to agent01 and append the public key to authorized_keys
+cat ~/id_rsa.pub >> .ssh/authorized_keys
+
+#5. After doing this, we can log in to agent01 without password on the doris manger machine
+ssh agent01@xx.xxx.xx.xx
+````
+
+In addition, it should be noted that the permissions of the .ssh directory are 700, and the permissions of the authorized_keys and private keys under it are 600. Otherwise, you will not be able to log in without a password due to permission issues. We can see that the known_hosts file will be generated after logging in. At the same time, when starting doris, you need to use a password-free login account.
+
+When installing a cluster in Doris Manager, just use the private key of the doris manager machine, ie ~/.ssh/id_rsa
+
+For details, please refer to: https://blog.csdn.net/universe_hao/article/details/52296811
+
+#### Host list
+
+Enter the host IP to add a new host, or add it in batches.
+
+### 3 Installation options
+
+#### Get the installation package
+
+When deploying a cluster through Doris Manager, you need to provide the compiled Doris installation package. You can compile it yourself from the Doris source code, or use the official binary version.
+
+Doris Manager will pull the installation package through http. If you need to build your own http service, please refer to the bottom of the document - Self-built http service.
+
+#### Specify the installation path
+
+Doris and Doris Manger Agent will be installed in this directory. Make sure this directory is dedicated to Doirs and related components.
+
+### 4 Verify the host
+
+The system will automatically perform verification according to the host status. When the verification is completed, the Agent will start sending back the heartbeat, and you can click to proceed to the next step.
+
+
+
+### 5 Planning Nodes
+
+Click the Assign Node button to plan FE/BE/Broker nodes for the host.
+
+
+
+### 6 Configuration Parameters
+
+Configure parameters for the nodes planned in the previous step. You can use the default values or turn on the custom configuration switch to customize the configuration.
+
+### 7 Deploy the cluster
+
+The system will automatically perform verification according to the status of the host installation progress. When the verification is completed, it will start the node and return the heartbeat. You can click to proceed to the next step.
+
+
+
+### 8 Complete the creation
+
+Complete the above steps to complete the new cluster.
+
+
+
+## Cluster hosting
+
+### 1 Registration space
+
+Space information includes space name, space introduction, and selection of space administrators.
+
+Space name and administrator are required/optional fields.
+
+### 2 Connect to the cluster
+
+Cluster information includes cluster address, HTTP port, JDBC port, cluster username, and cluster password. Users can fill in according to their own cluster information.
+
+Click the Link Test button to test it.
+
+### 3 Hosting Options
+
+
+
+#### Configure SSH login-free
+
+Doris Manager needs to distribute the Agent installation package during installation, so it is necessary to configure SSH login-free on the server (agent01) where Doris is to be installed.
+
+```shell
+#1. To log in to the server, you need to use the manger and agent accounts to be consistent
+su - xxx
+pwd
+#2. Generate a key pair on the machine where doris manager is deployed
+ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
+
+#3. Copy the public key to the machine agent01
+scp ~/.ssh/id_rsa.pub root@agent01:~
+
+#4. Log in to agent01 and append the public key to authorized_keys
+cat ~/id_rsa.pub >> .ssh/authorized_keys
+
+#5. After doing this, we can log in to agent01 without password on the doris manger machine
+ssh agent01@xx.xxx.xx.xx
+````
+
+In addition, it should be noted that the permissions of the .ssh directory are 700, and the permissions of the authorized_keys and private keys under it are 600. Otherwise, you will not be able to log in without a password due to permission issues. We can see that the known_hosts file will be generated after logging in. At the same time, when starting doris, you need to use a password-free login account.
+
+When installing a cluster in Doris Manager, just use the private key of the doris manager machine, ie ~/.ssh/id_rsa
+
+For details, please refer to: https://blog.csdn.net/universe_hao/article/details/52296811
+
+#### Specify the installation path
+
+Doris and Doris Manger Agent will be installed in this directory. Make sure this directory is dedicated to Doirs and related components.
+
+### 4 Verify the host
+
+The system will automatically perform verification according to the host status. When the verification is completed, the Agent will start sending back the heartbeat, and you can click to proceed to the next step.
+
+
+
+### 5 Verify the cluster
+
+Verify the cluster quantile instance installation verification, instance dependency verification, and instance startup verification. After the verification is successful, click Next to complete the creation.
+
+
+
+### 6 Complete access
+
+Complete the above steps to complete cluster hosting.
+
+## Self-built http service
+
+### 1 yum source installation
+
+1. Installation
+yum install -y nginx
+2. Start
+systemctl start nginx
+
+### 2 Source installation
+
+Reference: https://www.runoob.com/linux/nginx-install-setup.html
+
+### 3 Configuration
+
+1. Put the doris installation package in the nginx root directory
+mv PALO-0.15.1-rc03-binary.tar.gz /usr/share/nginx/html
+
+2. Modify ngixn.conf
+
+````
+location /download {
+ alias /home/work/nginx/nginx/html/;
+}
+````
+
+Restart ngxin access after modification:
+https://host:port/download/PALO-0.15.1-rc03-binary.tar.gz
\ No newline at end of file
diff --git a/docs/en/ecosystem/doris-manager/space-management.md b/docs/en/ecosystem/doris-manager/space-management.md
new file mode 100644
index 0000000000..c6df3098e4
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/space-management.md
@@ -0,0 +1,53 @@
+---
+{
+ "title": "Space management",
+ "language": "en"
+}
+---
+
+
+
+# Space management
+
+If you are Doris Manger's space administrator, you will have permission to set or manage the space, space members, roles, etc.
+
+## Space
+
+The space administrator can mainly perform the following operations under the space module:
+
+- Edit saved space information
+
+### Edit space information
+
+If the space information is complete, space administrators can view and edit space-related information here, including space name, space introduction, space administrator, etc.
+
+
+
+## Members
+
+In the secondary navigation bar of the "Space Management" interface, select "Members" to enter the member management page. This page can view all users in the current space, and you can remove members.
+
+
+
+## Role
+
+Space administrators can view all roles and role members by clicking the "Roles" button in the navigation bar. New members by default belong to the "Space Member" role and the "Space Admin" role. The default roles are "Space Admin" and "Space Member" and cannot be changed by other administrators.
+
+
diff --git a/docs/en/ecosystem/doris-manager/system-settings.md b/docs/en/ecosystem/doris-manager/system-settings.md
new file mode 100644
index 0000000000..bae7c2a16f
--- /dev/null
+++ b/docs/en/ecosystem/doris-manager/system-settings.md
@@ -0,0 +1,91 @@
+---
+{
+ "title": "System settings",
+ "language": "en"
+}
+---
+
+
+
+# System settings
+
+The super administrator can mainly perform the following operations under the platform module:
+
+- Perform relevant operations on platform users
+- Have the highest level of authority on the platform
+
+User permission description
+
+## users
+
+### User management under local authentication
+
+Click the Add User button to create a new user with username and email information.
+
+ Doris Manger will assign a temporary password to the new user. The new user needs to log in with the set username/email and temporary password. After logging in, you can create a new password in "Account Settings".
+
+
+
+
+
+
+### Edit User
+
+Super administrators can manage users, including editing user information, resetting user passwords, and deactivating users.
+
+#### Edit user information
+
+Click to select and select "Edit" to modify the user name and email address. If the user mailbox is updated, the user needs to log in with the updated mailbox, and the password will not be updated.
+
+
+
+#### reset user password
+
+Click to select "Reset Password", and after confirming this operation, Doris Manger will reassign a temporary password for the user. The user needs to log in with the set email address and the new temporary password. After logging in, you can go to "Account Settings" Create a new password.
+
+
+#### Deactivate/Activate User
+
+Click Opt-out user, and after confirming to deactivate the user, the user's status will be changed from active to inactive. Deactivated users will not be able to log in to Doris Manger.
+
+Click Activate User on the right side of the user to reactivate the user. The user's status will be changed back to enabled and will be able to log in to Doris Manger again.
+
+Note that super administrators cannot deactivate their own user accounts, and there must be at least one non-deactivated super administrator user in the system.
+
+
+
+
+## User permission description
+
+### Super administrator privileges
+
+| | Create | Edit | Delete | View |
+| :---- | :----- | :--- | :----- | :--- |
+| User | ✓ | ✓ | ✓ | ✓ |
+| Roles | ✓ | ✓ | ✓ | ✓ |
+| Space | ✓ | ✓ | ✓ | ✓ |
+
+### Space administrator permissions
+
+| | Create | Edit | Delete | View |
+| :---- | :----- | :--- | :----- | :--- |
+| User | X | X | X | X |
+| Roles | X | X | X | ✓ |
+| Space | X | ✓ | X | ✓ |
diff --git a/new-docs/en/ecosystem/external-table/doris-on-es.md b/docs/en/ecosystem/external-table/doris-on-es.md
similarity index 100%
rename from new-docs/en/ecosystem/external-table/doris-on-es.md
rename to docs/en/ecosystem/external-table/doris-on-es.md
diff --git a/new-docs/en/ecosystem/external-table/hive-of-doris.md b/docs/en/ecosystem/external-table/hive-of-doris.md
similarity index 100%
rename from new-docs/en/ecosystem/external-table/hive-of-doris.md
rename to docs/en/ecosystem/external-table/hive-of-doris.md
diff --git a/new-docs/en/ecosystem/external-table/iceberg-of-doris.md b/docs/en/ecosystem/external-table/iceberg-of-doris.md
similarity index 100%
rename from new-docs/en/ecosystem/external-table/iceberg-of-doris.md
rename to docs/en/ecosystem/external-table/iceberg-of-doris.md
diff --git a/new-docs/en/ecosystem/external-table/odbc-of-doris.md b/docs/en/ecosystem/external-table/odbc-of-doris.md
similarity index 100%
rename from new-docs/en/ecosystem/external-table/odbc-of-doris.md
rename to docs/en/ecosystem/external-table/odbc-of-doris.md
diff --git a/new-docs/en/ecosystem/flink-doris-connector.md b/docs/en/ecosystem/flink-doris-connector.md
similarity index 100%
rename from new-docs/en/ecosystem/flink-doris-connector.md
rename to docs/en/ecosystem/flink-doris-connector.md
diff --git a/new-docs/en/ecosystem/logstash.md b/docs/en/ecosystem/logstash.md
similarity index 100%
rename from new-docs/en/ecosystem/logstash.md
rename to docs/en/ecosystem/logstash.md
diff --git a/docs/en/extending-doris/plugin-development-manual.md b/docs/en/ecosystem/plugin-development-manual.md
similarity index 100%
rename from docs/en/extending-doris/plugin-development-manual.md
rename to docs/en/ecosystem/plugin-development-manual.md
diff --git a/new-docs/en/ecosystem/seatunnel/flink-sink.md b/docs/en/ecosystem/seatunnel/flink-sink.md
similarity index 100%
rename from new-docs/en/ecosystem/seatunnel/flink-sink.md
rename to docs/en/ecosystem/seatunnel/flink-sink.md
diff --git a/new-docs/en/ecosystem/seatunnel/spark-sink.md b/docs/en/ecosystem/seatunnel/spark-sink.md
similarity index 100%
rename from new-docs/en/ecosystem/seatunnel/spark-sink.md
rename to docs/en/ecosystem/seatunnel/spark-sink.md
diff --git a/new-docs/en/ecosystem/spark-doris-connector.md b/docs/en/ecosystem/spark-doris-connector.md
similarity index 100%
rename from new-docs/en/ecosystem/spark-doris-connector.md
rename to docs/en/ecosystem/spark-doris-connector.md
diff --git a/new-docs/en/ecosystem/udf/contribute-udf.md b/docs/en/ecosystem/udf/contribute-udf.md
similarity index 100%
rename from new-docs/en/ecosystem/udf/contribute-udf.md
rename to docs/en/ecosystem/udf/contribute-udf.md
diff --git a/new-docs/en/ecosystem/udf/native-user-defined-function.md b/docs/en/ecosystem/udf/native-user-defined-function.md
similarity index 100%
rename from new-docs/en/ecosystem/udf/native-user-defined-function.md
rename to docs/en/ecosystem/udf/native-user-defined-function.md
diff --git a/new-docs/en/ecosystem/udf/remote-user-defined-function.md b/docs/en/ecosystem/udf/remote-user-defined-function.md
similarity index 100%
rename from new-docs/en/ecosystem/udf/remote-user-defined-function.md
rename to docs/en/ecosystem/udf/remote-user-defined-function.md
diff --git a/docs/en/extending-doris/datax.md b/docs/en/extending-doris/datax.md
deleted file mode 100644
index c8762d68a4..0000000000
--- a/docs/en/extending-doris/datax.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-{
- "title": "DataX doriswriter",
- "language": "en"
-}
----
-
-
-
-# DataX doriswriter
-
-[DataX](https://github.com/alibaba/DataX) doriswriter plug-in, used to synchronize data from other data sources to Doris through DataX.
-
-The plug-in uses Doris' Stream Load function to synchronize and import data. It needs to be used with DataX service.
-
-## About DataX
-
-DataX is an open source version of Alibaba Cloud DataWorks data integration, an offline data synchronization tool/platform widely used in Alibaba Group. DataX implements efficient data synchronization functions between various heterogeneous data sources including MySQL, Oracle, SqlServer, Postgre, HDFS, Hive, ADS, HBase, TableStore (OTS), MaxCompute (ODPS), Hologres, DRDS, etc.
-
-More details can be found at: `https://github.com/alibaba/DataX/`
-
-## Usage
-
-The code of DataX doriswriter plug-in can be found [here](https://github.com/apache/incubator-doris/tree/master/extension/DataX).
-
-This directory is the doriswriter plug-in development environment of Alibaba DataX.
-
-Because the doriswriter plug-in depends on some modules in the DataX code base, and these module dependencies are not submitted to the official Maven repository, when we develop the doriswriter plug-in, we need to download the complete DataX code base to facilitate our development and compilation of the doriswriter plug-in.
-
-### Directory structure
-
-1. `doriswriter/`
-
- This directory is the code directory of doriswriter, and this part of the code should be in the Doris code base.
-
- The help doc can be found in `doriswriter/doc`
-
-2. `init-env.sh`
-
- The script mainly performs the following steps:
-
- 1. Git clone the DataX code base to the local
- 2. Softlink the `doriswriter/` directory to `DataX/doriswriter`.
- 3. Add `doriswriter` to the original `DataX/pom.xml`
- 4. Change httpclient version from 4.5 to 4.5.13 in DataX/core/pom.xml
-
- > httpclient v4.5 can not handle redirect 307 correctly.
-
- After that, developers can enter `DataX/` for development. And the changes in the `DataX/doriswriter` directory will be reflected in the `doriswriter/` directory, which is convenient for developers to submit code.
-
-### How to build
-
-1. Run `init-env.sh`
-2. Modify code of doriswriter in `DataX/doriswriter` if you need.
-3. Build doriswriter
-
- 1. Build doriswriter along:
-
- `mvn clean install -pl plugin-rdbms-util,doriswriter -DskipTests`
-
- 2. Build DataX:
-
- `mvn package assembly:assembly -Dmaven.test.skip=true`
-
- The output will be in `target/datax/datax/`.
-
- > hdfsreader, hdfswriter and oscarwriter needs some extra jar packages. If you don't need to use these components, you can comment out the corresponding module in DataX/pom.xml.
-
- 3. Compilation error
-
- If you encounter the following compilation errors:
-
- ```
- Could not find artifact com.alibaba.datax:datax-all:pom:0.0.1-SNAPSHOT ...
- ```
-
- You can try the following solutions:
-
- 1. Download [alibaba-datax-maven-m2-20210928.tar.gz](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/alibaba-datax-maven-m2-20210928.tar.gz)
- 2. After decompression, copy the resulting `alibaba/datax/` directory to `.m2/repository/com/alibaba/` corresponding to the maven used.
- 3. Try to compile again.
-
-4. Commit code of doriswriter in `doriswriter` if you need.
-
-### Example
-
-For instructions on using the doriswriter plug-in, please refer to [here](https://github.com/apache/incubator-doris/blob/master/extension/DataX/doriswriter/doc/doriswriter.md).
diff --git a/docs/en/extending-doris/doris-on-es.md b/docs/en/extending-doris/doris-on-es.md
deleted file mode 100644
index 79aa207109..0000000000
--- a/docs/en/extending-doris/doris-on-es.md
+++ /dev/null
@@ -1,589 +0,0 @@
----
-{
- "title": "Doris On ES",
- "language": "en"
-}
----
-
-
-
-# Doris On ES
-
-Doris-On-ES not only take advantage of Doris's distributed query planning capability but also ES (Elastic search)'s full-text search capability, provide a more complete OLAP scenario solution:
-
-1. Multi-index Distributed Join Query in ES
-2. Joint Query of Tables in Doris and ES, More Complex Full-Text Retrieval and Filtering
-
-This document mainly introduces the realization principle and usage of this function.
-
-## Glossary
-
-### Noun in Doris
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-
-### Noun in ES
-
-* DataNode: The data storage and computing node of ES.
-* MasterNode: The Master node of ES, which manages metadata, nodes, data distribution, etc.
-* scroll: The built-in data set cursor feature of ES for streaming scanning and filtering of data.
-* _source: contains the original JSON document body that was passed at index time
-* doc_values: store the same values as the _source but in a column-oriented fashion
-* keyword: string datatype in ES, but the content not analyzed by analyzer
-* text: string datatype in ES, the content analyzed by analyzer
-
-
-## How To Use
-
-### Create ES Index
-
-```
-PUT test
-{
- "settings": {
- "index": {
- "number_of_shards": "1",
- "number_of_replicas": "0"
- }
- },
- "mappings": {
- "doc": { // There is no need to specify the type when creating indexes after ES7.x version, there is one and only type of `_doc`
- "properties": {
- "k1": {
- "type": "long"
- },
- "k2": {
- "type": "date"
- },
- "k3": {
- "type": "keyword"
- },
- "k4": {
- "type": "text",
- "analyzer": "standard"
- },
- "k5": {
- "type": "float"
- }
- }
- }
- }
-}
-```
-
-### Add JSON documents to ES index
-
-```
-POST /_bulk
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Doris", "k4": "Trying out Doris", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris On ES", "k4": "Doris On ES", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris", "k4": "Doris", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "ES", "k4": "ES", "k5": 10.0}
-```
-
-### Create external ES table
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH // ENGINE must be Elasticsearch
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-
-"user" = "root",
-"password" = "root"
-);
-```
-
-The following parameters are accepted by ES table:
-
-Parameter | Description
----|---
-**hosts** | ES Cluster Connection Address, maybe one or more node, load-balance is also accepted
-**index** | the related ES index name, alias is supported, and if you use doc_value, you need to use the real name
-**type** | the type for this index, If not specified, `_doc` will be used
-**user** | username for ES
-**password** | password for the user
-
-* For clusters before 7.x, please pay attention to choosing the correct type when building the table
-* The authentication method only supports Http Basic authentication, need to ensure that this user has access to: /\_cluster/state/, \_nodes/http and other paths and index read permissions;The cluster has not turned on security authentication, and the user name and password do not need to be set
-* The column names in the Doris table need to exactly match the field names in the ES, and the field types should be as consistent as possible
-* **ENGINE** must be: **Elasticsearch**
-
-##### Filter to push down
-
-An important ability of `Doris On ES` is the push-down of filter conditions: The filtering conditions are pushed to ES, so that only the data that really meets the conditions will be returned, which can significantly improve query performance and reduce CPU, memory, and IO utilization of Doris and ES
-
-The following operators (Operators) will be optimized to the following ES Query:
-
-| SQL syntax | ES 5.x+ syntax |
-|-------|:---:|
-| = | term query|
-| in | terms query |
-| > , < , >= , ⇐ | range query |
-| and | bool.filter |
-| or | bool.should |
-| not | bool.must_not |
-| not in | bool.must_not + terms query |
-| is\_not\_null | exists query |
-| is\_null | bool.must_not + exists query |
-| esquery | QueryDSL in ES native json form |
-
-##### Data type mapping
-
-Doris\ES | byte | short | integer | long | float | double| keyword | text | date
-------------- | ------------- | ------ | ---- | ----- | ---- | ------ | ----| --- | --- |
-tinyint | √ | | | | | | | |
-smallint | √ | √ | | | | | | |
-int | √ | √ | √ | | | | | |
-bigint | √ | √ | √ | √ | | | | |
-float | | | | | √ | | | |
-double | | | | | | √ | | |
-char | | | | | | | √ | √ |
-varchar | | | | | | | √ | √ |
-date | | | | | | | | | √|
-datetime | | | | | | | | | √|
-
-
-### Enable column scan to optimize query speed(enable\_docvalue\_scan=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"enable_docvalue_scan" = "true"
-);
-```
-
-Parameter Description:
-
-Parameter | Description
----|---
-**enable\_docvalue\_scan** | whether to enable ES/Lucene column storage to get the value of the query field, the default is false
-
-Doris obtains data from ES following the following two principles:
-
-* **Best effort**: Automatically detect whether the column to be read has column storage enabled (doc_value: true).If all the fields obtained have column storage, Doris will obtain the values of all fields from the column storage(doc_values)
-* **Automatic downgrade**: If the field to be obtained has one or more field that is not have doc_value, the values of all fields will be parsed from the line store `_source`
-
-##### Advantage:
-
-By default, Doris On ES will get all the required columns from the row storage, which is `_source`, and the storage of `_source` is the origin json format document, Inferior to column storage in batch read performance, Especially obvious when only a few columns are needed, When only a few columns are obtained, the performance of docvalue is about ten times that of _source
-
-##### Tip
-1. Fields of type `text` are not column-stored in ES, so if the value of the field to be obtained has a field of type `text`, it will be automatically downgraded to get from `_source`
-2. In the case of too many fields obtained (`>= 25`), the performance of getting field values from `docvalue` will be basically the same as getting field values from `_source`
-
-
-### Detect keyword type field(enable\_keyword\_sniff=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"enable_keyword_sniff" = "true"
-);
-```
-
-Parameter Description:
-
-Parameter | Description
----|---
-**enable\_keyword\_sniff** | Whether to detect the string type (**text**) `fields` in ES to obtain additional not analyzed (**keyword**) field name(multi-fields mechanism)
-
-You can directly import data without creating an index. At this time, ES will automatically create a new index in ES, For a field of type string, a field of type `text` and field of type `keyword` will be created meantime, This is the multi-fields feature of ES, mapping is as follows:
-
-```
-"k4": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
-}
-```
-When performing conditional filtering on k4, for example =, Doris On ES will convert the query to ES's TermQuery
-
-SQL filter:
-
-```
-k4 = "Doris On ES"
-```
-
-The query DSL converted into ES is:
-
-```
-"term" : {
- "k4": "Doris On ES"
-
-}
-```
-
-Because the first field type of k4 is `text`, when data is imported, it will perform word segmentation processing according to the word segmentator set by k4 (if it is not set, it is the standard word segmenter) to get three Term of doris, on, and es, as follows ES analyze API analysis:
-
-```
-POST /_analyze
-{
- "analyzer": "standard",
- "text": "Doris On ES"
-}
-```
-The result of analyzed is:
-
-```
-{
- "tokens": [
- {
- "token": "doris",
- "start_offset": 0,
- "end_offset": 5,
- "type": "",
- "position": 0
- },
- {
- "token": "on",
- "start_offset": 6,
- "end_offset": 8,
- "type": "",
- "position": 1
- },
- {
- "token": "es",
- "start_offset": 9,
- "end_offset": 11,
- "type": "",
- "position": 2
- }
- ]
-}
-```
-The query uses:
-
-```
-"term" : {
- "k4": "Doris On ES"
-}
-```
-This term does not match any term in the dictionary, and will not return any results, enable `enable_keyword_sniff: true` will automatically convert `k4 = "Doris On ES"` into `k4.keyword = "Doris On ES"`to exactly match SQL semantics, The converted ES query DSL is:
-
-```
-"term" : {
- "k4.keyword": "Doris On ES"
-}
-```
-
-The type of `k4.keyword` is `keyword`, and writing data into ES is a complete term, so it can be matched
-
-### Enable node discovery mechanism, default is true(es\_nodes\_discovery=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"nodes_discovery" = "true"
-);
-```
-
-Parameter Description:
-
-Parameter | Description
----|---
-**es\_nodes\_discovery** | Whether or not to enable ES node discovery. the default is true
-
-Doris would find all available related data nodes (shards allocated on)from ES when this is true. Just set false if address of ES data nodes are not accessed by Doris BE, eg. the ES cluster is deployed in the intranet which isolated from your public Internet, and users access through a proxy
-
-### Whether ES cluster enables https access mode, if enabled should set value with`true`, default is false(http\_ssl\_enable=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"http_ssl_enabled" = "true"
-);
-```
-
-Parameter Description:
-
-Parameter | Description
----|---
-**http\_ssl\_enabled** | Whether ES cluster enables https access mode
-
-The current FE/BE implementation is to trust all, this is a temporary solution, and the real user configuration certificate will be used later
-
-### Query usage
-
-After create the ES external table in Doris, there is no difference except that the data model (rollup, pre-aggregation, materialized view, etc.) with other table in Doris
-
-#### Basic usage
-
-```
-select * from es_table where k1 > 1000 and k3 ='term' or k4 like 'fu*z_'
-```
-
-#### Extended esquery(field, QueryDSL)
-Through the `esquery(field, QueryDSL)` function, some queries that cannot be expressed in sql, such as match_phrase, geoshape, etc., are pushed down to the ES for filtering. The first column name parameter of `esquery` is used to associate the `index`, the second This parameter is the basic JSON expression of ES's `Query DSL`, which is contained in curly braces `{}`, and there can be only one root key of json, such as match_phrase, geo_shape, bool, etc.
-Match query:
-
-```
-select * from es_table where esquery(k4, '{
- "match": {
- "k4": "doris on es"
- }
- }');
-```
-Geo related queries:
-
-```
-select * from es_table where esquery(k4, '{
- "geo_shape": {
- "location": {
- "shape": {
- "type": "envelope",
- "coordinates": [
- [
- 13,
- 53
- ],
- [
- 14,
- 52
- ]
- ]
- },
- "relation": "within"
- }
- }
- }');
-```
-
-Bool query:
-
-```
-select * from es_table where esquery(k4, ' {
- "bool": {
- "must": [
- {
- "terms": {
- "k1": [
- 11,
- 12
- ]
- }
- },
- {
- "terms": {
- "k2": [
- 100
- ]
- }
- }
- ]
- }
- }');
-```
-
-
-
-## Principle
-
-```
-+----------------------------------------------+
-| |
-| Doris +------------------+ |
-| | FE +--------------+-------+
-| | | Request Shard Location
-| +--+-------------+-+ | |
-| ^ ^ | |
-| | | | |
-| +-------------------+ +------------------+ | |
-| | | | | | | | |
-| | +----------+----+ | | +--+-----------+ | | |
-| | | BE | | | | BE | | | |
-| | +---------------+ | | +--------------+ | | |
-+----------------------------------------------+ |
- | | | | | | |
- | | | | | | |
- | HTTP SCROLL | | HTTP SCROLL | |
-+-----------+---------------------+------------+ |
-| | v | | v | | |
-| | +------+--------+ | | +------+-------+ | | |
-| | | | | | | | | | |
-| | | DataNode | | | | DataNode +<-----------+
-| | | | | | | | | | |
-| | | +<--------------------------------+
-| | +---------------+ | | |--------------| | | |
-| +-------------------+ +------------------+ | |
-| Same Physical Node | |
-| | |
-| +-----------------------+ | |
-| | | | |
-| | MasterNode +<-----------------+
-| ES | | |
-| +-----------------------+ |
-+----------------------------------------------+
-
-
-```
-
-1. FE requests the hosts specified by the table to obtain node‘s HTTP port, shards location of the index. If the request fails, it will traverse the host list sequentially until it succeeds or fails completely.
-
-2. When querying, the query plan will be generated and sent to the corresponding BE node according to some node information obtained by FE and metadata information of index.
-
-3. The BE node requests locally deployed ES nodes in accordance with the `proximity principle`. The BE receives data concurrently from each fragment of ES index in the `HTTP Scroll` mode.
-
-4. After calculating the result, return it to client
-
-## Best Practices
-
-### Suggestions for using Date type fields
-
-The use of Datetype fields in ES is very flexible, but in Doris On ES, if the type of the Date type field is not set properly, it will cause the filter condition cannot be pushed down.
-
-When creating an index, do maximum format compatibility with the setting of the Date type format:
-
-```
- "dt": {
- "type": "date",
- "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
- }
-```
-
-When creating this field in Doris, it is recommended to set it to `date` or `datetime`, and it can also be set to `varchar` type. The following SQL statements can be used to directly push the filter condition down to ES
-
-
-```
-select * from doe where k2 > '2020-06-21';
-
-select * from doe where k2 < '2020-06-21 12:00:00';
-
-select * from doe where k2 < 1593497011;
-
-select * from doe where k2 < now();
-
-select * from doe where k2 < date_format(now(), '%Y-%m-%d');
-```
-
-`Notice`:
-
-* If you don’t set the format for the time type field In ES, the default format for Date-type field is
-
-```
-strict_date_optional_time||epoch_millis
-```
-* If the date field indexed into ES is unix timestamp, it needs to be converted to `ms`, and the internal timestamp of ES is processed according to `ms` unit, otherwise Doris On ES will display wrong column data
-
-### Fetch ES metadata field `_id`
-
-When indexing documents without specifying `_id`, ES will assign a globally unique `_id` field to each document. Users can also specify a `_id` with special represent some business meaning for the document when indexing; if needed, Doris On ES can get the value of this field by adding the `_id` field of type `varchar` when creating the ES external table
-
-```
-CREATE EXTERNAL TABLE `doe` (
- `_id` varchar COMMENT "",
- `city` varchar COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://127.0.0.1:8200",
-"user" = "root",
-"password" = "root",
-"index" = "doe",
-"type" = "doc"
-}
-```
-`Notice`:
-
-1. The filtering condition of the `_id` field only supports two types: `=` and `in`
-2. The `_id` field can only be of type `varchar`
-
-## Q&A
-
-1. ES Version Requirements
-
- The main version of ES is larger than 5. The scanning mode of ES data before 2. X and after 5. x is different. At present, the scanning mode of ES data after 5. x is supported.
-
-2. Does ES Cluster Support X-Pack Authentication
-
- Support all ES clusters using HTTP Basic authentication
-
-3. Some queries are much slower than requesting ES
-
- Yes, for example, query related to _count, etc., the ES internal will directly read the number of documents that meet the requirements of the relevant metadata, without the need to filter the real data.
-
-4. Whether the aggregation operation can be pushed down
-
- At present, Doris On ES does not support push-down operations such as sum, avg, min/max, etc., all documents satisfying the conditions are obtained from the ES in batch flow, and then calculated in Doris
diff --git a/docs/en/extending-doris/flink-doris-connector.md b/docs/en/extending-doris/flink-doris-connector.md
deleted file mode 100644
index acd7c016f8..0000000000
--- a/docs/en/extending-doris/flink-doris-connector.md
+++ /dev/null
@@ -1,496 +0,0 @@
----
-{
- "title": "Flink Doris Connector",
- "language": "en"
-}
----
-
-
-
-# Flink Doris Connector
-
-- The Flink Doris Connector can support operations (read, insert, modify, delete) data stored in Doris through Flink.
-
-Github: https://github.com/apache/incubator-doris-flink-connector
-
-* `Doris` table can be mapped to `DataStream` or `Table`.
-
->**Note:**
->
->1. Modification and deletion are only supported on the Unique Key model
->2. The current deletion is to support Flink CDC to access data to achieve automatic deletion. If it is to delete other data access methods, you need to implement it yourself. For the data deletion usage of Flink CDC, please refer to the last section of this document
-
-## Version Compatibility
-
-| Connector | Flink | Doris | Java | Scala |
-| --------- | ----- | ------ | ---- | ----- |
-| 1.11.6-2.12-xx | 1.11.x | 0.13+ | 8 | 2.12 |
-| 1.12.7-2.12-xx | 1.12.x | 0.13.+ | 8 | 2.12 |
-| 1.13.5-2.12-xx | 1.13.x | 0.13.+ | 8 | 2.12 |
-| 1.14.4-2.12-xx | 1.14.x | 0.13.+ | 8 | 2.12 |
-
-## Build and Install
-
-Ready to work
-
-1.Modify the `custom_env.sh.tpl` file and rename it to `custom_env.sh`
-
-2.Specify the thrift installation directory
-
-```bash
-##source file content
-#export THRIFT_BIN=
-#export MVN_BIN=
-#export JAVA_HOME=
-
-##amend as below,MacOS as an example
-export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-#export MVN_BIN=
-#export JAVA_HOME=
-
-Install `thrift` 0.13.0 (Note: `Doris` 0.15 and the latest builds are based on `thrift` 0.13.0, previous versions are still built with `thrift` 0.9.3)
-Windows:
- 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`
- 2. Modify thrift-0.13.0.exe to thrift
-
-MacOS:
- 1. Download: `brew install thrift@0.13.0`
- 2. default address: /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-
-Note: Executing `brew install thrift@0.13.0` on MacOS may report an error that the version cannot be found. The solution is as follows, execute it in the terminal:
- 1. `brew tap-new $USER/local-tap`
- 2. `brew extract --version='0.13.0' thrift $USER/local-tap`
- 3. `brew install thrift@0.13.0`
- Reference link: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c`
-
-Linux:
- 1.Download source package: `wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz`
- 2.Install dependencies: `yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++`
- 3.`tar zxvf thrift-0.13.0.tar.gz`
- 4.`cd thrift-0.13.0`
- 5.`./configure --without-tests`
- 6.`make`
- 7.`make install`
- Check the version after installation is complete: thrift --version
- Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift
-```
-
-Execute following command in source dir:
-
-```bash
-sh build.sh
-
- Usage:
- build.sh --flink version --scala version # specify flink and scala version
- build.sh --tag # this is a build from tag
- e.g.:
- build.sh --flink 1.14.3 --scala 2.12
- build.sh --tag
-
-Then, for example, execute the command to compile according to the version you need:
-sh build.sh --flink 1.14.3 --scala 2.12
-```
-
-> Note: If you check out the source code from tag, you can just run `sh build.sh --tag` without specifying the Flink and Scala versions. This is because the version in the tag source code is fixed. For example, `1.13.5_2.12-1.0.1` means Flink version 1.13.5, scala version 2.12, and connector version 1.0.1.
-
-After successful compilation, the file `flink-doris-connector-1.14_2.12-1.0.0-SNAPSHOT.jar` will be generated in the `output/` directory. Copy this file to `ClassPath` in `Flink` to use `flink-doris-connector`. For example, `Flink` running in `Local` mode, put this file in the `jars/` folder. `Flink` running in `Yarn` cluster mode, put this file in the pre-deployment package.
-
-**Remarks:**
-
-1. Doris FE should be configured to enable http v2 in the configuration
-2. Scala version currently supports 2.12 and 2.11
-
-conf/fe.conf
-
-```
-enable_http_server_v2 = true
-```
-## Using Maven
-
-Add flink-doris-connector and necessary Flink Maven dependencies
-
-Flink 1.13.* and earlier version
-
-```
-
- org.apache.flink
- flink-java
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-streaming-java_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-clients_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.flink
- flink-table-common
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-table-api-java-bridge_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-table-planner-blink_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.doris
- flink-doris-connector-1.13_2.12
-
-
- 1.0.3
-
-```
-
-Flink 1.14.* version
-
-```
-
- org.apache.flink
- flink-java
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-streaming-java_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-clients_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.flink
- flink-table-planner_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.doris
- flink-doris-connector-1.14_2.12
- 1.0.3
-
-```
-
-**Notes**
-
-1.Please replace the corresponding Connector and Flink dependency versions according to different Flink and Scala versions.
-2.At present, only the scala2.12 version of the package is provided in maven. The 2.11 version of the package needs to be compiled by itself. Please refer to the compilation and installation section above.
-
-## How to use
-
-There are three ways to use Flink Doris Connector.
-
-* SQL
-* DataStream
-* DataSet
-
-### Parameters Configuration
-
-Flink Doris Connector Sink writes data to Doris by the `Stream Load`, and also supports the configurations of `Stream Load`
-
-* SQL configured by `sink.properties.` in the `WITH`
-* DataStream configured by `DorisExecutionOptions.builder().setStreamLoadProp(Properties)`
-
-
-### SQL
-
-* Source
-
-```sql
-CREATE TABLE flink_doris_source (
- name STRING,
- age INT,
- price DECIMAL(5,2),
- sale DOUBLE
- )
- WITH (
- 'connector' = 'doris',
- 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT',
- 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME',
- 'username' = '$YOUR_DORIS_USERNAME',
- 'password' = '$YOUR_DORIS_PASSWORD'
-);
-```
-
-* Sink
-
-```sql
-CREATE TABLE flink_doris_sink (
- name STRING,
- age INT,
- price DECIMAL(5,2),
- sale DOUBLE
- )
- WITH (
- 'connector' = 'doris',
- 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT',
- 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME',
- 'username' = '$YOUR_DORIS_USERNAME',
- 'password' = '$YOUR_DORIS_PASSWORD'
-);
-```
-
-* Insert
-
-```sql
-INSERT INTO flink_doris_sink select name,age,price,sale from flink_doris_source
-```
-
-### DataStream
-
-* Source
-
-```java
- Properties properties = new Properties();
- properties.put("fenodes","FE_IP:8030");
- properties.put("username","root");
- properties.put("password","");
- properties.put("table.identifier","db.table");
- env.addSource(new DorisSourceFunction(
- new DorisStreamOptions(properties),
- new SimpleListDeserializationSchema()
- )
- ).print();
-```
-
-* Sink
-
-Json Stream
-
-```java
-Properties pro = new Properties();
-pro.setProperty("format", "json");
-pro.setProperty("strip_outer_array", "true");
-env.fromElements(
- "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}"
- )
- .addSink(
- DorisSink.sink(
- DorisReadOptions.builder().build(),
- DorisExecutionOptions.builder()
- .setBatchSize(3)
- .setBatchIntervalMs(0l)
- .setMaxRetries(3)
- .setStreamLoadProp(pro).build(),
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-```
-
-Json Stream
-
-```java
-env.fromElements(
- "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}"
- )
- .addSink(
- DorisSink.sink(
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-```
-
-RowData Stream
-
-```java
-DataStream source = env.fromElements("")
- .map(new MapFunction() {
- @Override
- public RowData map(String value) throws Exception {
- GenericRowData genericRowData = new GenericRowData(3);
- genericRowData.setField(0, StringData.fromString("北京"));
- genericRowData.setField(1, 116.405419);
- genericRowData.setField(2, 39.916927);
- return genericRowData;
- }
- });
-
-String[] fields = {"city", "longitude", "latitude"};
-LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()};
-
-source.addSink(
- DorisSink.sink(
- fields,
- types,
- DorisReadOptions.builder().build(),
- DorisExecutionOptions.builder()
- .setBatchSize(3)
- .setBatchIntervalMs(0L)
- .setMaxRetries(3)
- .build(),
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-```
-
-### DataSet
-
-* Sink
-
-```java
-MapOperator data = env.fromElements("")
- .map(new MapFunction() {
- @Override
- public RowData map(String value) throws Exception {
- GenericRowData genericRowData = new GenericRowData(3);
- genericRowData.setField(0, StringData.fromString("北京"));
- genericRowData.setField(1, 116.405419);
- genericRowData.setField(2, 39.916927);
- return genericRowData;
- }
- });
-
-DorisOptions dorisOptions = DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build();
-DorisReadOptions readOptions = DorisReadOptions.defaults();
-DorisExecutionOptions executionOptions = DorisExecutionOptions.defaults();
-
-LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()};
-String[] fields = {"city", "longitude", "latitude"};
-
-DorisDynamicOutputFormat outputFormat = new DorisDynamicOutputFormat(
- dorisOptions, readOptions, executionOptions, types, fields
- );
-
-outputFormat.open(0, 1);
-data.output(outputFormat);
-outputFormat.close();
-```
-
-
-
-### General
-
-| Key | Default Value | Comment |
-| -------------------------------- | ----------------- | ------------------------------------------------------------ |
-| fenodes | -- | Doris FE http address, support multiple addresses, separated by commas |
-| table.identifier | -- | Doris table identifier, eg, db1.tbl1 |
-| username | -- | Doris username |
-| password | -- | Doris password |
-| doris.request.retries | 3 | Number of retries to send requests to Doris |
-| doris.request.connect.timeout.ms | 30000 | Connection timeout for sending requests to Doris |
-| doris.request.read.timeout.ms | 30000 | Read timeout for sending request to Doris |
-| doris.request.query.timeout.s | 3600 | Query the timeout time of Doris, the default is 1 hour, -1 means no timeout limit |
-| doris.request.tablet.size | Integer.MAX_VALUE | The number of Doris Tablets corresponding to an Partition. The smaller this value is set, the more partitions will be generated. This will increase the parallelism on the Flink side, but at the same time will cause greater pressure on Doris. |
-| doris.batch.size | 1024 | The maximum number of rows to read data from BE at one time. Increasing this value can reduce the number of connections between Flink and Doris. Thereby reducing the extra time overhead caused by network delay. |
-| doris.exec.mem.limit | 2147483648 | Memory limit for a single query. The default is 2GB, in bytes. |
-| doris.deserialize.arrow.async | false | Whether to support asynchronous conversion of Arrow format to RowBatch required for flink-doris-connector iteration |
-| doris.deserialize.queue.size | 64 | Asynchronous conversion of the internal processing queue in Arrow format takes effect when doris.deserialize.arrow.async is true |
-| doris.read.field | -- | List of column names in the Doris table, separated by commas |
-| doris.filter.query | -- | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. |
-| sink.batch.size | 10000 | Maximum number of lines in a single write BE |
-| sink.max-retries | 1 | Number of retries after writing BE failed |
-| sink.batch.interval | 10s | The flush interval, after which the asynchronous thread will write the data in the cache to BE. The default value is 10 second, and the time units are ms, s, min, h, and d. Set to 0 to turn off periodic writing. |
-| sink.properties.* | -- | The stream load parameters.
eg: sink.properties.column_separator' = ','
Setting 'sink.properties.escape_delimiters' = 'true' if you want to use a control char as a separator, so that such as '\\x01' will translate to binary 0x01
Support JSON format import, you need to enable both 'sink.properties.format' ='json' and 'sink.properties.strip_outer_array' ='true'|
-| sink.enable-delete | true | Whether to enable deletion. This option requires Doris table to enable batch delete function (0.15+ version is enabled by default), and only supports Unique model.|
-| sink.batch.bytes | 10485760 | Maximum bytes of batch in a single write to BE. When the data size in batch exceeds this threshold, cache data is written to BE. The default value is 10MB |
-
-## Doris & Flink Column Type Mapping
-
-| Doris Type | Flink Type |
-| ---------- | -------------------------------- |
-| NULL_TYPE | NULL |
-| BOOLEAN | BOOLEAN |
-| TINYINT | TINYINT |
-| SMALLINT | SMALLINT |
-| INT | INT |
-| BIGINT | BIGINT |
-| FLOAT | FLOAT |
-| DOUBLE | DOUBLE |
-| DATE | STRING |
-| DATETIME | STRING |
-| DECIMAL | DECIMAL |
-| CHAR | STRING |
-| LARGEINT | STRING |
-| VARCHAR | STRING |
-| DECIMALV2 | DECIMAL |
-| TIME | DOUBLE |
-| HLL | Unsupported datatype |
-
-## An example of using Flink CDC to access Doris (supports Insert / Update / Delete events)
-```sql
-CREATE TABLE cdc_mysql_source (
- id int
- ,name VARCHAR
- ,PRIMARY KEY (id) NOT ENFORCED
-) WITH (
- 'connector' = 'mysql-cdc',
- 'hostname' = '127.0.0.1',
- 'port' = '3306',
- 'username' = 'root',
- 'password' = 'password',
- 'database-name' = 'database',
- 'table-name' = 'table'
-);
-
--- Support delete event synchronization (sink.enable-delete='true'), requires Doris table to enable batch delete function
-CREATE TABLE doris_sink (
-id INT,
-name STRING
-)
-WITH (
- 'connector' = 'doris',
- 'fenodes' = '127.0.0.1:8030',
- 'table.identifier' = 'database.table',
- 'username' = 'root',
- 'password' = '',
- 'sink.properties.format' = 'json',
- 'sink.properties.strip_outer_array' = 'true',
- 'sink.enable-delete' = 'true'
-);
-
-insert into doris_sink select id,name from cdc_mysql_source;
-```
diff --git a/docs/en/extending-doris/hive-bitmap-udf.md b/docs/en/extending-doris/hive-bitmap-udf.md
deleted file mode 100644
index 40cb13cf3f..0000000000
--- a/docs/en/extending-doris/hive-bitmap-udf.md
+++ /dev/null
@@ -1,97 +0,0 @@
----
-{
- "title": "Hive Bitmap UDF",
- "language": "en"
-}
----
-
-
-
-# Hive UDF
-
- Hive Bitmap UDF provides UDFs for generating bitmap and bitmap operations in hive tables. The bitmap in Hive is exactly the same as the Doris bitmap. The bitmap in Hive can be imported into doris through (spark bitmap load).
-
- the main purpose:
- 1. Reduce the time of importing data into doris, and remove processes such as dictionary building and bitmap pre-aggregation;
- 2. Save hive storage, use bitmap to compress data, reduce storage cost;
- 3. Provide flexible bitmap operations in hive, such as: intersection, union, and difference operations, and the calculated bitmap can also be directly imported into doris; imported into doris;
-
-## How To Use
-
-### Create Bitmap type table in Hive
-
-```sql
-
--- Example: Create Hive Bitmap Table
-CREATE TABLE IF NOT EXISTS `hive_bitmap_table`(
- `k1` int COMMENT '',
- `k2` String COMMENT '',
- `k3` String COMMENT '',
- `uuid` binary COMMENT 'bitmap'
-) comment 'comment'
-
-```
-
-### Hive Bitmap UDF Usage:
-
- Hive Bitmap UDF used in Hive/Spark
-
-```sql
-
--- Load the Hive Bitmap Udf jar package (Upload the compiled hive-udf jar package to HDFS)
-add jar hdfs://node:9001/hive-udf-jar-with-dependencies.jar;
-
--- Create Hive Bitmap UDAF function
-create temporary function to_bitmap as 'org.apache.doris.udf.ToBitmapUDAF';
-create temporary function bitmap_union as 'org.apache.doris.udf.BitmapUnionUDAF';
-
--- Create Hive Bitmap UDF function
-create temporary function bitmap_count as 'org.apache.doris.udf.BitmapCountUDF';
-create temporary function bitmap_and as 'org.apache.doris.udf.BitmapAndUDF';
-create temporary function bitmap_or as 'org.apache.doris.udf.BitmapOrUDF';
-create temporary function bitmap_xor as 'org.apache.doris.udf.BitmapXorUDF';
-
--- Example: Generate bitmap by to_bitmap function and write to Hive Bitmap table
-insert into hive_bitmap_table
-select
- k1,
- k2,
- k3,
- to_bitmap(uuid) as uuid
-from
- hive_table
-group by
- k1,
- k2,
- k3
-
--- Example: The bitmap_count function calculate the number of elements in the bitmap
-select k1,k2,k3,bitmap_count(uuid) from hive_bitmap_table
-
--- Example: The bitmap_union function calculate the grouped bitmap union
-select k1,bitmap_union(uuid) from hive_bitmap_table group by k1
-
-```
-
-### Hive Bitmap UDF Description
-
-## Hive Bitmap import into Doris
-
- see details: Load Data -> Spark Load -> Basic operation -> Create load(Example 3: when the upstream data source is hive binary type table)
diff --git a/docs/en/extending-doris/hive-of-doris.md b/docs/en/extending-doris/hive-of-doris.md
deleted file mode 100644
index cfa98ab0f4..0000000000
--- a/docs/en/extending-doris/hive-of-doris.md
+++ /dev/null
@@ -1,117 +0,0 @@
----
-{
- "title": "Doris On Hive",
- "language": "en"
-}
----
-
-
-
-# Hive External Table of Doris
-
-Hive External Table of Doris provides Doris with direct access to Hive external tables, which eliminates the need for cumbersome data import and solves the problem of analyzing Hive tables with the help of Doris' OLAP capabilities:
-
- 1. support for Hive data sources to access Doris
- 2. Support joint queries between Doris and Hive data sources to perform more complex analysis operations
-
-This document introduces how to use this feature and the considerations.
-
-## Glossary
-
-### Noun in Doris
-
-* FE: Frontend, the front-end node of Doris, responsible for metadata management and request access.
-* BE: Backend, the backend node of Doris, responsible for query execution and data storage
-
-## How To Use
-
-### Create Hive External Table
-
-```sql
--- Syntax
-CREATE [EXTERNAL] TABLE table_name (
- col_name col_type [NULL | NOT NULL] [COMMENT "comment"]
-) ENGINE=HIVE
-[COMMENT "comment"] )
-PROPERTIES (
- 'property_name'='property_value',
- ...
-);
-
--- Example: Create the hive_table table under hive_db in a Hive cluster
-CREATE TABLE `t_hive` (
- `k1` int NOT NULL COMMENT "",
- `k2` char(10) NOT NULL COMMENT "",
- `k3` datetime NOT NULL COMMENT "",
- `k5` varchar(20) NOT NULL COMMENT "",
- `k6` double NOT NULL COMMENT ""
-) ENGINE=HIVE
-COMMENT "HIVE"
-PROPERTIES (
-'hive.metastore.uris' = 'thrift://192.168.0.1:9083',
-'database' = 'hive_db',
-'table' = 'hive_table'
-);
-```
-
-#### Parameter Description
-
-- External Table Columns
- - Column names should correspond to the Hive table
- - The order of the columns should be the same as the Hive table
- - Must contain all the columns in the Hive table
- - Hive table partition columns do not need to be specified, they can be defined as normal columns.
-- ENGINE should be specified as HIVE
-- PROPERTIES attribute.
- - `hive.metastore.uris`: Hive Metastore service address
- - `database`: the name of the database to which Hive is mounted
- - `table`: the name of the table to which Hive is mounted
-
-## Data Type Matching
-
-The supported Hive column types correspond to Doris in the following table.
-
-| Hive | Doris | Description |
-| :------: | :----: | :-------------------------------: |
-| BOOLEAN | BOOLEAN | |
-| CHAR | CHAR | Only UTF8 encoding is supported |
-| VARCHAR | VARCHAR | Only UTF8 encoding is supported |
-| TINYINT | TINYINT | |
-| SMALLINT | SMALLINT | |
-| INT | INT | |
-| BIGINT | BIGINT | |
-| FLOAT | FLOAT | |
-| DOUBLE | DOUBLE | |
-| DECIMAL | DECIMAL | |
-| DATE | DATE | |
-| TIMESTAMP | DATETIME | Timestamp to Datetime will lose precision |
-
-**Note:**
-- Hive table Schema changes **are not automatically synchronized** and require rebuilding the Hive external table in Doris.
-- The current Hive storage format only supports Text, Parquet and ORC types
-- The Hive version currently supported by default is `2.3.7、3.1.2`, which has not been tested in other versions. More versions will be supported in the future.
-
-### Query Usage
-
-After you finish building the Hive external table in Doris, it is no different from a normal Doris OLAP table except that you cannot use the data model in Doris (rollup, preaggregation, materialized view, etc.)
-
-```sql
-select * from t_hive where k1 > 1000 and k3 = 'term' or k4 like '%doris';
-```
diff --git a/docs/en/extending-doris/iceberg-of-doris.md b/docs/en/extending-doris/iceberg-of-doris.md
deleted file mode 100644
index aa83042feb..0000000000
--- a/docs/en/extending-doris/iceberg-of-doris.md
+++ /dev/null
@@ -1,211 +0,0 @@
----
-{
- "title": "Doris On Iceberg",
- "language": "en"
-}
----
-
-
-
-# Iceberg External Table of Doris
-
-Iceberg External Table of Doris provides Doris with the ability to access Iceberg external tables directly, eliminating the need for cumbersome data import and leveraging Doris' own OLAP capabilities to solve Iceberg table data analysis problems.
-
- 1. support Iceberg data sources to access Doris
- 2. Support joint query between Doris and Iceberg data source tables to perform more complex analysis operations
-
-This document introduces how to use this feature and the considerations.
-
-## Glossary
-
-### Noun in Doris
-
-* FE: Frontend, the front-end node of Doris, responsible for metadata management and request access
-* BE: Backend, the backend node of Doris, responsible for query execution and data storage
-
-## How to use
-
-### Create Iceberg External Table
-
-Iceberg tables can be created in Doris in two ways. You do not need to declare the column definitions of the table when creating an external table, Doris can automatically convert them based on the column definitions of the table in Iceberg.
-
-1. Create a separate external table to mount the Iceberg table.
- The syntax can be viewed in `HELP CREATE TABLE`.
-
- ```sql
- -- Syntax
- CREATE [EXTERNAL] TABLE table_name
- ENGINE = ICEBERG
- [COMMENT "comment"]
- PROPERTIES (
- "iceberg.database" = "iceberg_db_name",
- "iceberg.table" = "icberg_table_name",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
-
-
- -- Example: Mount iceberg_table under iceberg_db in Iceberg
- CREATE TABLE `t_iceberg`
- ENGINE = ICEBERG
- PROPERTIES (
- "iceberg.database" = "iceberg_db",
- "iceberg.table" = "iceberg_table",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
- ```
-
-2. Create an Iceberg database to mount the corresponding Iceberg database on the remote side, and mount all the tables under the database.
- You can check the syntax with `HELP CREATE DATABASE`.
-
- ```sql
- -- Syntax
- CREATE DATABASE db_name
- [COMMENT "comment"]
- PROPERTIES (
- "iceberg.database" = "iceberg_db_name",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
-
- -- Example: mount the iceberg_db in Iceberg and mount all tables under that db
- CREATE DATABASE `iceberg_test_db`
- PROPERTIES (
- "iceberg.database" = "iceberg_db",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
- ```
-
- The progress of the table build in `iceberg_test_db` can be viewed by `HELP SHOW TABLE CREATION`.
-
-
-You can also create an Iceberg table by explicitly specifying the column definitions according to your needs.
-
-1. Create an Iceberg table
-
- ```sql
- -- Syntax
- CREATE [EXTERNAL] TABLE table_name (
- col_name col_type [NULL | NOT NULL] [COMMENT "comment"]
- ) ENGINE = ICEBERG
- [COMMENT "comment"] )
- PROPERTIES (
- "iceberg.database" = "iceberg_db_name",
- "iceberg.table" = "icberg_table_name",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
-
- -- Example: Mount iceberg_table under iceberg_db in Iceberg
- CREATE TABLE `t_iceberg` (
- `id` int NOT NULL COMMENT "id number",
- `name` varchar(10) NOT NULL COMMENT "user name"
- ) ENGINE = ICEBERG
- PROPERTIES (
- "iceberg.database" = "iceberg_db",
- "iceberg.table" = "iceberg_table",
- "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
- ```
-
-#### Parameter Description
-
-- External Table Columns
- - Column names should correspond to the Iceberg table
- - The order of the columns needs to be consistent with the Iceberg table
-- ENGINE needs to be specified as ICEBERG
-- PROPERTIES property.
- - `iceberg.hive.metastore.uris`: Hive Metastore service address
- - `iceberg.database`: the name of the database to which Iceberg is mounted
- - `iceberg.table`: the name of the table to which Iceberg is mounted, not required when mounting Iceberg database.
- - `iceberg.catalog.type`: the catalog method used in Iceberg, the default is `HIVE_CATALOG`, currently only this method is supported, more Iceberg catalog access methods will be supported in the future.
-
-### Show table structure
-
-Show table structure can be viewed by `HELP SHOW CREATE TABLE`.
-
-### Synchronized mounts
-
-When the Iceberg table Schema changes, you can manually synchronize it with the `REFRESH` command, which will remove and rebuild the Iceberg external table in Doris, as seen in the `HELP REFRESH` help.
-
-```sql
--- Synchronize the Iceberg table
-REFRESH TABLE t_iceberg;
-
--- Synchronize the Iceberg database
-REFRESH DATABASE iceberg_test_db;
-```
-
-## Data Type Matching
-
-The supported Iceberg column types correspond to Doris in the following table.
-
-| Iceberg | Doris | Description |
-| :------: | :----: | :-------------------------------: |
-| BOOLEAN | BOOLEAN | |
-| INTEGER | INT | |
-| LONG | BIGINT | |
-| FLOAT | FLOAT | |
-| DOUBLE | DOUBLE | |
-| DATE | DATE | |
-| TIMESTAMP | DATETIME | Timestamp to Datetime with loss of precision |
-| STRING | STRING | |
-| UUID | VARCHAR | Use VARCHAR instead |
-| DECIMAL | DECIMAL | |
-| TIME | - | not supported |
-| FIXED | - | not supported |
-| BINARY | - | not supported |
-| STRUCT | - | not supported |
-| LIST | - | not supported |
-| MAP | - | not supported |
-
-**Note:**
-- Iceberg table Schema changes **are not automatically synchronized** and require synchronization of Iceberg external tables or databases in Doris via the `REFRESH` command.
-- The current default supported version of Iceberg is 0.12.0 and has not been tested in other versions. More versions will be supported in the future.
-
-### Query Usage
-
-Once you have finished building the Iceberg external table in Doris, it is no different from a normal Doris OLAP table except that you cannot use the data models in Doris (rollup, preaggregation, materialized views, etc.)
-
-```sql
-select * from t_iceberg where k1 > 1000 and k3 = 'term' or k4 like '%doris';
-```
-
-## Related system configurations
-
-### FE Configuration
-
-The following configurations are at the Iceberg external table system level and can be configured by modifying `fe.conf` or by `ADMIN SET CONFIG`.
-
-- `iceberg_table_creation_strict_mode`
-
- Iceberg tables are created with strict mode enabled by default.
- strict mode means that the column types of the Iceberg table are strictly filtered, and if there are data types that Doris does not currently support, the creation of the table will fail.
-
-- `iceberg_table_creation_interval_second`
-
- The background task execution interval for automatic creation of Iceberg tables, default is 10s.
-
-- `max_iceberg_table_creation_record_size`
-
- The maximum value reserved for Iceberg table creation records, default is 2000. Only for creating Iceberg database records.
diff --git a/docs/en/extending-doris/logstash.md b/docs/en/extending-doris/logstash.md
deleted file mode 100644
index 96dbfdb73d..0000000000
--- a/docs/en/extending-doris/logstash.md
+++ /dev/null
@@ -1,198 +0,0 @@
----
-{
- "title": "Logstash Doris Output Plugin",
- "language": "en"
-}
----
-
-
-
-# Doris output plugin
-
-This plugin is used to output data to Doris for logstash, use the HTTP protocol to interact with the Doris FE Http interface, and import data through Doris's stream load.
-
-[Learn more about Doris Stream Load ](http://doris.apache.org/administrator-guide/load-data/stream-load-manual.html)
-
-[Learn more about Doris](http://doris.apache.org)
-
-
-## Install and compile
-### 1.Download source code
-
-### 2.compile ##
-Execute under extension/logstash/ directory
-
-`gem build logstash-output-doris.gemspec`
-
-You will get logstash-output-doris-{version}.gem file in the same directory
-
-### 3.Plug-in installation
-copy logstash-output-doris-{version}.gem to the logstash installation directory
-
-Executing an order
-
-`./bin/logstash-plugin install logstash-output-doris-{version}.gem`
-
-Install logstash-output-doris plugin
-
-## Configuration
-### Example:
-
-Create a new configuration file in the config directory and name it logstash-doris.conf
-
-The specific configuration is as follows:
-
- output {
- doris {
- http_hosts => [ "http://fehost:8030" ]
- user => user_name
- password => password
- db => "db_name"
- table => "table_name"
- label_prefix => "label_prefix"
- column_separator => ","
- }
- }
-
-Configuration instructions:
-
-Connection configuration:
-
-Configuration | Explanation
---- | ---
-`http_hosts` | FE's HTTP interactive address eg | ["http://fe1:8030", "http://fe2:8030"]
-`user` | User name, the user needs to have import permission for the doris table
-`password` | Password
-`db` | Database name
-`table` | Table name
-`label_prefix` | Import the identification prefix, the final generated ID is *{label\_prefix}\_{db}\_{table}\_{time_stamp}*
-
-
-Load configuration:([Reference documents](http://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html))
-
-Configuration | Explanation
---- | ---
-`column_separator` | Column separator, the default is \t
-`columns` | Used to specify the correspondence between the columns in the import file and the columns in the table
-`where` | The filter conditions specified by the import task
-`max_filter_ratio` | The maximum tolerance rate of the import task, the default is zero tolerance
-`partition` | Partition information of the table to be imported
-`timeout` | timeout, the default is 600s
-`strict_mode` | Strict mode, the default is false
-`timezone` | Specify the time zone used for this import, the default is the East Eight District
-`exec_mem_limit` | Import memory limit, default is 2GB, unit is byte
-
-Other configuration:
-
-Configuration | Explanation
---- | ---
-`save_on_failure` | If the import fails to save locally, the default is true
-`save_dir` | Local save directory, default is /tmp
-`automatic_retries` | The maximum number of retries on failure, the default is 3
-`batch_size` | The maximum number of events processed per batch, the default is 100000
-`idle_flush_time` | Maximum interval, the default is 20 (seconds)
-
-
-## Start Up
-Run the command to start the doris output plugin:
-
-`{logstash-home}/bin/logstash -f {logstash-home}/config/logstash-doris.conf --config.reload.automatic`
-
-
-
-
-## Complete usage example
-### 1. Compile doris-output-plugin
-1> Download the ruby compressed package and go to [ruby official website](https://www.ruby-lang.org/en/downloads/) to download it. The version 2.7.1 used here
-
-2> Compile and install, configure ruby environment variables
-
-3> Go to the doris source extension/logstash/ directory and execute
-
-`gem build logstash-output-doris.gemspec`
-
-Get the file logstash-output-doris-0.1.0.gem, and the compilation is complete
-
-### 2. Install and configure filebeat (here use filebeat as input)
-
-1> [es official website](https://www.elastic.co/) Download the filebeat tar compression package and decompress it
-
-2> Enter the filebeat directory and modify the configuration file filebeat.yml as follows:
-
- filebeat.inputs:
- - type: log
- paths:
- - /tmp/doris.data
- output.logstash:
- hosts: ["localhost:5044"]
-
-/tmp/doris.data is the doris data path
-
-3> Start filebeat:
-
-`./filebeat -e -c filebeat.yml -d "publish"`
-
-
-### 3.Install logstash and doris-out-plugin
-1> [es official website](https://www.elastic.co/) Download the logstash tar compressed package and decompress it
-
-2> Copy the logstash-output-doris-0.1.0.gem obtained in step 1 to the logstash installation directory
-
-3> execute
-
-`./bin/logstash-plugin install logstash-output-doris-0.1.0.gem`
-
-Install the plugin
-
-4> Create a new configuration file logstash-doris.conf in the config directory as follows:
-
- input {
- beats {
- port => "5044"
- }
- }
-
- output {
- doris {
- http_hosts => [ "http://127.0.0.1:8030" ]
- user => doris
- password => doris
- db => "logstash_output_test"
- table => "output"
- label_prefix => "doris"
- column_separator => ","
- columns => "a,b,c,d,e"
- }
- }
-
-The configuration here needs to be configured according to the configuration instructions
-
-5> Start logstash:
-
-./bin/logstash -f ./config/logstash-doris.conf --config.reload.automatic
-
-### 4.Test Load
-
-Add write data to /tmp/doris.data
-
-`echo a,b,c,d,e >> /tmp/doris.data`
-
-Observe the logstash log. If the status of the returned response is Success, the import was successful. At this time, you can view the imported data in the logstash_output_test.output table
-
diff --git a/docs/en/extending-doris/odbc-of-doris.md b/docs/en/extending-doris/odbc-of-doris.md
deleted file mode 100644
index fe8915c47d..0000000000
--- a/docs/en/extending-doris/odbc-of-doris.md
+++ /dev/null
@@ -1,374 +0,0 @@
----
-{
- "title": "Doris On ODBC",
- "language": "en"
-}
----
-
-
-
-
-# ODBC External Table Of Doris
-
-ODBC external table of Doris provides Doris access to external tables through the standard interface for database access (ODBC). The external table eliminates the tedious data import work and enables Doris to have the ability to access all kinds of databases. It solves the data analysis problem of external tables with Doris' OLAP capability.
-
-1. Support various data sources to access Doris
-2. Support Doris query with tables in various data sources to perform more complex analysis operations
-3. Use insert into to write the query results executed by Doris to the external data source
-
-
-This document mainly introduces the implementation principle and usage of this ODBC external table.
-
-## Glossary
-
-### Noun in Doris
-
-* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
-* BE: Backend, Doris's back-end node. Responsible for query execution and data storage.
-
-## How To Use
-
-### Create ODBC External Table
-
-#### 1. Creating ODBC external table without resource
-
-```
-CREATE EXTERNAL TABLE `baseall_oracle` (
- `k1` decimal(9, 3) NOT NULL COMMENT "",
- `k2` char(10) NOT NULL COMMENT "",
- `k3` datetime NOT NULL COMMENT "",
- `k5` varchar(20) NOT NULL COMMENT "",
- `k6` double NOT NULL COMMENT ""
-) ENGINE=ODBC
-COMMENT "ODBC"
-PROPERTIES (
-"host" = "192.168.0.1",
-"port" = "8086",
-"user" = "test",
-"password" = "test",
-"database" = "test",
-"table" = "baseall",
-"driver" = "Oracle 19 ODBC driver",
-"type" = "oracle"
-);
-```
-
-#### 2. Creating ODBC external table by resource (recommended)
-```
-CREATE EXTERNAL RESOURCE `oracle_odbc`
-PROPERTIES (
-"type" = "odbc_catalog",
-"host" = "192.168.0.1",
-"port" = "8086",
-"user" = "test",
-"password" = "test",
-"database" = "test",
-"odbc_type" = "oracle",
-"driver" = "Oracle 19 ODBC driver"
-);
-
-CREATE EXTERNAL TABLE `baseall_oracle` (
- `k1` decimal(9, 3) NOT NULL COMMENT "",
- `k2` char(10) NOT NULL COMMENT "",
- `k3` datetime NOT NULL COMMENT "",
- `k5` varchar(20) NOT NULL COMMENT "",
- `k6` double NOT NULL COMMENT ""
-) ENGINE=ODBC
-COMMENT "ODBC"
-PROPERTIES (
-"odbc_catalog_resource" = "oracle_odbc",
-"database" = "test",
-"table" = "baseall"
-);
-```
-
-The following parameters are accepted by ODBC external table:
-
-Parameter | Description
----|---
-**hosts** | IP address of external database
-**driver** | The driver name of ODBC Driver, which needs to be/conf/odbcinst.ini. The driver names should be consistent.
-**type** | The type of external database, currently supports Oracle, MySQL and PostgerSQL
-**user** | The user name of database
-**password** | password for the user
-
-
-##### Installation and configuration of ODBC driver
-
-
-Each database will provide ODBC access driver. Users can install the corresponding ODBC driver lib library according to the official recommendation of each database.
-
-After installation of ODBC driver, find the path of the driver lib Library of the corresponding database. The modify be/conf/odbcinst.ini Configuration like:
-
-```
-[MySQL Driver]
-Description = ODBC for MySQL
-Driver = /usr/lib64/libmyodbc8w.so
-FileUsage = 1
-```
-* `[]`: The corresponding driver name in is the driver name. When creating an external table, the driver name of the external table should be consistent with that in the configuration file.
-* `Driver=`: This should be setted in according to the actual be installation path of the driver. It is essentially the path of a dynamic library. Here, we need to ensure that the pre dependencies of the dynamic library are met.
-
-**Remember, all BE nodes are required to have the same driver installed, the same installation path and the same be/conf/odbcinst.ini config.**
-
-
-### Query usage
-
-After the ODBC external table is create in Doris, it is no different from ordinary Doris tables except that the data model (rollup, pre aggregation, materialized view, etc.) in Doris cannot be used.
-
-```
-select * from oracle_table where k1 > 1000 and k3 ='term' or k4 like '%doris'
-```
-
-### Data write
-
-After the ODBC external table is create in Doris, the data can be written directly by the `insert into` statement, the query results of Doris can be written to the ODBC external table, or the data can be imported from one ODBC table to another.
-
-```
-insert into oracle_table values(1, "doris");
-insert into oracle_table select * from postgre_table;
-```
-#### Transaction
-
-
-The data of Doris is written to the external table by a group of batch. If the import is interrupted, the data written before may need to be rolled back. Therefore, the ODBC external table supports transactions when data is written. Transaction support needs to be supported set by session variable: `enable_odbc_transcation`.
-
-```
-set enable_odbc_transcation = true;
-```
-
-Transactions ensure the atomicity of ODBC external table writing, but it will reduce the performance of data writing ., so we can consider turning on the way as appropriate.
-
-## Database ODBC version correspondence
-
-### Centos Operating System
-
-The unixODBC versions used are: 2.3.1, Doris 0.15, centos 7.9, all of which are installed using yum.
-
-#### 1.mysql
-
-| Mysql version | Mysql ODBC version |
-| ------------- | ------------------ |
-| 8.0.27 | 8.0.27, 8.026 |
-| 5.7.36 | 5.3.11, 5.3.13 |
-| 5.6.51 | 5.3.11, 5.3.13 |
-| 5.5.62 | 5.3.11, 5.3.13 |
-
-#### 2. PostgreSQL
-
-PostgreSQL's yum source rpm package address:
-
-````
-https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm
-````
-
-This contains all versions of PostgreSQL from 9.x to 14.x, including the corresponding ODBC version, which can be installed as needed.
-
-| PostgreSQL Version | PostgreSQL ODBC Version |
-| ------------------ | ---------------------------- |
-| 12.9 | postgresql12-odbc-13.02.0000 |
-| 13.5 | postgresql13-odbc-13.02.0000 |
-| 14.1 | postgresql14-odbc-13.02.0000 |
-| 9.6.24 | postgresql96-odbc-13.02.0000 |
-| 10.6 | postgresql10-odbc-13.02.0000 |
-| 11.6 | postgresql11-odbc-13.02.0000 |
-
-#### 3. Oracle
-
-####
-
-| Oracle版本 | Oracle ODBC版本 |
-| ------------------------------------------------------------ | ------------------------------------------ |
-| Oracle Database 11g Enterprise Edition Release 11.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 |
-| Oracle Database 12c Standard Edition Release 12.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 |
-| Oracle Database 18c Enterprise Edition Release 18.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 |
-| Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 |
-| Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 |
-
-Oracle ODBC driver version download address:
-
-```
-https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm
-https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm
-https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm
-https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm
-```
-
-## Ubuntu operating system
-
-The unixODBC versions used are: 2.3.4, Doris 0.15, Ubuntu 20.04
-
-#### 1. Mysql
-
-| Mysql version | Mysql ODBC version |
-| ------------- | ------------------ |
-| 8.0.27 | 8.0.11, 5.3.13 |
-
-Currently only tested this version, other versions will be added after testing
-
-#### 2. PostgreSQL
-
-| PostgreSQL Version | PostgreSQL ODBC Version |
-| ------------------ | ----------------------- |
-| 12.9 | psqlodbc-12.02.0000 |
-
-For other versions, as long as you download the ODBC driver version that matches the major version of the database, there is no problem. This will continue to supplement the test results of other versions under the Ubuntu system.
-
-#### 3. Oracle
-
-The same as the Oracle database and ODBC correspondence of the Centos operating system, and the following method is used to install the rpm package under ubuntu.
-
-In order to install rpm packages under ubuntu, we also need to install an alien, which is a tool that can convert rpm packages into deb installation packages
-
-````
-sudo apt-get install alien
-````
-
-Then execute the installation of the above four packages
-
-````
-sudo alien -i oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm
-sudo alien -i oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm
-sudo alien -i oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm
-sudo alien -i oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm
-````
-
-## Data type mapping
-
-There are different data types among different databases. Here, the types in each database and the data type matching in Doris are listed.
-
-### MySQL
-
-| MySQL | Doris | Alternation rules |
-| :------: | :----: | :-------------------------------: |
-| BOOLEAN | BOOLEAN | |
-| CHAR | CHAR | Only UTF8 encoding is supported |
-| VARCHAR | VARCHAR | Only UTF8 encoding is supported |
-| DATE | DATE | |
-| FLOAT | FLOAT | |
-| TINYINT | TINYINT | |
-| SMALLINT | SMALLINT | |
-| INT | INT | |
-| BIGINT | BIGINT | |
-| DOUBLE | DOUBLE | |
-| DATE | DATE | |
-| DATETIME | DATETIME | |
-| DECIMAL | DECIMAL | |
-
-### PostgreSQL
-
-| PostgreSQL | Doris | Alternation rules |
-| :------: | :----: | :-------------------------------: |
-| BOOLEAN | BOOLEAN | |
-| CHAR | CHAR | Only UTF8 encoding is supported |
-| VARCHAR | VARCHAR | Only UTF8 encoding is supported
-| DATE | DATE | |
-| REAL | FLOAT | |
-| SMALLINT | SMALLINT | |
-| INT | INT | |
-| BIGINT | BIGINT | |
-| DOUBLE | DOUBLE | |
-| TIMESTAMP | DATETIME | |
-| DECIMAL | DECIMAL | |
-
-### Oracle
-
-| Oracle | Doris | Alternation rules |
-| :------: | :----: | :-------------------------------: |
-| not support | BOOLEAN | Oracle can replace Boolean with number (1) |
-| CHAR | CHAR | |
-| VARCHAR | VARCHAR | |
-| DATE | DATE | |
-| FLOAT | FLOAT | |
-| not support | TINYINT | Oracle can be replaced by NUMBER |
-| SMALLINT | SMALLINT | |
-| INT | INT | |
-| not support | BIGINT | Oracle can be replaced by NUMBER |
-| not support | DOUBLE | Oracle can be replaced by NUMBER |
-| DATE | DATE | |
-| DATETIME | DATETIME | |
-| NUMBER | DECIMAL | |
-
-### SQLServer
-
-| SQLServer | Doris | Alternation rules |
-| :------: | :----: | :-------------------------------: |
-| BOOLEAN | BOOLEAN | |
-| CHAR | CHAR | Only UTF8 encoding is supported |
-| VARCHAR | VARCHAR | Only UTF8 encoding is supported |
-| DATE/ | DATE | |
-| REAL | FLOAT | |
-| TINYINT | TINYINT | |
-| SMALLINT | SMALLINT | |
-| INT | INT | |
-| BIGINT | BIGINT | |
-| FLOAT | DOUBLE | |
-| DATETIME/DATETIME2 | DATETIME | |
-| DECIMAL/NUMERIC | DECIMAL | |
-
-## Q&A
-
-1. Relationship with the original external table of MySQL?
-
-After accessing the ODBC external table, the original way to access the MySQL external table will be gradually abandoned. If you have not used the MySQL external table before, it is recommended that the newly accessed MySQL tables use ODBC external table directly.
-
-2. Besides MySQL, Oracle, SQLServer, PostgreSQL, can doris support more databases?
-
-Currently, Doris only adapts to MySQL, Oracle, SQLServer, PostgreSQL. The adaptation of other databases is under planning. In principle, any database that supports ODBC access can be accessed through the ODBC external table. If you need to access other databases, you are welcome to modify the code and contribute to Doris.
-
-3. When is it appropriate to use ODBC external tables?
-
- Generally, when the amount of external data is small and less than 100W, it can be accessed through ODBC external table. Since external table the cannot play the role of Doris in the storage engine and will bring additional network overhead, it is recommended to determine whether to access through external tables or import data into Doris according to the actual access delay requirements for queries.
-
-4. Garbled code in Oracle access?
-
- Add the following parameters to the BE start up script: `export NLS_LANG=AMERICAN_AMERICA.AL32UTF8`R, Restart all be
-
-5. ANSI Driver or Unicode Driver?
-
- Currently, ODBC supports both ANSI and Unicode driver forms, while Doris only supports Unicode driver. If you force the use of ANSI driver, the query results may be wrong.
-
-6. Report Errors: `driver connect Err: 01000 [unixODBC][Driver Manager]Can't open lib 'Xxx' : file not found (0)`
-
- The driver for the corresponding data is not installed on each BE, or it is not installed in the be/conf/odbcinst.ini configure the correct path, or create the table with the driver namebe/conf/odbcinst.ini different
-
-7. Report Errors: `Fail to convert odbc value 'PALO ' TO INT on column:'A'`
-
- Type conversion error, type of column `A` mapping of actual column type is different, needs to be modified
-
-8. BE crash occurs when using old MySQL table and ODBC external driver at the same time
-
-
-This is the compatibility problem between MySQL database ODBC driver and existing Doris depending on MySQL lib. The recommended solutions are as follows:
-
-* Method 1: replace the old MySQL External Table by ODBC External Table, recompile BE close options **WITH_MySQL**
-
-* Method 2: Do not use the latest 8. X MySQL ODBC driver replace with the 5. X MySQL ODBC driver
-
-9. Push down the filtering condition
-
- The current ODBC appearance supports push down under filtering conditions. MySQL external table can support push down under all conditions. The functions of other databases are different from Doris, which will cause the push down query to fail. At present, except for the MySQL, other databases do not support push down of function calls. Whether Doris pushes down the required filter conditions can be confirmed by the 'explain' query statement.
-
-10. Report Errors: `driver connect Err: xxx`
-
- Connection to the database fails. The` Err: part` represents the error of different database connection failures. This is usually a configuration problem. You should check whether the IP address, port or account password are mismatched.
-
-
-
diff --git a/docs/en/extending-doris/seatunnel/flink-sink.md b/docs/en/extending-doris/seatunnel/flink-sink.md
deleted file mode 100644
index e8d09329d9..0000000000
--- a/docs/en/extending-doris/seatunnel/flink-sink.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-{
- "title": "Seatunnel Connector Flink Doris",
- "language": "en"
-}
----
-
-
-
-# Seatunnel
-The newest [Apache SeaTunnel (formerly waterdrop )](https://seatunnel.apache.org) already supports Doris's connector, SeaTunnel can use Spark engine and Flink engine to synchronize data to Doris.
-## Flink Sink Doris(2.x)
-Seatunnel Flink Sink Doris [plugin code](https://github.com/apache/incubator-seatunnel/tree/dev/seatunnel-connectors/seatunnel-connector-flink-doris)
-
-### Options
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| fenodes | string | yes | - | Flink |
-| database | string | yes | - | Flink |
-| table | string | yes | - | Flink |
-| user | string | yes | - | Flink |
-| password | string | yes | - | Flink |
-| batch_size | int | no | 100 | Flink |
-| interval | int | no |1000 | Flink |
-| max_retries | int | no | 1 | Flink|
-| doris.* | - | no | - | Flink |
-
-`fenodes [string]`
-
-Doris Fe http url, eg: 127.0.0.1:8030
-
-`database [string]`
-
-Doris database
-
-`table [string]`
-
-Doris table
-
-`user [string]`
-
-Doris user
-
-`password [string]`
-
-Doris password
-
-`batch_size [int]`
-
-The maximum number of lines to write to Doris at a time, the default value is 100
-
-`interval [int]`
-
-The flush interval (in milliseconds), after which the asynchronous thread writes the data in the cache to Doris. Set to 0 to turn off periodic writes.
-
-`max_retries [int]`
-
-Number of retries after writing to Doris fails
-
-`doris.* [string]`
-
-Import parameters for Stream load. For example: 'doris.column_separator' = ', ' etc.
-
-[More Stream Load parameter configuration](https://doris.apache.org/administrator-guide/load-data/stream-load-manual.html)
-
-### Examples
-Socket To Doris
-```
-env {
- execution.parallelism = 1
-}
-source {
- SocketStream {
- host = 127.0.0.1
- port = 9999
- result_table_name = "socket"
- field_name = "info"
- }
-}
-transform {
-}
-sink {
- DorisSink {
- fenodes = "127.0.0.1:8030"
- user = root
- password = 123456
- database = test
- table = test_tbl
- batch_size = 5
- max_retries = 1
- interval = 5000
- }
-}
-
-```
-### Start command
-```
-sh bin/start-seatunnel-flink.sh --config config/flink.streaming.conf
-```
\ No newline at end of file
diff --git a/docs/en/extending-doris/seatunnel/spark-sink.md b/docs/en/extending-doris/seatunnel/spark-sink.md
deleted file mode 100644
index 5ef316e1dd..0000000000
--- a/docs/en/extending-doris/seatunnel/spark-sink.md
+++ /dev/null
@@ -1,123 +0,0 @@
----
-{
- "title": "Seatunnel Connector Spark Doris",
- "language": "en"
-}
----
-
-
-
-# Seatunnel
-
-The newest [Apache SeaTunnel (waterdop) ](https://seatunnel.apache.org) has supported Doris connector,
-seatunnel can load data by Spark engine or Flink engine.
-
-In fact,seatunnel load data by stream load function.Everyone is welcome to use
-
-# Install Seatunnel
-[Seatunnel install](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/flink/installation)
-
-## Spark Sink Doris
-### Options
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| fenodes | string | yes | - | Spark |
-| database | string | yes | - | Spark |
-| table | string | yes | - | Spark |
-| user | string | yes | - | Spark |
-| password | string | yes | - | Spark |
-| batch_size | int | yes | 100 | Spark |
-| doris.* | string | no | - | Spark |
-
-`fenodes [string]`
-
-Doris FE address:8030
-
-`database [string]`
-
-Doris target database name
-
-`table [string]`
-
-Doris target table name
-
-`user [string]`
-
-Doris user name
-
-`password [string]`
-
-Doris user's password
-
-`batch_size [string]`
-
-Doris number of submissions per batch
-
-`doris. [string]`
-Doris stream_load properties,you can use 'doris.' prefix + stream_load properties
-
-[More Doris stream_load Configurations](https://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html)
-
-### Examples
-Hive to Doris
-
-Config properties
-```
-env{
- spark.app.name = "hive2doris-template"
-}
-
-spark {
- spark.sql.catalogImplementation = "hive"
-}
-
-source {
- hive {
- preSql = "select * from tmp.test"
- result_table_name = "test"
- }
-}
-
-transform {
-}
-
-
-sink {
-
-Console {
-
- }
-
-Doris {
- fenodes="xxxx:8030"
- database="gl_mint_dim"
- table="dim_date"
- user="root"
- password="root"
- batch_size=1000
- doris.column_separator="\t"
- doris.columns="date_key,date_value,day_in_year,day_in_month"
- }
-}
-```
-Start command
-```
-sh bin/start-waterdrop-spark.sh --master local[4] --deploy-mode client --config ./config/spark.conf
-```
\ No newline at end of file
diff --git a/docs/en/extending-doris/spark-doris-connector.md b/docs/en/extending-doris/spark-doris-connector.md
deleted file mode 100644
index b7145654c0..0000000000
--- a/docs/en/extending-doris/spark-doris-connector.md
+++ /dev/null
@@ -1,286 +0,0 @@
----
-{
- "title": "Spark Doris Connector",
- "language": "en"
-}
----
-
-
-
-# Spark Doris Connector
-
-Spark Doris Connector can support reading data stored in Doris and writing data to Doris through Spark.
-
-Github: https://github.com/apache/incubator-doris-spark-connector
-
-- Support reading data from `Doris`.
-- Support `Spark DataFrame` batch/stream writing data to `Doris`
-- You can map the `Doris` table to` DataFrame` or `RDD`, it is recommended to use` DataFrame`.
-- Support the completion of data filtering on the `Doris` side to reduce the amount of data transmission.
-
-## Version Compatibility
-
-| Connector | Spark | Doris | Java | Scala |
-|---------------| ----- | ------ | ---- | ----- |
-| 2.3.4-2.11.xx | 2.x | 0.12+ | 8 | 2.11 |
-| 3.1.2-2.12.xx | 3.x | 0.12.+ | 8 | 2.12 |
-
-## Build and Install
-
-Ready to work
-
-1.Modify the `custom_env.sh.tpl` file and rename it to `custom_env.sh`
-
-2.Specify the thrift installation directory
-
-```bash
-##source file content
-#export THRIFT_BIN=
-#export MVN_BIN=
-#export JAVA_HOME=
-
-##amend as below,MacOS as an example
-export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-#export MVN_BIN=
-#export JAVA_HOME=
-
-Install `thrift` 0.13.0 (Note: `Doris` 0.15 and the latest builds are based on `thrift` 0.13.0, previous versions are still built with `thrift` 0.9.3)
-Windows:
- 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`
- 2. Modify thrift-0.13.0.exe to thrift
-
-MacOS:
- 1. Download: `brew install thrift@0.13.0`
- 2. default address: /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-
-Note: Executing `brew install thrift@0.13.0` on MacOS may report an error that the version cannot be found. The solution is as follows, execute it in the terminal:
- 1. `brew tap-new $USER/local-tap`
- 2. `brew extract --version='0.13.0' thrift $USER/local-tap`
- 3. `brew install thrift@0.13.0`
- Reference link: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c`
-
-Linux:
- 1.Download source package: `wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz`
- 2.Install dependencies: `yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++`
- 3.`tar zxvf thrift-0.13.0.tar.gz`
- 4.`cd thrift-0.13.0`
- 5.`./configure --without-tests`
- 6.`make`
- 7.`make install`
- Check the version after installation is complete: thrift --version
- Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift
-```
-
-Execute following command in source dir
-
-```bash
-sh build.sh 2.3.4 2.11 ## spark 2.3.4 version, and scala 2.11
-sh build.sh 3.1.2 2.12 ## spark 3.1.2 version, and scala 2.12
-```
-> Note: If you check out the source code from tag, you can just run sh build.sh --tag without specifying the spark and scala versions. This is because the version in the tag source code is fixed.
-
-After successful compilation, the file `doris-spark-2.3.4-2.11-1.0.0-SNAPSHOT.jar` will be generated in the `output/` directory. Copy this file to `ClassPath` in `Spark` to use `Spark-Doris-Connector`. For example, `Spark` running in `Local` mode, put this file in the `jars/` folder. `Spark` running in `Yarn` cluster mode, put this file in the pre-deployment package.
-
-## Using Maven
-
-```
-
- org.apache.doris
- spark-doris-connector-3.1_2.12
-
- 1.0.1
-
-```
-
-**Notes**
-
-Please replace the Connector version according to the different Spark and Scala versions.
-
-## Example
-### Read
-
-#### SQL
-
-```sql
-CREATE TEMPORARY VIEW spark_doris
-USING doris
-OPTIONS(
- "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME",
- "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT",
- "user"="$YOUR_DORIS_USERNAME",
- "password"="$YOUR_DORIS_PASSWORD"
-);
-
-SELECT * FROM spark_doris;
-```
-
-#### DataFrame
-
-```scala
-val dorisSparkDF = spark.read.format("doris")
- .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME")
- .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT")
- .option("user", "$YOUR_DORIS_USERNAME")
- .option("password", "$YOUR_DORIS_PASSWORD")
- .load()
-
-dorisSparkDF.show(5)
-```
-
-#### RDD
-
-```scala
-import org.apache.doris.spark._
-val dorisSparkRDD = sc.dorisRDD(
- tableIdentifier = Some("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"),
- cfg = Some(Map(
- "doris.fenodes" -> "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT",
- "doris.request.auth.user" -> "$YOUR_DORIS_USERNAME",
- "doris.request.auth.password" -> "$YOUR_DORIS_PASSWORD"
- ))
-)
-
-dorisSparkRDD.collect()
-```
-### Write
-
-#### SQL
-
-```sql
-CREATE TEMPORARY VIEW spark_doris
-USING doris
-OPTIONS(
- "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME",
- "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT",
- "user"="$YOUR_DORIS_USERNAME",
- "password"="$YOUR_DORIS_PASSWORD"
-);
-
-INSERT INTO spark_doris VALUES ("VALUE1","VALUE2",...);
-# or
-INSERT INTO spark_doris SELECT * FROM YOUR_TABLE
-```
-
-#### DataFrame(batch/stream)
-```scala
-## batch sink
-val mockDataDF = List(
- (3, "440403001005", "21.cn"),
- (1, "4404030013005", "22.cn"),
- (33, null, "23.cn")
-).toDF("id", "mi_code", "mi_name")
-mockDataDF.show(5)
-
-mockDataDF.write.format("doris")
- .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME")
- .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT")
- .option("user", "$YOUR_DORIS_USERNAME")
- .option("password", "$YOUR_DORIS_PASSWORD")
- //other options
- //specify the fields to write
- .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE")
- .save()
-
-## stream sink(StructuredStreaming)
-val kafkaSource = spark.readStream
- .option("kafka.bootstrap.servers", "$YOUR_KAFKA_SERVERS")
- .option("startingOffsets", "latest")
- .option("subscribe", "$YOUR_KAFKA_TOPICS")
- .format("kafka")
- .load()
-kafkaSource.selectExpr("CAST(key AS STRING)", "CAST(value as STRING)")
- .writeStream
- .format("doris")
- .option("checkpointLocation", "$YOUR_CHECKPOINT_LOCATION")
- .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME")
- .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT")
- .option("user", "$YOUR_DORIS_USERNAME")
- .option("password", "$YOUR_DORIS_PASSWORD")
- //other options
- //specify the fields to write
- .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE")
- .start()
- .awaitTermination()
-```
-
-## Configuration
-
-### General
-
-| Key | Default Value | Comment |
-| -------------------------------- | ----------------- | ------------------------------------------------------------ |
-| doris.fenodes | -- | Doris FE http address, support multiple addresses, separated by commas |
-| doris.table.identifier | -- | Doris table identifier, eg, db1.tbl1 |
-| doris.request.retries | 3 | Number of retries to send requests to Doris |
-| doris.request.connect.timeout.ms | 30000 | Connection timeout for sending requests to Doris |
-| doris.request.read.timeout.ms | 30000 | Read timeout for sending request to Doris |
-| doris.request.query.timeout.s | 3600 | Query the timeout time of doris, the default is 1 hour, -1 means no timeout limit |
-| doris.request.tablet.size | Integer.MAX_VALUE | The number of Doris Tablets corresponding to an RDD Partition. The smaller this value is set, the more partitions will be generated. This will increase the parallelism on the Spark side, but at the same time will cause greater pressure on Doris. |
-| doris.batch.size | 1024 | The maximum number of rows to read data from BE at one time. Increasing this value can reduce the number of connections between Spark and Doris. Thereby reducing the extra time overhead caused by network delay. |
-| doris.exec.mem.limit | 2147483648 | Memory limit for a single query. The default is 2GB, in bytes. |
-| doris.deserialize.arrow.async | false | Whether to support asynchronous conversion of Arrow format to RowBatch required for spark-doris-connector iteration |
-| doris.deserialize.queue.size | 64 | Asynchronous conversion of the internal processing queue in Arrow format takes effect when doris.deserialize.arrow.async is true |
-| doris.write.fields | -- | Specifies the fields (or the order of the fields) to write to the Doris table, fileds separated by commas. By default, all fields are written in the order of Doris table fields. |
-| sink.batch.size | 10000 | Maximum number of lines in a single write BE |
-| sink.max-retries | 1 | Number of retries after writing BE failed |
-
-### SQL & Dataframe Configuration
-
-| Key | Default Value | Comment |
-| ------------------------------- | ------------- | ------------------------------------------------------------ |
-| user | -- | Doris username |
-| password | -- | Doris password |
-| doris.filter.query.in.max.count | 100 | In the predicate pushdown, the maximum number of elements in the in expression value list. If this number is exceeded, the in-expression conditional filtering is processed on the Spark side. |
-
-### RDD Configuration
-
-| Key | Default Value | Comment |
-| --------------------------- | ------------- | ------------------------------------------------------------ |
-| doris.request.auth.user | -- | Doris username |
-| doris.request.auth.password | -- | Doris password |
-| doris.read.field | -- | List of column names in the Doris table, separated by commas |
-| doris.filter.query | -- | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. |
-
-
-
-## Doris & Spark Column Type Mapping
-
-| Doris Type | Spark Type |
-| ---------- | -------------------------------- |
-| NULL_TYPE | DataTypes.NullType |
-| BOOLEAN | DataTypes.BooleanType |
-| TINYINT | DataTypes.ByteType |
-| SMALLINT | DataTypes.ShortType |
-| INT | DataTypes.IntegerType |
-| BIGINT | DataTypes.LongType |
-| FLOAT | DataTypes.FloatType |
-| DOUBLE | DataTypes.DoubleType |
-| DATE | DataTypes.StringType1 |
-| DATETIME | DataTypes.StringType1 |
-| BINARY | DataTypes.BinaryType |
-| DECIMAL | DecimalType |
-| CHAR | DataTypes.StringType |
-| LARGEINT | DataTypes.StringType |
-| VARCHAR | DataTypes.StringType |
-| DECIMALV2 | DecimalType |
-| TIME | DataTypes.DoubleType |
-| HLL | Unsupported datatype |
-
-* Note: In Connector, `DATE` and` DATETIME` are mapped to `String`. Due to the processing logic of the Doris underlying storage engine, when the time type is used directly, the time range covered cannot meet the demand. So use `String` type to directly return the corresponding time readable text.
diff --git a/docs/en/extending-doris/udf/contribute-udf.md b/docs/en/extending-doris/udf/contribute-udf.md
deleted file mode 100644
index 16356a7f65..0000000000
--- a/docs/en/extending-doris/udf/contribute-udf.md
+++ /dev/null
@@ -1,124 +0,0 @@
----
-{
- "title": "Contribute UDF",
- "language": "en"
-}
----
-
-
-
-# Contribute UDF
-
-This manual mainly introduces how external users can contribute their own UDF functions to the Doris community.
-
-## Prerequisites
-
-1. UDF function is universal
-
- The versatility here mainly refers to: UDF functions are widely used in certain business scenarios. Such UDF functions are valuable and can be used directly by other users in the community.
-
- If you are not sure whether the UDF function you wrote is universal, you can send an email to `dev@doris.apache.org` or directly create an ISSUE to initiate the discussion.
-
-2. UDF has completed testing and is running normally in the user's production environment
-
-## Ready to work
-
-1. UDF source code
-2. User Manual of UDF
-
-### Source code
-
-Create a folder for UDF functions under `contrib/udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `contrib/udf/src/` path and store the source code.
-
-```
- ├──contrib
- │ └── udf
- │ ├── CMakeLists.txt
- │ └── src
- │ └── udf_samples
- │ ├── CMakeLists.txt
- │ ├── uda_sample.cpp
- │ ├── uda_sample.h
- │ ├── udf_sample.cpp
- │ └── udf_sample.h
-
-```
-
-1. CMakeLists.txt
-
- After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level.
-
-### manual
-
-The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples.
-
-1. The user manual must contain both Chinese and English versions and be stored under `docs/zh-CN/extending-doris/contrib/udf` and `docs/en/extending-doris/contrib/udf` respectively.
-
- ```
- ├── docs
- │ └── zh-CN
- │ └──extending-doris
- │ └──udf
- │ └──contrib
- │ ├── udf-simple-manual.md
-
- ```
-
- ```
- ├── docs
- │ └── en
- │ └──extending-doris
- │ └──udf
- │ └──contrib
- │ ├── udf-simple-manual.md
- ```
-
-2. Add the two manual files to the sidebar in Chinese and English.
-
- ```
- vi docs/.vuepress/sidebar/zh-CN.js
- {
- title: "用户贡献的 UDF",
- directoryPath: "contrib/",
- children:
- [
- "udf-simple-manual",
- ],
- },
- ```
-
- ```
- vi docs/.vuepress/sidebar/en.js
- {
- title: "Users contribute UDF",
- directoryPath: "contrib/",
- children:
- [
- "udf-simple-manual",
- ],
- },
-
- ```
-
-## Contribute UDF to the community
-
-When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github](https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)](https://help.github.com/articles/about-pull-requests/).
-
-Finally, when the PR assessment is passed and merged. Congratulations, your UDF becomes a third-party UDF supported by Doris. You can check it out in the extended functions section of [Doris official website](http://doris.apache.org/master/zh-CN/)~.
diff --git a/docs/en/extending-doris/udf/java-user-defined-function.md b/docs/en/extending-doris/udf/java-user-defined-function.md
deleted file mode 100644
index efbd293f78..0000000000
--- a/docs/en/extending-doris/udf/java-user-defined-function.md
+++ /dev/null
@@ -1,89 +0,0 @@
----
-{
- "title": "[Experimental] Java UDF",
- "language": "en"
-}
----
-
-
-
-# Java UDF
-
-Java UDF provides users with a Java interface written in UDF to facilitate the execution of user-defined functions in Java language. Compared with native UDF implementation, Java UDF has the following advantages and limitations:
-1. The advantages
- * Compatibility: Using Java UDF can be compatible with different Doris versions, so when upgrading Doris version, Java UDF does not need additional migration. At the same time, Java UDF also follows the same programming specifications as hive / spark and other engines, so that users can directly move Hive / Spark UDF jar to Doris.
- * Security: The failure or crash of Java UDF execution will only cause the JVM to report an error, not the Doris process to crash.
- * Flexibility: In Java UDF, users can package the third-party dependencies together in the user jar.
-
-2. Restrictions on use
- * Performance: Compared with native UDF, Java UDF will bring additional JNI overhead, but through batch execution, we have minimized the JNI overhead as much as possible.
- * Vectorized engine: Java UDF is only supported on vectorized engine now.
-
-## Write UDF functions
-
-This section mainly introduces how to develop a Java UDF. Samples for the Java version are provided under `samples/doris-demo/java-udf-demo/` for your reference.
-
-To use Java UDF, the main entry of UDF must be the `evaluate` function. This is consistent with other engines such as Hive. In the example of `AddOne`, we have completed the operation of adding an integer as the UDF.
-
-It is worth mentioning that this example is not only the Java UDF supported by Doris, but also the UDF supported by Hive, that's to say, for users, Hive UDF can be directly migrated to Doris.
-
-## Create UDF
-
-Currently, UDAF and UDTF are not supported.
-
-```sql
-CREATE FUNCTION
-name ([,...])
-[RETURNS] rettype
-PROPERTIES (["key"="value"][,...])
-```
-Instructions:
-
-1. `symbol` in properties represents the class name containing UDF classes. This parameter must be set.
-2. The jar package containing UDF represented by `file` in properties must be set.
-3. The UDF call type represented by `type` in properties is native by default. When using java UDF, it is transferred to `Java_UDF`.
-4. `name`: A function belongs to a DB and name is of the form`dbName`.`funcName`. When `dbName` is not explicitly specified, the db of the current session is used`dbName`.
-
-Sample:
-```sql
-CREATE FUNCTION java_udf_add_one(int) RETURNS int PROPERTIES (
- "file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
- "symbol"="org.apache.doris.udf.AddOne",
- "type"="JAVA_UDF"
-);
-```
-
-## Use UDF
-
-Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF.
-
-The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`.
-
-## Delete UDF
-
-When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`.
-
-## Example
-Examples of Java UDF are provided in the `samples/doris-demo/java-udf-demo/` directory. See the `README.md` in each directory for details on how to use it.
-
-## Unsupported Use Case
-At present, Java UDF is still in the process of continuous development, so some features are **not completed**.
-1. Complex data types (date, HLL, bitmap) are not supported.
-2. Memory management and statistics of JVM and Doris have not been unified.
diff --git a/docs/en/extending-doris/udf/native-user-defined-function.md b/docs/en/extending-doris/udf/native-user-defined-function.md
deleted file mode 100644
index c32f17549c..0000000000
--- a/docs/en/extending-doris/udf/native-user-defined-function.md
+++ /dev/null
@@ -1,264 +0,0 @@
----
-{
- "title": "Native User Defined Function",
- "language": "en"
-}
----
-
-
-
-# Native User Defined Function
-UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement custom functions according to their own needs, and register with Doris through the UDF framework to expand Doris' capabilities and solve user analysis needs.
-
-There are two types of analysis requirements that UDF can meet: UDF and UDAF. UDF in this article refers to both.
-
-1. UDF: User-defined function, this function will operate on a single line and output a single line result. When users use UDFs for queries, each row of data will eventually appear in the result set. Typical UDFs are string operations such as concat().
-2. UDAF: User-defined aggregation function. This function operates on multiple lines and outputs a single line of results. When the user uses UDAF in the query, each group of data after grouping will finally calculate a value and expand the result set. A typical UDAF is the set operation sum(). Generally speaking, UDAF will be used together with group by.
-
-This document mainly describes how to write a custom UDF function and how to use it in Doris.
-
-## Writing UDF functions
-
-Before using UDF, users need to write their own UDF functions under Doris' UDF framework. In the `contrib/udf/src/udf_samples/udf_sample.h|cpp` file is a simple UDF Demo.
-
-Writing a UDF function requires the following steps.
-
-### Writing functions
-
-Create the corresponding header file and CPP file, and implement the logic you need in the CPP file. Correspondence between the implementation function format and UDF in the CPP file.
-
-Users can put their own source code in a folder. Taking udf_sample as an example, the directory structure is as follows:
-
-```
-└── udf_samples
- ├── uda_sample.cpp
- ├── uda_sample.h
- ├── udf_sample.cpp
- └── udf_sample.h
-```
-
-#### Non-variable parameters
-
-For UDFs with non-variable parameters, the correspondence between the two is straightforward.
-For example, the UDF of `INT MyADD(INT, INT)` will correspond to `IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)`.
-
-1. `AddUdf` can be any name, as long as it is specified when creating UDF.
-2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content through this structure, and apply for some memory to be used. The specific interface used can refer to the definition in `udf/udf.h`.
-3. In the implementation function, the second parameter needs to correspond to the UDF parameter one by one, for example, `IntVal` corresponds to `INT` type. All types in this part must be referenced with `const`.
-4. The return parameter must correspond to the type of UDF parameter.
-
-#### variable parameter
-
-For variable parameters, you can refer to the following example, corresponding to UDF`String md5sum(String, ...)`
-The implementation function is `StringVal md5sumUdf(FunctionContext* ctx, int num_args, const StringVal* args)`
-
-1. `md5sumUdf` can also be changed arbitrarily, just specify it when creating.
-2. The first parameter is the same as the non-variable parameter function, and the passed in is a `FunctionContext*`.
-3. The variable parameter part consists of two parts. First, an integer is passed in, indicating that there are several parameters behind. An array of variable parameter parts is passed in later.
-
-#### Type correspondence
-
-|UDF Type|Argument Type|
-|----|---------|
-|TinyInt|TinyIntVal|
-|SmallInt|SmallIntVal|
-|Int|IntVal|
-|BigInt|BigIntVal|
-|LargeInt|LargeIntVal|
-|Float|FloatVal|
-|Double|DoubleVal|
-|Date|DateTimeVal|
-|Datetime|DateTimeVal|
-|Char|StringVal|
-|Varchar|StringVal|
-|Decimal|DecimalVal|
-
-
-## Compile UDF function
-
-Since the UDF implementation relies on Doris' UDF framework, the first step in compiling UDF functions is to compile Doris, that is, the UDF framework.
-
-After the compilation is completed, the static library file of the UDF framework will be generated. Then introduce the UDF framework dependency and compile the UDF.
-
-### Compile Doris
-
-Running `sh build.sh` in the root directory of Doris will generate a static library file of the UDF framework `headers|libs` in `output/udf/`
-
-```
-├── output
-│ └── udf
-│ ├── include
-│ │ ├── uda_test_harness.h
-│ │ └── udf.h
-│ └── lib
-│ └── libDorisUdf.a
-
-```
-
-### Writing UDF compilation files
-
-1. Prepare thirdparty
-
- The thirdparty folder is mainly used to store thirdparty libraries that users' UDF functions depend on, including header files and static libraries. It must contain the two files `udf.h` and `libDorisUdf.a` in the dependent Doris UDF framework.
-
- Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a thirdparty folder in the same directory to store the static library. The directory structure is as follows:
-
- ```
- ├── thirdparty
- │ │── include
- │ │ └── udf.h
- │ └── lib
- │ └── libDorisUdf.a
- └── udf_samples
-
- ```
-
- `udf.h` is the UDF frame header file. The storage path is `doris/output/udf/include/udf.h`. Users need to copy the header file in the Doris compilation output to their include folder of `thirdparty`.
-
- `libDorisUdf.a` is a static library of UDF framework. After Doris is compiled, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy the file to the lib folder of his `thirdparty`.
-
- *Note: The static library of UDF framework will not be generated until Doris is compiled.
-
-2. Prepare to compile UDF's CMakeFiles.txt
-
- CMakeFiles.txt is used to declare how UDF functions are compiled. Stored in the source code folder, level with user code. Here, taking udf_samples as an example, the directory structure is as follows:
-
- ```
- ├── thirdparty
- └── udf_samples
- ├── CMakeLists.txt
- ├── uda_sample.cpp
- ├── uda_sample.h
- ├── udf_sample.cpp
- └── udf_sample.h
- ```
-
- + Need to show declaration reference `libDorisUdf.a`
- + Declare `udf.h` header file location
-
-
- Take udf_sample as an example
-
- ```
- # Include udf
- include_directories(thirdparty/include)
-
- # Set all libraries
- add_library(udf STATIC IMPORTED)
- set_target_properties(udf PROPERTIES IMPORTED_LOCATION thirdparty/lib/libDorisUdf.a)
-
- # where to put generated libraries
- set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples")
-
- # where to put generated binaries
- set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples")
-
- add_library(udfsample SHARED udf_sample.cpp)
- target_link_libraries(udfsample
- udf
- -static-libstdc++
- -static-libgcc
- )
-
- add_library(udasample SHARED uda_sample.cpp)
- target_link_libraries(udasample
- udf
- -static-libstdc++
- -static-libgcc
- )
- ```
-
- If the user's UDF function also depends on other thirdparty libraries, you need to declare include, lib, and add dependencies in `add_library`.
-
-The complete directory structure after all files are prepared is as follows:
-
-```
- ├── thirdparty
- │ │── include
- │ │ └── udf.h
- │ └── lib
- │ └── libDorisUdf.a
- └── udf_samples
- ├── CMakeLists.txt
- ├── uda_sample.cpp
- ├── uda_sample.h
- ├── udf_sample.cpp
- └── udf_sample.h
-```
-
-Prepare the above files and you can compile UDF directly
-
-### Execute compilation
-
-Create a build folder under the udf_samples folder to store the compilation output.
-
-Run the command `cmake ../` in the build folder to generate a Makefile, and execute make to generate the corresponding dynamic library.
-
-```
-├── thirdparty
-├── udf_samples
- └── build
-```
-
-### Compilation result
-
-After the compilation is completed, the UDF dynamic link library is successfully generated. Under `build/src/`, taking udf_samples as an example, the directory structure is as follows:
-
-```
-├── thirdparty
-├── udf_samples
- └── build
- └── src
- └── udf_samples
- ├── libudasample.so
- └── libudfsample.so
-
-```
-
-## Create UDF function
-
-After following the above steps, you can get the UDF dynamic library (that is, the `.so` file in the compilation result). You need to put this dynamic library in a location that can be accessed through the HTTP protocol.
-
-Then log in to the Doris system and create a UDF function in the mysql-client through the `CREATE FUNCTION` syntax. You need to have ADMIN authority to complete this operation. At this time, there will be a UDF created in the Doris system.
-
-```
-CREATE [AGGREGATE] FUNCTION
-name ([argtype][,...])
-[RETURNS] rettype
-PROPERTIES (["key"="value"][,...])
-```
-Description:
-
-1. "Symbol" in PROPERTIES means that the symbol corresponding to the entry function is executed. This parameter must be set. You can get the corresponding symbol through the `nm` command, for example, `_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_` obtained by `nm libudfsample.so | grep AddUdf` is the corresponding symbol.
-2. The object_file in PROPERTIES indicates where it can be downloaded to the corresponding dynamic library. This parameter must be set.
-3. name: A function belongs to a certain DB, and the name is in the form of `dbName`.`funcName`. When `dbName` is not explicitly specified, the db where the current session is located is used as `dbName`.
-
-For specific use, please refer to `CREATE FUNCTION` for more detailed information.
-
-## Use UDF
-
-Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF.
-
-The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`.
-
-
-## Delete UDF
-
-When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`.
diff --git a/docs/en/extending-doris/udf/remote-user-defined-function.md b/docs/en/extending-doris/udf/remote-user-defined-function.md
deleted file mode 100644
index aa8cc3a3c9..0000000000
--- a/docs/en/extending-doris/udf/remote-user-defined-function.md
+++ /dev/null
@@ -1,109 +0,0 @@
----
-{
- "title": "Remote User Defined Function Service",
- "language": "en"
-}
----
-
-
-
-# User Defined Function Rpc
-
-Remote UDF Service The Remote UDF Service can be accessed through RPC to implement the execution of user-defined functions. Compared with Native UDF implementations, Remote UDF Service has the following advantages and limitations:
-1. The advantage
- * Cross-language: UDF services can be written in all languages supported by Protobuf.
- * Security: UDF execution failure or crash only affects the UDF Service and does not cause the Doris process to crash.
- * Flexibility: Any other Service or library class can be invoked within a UDF Service to meet a wider variety of business requirements.
-
-2. Restrictions on use
- * Performance: Compared to Native UDFs, UDF services incur extra network overhead and thus have much lower performance than Native UDFs. At the same time, the implementation of the UDF Service also affects the execution efficiency of the function. Users need to deal with problems such as high concurrency and thread safety by themselves.
- * Single line mode and batch mode: Doris's original query execution framework based on row memory would execute one UDF RPC call for each row of data, so the execution efficiency was very poor. However, under the new vectorization execution framework, one UDF RPC call would be executed for each batch of data (2048 rows by default), so the performance was significantly improved. In actual tests, the performance of Remote UDF based on vectorization and batch processing is similar to that of Native UDF based on rowmemory, which can be used for reference.
-
-## Write UDF functions
-
-This section describes how to develop a Remote RPC Service. Samples for the Java version are provided under `samples/doris-demo/udf-demo/` for your reference.
-
-### Copy the proto file
-
-Copy gensrc/proto/function_service.proto and gensrc/proto/types.proto to Rpc service
-
-- function_service.proto
- - PFunctionCallRequest
- - function_name:The function name, corresponding to the symbol specified when the function was created
- - args:The parameters passed by the method
- - context:Querying context Information
- - PFunctionCallResponse
- - result:Return result
- - status:Return Status, 0 indicates normal
- - PCheckFunctionRequest
- - function:Function related information
- - match_type:Matching type
- - PCheckFunctionResponse
- - status:Return status, 0 indicates normal
-
-### Generated interface
-
-Use protoc generate code, and specific parameters are viewed using protoc -h
-
-### Implementing an interface
-
-The following three methods need to be implemented
-- fnCall:Used to write computational logic
-- checkFn:Used to verify function names, parameters, and return values when creating UDFs
-- handShake:Used for interface probe
-
-## Create UDF
-
-Currently, UDAF and UDTF are not supported
-
-```sql
-CREATE FUNCTION
-name ([,...])
-[RETURNS] rettype
-PROPERTIES (["key"="value"][,...])
-```
-Instructions:
-
-1. PROPERTIES中`symbol`Represents the name of the method passed by the RPC call, which must be set.
-2. PROPERTIES中`object_file`Represents the RPC service address. Currently, a single address and a cluster address in BRPC-compatible format are supported. Refer to the cluster connection mode[Format specification](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%BF%9E%E6%8E%A5%E6%9C%8D%E5%8A%A1%E9%9B%86%E7%BE%A4).
-3. PROPERTIES中`type`Indicates the UDF call type, which is Native by default. Rpc is transmitted when Rpc UDF is used.
-4. name: A function belongs to a DB and name is of the form`dbName`.`funcName`. When `dbName` is not explicitly specified, the db of the current session is used`dbName`.
-
-Sample:
-```sql
-CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES (
- "SYMBOL"="add_int",
- "OBJECT_FILE"="127.0.0.1:9090",
- "TYPE"="RPC"
-);
-```
-
-## Use UDF
-
-Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF.
-
-The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`.
-
-## Delete UDF
-
-When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`.
-
-## Example
-Examples of rpc server implementations and cpp/java/python languages are provided in the `samples/doris-demo/` directory. See the `README.md` in each directory for details on how to use it.
\ No newline at end of file
diff --git a/new-docs/en/faq/data-faq.md b/docs/en/faq/data-faq.md
similarity index 100%
rename from new-docs/en/faq/data-faq.md
rename to docs/en/faq/data-faq.md
diff --git a/docs/en/faq/error.md b/docs/en/faq/error.md
deleted file mode 100644
index df0905dcd2..0000000000
--- a/docs/en/faq/error.md
+++ /dev/null
@@ -1,153 +0,0 @@
----
-{
- "title": "Common Error",
- "language": "en"
-}
----
-
-
-
-# Common Error
-
-This document is mainly used to record the errors reported during the use of Doris. If you encounter some errors, you are welcome to contribute to us for updates.
-
-
-### E1. Query error: Failed to get scan range, no queryable replica found in tablet: xxxx
-
-This situation is because the corresponding tablet does not find a copy that can be queried, usually because the BE is down, the copy is missing, and so on. You can use the `show tablet tablet_id` statement first, and then execute the following `show proc` statement to view the copy information corresponding to this tablet, and check whether the copy is complete. At the same time, you can use the `show proc "/cluster_balance"` information to query the progress of replica scheduling and repair in the cluster.
-
-For commands related to data copy management, please refer to [Data Copy Management](../administrator-guide/operation/tablet-repair-and-balance.md).
-
-### E2. FE failed to start, fe.log keeps scrolling "wait catalog to be ready. FE type UNKNOWN"
-
-There are usually two reasons for this problem:
-
-1. The local IP obtained when the FE is started this time is inconsistent with the last time, usually because the `priority_network` is not set correctly, the wrong IP address is matched when the FE is started. Need to modify `priority_network` and restart FE.
-
-2. Most Follower FE nodes in the cluster are not started. For example, there are 3 Followers and only one is started. At this time, at least one other FE needs to be also activated, and the FE electable group can elect the Master to provide services.
-
-If none of the above conditions can be resolved, you can follow the [Metadata Operation and Maintenance Document] (../administrator-guide/operation/metadata-operation.md) in the Doris official website to restore.
-
-### E3. tablet writer write failed, tablet_id=27306172, txn_id=28573520, err=-235 or -215 or -238
-
-This error usually occurs during data import operations. The error code of the new version is -235, and the error code of the old version may be -215. The meaning of this error is that the data version of the corresponding tablet exceeds the maximum limit (default 500, controlled by the BE parameter `max_tablet_version_num`), and subsequent writes will be rejected. For example, the error in the question means that the data version of the tablet 27306172 exceeds the limit.
-
-This error is usually because the import frequency is too high, which is greater than the compaction speed of the background data, which causes the version to accumulate and eventually exceeds the limit. At this point, we can first use the show tablet 27306172 statement, and then execute the show proc statement in the result to view the status of each copy of the tablet. The versionCount in the result represents the number of versions. If you find that there are too many versions of a copy, you need to reduce the import frequency or stop importing, and observe whether the number of versions drops. If the version number still does not decrease after the import is stopped, you need to go to the corresponding BE node to check the be.INFO log, search for the tablet id and compaction keywords, and check whether the compaction is running normally. For compaction tuning related, you can refer to the ApacheDoris public account article: Doris Best Practice-Compaction Tuning (3)
-
-The -238 error usually occurs when the amount of imported data in the same batch is too large, which leads to too many Segment files for a certain tablet (the default is 200, which is controlled by the BE parameter `max_segment_num_per_rowset`). At this time, it is recommended to reduce the amount of data imported in one batch, or to appropriately increase the value of the BE configuration parameter to solve the problem.
-
-### E4. tablet 110309738 has few replicas: 1, alive backends: [10003]
-
-This error may occur during query or import operation. It usually means that the copy of the tablet is abnormal.
-
-At this point, you can first check whether the BE node is down by using the show backends command, such as the isAlive field is false, or LastStartTime is the most recent time (indicating that it has been restarted recently). If the BE is down, you need to go to the node corresponding to the BE and check the be.out log. If the BE is down due to an exception, usually the exception stack will be printed in be.out to help troubleshoot the problem. If there is no error stack in be.out. You can use the linux command dmesg -T to check whether the process is killed by the system because of OOM.
-
-If no BE node is down, you need to use the show tablet 110309738 statement, and then execute the show proc statement in the result to check the status of each copy of the tablet for further investigation.
-
-### E5. disk xxxxx on backend xxx exceed limit usage
-
-It usually appears in operations such as import and Alter. This error means that the usage of the corresponding disk corresponding to the BE exceeds the threshold (95% by default). At this time, you can use the show backends command first, where MaxDiskUsedPct shows the usage of the disk with the highest usage on the corresponding BE. If If it exceeds 95%, this error will be reported.
-
-At this time, you need to go to the corresponding BE node to check the usage in the data directory. The trash directory and snapshot directory can be manually cleaned up to free up space. If the data directory occupies a lot, you need to consider deleting some data to free up space. For details, please refer to [Disk Space Management](../administrator-guide/operation/disk-capacity.md).
-
-### E6. invalid cluster id: xxxx
-
-This error may appear in the results of the show backends or show frontends commands. It usually appears in the error message column of a certain FE or BE node. The meaning of this error is that after Master FE sends heartbeat information to this node, the node finds that the cluster id carried in the heartbeat information is different from the cluster id stored locally, so it refuses to respond to the heartbeat.
-
-Doris' Master FE node will actively send a heartbeat to each FE or BE node, and will carry a cluster_id in the heartbeat information. The cluster_id is the unique cluster ID generated by the Master FE when a cluster is initialized. When the FE or BE receives the heartbeat information for the first time, it will save the cluster_id locally in the form of a file. The FE file is in the image/ directory of the metadata directory, and BE has a cluster_id file in all data directories. After that, every time a node receives a heartbeat, it will compare the content of the local cluster_id with the content in the heartbeat. If it is inconsistent, it will refuse to respond to the heartbeat.
-
-This mechanism is a node authentication mechanism to prevent receiving wrong heartbeat information from nodes outside the cluster.
-
-If you need to recover from this error. First, confirm whether all nodes are the correct nodes in the cluster. After that, for the FE node, you can try to modify the cluster_id value in the image/VERSION file in the metadata directory and restart the FE. For BE nodes, you can delete cluster_id files in all data directories and restart BE.
-
-### E7. Import data by calling stream load through a Java program. When a batch of data is large, a Broken Pipe error may be reported
-
-In addition to Broken Pipe, there may be other strange errors.
-
-This situation usually occurs after opening httpv2. Because httpv2 is an http service implemented using spring boot, and uses tomcat as the default built-in container. But tomcat's handling of 307 forwarding seems to have some problems, so the built-in container will be modified to jetty later. In addition, the version of apache http client in the java program needs to use a version later than 4.5.13. In the previous version, there were also some problems with the processing of forwarding.
-
-So this problem can be solved in two ways:
-
-1. Turn off httpv2
-
- Add enable_http_server_v2=false in fe.conf and restart FE. However, the new UI interface can no longer be used in this way, and some new interfaces based on httpv2 cannot be used later. (Normal import queries are not affected).
-
-2. Upgrade
-
- You can upgrade to Doris 0.15 and later versions, this problem has been fixed.
-
-
-### E8. `Lost connection to MySQL server at'reading initial communication packet', system error: 0`
-
-If the following problems occur when using the MySQL client to connect to Doris, this is usually caused by the difference between the jdk version used when compiling FE and the jdk version used when running FE.
-Note that when using docker image to compile, the default JDK version is openjdk 11, you can switch to openjdk 8 by command (see the compilation document for details).
-
-### E9. -214 error
-
-When performing operations such as load and query, you may encounter the following errors:
-
-```
-failed to initialize storage reader. tablet=63416.1050661139.aa4d304e7a7aff9c-f0fa7579928c85a0, res=-214, backend=192.168.100.10
-```
-
-A -214 error means that the data version of the corresponding tablet is missing. For example, the above error indicates that the data version of the replica of tablet 63416 on the BE of 192.168.100.10 is missing. (There may be other similar error codes, which can be checked and repaired in the following ways).
-
-Normally, if your data has multiple replicas, the system will automatically repair these problematic replicas. You can troubleshoot through the following steps:
-
-First, use the `show tablet 63416` statement and execute the `show proc xxx` statement in the result to view the status of each replica of the corresponding tablet. Usually we need to care about the data in the `Version` column.
-
-Under normal circumstances, the Version of multiple replicas of a tablet should be the same. And it is the same as the VisibleVersion of the corresponding partition.
-
-You can use `show partitions from tblx` to view the corresponding partition version (the partition corresponding to the tablet can be obtained in the `show tablet` statement.)
-
-At the same time, you can also visit the URL in the CompactionStatus column of the `show proc` statement (just open it in the browser) to view more specific version information, to check which version is missing.
-
-If there is no automatic repair for a long time, you need to use the `show proc "/cluster_balance"` statement to view the tablet repair and scheduling tasks currently being performed by the system. It may be because there are a large number of tablets waiting to be scheduled, which leads to a long repair time. You can follow the records in `pending_tablets` and `running_tablets`.
-
-Furthermore, you can use the `admin repair` statement to specify the priority to repair a table or partition. For details, please refer to `help admin repair`;
-
-If it still cannot be repaired, then in the case of multiple replicas, we use the `admin set replica status` command to force the replica to go offline. For details, please refer to the example of `help admin set replica status` to set the status of the replica to bad. (After set to bad, the replica will not be accessed again. And will be automatically repaired later. But before the operation, you should make sure that the other replicas are normal)
-
-### E10. Not connected to 192.168.100.1:8060 yet, server_id=384
-
-We may encounter this error when loading or querying. If you go to the corresponding BE log to check, you may also find similar errors.
-
-This is an RPC error, and there are usually two possibilities: 1. The corresponding BE node is down. 2. rpc congestion or other errors.
-
-If the BE node is down, you need to check the specific reason for the downtime. Only the problem of rpc congestion is discussed here.
-
-One situation is OVERCROWDED, which means that a large amount of unsent data at the rpc client exceeds the threshold. BE has two parameters related to it:
-
-1. `brpc_socket_max_unwritten_bytes`: The default is 1GB. If the unwritten data exceeds this value, an error will be reported. You can modify this value appropriately to avoid OVERCROWDED errors. (But this cures the symptoms rather than the root cause, essentially congestion still occurs).
-2. `tablet_writer_ignore_eovercrowded`: The default is false. If set to true, Doris will ignore OVERCROWDED errors during the load process. This parameter is mainly used to avoid load failure and improve the stability of load.
-
-The second is that the packet size of rpc exceeds `max_body_size`. This problem may occur if the query contains a very large String type or a Bitmap type. It can be circumvented by modifying the following BE parameters:
-
-1. `brpc_max_body_size`: The default is 3GB.
-
-### E11. `recoveryTracker should overlap or follow on disk last VLSN of 4,422,880 recoveryFirst= 4,422,882 UNEXPECTED_STATE_FATAL`
-
-Sometimes when restarting the Fe, the above error will occur (usually only in the case of multiple followers), and the difference between the two values in the error is 2. As a result, the Fe startup fails.
-
-This is a bug in bdbje that has not been resolved. In this case, metadata can only be recovered through fault recovery in [metadata operation and maintenance manual](../administrator-guide/operation/metadata-operation.md).
-
-### E12.Doris compile and install JDK version incompatibility problem
-
-When I use Docker to compile Doris myself, start FE after compiling and installing, ```java.lang.Suchmethoderror: java.nio.ByteBuffer.limit (I)Ljava/nio/ByteBuffer; ``` exception information, this is because the default in Docker is JDK 11. If your installation environment is using JDK8, you need to switch the JDK environment to JDK8 in Docker. For the specific switching method, refer to [Compilation](https://doris.apache.org/installing/compilation.html)
diff --git a/docs/en/faq/faq.md b/docs/en/faq/faq.md
deleted file mode 100644
index 27d14bc910..0000000000
--- a/docs/en/faq/faq.md
+++ /dev/null
@@ -1,297 +0,0 @@
----
-{
- "title": "FAQ",
- "language": "en"
-}
----
-
-
-
-# FAQ
-
-This document is mainly used to record common problems in the use of Doris. Will be updated from time to time.
-
-### Q1. Use Stream Load to access the public network address of FE to import data, and it is redirected to the internal network IP?
-
-When the connection target of stream load is the http port of FE, FE will only randomly select a BE node for http 307 redirect operation, so the user's request is actually sent to a BE designated by FE. The redirect returns the ip of BE, which is the intranet IP. So if you send the request through the public IP of FE, it is very likely that you will not be able to connect because you are redirected to the intranet address.
-
-The usual approach is to ensure that you can access the intranet IP address, or assume a load balance for all BE upper layers, and then directly send the stream load request to the load balancer, and the load balancer transparently transmits the request to the BE node .
-
-### Q2. When the BE node is offline through DECOMMISSION, why is there always some tablet remaining?
-
-During the offline process, check the tabletNum of the offline node through show backends, and you will observe that the number of tabletNum is decreasing, indicating that the data fragments are migrating from this node. When the number is reduced to 0, the system will automatically delete this node. But in some cases, tabletNum does not change after it drops to a certain value. This can usually have the following two reasons:
-
-1. These tablets belong to the table, partition, or materialized view that has just been deleted. The objects that have just been deleted will remain in the recycle bin. The offline logic will not process these fragments. You can modify the resident time of the object in the recycle bin by modifying the configuration parameter catalog_trash_expire_second of FE. When the object is deleted from the recycle bin, these tablets will be processed.
-
-2. There is a problem with the migration task of these tablets. At this time, you need to check the error of the specific task through show proc "/cluster_balance".
-
-For the above situation, you can first check whether the cluster still has unhealthy shards through show proc "/statistic". If it is 0, you can delete the BE directly through the drop backend statement. Otherwise, you need to check the copy status of unhealthy shards.
-
-
-### Q3. How should priorty_network be set?
-
-Priorty_network is a configuration parameter for both FE and BE. This parameter is mainly used to help the system choose the correct network card IP as its own IP. It is recommended to set this parameter explicitly under any circumstances to prevent the problem of incorrect IP selection caused by the addition of a new network card to the subsequent machine.
-
-The value of priorty_network is expressed in CIDR format. It is divided into two parts, the first part is a dotted decimal IP address, and the second part is a prefix length. For example, 10.168.1.0/8 will match all 10.xx.xx.xx IP addresses, and 10.168.1.0/16 will match all 10.168.xx.xx IP addresses.
-
-The reason for using the CIDR format instead of directly specifying a specific IP is to ensure that all nodes can use uniform configuration values. For example, there are two nodes: 10.168.10.1 and 10.168.10.2, then we can use 10.168.10.0/24 as the value of priorty_network.
-
-### Q4. What are FE's Master, Follower and Observer?
-
-First of all, make it clear that FE has only two roles: Follower and Observer. The Master is just an FE selected from a group of Follower nodes. Master can be regarded as a special kind of Follower. So when we were asked how many FEs in a cluster and what roles are they, the correct answer should be the number of all FE nodes, the number of Follower roles, and the number of Observer roles.
-
-All FE nodes in the Follower role will form a selectable group, similar to the group concept in the Poxas consensus protocol. A Follower will be elected as the Master in the group. When the Master hangs up, the new Follower will be automatically selected as the Master. Observer will not participate in the election, so Observer will not be called Master.
-
-A metadata log needs to be successfully written in most Follower nodes to be considered successful. For example, if 3 FEs are written, 2 writes are successful. This is why the number of Follower roles needs to be an odd number.
-
-The role of Observer is the same as the meaning of this word. It only acts as an observer to synchronize the metadata logs that have been successfully written, and provides metadata reading services. He will not participate in the logic of majority writing.
-
-Normally, you can deploy 1 Follower + 2 Observer or 3 Follower + N Observer. The former is simple to operate and maintain, and there will be almost no consensus agreement between Followers to cause this complicated error situation (most of Baidu's internal clusters use this method). The latter can ensure the high availability of metadata writing. If it is a high-concurrency query scenario, you can appropriately increase the Observer.
-
-### Q5. Does Doris support modifying column names?
-
-Does not support modifying column names.
-
-Doris supports modifying database names, table names, partition names, materialized view (Rollup) names, and column types, comments, default values, and so on. But unfortunately, currently does not support modifying the column name.
-
-For some historical reasons, the column names are currently written directly into the data file. When Doris searches, he also finds the corresponding column by the class name. Therefore, modifying column names is not only a simple metadata modification, but also involves data rewriting, which is a very heavy operation.
-
-We do not rule out the subsequent use of some compatible means to support lightweight column name modification operations.
-
-### Q6. Does the table of the Unique Key model support the creation of materialized views?
-
-not support.
-
-The table of the Unique Key model is a business-friendly table. Because of its unique function of de-duplication according to the primary key, it can easily synchronize business databases with frequent data changes. Therefore, many users will first consider using the Unique Key model when accessing data to Doris.
-
-Unfortunately, the table of the Unique Key model cannot create a materialized view. The reason is that the nature of the materialized view is to "pre-calculate" the data through pre-calculation, so that the calculated data is directly returned during the query to speed up the query. In the materialized view, the "pre-calculated" data is usually some aggregated indicators, such as summation and count. At this time, if the data changes, such as udpate or delete, because the pre-calculated data has lost the detailed information, it cannot be updated synchronously. For example, a sum of 5 may be 1+4 or 2+3. Because of the loss of detailed information, we cannot distinguish how the sum is calculated, and therefore cannot meet the update requirements.
-
-### Q7. show backends/frontends Viewed information is incomplete
-
-After executing certain statements such as `show backends/frontends`, some columns may be incomplete in the results. For example, the disk capacity information cannot be seen in the show backends results.
-
-This problem usually occurs when there are multiple FEs in the cluster. If users connect to non-Master FE nodes to execute these statements, they will see incomplete information. This is because part of the information only exists in the Master FE node. Such as BE's disk usage information. Therefore, the complete information can be obtained only after the Master FE is directly connected.
-
-Of course, the user can also execute `set forward_to_master=true;` before executing these statements. After the session variable is set to true, some information viewing statements executed later will be automatically forwarded to the Master FE to obtain the results. In this way, no matter which FE the user is connected to, the complete result can be obtained.
-
-### Q8. A new disk is added to the node. Why is the data not balanced on the new disk?
-
-The current balance strategy of Doris is based on nodes. In other words, the cluster load is judged according to the overall load index of the node (the number of shards and the total disk utilization). And migrate data fragments from high-load nodes to low-load nodes. If each node adds a disk, from the perspective of the node as a whole, the load has not changed, so the balancing logic cannot be triggered.
-
-In addition, Doris currently does not support balanced operations within a single node and between various disks. Therefore, after adding a new disk, the data will not be balanced to the new disk.
-
-However, when data is migrated between nodes, Doris will consider the disk factor. For example, if a slice is migrated from node A to node B, the disk with lower disk space utilization among node B will be selected first.
-
-Here we provide 3 ways to solve this problem:
-
-1. Rebuild the new table
-
- Create a new table through the create table like statement, and then use insert into select to synchronize the data from the old table to the new table. Because when a new table is created, the data fragments of the new table will be distributed on the new disk, and the data will also be written to the new disk. This method is suitable for situations where the amount of data is small (within tens of GB).
-
-2. Through the Decommission command
-
- The decommission command is used to safely decommission a BE node. This command will first migrate the data fragments on the node to other nodes, and then delete the node. As mentioned earlier, when data is migrated, disks with low disk utilization will be given priority, so this method can "force" the data to be migrated to the disks of other nodes. When the data migration is completed, we cancel the decommission operation, so that the data will be rebalanced back to this node. When we perform the above steps for all BE nodes, the data will be evenly distributed on all disks of all nodes.
-
- Note that before executing the decommission command, execute the following command first to avoid the node being deleted after it is offline.
-
- `admin set frontend config("drop_backend_after_decommission" = "false");`
-
-3. Manually migrate data using API
-
- Doris provides [HTTP API](../administrator-guide/http-actions/tablet-migration-action.md), which allows you to manually specify data fragments on one disk to migrate to another disk.
-
-### Q9. How to read FE/BE log correctly?
-
-In many cases, we need to troubleshoot problems through logs. Here is an explanation of the format and viewing method of the FE/BE log.
-
-1. FE
-
- FE logs mainly include:
-
- * fe.log: main log. Including everything except fe.out.
- * fe.warn.log: A subset of the main log, which only records WARN and ERROR level logs.
- * fe.out: Standard/error output log (stdout and stderr).
- * fe.audit.log: Audit log, which records all SQL requests received by this FE.
-
- A typical FE log is as follows:
-
- ```
- 2021-09-16 23:13:22,502 INFO (tablet scheduler|43) [BeLoadRebalancer.selectAlternativeTabletsForCluster():85] cluster is balance: default_cluster with medium: HDD. skip
- ```
-
- * `2021-09-16 23:13:22,502`: log time.
- * `INFO: log level, the default is INFO`.
- * `(tablet scheduler|43)`: thread name and thread id. Through the thread id, you can view the thread context information and troubleshoot what happened in this thread.
- * `BeLoadRebalancer.selectAlternativeTabletsForCluster():85`: class name, method name and code line number.
- * `cluster is balance xxx`: log content.
-
- Normally, we mainly check the fe.log log. Under special circumstances, some logs may be output to fe.out.
-
-2. BE
-
- The BE logs mainly include:
-
- * be.INFO: Main log. This is actually a soft connection, connected to the latest be.INFO.xxxx.
- * be.WARNING: A subset of the main log, only logs of WARN and FATAL levels are recorded. This is actually a soft connection, connected to the latest be.WARN.xxxx.
- * be.out: standard/error output log (stdout and stderr).
-
- A typical BE log is as follows:
-
- ```
- I0916 23:21:22.038795 28087 task_worker_pool.cpp:1594] finish report TASK. master host: 10.10.10.10, port: 9222
- ```
-
- * `I0916 23:21:22.038795`: Log level and date and time. The capital letter I means INFO, W means WARN, and F means FATAL.
- * `28087`: thread id. Through the thread id, you can view the thread context information and troubleshoot what happened in this thread.
- * `task_worker_pool.cpp:1594`: code file and line number.
- * `finish report TASK xxx`: log content.
-
- Normally, we mainly check the be.INFO log. Under special circumstances, such as BE downtime, you need to check be.out.
-
-### Q10. How to troubleshoot the cause of FE/BE node down?
-
-1. BE
-
- The BE process is a C/C++ process, and the process may hang due to some program bugs (memory out of bounds, illegal address access, etc.) or Out Of Memory (OOM). At this point, we can check the cause of the error through the following steps:
-
- 1. View be.out
-
- The BE process realizes that when the program exits due to an abnormal condition, it will print the current error stack to be.out (note that it is be.out, not be.INFO or be.WARNING). Through the error stack, you can usually get a rough idea of where the program went wrong.
-
- Note that if an error stack appears in be.out, it is usually due to a program bug, and ordinary users may not be able to solve it by themselves. Welcome to the WeChat group, github discussion or dev mail group for help, and post the corresponding error stack for quick Troubleshoot the problem.
-
- 2. dmesg
-
- If be.out has no stack information, it is likely that OOM was forcibly killed by the system. At this point, you can use the dmesg -T command to view the Linux system log. If a log similar to Memory cgroup out of memory: Kill process 7187 (palo_be) score 1007 or sacrifice child appears at the end, it means that it is caused by OOM.
-
- There may be many reasons for memory problems, such as large queries, imports, compactions, etc. Doris is also constantly optimizing memory usage. Welcome to the WeChat group, github discussion or dev mailing group for help.
-
- 3. Check whether there are logs starting with F in be.INFO.
-
- The log at the beginning of F is the Fatal log. For example, F0916 means the Fatal log on September 16. Fatal logs usually indicate program assertion errors, and assertion errors will directly cause the process to exit (indicating that the program has a bug). Welcome to the WeChat group, github discussion or dev mailing group for help.
-
- 4. Minidump
-
- Mindump is a feature added after Doris 0.15. For details, please refer to [Document](../developer-guide/minidump.md).
-
-2. FE
-
- FE is a java process, and its robustness depends on the C/C++ program. Usually, the cause of FE hanging may be OOM (Out-of-Memory) or metadata writing failure. These errors usually have an error stack in fe.log or fe.out. You need to investigate further based on the error stack information.
-
-### Q11. About the configuration of the data directory SSD and HDD.
-
-Doris supports a BE node to configure multiple storage paths. Normally, it is sufficient to configure one storage path for each disk. At the same time, Doris supports storage media attributes of specified paths, such as SSD or HDD. SSD stands for high-speed storage devices, and HDD stands for low-speed storage devices.
-
-By specifying the storage medium properties of the path, we can use Doris's hot and cold data partition storage function to store hot data in the SSD at the partition level, and the cold data will be automatically transferred to the HDD.
-
-It should be noted that Doris does not automatically perceive the actual storage medium type of the disk where the storage path is located. This type needs to be explicitly indicated by the user in the path configuration. For example, the path "/path/to/data1.SSD" means that this path is an SSD storage medium. And "data1.SSD" is the actual directory name. Doris determines the storage medium type based on the ".SSD" suffix behind the directory name, not the actual storage medium type. In other words, the user can specify any path as the SSD storage medium, and Doris only recognizes the directory suffix and will not judge whether the storage medium matches. If you do not write the suffix, the default is HDD.
-
-In other words, ".HDD" and ".SSD" are only used to identify the "relative" "low speed" and "high speed" of the storage directory, not the actual storage medium type. Therefore, if the storage path on the BE node has no difference in media, there is no need to fill in the suffix.
-
-### Q12. The query results of unique key model are inconsistent
-
-In some cases, when users use the same SQL to query a table of a unique key model, inconsistent query results may occur. And the query results always change between 2-3 kinds.
-
-This may be because there are data with the same key but different values in the same batch of imported data, which will lead to inconsistent results between different replicas due to uncertain data replace order.
-
-For example, tables are defined as k1 and v1. A batch of imported data is as follows:
-
-```
-1, "abc"
-1, "def"
-```
-
-Then the result of replica 1 may be '1, "ABC', while the result of replica 2 may be '1," def'. This leads to inconsistent query results.
-
-To ensure the unique data order between different replicas, refer to the [Sequence Column](../administrator-guide/load-data/sequence-column-manual.md) function.
-
-### Q13. Multiple FEs cannot log in when using Nginx to implement web UI load balancing
-
-Doris can deploy multiple FEs. When accessing the Web UI, if you use Nginx for load balancing, you will be prompted to log in again because of Session problems. This problem is actually a session sharing problem. Nginx provides centralized session sharing. The solution, here we use the ip_hash technology in nginx, ip_hash can direct the request of a certain ip to the same backend, so that a certain client and a certain backend under this ip can establish a stable The session, ip_hash is defined in the upstream configuration:
-
-```
-upstream doris.com {
- server 172.22.197.238:8030 weight=3;
- server 172.22.197.239:8030 weight=4;
- server 172.22.197.240:8030 weight=4;
- ip_hash;
-}
-```
-The complete Nginx example configuration is as follows:
-
-```
-user nginx;
-worker_processes auto;
-error_log /var/log/nginx/error.log;
-pid /run/nginx.pid;
-
-# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
-include /usr/share/nginx/modules/*.conf;
-
-events {
- worker_connections 1024;
-}
-
-http {
- log_format main '$remote_addr - $remote_user [$time_local] "$request" '
- '$status $body_bytes_sent "$http_referer" '
- '"$http_user_agent" "$http_x_forwarded_for"';
-
- access_log /var/log/nginx/access.log main;
-
- sendfile on;
- tcp_nopush on;
- tcp_nodelay on;
- keepalive_timeout 65;
- types_hash_max_size 2048;
-
- include /etc/nginx/mime.types;
- default_type application/octet-stream;
-
- # Load modular configuration files from the /etc/nginx/conf.d directory.
- # See http://nginx.org/en/docs/ngx_core_module.html#include
- # for more information.
- include /etc/nginx/conf.d/*.conf;
- #include /etc/nginx/custom/*.conf;
- upstream doris.com {
- server 172.22.197.238:8030 weight=3;
- server 172.22.197.239:8030 weight=4;
- server 172.22.197.240:8030 weight=4;
- ip_hash;
- }
-
- server {
- listen 80;
- server_name gaia-pro-bigdata-fe02;
- if ($request_uri ~ _load) {
- return 307 http://$host$request_uri ;
- }
-
- location / {
- proxy_pass http://doris.com;
- proxy_redirect default;
- }
- error_page 500 502 503 504 /50x.html;
- location = /50x.html {
- root html;
- }
- }
- }
-```
diff --git a/new-docs/en/faq/install-faq.md b/docs/en/faq/install-faq.md
similarity index 100%
rename from new-docs/en/faq/install-faq.md
rename to docs/en/faq/install-faq.md
diff --git a/new-docs/en/faq/sql-faq.md b/docs/en/faq/sql-faq.md
similarity index 100%
rename from new-docs/en/faq/sql-faq.md
rename to docs/en/faq/sql-faq.md
diff --git a/new-docs/en/get-starting/get-starting.md b/docs/en/get-starting/get-starting.md
similarity index 99%
rename from new-docs/en/get-starting/get-starting.md
rename to docs/en/get-starting/get-starting.md
index a557f962d8..62e35871bb 100644
--- a/new-docs/en/get-starting/get-starting.md
+++ b/docs/en/get-starting/get-starting.md
@@ -1,6 +1,6 @@
---
{
- "title": "Get-Starting",
+ "title": "Getting-Started",
"language": "en"
}
@@ -25,7 +25,7 @@ specific language governing permissions and limitations
under the License.
-->
-# Apache Doris Get-Starting
+# Getting Started
## Environmental preparation
diff --git a/docs/en/getting-started/advance-usage.md b/docs/en/getting-started/advance-usage.md
deleted file mode 100644
index 3a47a2f7db..0000000000
--- a/docs/en/getting-started/advance-usage.md
+++ /dev/null
@@ -1,280 +0,0 @@
----
-{
- "title": "Advanced Use Guide",
- "language": "en"
-}
----
-
-
-
-# Advanced Use Guide
-
-Here we introduce some of Doris's advanced features.
-
-## Table 1 Structural Change
-
-Schema of the table can be modified using the ALTER TABLE command, including the following modifications:
-
-* Additional columns
-* Delete columns
-* Modify column type
-* Changing column order
-
-Examples are given below.
-
-Schema of Table 1 is as follows:
-
-```
-+----------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+----------+-------------+------+-------+---------+-------+
-| siteid | int(11) | No | true | 10 | |
-| citycode | smallint(6) | No | true | N/A | |
-| username | varchar(32) | No | true | | |
-| pv | bigint(20) | No | false | 0 | SUM |
-+----------+-------------+------+-------+---------+-------+
-```
-
-We added a new column of uv, type BIGINT, aggregation type SUM, default value is 0:
-
-`ALTER TABLE table1 ADD COLUMN uv BIGINT SUM DEFAULT '0' after pv;`
-
-After successful submission, you can view the progress of the job by following commands:
-
-`SHOW ALTER TABLE COLUMN;`
-
-When the job state is FINISHED, the job is completed. The new Schema is in force.
-
-After ALTER TABLE is completed, you can view the latest Schema through `DESC TABLE`.
-
-```
-mysql> DESC table1;
-+----------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+----------+-------------+------+-------+---------+-------+
-| siteid | int(11) | No | true | 10 | |
-| citycode | smallint(6) | No | true | N/A | |
-| username | varchar(32) | No | true | | |
-| pv | bigint(20) | No | false | 0 | SUM |
-| uv | bigint(20) | No | false | 0 | SUM |
-+----------+-------------+------+-------+---------+-------+
-5 rows in set (0.00 sec)
-```
-
-The following command can be used to cancel the job currently being executed:
-
-`CANCEL ALTER TABLE COLUMN FROM table1`
-
-For more help, see `HELP ALTER TABLE`.
-
-## 2 Rollup
-
-Rollup can be understood as a materialized index structure of Table. **materialized** because data is store as a concrete ("materialized") table independently, and **indexing** means that Rollup can adjust column order to increase the hit rate of prefix index, or reduce key column to increase data aggregation.
-
-Examples are given below.
-
-Schema of Table 1 is as follows:
-
-```
-+----------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+----------+-------------+------+-------+---------+-------+
-| siteid | int(11) | No | true | 10 | |
-| citycode | smallint(6) | No | true | N/A | |
-| username | varchar(32) | No | true | | |
-| pv | bigint(20) | No | false | 0 | SUM |
-| uv | bigint(20) | No | false | 0 | SUM |
-+----------+-------------+------+-------+---------+-------+
-```
-
-For table1 detailed data, siteid, citycode and username form a set of keys, which aggregate the PV field. If the business side often has the need to see the total amount of PV in the city, it can build a rollup with only citycode and pv.
-
-`ALTER TABLE table1 ADD ROLLUP rollup_city(citycode, pv);`
-
-After successful submission, you can view the progress of the job by following commands:
-
-`SHOW ALTER TABLE ROLLUP;`
-
-When the job state is FINISHED, the job is completed.
-
-When Rollup is established, you can use `DESC table1 ALL` to view the Rollup information of the table.
-
-```
-mysql> desc table1 all;
-+-------------+----------+-------------+------+-------+--------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-------------+----------+-------------+------+-------+---------+-------+
-| table1 | siteid | int(11) | No | true | 10 | |
-| | citycode | smallint(6) | No | true | N/A | |
-| | username | varchar(32) | No | true | | |
-| | pv | bigint(20) | No | false | 0 | SUM |
-| | uv | bigint(20) | No | false | 0 | SUM |
-| | | | | | | |
-| rollup_city | citycode | smallint(6) | No | true | N/A | |
-| | pv | bigint(20) | No | false | 0 | SUM |
-+-------------+----------+-------------+------+-------+---------+-------+
-8 rows in set (0.01 sec)
-```
-
-The following command can be used to cancel the job currently being executed:
-
-`CANCEL ALTER TABLE ROLLUP FROM table1;`
-
-After Rollup is established, the query does not need to specify Rollup to query. Or specify the original table for query. The program automatically determines whether Rollup should be used. Whether Rollup is hit or not can be viewed by the `EXPLAIN your_sql;`.
-
-For more help, see `HELP ALTER TABLE`.
-
-## 2 Query of Data Table
-
-### 2.1 Memory Limitation
-
-To prevent a user's query from consuming too much memory. Queries are controlled in memory. A query task uses no more than 2GB of memory by default on a single BE node.
-
-When users use it, if they find a `Memory limit exceeded` error, they usually exceed the memory limit.
-
-Users should try to optimize their SQL statements when they encounter memory overrun.
-
-If it is found that 2GB memory cannot be satisfied, the memory parameters can be set manually.
-
-Display query memory limits:
-
-```
-mysql> SHOW VARIABLES LIKE "%mem_limit%";
-+---------------+------------+
-| Variable_name | Value |
-+---------------+------------+
-| exec_mem_limit| 2147483648 |
-+---------------+------------+
-1 row in set (0.00 sec)
-```
-
-The unit of `exec_mem_limit` is byte, and the value of `exec_mem_limit` can be changed by the `SET` command. If changed to 8GB.
-
-`SET exec_mem_limit = 8589934592;`
-
-```
-mysql> SHOW VARIABLES LIKE "%mem_limit%";
-+---------------+------------+
-| Variable_name | Value |
-+---------------+------------+
-| exec_mem_limit| 8589934592 |
-+---------------+------------+
-1 row in set (0.00 sec)
-```
-
-> * The above modification is session level and is only valid within the current connection session. Disconnecting and reconnecting will change back to the default value.
-> * If you need to modify the global variable, you can set it as follows: `SET GLOBAL exec_mem_limit = 8589934592;` When the setup is complete, disconnect the session and log in again, and the parameters will take effect permanently.
-
-### 2.2 Query timeout
-
-The current default query time is set to 300 seconds. If a query is not completed within 300 seconds, the query will be cancelled by the Doris system. Users can use this parameter to customize the timeout time of their applications and achieve a blocking mode similar to wait (timeout).
-
-View the current timeout settings:
-
-```
-mysql> SHOW VARIABLES LIKE "%query_timeout%";
-+---------------+-------+
-| Variable_name | Value |
-+---------------+-------+
-| QUERY_TIMEOUT | 300 |
-+---------------+-------+
-1 row in set (0.00 sec)
-```
-
-Modify the timeout to 1 minute:
-
-`SET query timeout =60;`
-
-> * The current timeout check interval is 5 seconds, so timeouts less than 5 seconds are not very accurate.
-> * The above modifications are also session level. Global validity can be modified by `SET GLOBAL`.
-
-### 2.3 Broadcast/Shuffle Join
-
-The system implements Join operator in two ways:
-
-Broadcast join: conditionally filtering right hand tables, broadcasting them to the nodes where the large tables are located, forming a memory Hash table, and then streaming out the data of the large tables Hash Join.
-
-Shuffle join: tables in both side are Hash according to Join's key, and then distributed Join. This memory consumption is allocated to all computing nodes in the cluster.
-
-Broadcast join is perfermance better when right hand table size is really small, vice versa.
-
-Doris will try to use Broadcast Join first. You can specify how each join operator is implemented explicitly. System provides configurable parameter `auto_broadcast_join_threshold` to configure the maximum percentage of execute memory could used for build hash table for broadcast join. The meaningful values range from `0` to `1`, and the default value is `0.8`. System will use shuffle join when broadcast join used memory more than it.
-
-You can turn off broadcast join by set `auto_broadcast_join_threshold` to negative or `0`.
-
-Choose join implementation automaticaly (default):
-
-```
-mysql> select sum(table1.pv) from table1 join table2 where table1.siteid = 2;
-+--------------------+
-| sum(`table1`.`pv`) |
-+--------------------+
-| 10 |
-+--------------------+
-1 row in set (0.20 sec)
-```
-
-Use Broadcast Join (explicitly specified):
-
-```
-mysql> select sum(table1.pv) from table1 join [broadcast] table2 where table1.siteid = 2;
-+--------------------+
-| sum(`table1`.`pv`) |
-+--------------------+
-| 10 |
-+--------------------+
-1 row in set (0.20 sec)
-```
-
-Shuffle Join:
-
-```
-mysql> select sum(table1.pv) from table1 join [shuffle] table2 where table1.siteid = 2;
-+--------------------+
-| sum(`table1`.`pv`) |
-+--------------------+
-| 10 |
-+--------------------+
-1 row in set (0.15 sec)
-```
-
-### 2.4 Query Retry and High Availability
-
-When multiple FE nodes are deployed, users can deploy load balancing layers on top of multiple FEs to achieve high availability of Doris.
-
-Here are some highly available solutions:
-
-**The first**
-
-I retry and load balancing in application layer code. For example, if a connection is found to be dead, it will automatically retry on other connections. Application-level code retry requires the application to configure multiple Doris front-end node addresses.
-
-**Second**
-
-If you use MySQL JDBC connector to connect Doris, you can use jdbc's automatic retry mechanism:
-
-```
-jdbc:mysql://[host1][:port1],[host2][:port2][,[host3][:port3]]...[/[database]][?propertyName1=propertyValue1[&propertyName2=propertyValue2]...]
-```
-
-**The third**
-
-Applications can connect to and deploy MySQL Proxy on the same machine by configuring MySQL Proxy's Failover and Load Balance functions.
-
-`http://dev.mysql.com/doc/refman/5.6/en/mysql-proxy-using.html`
\ No newline at end of file
diff --git a/docs/en/getting-started/basic-usage.md b/docs/en/getting-started/basic-usage.md
deleted file mode 100644
index fedd7cd009..0000000000
--- a/docs/en/getting-started/basic-usage.md
+++ /dev/null
@@ -1,382 +0,0 @@
----
-{
- "title": "Guidelines for Basic Use",
- "language": "en"
-}
----
-
-
-
-
-# Guidelines for Basic Use
-
-Doris uses MySQL protocol to communicate. Users can connect to Doris cluster through MySQL client or MySQL JDBC. When selecting the MySQL client version, it is recommended to use the version after 5.1, because user names of more than 16 characters cannot be supported before 5.1. This paper takes MySQL client as an example to show users the basic usage of Doris through a complete process.
-
-## 1 Create Users
-
-### 1.1 Root User Logon and Password Modification
-
-Doris has built-in root and admin users, and the password is empty by default. After starting the Doris program, you can connect to the Doris cluster through root or admin users.
-Use the following command to log in to Doris:
-
-```
-mysql -h FE_HOST -P9030 -uroot
-```
-
->` fe_host` is the IP address of any FE node. ` 9030 ` is the query_port configuration in fe.conf.
-
-After login, you can modify the root password by following commands
-
-```
-SET PASSWORD FOR 'root' = PASSWORD('your_password');
-```
-
-### 1.3 Creating New Users
-
-Create an ordinary user with the following command.
-
-```
-CREATE USER 'test' IDENTIFIED BY 'test_passwd';
-```
-
-Follow-up login can be done through the following connection commands.
-
-```
-mysql -h FE_HOST -P9030 -utest -ptest_passwd
-```
-
-> By default, the newly created common user does not have any permissions. Permission grants can be referred to later permission grants.
-
-## 2 Data Table Creation and Data Import
-
-### 2.1 Create a database
-
-Initially, a database can be created through root or admin users:
-
-`CREATE DATABASE example_db;`
-
-> All commands can use `HELP` command to see detailed grammar help. For example: `HELP CREATE DATABASE;`
-
-> If you don't know the full name of the command, you can use "help command a field" for fuzzy query. If you type `HELP CREATE`, you can match commands like `CREATE DATABASE`, `CREATE TABLE`, `CREATE USER`, etc.
-
-After the database is created, you can view the database information through `SHOW DATABASES'.
-
-```
-MySQL> SHOW DATABASES;
-+--------------------+
-| Database |
-+--------------------+
-| example_db |
-| information_schema |
-+--------------------+
-2 rows in set (0.00 sec)
-```
-
-Information_schema exists to be compatible with MySQL protocol. In practice, information may not be very accurate. Therefore, information about specific databases is suggested to be obtained by directly querying the corresponding databases.
-
-### 2.2 Account Authorization
-
-After the example_db is created, the read and write permissions of example_db can be authorized to ordinary accounts, such as test, through the root/admin account. After authorization, the example_db database can be operated by logging in with the test account.
-
-`GRANT ALL ON example_db TO test;`
-
-### 2.3 Formulation
-
-Create a table using the `CREATE TABLE` command. More detailed parameters can be seen:
-
-`HELP CREATE TABLE;`
-
-First switch the database:
-
-`USE example_db;`
-
-Doris supports single partition and composite partition.
-
-In the composite partition:
-
-* The first level is called Partition, or partition. Users can specify a dimension column as a partition column (currently only integer and time type columns are supported), and specify the range of values for each partition.
-
-* The second stage is called Distribution, or bucket division. Users can specify one or more dimension columns and the number of buckets for HASH distribution of data.
-
-Composite partitioning is recommended for the following scenarios
-
-* There are time dimensions or similar dimensions with ordered values, which can be used as partition columns. The partition granularity can be evaluated according to the frequency of importation and the amount of partition data.
-* Historic data deletion requirements: If there is a need to delete historical data (for example, only the last N days of data are retained). Using composite partitions, you can achieve this by deleting historical partitions. Data can also be deleted by sending a DELETE statement within a specified partition.
-* Solve the data skew problem: Each partition can specify the number of buckets separately. If dividing by day, when the amount of data varies greatly every day, we can divide the data of different partitions reasonably by the number of buckets in the specified partition. Bucket columns recommend choosing columns with high degree of differentiation.
-
-Users can also use no composite partitions, even single partitions. Then the data are only distributed by HASH.
-
-Taking the aggregation model as an example, the following two partitions are illustrated separately.
-
-#### Single partition
-
-Create a logical table with the name table1. The number of barrels is 10.
-
-The schema of this table is as follows:
-
-* Siteid: Type is INT (4 bytes), default value is 10 bytes.
-* citycode: The type is SMALLINT (2 bytes).
-* username: The type is VARCHAR, the maximum length is 32 bytes, and the default value is an empty string.
-* pv: Type is BIGINT (8 bytes), default value is 0 byte; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM.
-
-The TABLE statement is as follows:
-```
-CREATE TABLE table1
-(
- siteid INT DEFAULT '10',
- citycode SMALLINT,
- username VARCHAR(32) DEFAULT '',
- pv BIGINT SUM DEFAULT '0'
-)
-AGGREGATE KEY(siteid, citycode, username)
-DISTRIBUTED BY HASH(siteid) BUCKETS 10
-PROPERTIES("replication_num" = "1");
-```
-
-#### Composite partition
-
-Create a logical table named table2.
-
-The schema of this table is as follows:
-
-* event_day: Type DATE, no default
-* Siteid: Type is INT (4 bytes), default value is 10 bytes.
-* citycode: The type is SMALLINT (2 bytes).
-* username: The type is VARCHAR, the maximum length is 32 bytes, and the default value is an empty string.
-* pv: Type is BIGINT (8 bytes), default value is 0 byte; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM.
-
-We use the event_day column as the partition column to create three partitions: p201706, p201707, and p201708.
-
-* p201706: Range [Minimum, 2017-07-01)
-* p201707: Scope [2017-07-01, 2017-08-01)
-* p201708: Scope [2017-08-01, 2017-09-01)
-
-> Note that the interval is left closed and right open.
-
-Each partition uses siteid to hash buckets, with a bucket count of 10
-
-The TABLE statement is as follows:
-```
-CREATE TABLE table2
-(
- event_day DATE,
- siteid INT DEFAULT '10',
- citycode SMALLINT,
- username VARCHAR(32) DEFAULT '',
- pv BIGINT SUM DEFAULT '0'
-)
-AGGREGATE KEY(event_day, siteid, citycode, username)
-PARTITION BY RANGE(event_day)
-(
- PARTITION p201706 VALUES LESS THAN ('2017-07-01'),
- PARTITION p201707 VALUES LESS THAN ('2017-08-01'),
- PARTITION p201708 VALUES LESS THAN ('2017-09-01')
-)
-DISTRIBUTED BY HASH(siteid) BUCKETS 10
-PROPERTIES("replication_num" = "1");
-```
-
-After the table is built, you can view the information of the table in example_db:
-
-```
-MySQL> SHOW TABLES;
-+----------------------+
-| Tables_in_example_db |
-+----------------------+
-| table1 |
-| table2 |
-+----------------------+
-2 rows in set (0.01 sec)
-
-MySQL> DESC table1;
-+----------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+----------+-------------+------+-------+---------+-------+
-| siteid | int(11) | Yes | true | 10 | |
-| citycode | smallint(6) | Yes | true | N/A | |
-| username | varchar(32) | Yes | true | | |
-| pv | bigint(20) | Yes | false | 0 | SUM |
-+----------+-------------+------+-------+---------+-------+
-4 rows in set (0.00 sec)
-
-MySQL> DESC table2;
-+-----------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+-----------+-------------+------+-------+---------+-------+
-| event_day | date | Yes | true | N/A | |
-| siteid | int(11) | Yes | true | 10 | |
-| citycode | smallint(6) | Yes | true | N/A | |
-| username | varchar(32) | Yes | true | | |
-| pv | bigint(20) | Yes | false | 0 | SUM |
-+-----------+-------------+------+-------+---------+-------+
-5 rows in set (0.00 sec)
-```
-
-> Notes:
->
-> 1. By setting replication_num, the above tables are all single-copy tables. Doris recommends that users adopt the default three-copy settings to ensure high availability.
-> 2. Composite partition tables can be added or deleted dynamically. See the Partition section in `HELP ALTER TABLE`.
-> 3. Data import can import the specified Partition. See `HELP LOAD`.
-> 4. Schema of table can be dynamically modified.
-> 5. Rollup can be added to Table to improve query performance. This section can be referred to the description of Rollup in Advanced Usage Guide.
-> 6. The default value of Null property for column is true, which may result in poor scan performance.
-
-### 2.4 Import data
-
-Doris supports a variety of data import methods. Specifically, you can refer to the data import document. Here we use streaming import and Broker import as examples.
-
-#### Flow-in
-
-Streaming import transfers data to Doris via HTTP protocol. It can import local data directly without relying on other systems or components. Detailed grammar help can be found in `HELP STREAM LOAD;`
-
-Example 1: With "table1_20170707" as Label, import table1 tables using the local file table1_data.
-
-```
-curl --location-trusted -u test:test_passwd -H "label:table1_20170707" -H "column_separator:," -T table1_data http://FE_HOST:8030/api/example_db/table1/_stream_load
-```
-
-> 1. FE_HOST is the IP of any FE node and 8030 is http_port in fe.conf.
-> 2. You can use the IP of any BE and the webserver_port in be.conf to connect the target left and right for import. For example: `BE_HOST:8040`
-
-The local file `table1_data` takes `,` as the separation between data, and the specific contents are as follows:
-
-```
-1,1,Jim,2
-2,1,grace,2
-3,2,tom,2
-4,3,bush,3
-5,3,helen,3
-```
-
-Example 2: With "table2_20170707" as Label, import table2 tables using the local file table2_data.
-
-```
-curl --location-trusted -u test:test -H "label:table2_20170707" -H "column_separator:|" -T table2_data http://127.0.0.1:8030/api/example_db/table2/_stream_load
-```
-
-The local file `table2_data`is separated by `|`. The details are as follows:
-
-```
-2017-07-03|1|1|jim|2
-2017-07-05|2|1|grace|2
-2017-07-12|3|2|tom|2
-2017-07-15|4|3|bush|3
-2017-07-12|5|3|helen|3
-```
-
-> Notes:
->
-> 1. The recommended file size for streaming import is limited to 10GB. Excessive file size will result in higher cost of retry failure.
-> 2. Each batch of imported data needs to take a Label. Label is best a string related to a batch of data for easy reading and management. Doris based on Label guarantees that the same batch of data can be imported only once in a database. Label for failed tasks can be reused.
-> 3. Streaming imports are synchronous commands. The successful return of the command indicates that the data has been imported, and the failure of the return indicates that the batch of data has not been imported.
-
-#### Broker Load
-
-Broker imports import data from external storage through deployed Broker processes. For more help, see `HELP BROKER LOAD;`
-
-Example: Import files on HDFS into table1 table with "table1_20170708" as Label
-
-```
-LOAD LABEL table1_20170708
-(
- DATA INFILE("hdfs://your.namenode.host:port/dir/table1_data")
- INTO TABLE table1
-)
-WITH BROKER hdfs
-(
- "username"="hdfs_user",
- "password"="hdfs_password"
-)
-PROPERTIES
-(
- "timeout"="3600",
- "max_filter_ratio"="0.1"
-);
-```
-
-Broker imports are asynchronous commands. Successful execution of the above commands only indicates successful submission of tasks. Successful imports need to be checked through `SHOW LOAD;' Such as:
-
-`SHOW LOAD WHERE LABEL = "table1_20170708";`
-
-In the return result, FINISHED in the `State` field indicates that the import was successful.
-
-For more instructions on `SHOW LOAD`, see` HELP SHOW LOAD; `
-
-Asynchronous import tasks can be cancelled before the end:
-
-`CANCEL LOAD WHERE LABEL = "table1_20170708";`
-
-## 3 Data query
-
-### 3.1 Simple Query
-
-Examples:
-
-```
-MySQL> SELECT * FROM table1 LIMIT 3;
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 2 | 1 | 'grace' | 2 |
-| 5 | 3 | 'helen' | 3 |
-| 3 | 2 | 'tom' | 2 |
-+--------+----------+----------+------+
-3 rows in set (0.01 sec)
-
-MySQL> SELECT * FROM table1 ORDER BY citycode;
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 2 | 1 | 'grace' | 2 |
-| 1 | 1 | 'jim' | 2 |
-| 3 | 2 | 'tom' | 2 |
-| 4 | 3 | 'bush' | 3 |
-| 5 | 3 | 'helen' | 3 |
-+--------+----------+----------+------+
-5 rows in set (0.01 sec)
-```
-
-### 3.3 Join Query
-
-Examples:
-
-```
-MySQL> SELECT SUM(table1.pv) FROM table1 JOIN table2 WHERE table1.siteid = table2.siteid;
-+--------------------+
-| sum(`table1`.`pv`) |
-+--------------------+
-| 12 |
-+--------------------+
-1 row in set (0.20 sec)
-```
-
-### 3.4 Subquery
-
-Examples:
-
-```
-MySQL> SELECT SUM(pv) FROM table2 WHERE siteid IN (SELECT siteid FROM table1 WHERE siteid > 2);
-+-----------+
-| sum(`pv`) |
-+-----------+
-| 8 |
-+-----------+
-1 row in set (0.13 sec)
-```
diff --git a/docs/en/getting-started/best-practice.md b/docs/en/getting-started/best-practice.md
deleted file mode 100644
index 930bdb3a86..0000000000
--- a/docs/en/getting-started/best-practice.md
+++ /dev/null
@@ -1,198 +0,0 @@
----
-{
- "title": "Best Practices",
- "language": "en"
-}
----
-
-
-
-
-# Best Practices
-
-## 1 tabulation
-
-### 1.1 Data Model Selection
-
-Doris data model is currently divided into three categories: AGGREGATE KEY, UNIQUE KEY, DUPLICATE KEY. Data in all three models are sorted by KEY.
-
-1.1.1. AGGREGATE KEY
-
-When AGGREGATE KEY is the same, old and new records are aggregated. The aggregation functions currently supported are SUM, MIN, MAX, REPLACE.
-
-AGGREGATE KEY model can aggregate data in advance and is suitable for reporting and multi-dimensional analysis business.
-
-```
-CREATE TABLE site_visit
-(
-siteid INT,
-City: SMALLINT,
-username VARCHAR (32),
-pv BIGINT SUM DEFAULT '0'
-)
-AGGREGATE KEY(siteid, city, username)
-DISTRIBUTED BY HASH(siteid) BUCKETS 10;
-```
-
-1.1.2. KEY UNIQUE
-
-When UNIQUE KEY is the same, the new record covers the old record. At present, UNIQUE KEY implements the same REPLACE aggregation method as AGGREGATE KEY, and they are essentially the same. Suitable for analytical business with updated requirements.
-
-```
-CREATE TABLE sales_order
-(
-orderid BIGINT,
-status TINYINT,
-username VARCHAR (32),
-amount BIGINT DEFAULT '0'
-)
-KEY (orderid) UNIT
-DISTRIBUTED BY HASH(orderid) BUCKETS 10;
-```
-
-1.1.3. DUPLICATE KEY
-
-Only sort columns are specified, and the same rows are not merged. It is suitable for the analysis business where data need not be aggregated in advance.
-
-```
-CREATE TABLE session_data
-(
-visitorid SMALLINT,
-sessionid BIGINT,
-visit time DATETIME,
-City CHAR (20),
-province CHAR(20),
-ip. varchar (32),
-brower CHAR(20),
-url: VARCHAR (1024)
-)
-DUPLICATE KEY (visitor time, session time)
-DISTRIBUTED BY HASH(sessionid, visitorid) BUCKETS 10;
-```
-
-### 1.2 Wide Table vs. Star Schema
-
-In order to adapt to the front-end business, business side often does not distinguish dimension information from indicator information, but defines Schema as a wide table. For Doris, the performance of such wide gauges is often unsatisfactory:
-
-* There are many fields in Schema, and there may be more key columns in the aggregation model. The number of columns that need to be sorted in the import process will increase.
-* Dimensional information updates are reflected in the whole table, and the frequency of updates directly affects the efficiency of queries.
-
-In the process of using Star Schema, users are advised to use Star Schema to distinguish dimension tables from indicator tables as much as possible. Frequently updated dimension tables can also be placed in MySQL external tables. If there are only a few updates, they can be placed directly in Doris. When storing dimension tables in Doris, more copies of dimension tables can be set up to improve Join's performance.
-
-### 1.3 Partitioning and Bucketing
-
-Doris supports two-level partitioned storage. The first level is partition, which currently supports both RANGE and LIST partition types, and the second layer is HASH bucket.
-
-1.3.1. Partitioning
-
-Partition is used to divide data into different intervals, which can be logically understood as dividing the original table into multiple sub-tables. Data can be easily managed by partition, for example, to delete data more quickly.
-
-1.3.1.1. Range Partitioning
-
-In business, most users will choose to partition on time, which has the following advantages:
-
-* Differentiable heat and cold data
-* Availability of Doris Hierarchical Storage (SSD + SATA)
-
-1.3.1.2. List Partitioning
-
-In business,, users can select cities or other enumeration values for partition.
-
-1.3.2. Hash Bucketing
-
-The data is divided into different buckets according to the hash value.
-
-* It is suggested that columns with large differentiation should be used as buckets to avoid data skew.
-* In order to facilitate data recovery, it is suggested that the size of a single bucket should not be too large and should be kept within 10GB. Therefore, the number of buckets should be considered reasonably when building tables or increasing partitions, among which different partitions can specify different buckets.
-
-### 1.4 Sparse Index and Bloom Filter
-
-Doris stores the data in an orderly manner, and builds a sparse index for Doris on the basis of ordered data. The index granularity is block (1024 rows).
-
-Sparse index chooses fixed length prefix in schema as index content, and Doris currently chooses 36 bytes prefix as index.
-
-* When building tables, it is suggested that the common filter fields in queries should be placed in front of Schema. The more distinguishable the query fields are, the more frequent the query fields are.
-* One particular feature of this is the varchar type field. The varchar type field can only be used as the last field of the sparse index. The index is truncated at varchar, so if varchar appears in front, the length of the index may be less than 36 bytes. Specifically, you can refer to [data model, ROLLUP and prefix index] (. / data-model-rollup. md).
-* In addition to sparse index, Doris also provides bloomfilter index. Bloomfilter index has obvious filtering effect on columns with high discrimination. If you consider that varchar cannot be placed in a sparse index, you can create a bloomfilter index.
-
-### 1.5 Physical and Chemical View (rollup)
-
-Rollup can essentially be understood as a physical index of the original table. When creating Rollup, only some columns in Base Table can be selected as Schema. The order of fields in Schema can also be different from that in Base Table.
-
-Rollup can be considered in the following cases:
-
-1.5.1. Low ratio of data aggregation in the Base Table
-
-This is usually due to the fact that Base Table has more differentiated fields. At this point, you can consider selecting some columns and establishing Rollup.
-
-For the `site_visit'table:
-
-```
-site -u visit (siteid, city, username, pv)
-```
-
-Siteid may lead to a low degree of data aggregation. If business parties often base their PV needs on city statistics, they can build a city-only, PV-based rollup:
-
-```
-ALTER TABLE site_visit ADD ROLLUP rollup_city(city, pv);
-```
-
-1.5.2. The prefix index in Base Table cannot be hit
-
-Generally, the way Base Table is constructed cannot cover all query modes. At this point, you can consider adjusting the column order and establishing Rollup.
-
-Database Session
-
-```
-session -u data (visitorid, sessionid, visittime, city, province, ip, browser, url)
-```
-
-In addition to visitorid analysis, there are Brower and province analysis cases, Rollup can be established separately.
-
-```
-ALTER TABLE session_data ADD ROLLUP rollup_brower(brower,province,ip,url) DUPLICATE KEY(brower,province);
-```
-
-## 2 Schema Change
-
-There are three Schema Change in doris: Sorted Schema Change, Direct Schema Change, Linked Schema Change.
-
-2.1. Sorted Schema Change
-
-The sorting of columns has been changed and the data needs to be reordered. For example, delete a column in a sorted column and reorder the fields.
-
-```
-ALTER TABLE site_visit DROP COLUMN city;
-```
-
-2.2. Direct Schema Change: There is no need to reorder, but there is a need to convert the data. For example, modify
- the type of column, add a column to the sparse index, etc.
-
-```
-ALTER TABLE site_visit MODIFY COLUMN username varchar(64);
-```
-
-2.3. Linked Schema Change: No need to transform data, for example add columns.
-
-```
-ALTER TABLE site_visit ADD COLUMN click bigint SUM default '0';
-```
-
-Schema is recommended to be considered when creating tables so that Schema can be changed more quickly.
diff --git a/docs/en/getting-started/data-model-rollup.md b/docs/en/getting-started/data-model-rollup.md
deleted file mode 100644
index d70e064eb4..0000000000
--- a/docs/en/getting-started/data-model-rollup.md
+++ /dev/null
@@ -1,636 +0,0 @@
----
-{
- "title": "Data Model, ROLLUP and Prefix Index",
- "language": "en"
-}
----
-
-
-
-# Data Model, ROLLUP and Prefix Index
-
-This document describes Doris's data model, ROLLUP and prefix index concepts at the logical level to help users better use Doris to cope with different business scenarios.
-
-## Basic concepts
-
-In Doris, data is logically described in the form of tables.
-A table consists of rows and columns. Row is a row of user data. Column is used to describe different fields in a row of data.
-
-Columns can be divided into two categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and indicator columns, respectively.
-
-Doris's data model is divided into three main categories:
-
-* Aggregate
-* Unique
-* Duplicate
-
-Let's introduce them separately.
-
-## Aggregate Model
-
-We illustrate what aggregation model is and how to use it correctly with practical examples.
-
-### Example 1: Importing data aggregation
-
-Assume that the business has the following data table schema:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| userid | LARGEINT | | user id|
-| date | DATE | | date of data filling|
-| City | VARCHAR (20) | | User City|
-| age | SMALLINT | | User age|
-| sex | TINYINT | | User gender|
-| Last_visit_date | DATETIME | REPLACE | Last user access time|
-| Cost | BIGINT | SUM | Total User Consumption|
-| max dwell time | INT | MAX | Maximum user residence time|
-| min dwell time | INT | MIN | User minimum residence time|
-
-If converted into a table-building statement, the following is done (omitting the Partition and Distribution information in the table-building statement)
-
-```
-CREATE TABLE IF NOT EXISTS example_db.expamle_tbl
-(
- `user_id` LARGEINT NOT NULL COMMENT "user id",
- `date` DATE NOT NULL COMMENT "data import time",
- `city` VARCHAR(20) COMMENT "city",
- `age` SMALLINT COMMENT "age",
- `sex` TINYINT COMMENT "gender",
- `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "last visit date time",
- `cost` BIGINT SUM DEFAULT "0" COMMENT "user total cost",
- `max_dwell_time` INT MAX DEFAULT "0" COMMENT "user max dwell time",
- `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "user min dwell time"
-)
-AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
-... /* ignore Partition and Distribution */
-;
-```
-
-As you can see, this is a typical fact table of user information and access behavior.
-In general star model, user information and access behavior are stored in dimension table and fact table respectively. Here, in order to explain Doris's data model more conveniently, we store the two parts of information in a single table.
-
-The columns in the table are divided into Key (dimension column) and Value (indicator column) according to whether `AggregationType`is set or not. No `AggregationType`, such as `user_id`, `date`, `age`, etc., is set as **Key**, while Aggregation Type is set as **Value**.
-
-When we import data, the same rows and aggregates into one row for the Key column, while the Value column aggregates according to the set `AggregationType`. `AggregationType`currently has the following four ways of aggregation:
-
-1. SUM: Sum, multi-line Value accumulation.
-2. REPLACE: Instead, Values in the next batch of data will replace Values in rows previously imported.
-3. MAX: Keep the maximum.
-4. MIN: Keep the minimum.
-
-Suppose we have the following imported data (raw data):
-
-|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10|
-| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2|
-| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-Let's assume that this is a table that records the user's behavior in accessing a commodity page. Let's take the first row of data as an example and explain it as follows:
-
-| Data | Description|
-|---|---|
-| 10000 | User id, each user uniquely identifies id|
-| 2017-10-01 | Data storage time, accurate to date|
-| Beijing | User City|
-| 20 | User Age|
-| 0 | Gender male (1 for female)|
-| 2017-10-01 06:00 | User's time to visit this page, accurate to seconds|
-| 20 | Consumption generated by the user's current visit|
-| 10 | User's visit, time to stay on the page|
-| 10 | User's current visit, time spent on the page (redundancy)|
-
-Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows:
-
-|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2|
-| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-As you can see, there is only one line of aggregated data left for 10,000 users. The data of other users are consistent with the original data. Here we first explain the aggregated data of user 10000:
-
-The first five columns remain unchanged, starting with column 6 `last_visit_date':
-
-* `2017-10-01 07:00`: Because the `last_visit_date`column is aggregated by REPLACE, the `2017-10-01 07:00` column has been replaced by `2017-10-01 06:00'.
-> Note: For data in the same import batch, the order of replacement is not guaranteed for the aggregation of REPLACE. For example, in this case, it may be `2017-10-01 06:00'. For data from different imported batches, it can be guaranteed that the data from the latter batch will replace the former batch.
-
-* `35`: Because the aggregation type of the `cost'column is SUM, 35 is accumulated from 20 + 15.
-* `10`: Because the aggregation type of the`max_dwell_time'column is MAX, 10 and 2 take the maximum and get 10.
-* `2`: Because the aggregation type of `min_dwell_time'column is MIN, 10 and 2 take the minimum value and get 2.
-
-After aggregation, Doris ultimately only stores aggregated data. In other words, detailed data will be lost and users can no longer query the detailed data before aggregation.
-
-### Example 2: Keep detailed data
-
-Following example 1, we modify the table structure as follows:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| userid | LARGEINT | | user id|
-| date | DATE | | date of data filling|
-| Time stamp | DATETIME | | Data filling time, accurate to seconds|
-| City | VARCHAR (20) | | User City|
-| age | SMALLINT | | User age|
-| sex | TINYINT | | User gender|
-| Last visit date | DATETIME | REPLACE | Last user access time|
-| Cost | BIGINT | SUM | Total User Consumption|
-| max dwell time | INT | MAX | Maximum user residence time|
-| min dwell time | INT | MIN | User minimum residence time|
-
-That is to say, a column of `timestamp` has been added to record the data filling time accurate to seconds.
-
-The imported data are as follows:
-
-|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10|
-| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2|
-| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows:
-
-|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10|
-| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2|
-| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-We can see that the stored data, just like the imported data, does not aggregate at all. This is because, in this batch of data, because the `timestamp` column is added, the Keys of all rows are **not exactly the same**. That is, as long as the keys of each row are not identical in the imported data, Doris can save the complete detailed data even in the aggregation model.
-
-### Example 3: Importing data and aggregating existing data
-
-Take Example 1. Suppose that the data in the table are as follows:
-
-|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2|
-| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-We imported a new batch of data:
-
-|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|
-| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 44 | 19 | 19|
-| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1|
-
-Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows:
-
-|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2|
-| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 55 | 19 | 6|
-| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1|
-
-As you can see, the existing data and the newly imported data of user 10004 have been aggregated. At the same time, 10005 new user's data were added.
-
-Data aggregation occurs in Doris in the following three stages:
-
-1. The ETL stage of data import for each batch. This phase aggregates data within each batch of imported data.
-2. The stage in which the underlying BE performs data Compaction. At this stage, BE aggregates data from different batches that have been imported.
-3. Data query stage. In data query, the data involved in the query will be aggregated accordingly.
-
-Data may be aggregated to varying degrees at different times. For example, when a batch of data is just imported, it may not be aggregated with the existing data. But for users, user **can only query aggregated data**. That is, different degrees of aggregation are transparent to user queries. Users should always assume that data exists in terms of the degree of aggregation that **ultimately completes**, and **should not assume that some aggregation has not yet occurred**. (See the section **Limitations of the aggregation model** for more details.)
-
-## Unique Model
-
-In some multi-dimensional analysis scenarios, users are more concerned with how to ensure the uniqueness of Key, that is, how to obtain the Primary Key uniqueness constraint. Therefore, we introduce Unique's data model. This model is essentially a special case of aggregation model and a simplified representation of table structure. Let's give an example.
-
-|ColumnName|Type|IsKey|Comment|
-|---|---|---|---|
-| user_id | BIGINT | Yes | user id|
-| username | VARCHAR (50) | Yes | User nickname|
-| city | VARCHAR (20) | No | User City|
-| age | SMALLINT | No | User Age|
-| sex | TINYINT | No | User Gender|
-| phone | LARGEINT | No | User Phone|
-| address | VARCHAR (500) | No | User Address|
-| register_time | DATETIME | No | user registration time|
-
-This is a typical user base information table. There is no aggregation requirement for this type of data, just the uniqueness of the primary key is guaranteed. (The primary key here is user_id + username). Then our statement is as follows:
-
-```
-CREATE TABLE IF NOT EXISTS example_db.expamle_tbl
-(
-`user_id` LARGEINT NOT NULL COMMENT "用户id",
-`username` VARCHAR (50) NOT NULL COMMENT "25143;" 261651;"
-`city` VARCHAR (20) COMMENT `User City',
-`age` SMALLINT COMMENT "29992;" 25143;"24180;" 40836 ",
-`sex` TINYINT COMMENT "用户性别",
-`phone` LARGEINT COMMENT "用户电话",
-`address` VARCHAR (500) COMMENT'25143;',
-`register_time` DATETIME COMMENT "29992;" 25143;"27880;" 20876;"26102;" 38388;"
-)
-Unique Key (`user_id`, `username`)
-... /* ignore Partition and Distribution */
-;
-```
-
-This table structure is exactly the same as the following table structure described by the aggregation model:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| user_id | BIGINT | | user id|
-| username | VARCHAR (50) | | User nickname|
-| City | VARCHAR (20) | REPLACE | User City|
-| age | SMALLINT | REPLACE | User Age|
-| sex | TINYINT | REPLACE | User Gender|
-| Phone | LARGEINT | REPLACE | User Phone|
-| address | VARCHAR (500) | REPLACE | User Address|
-| register_time | DATETIME | REPLACE | User registration time|
-
-And table-building statements:
-
-```
-CREATE TABLE IF NOT EXISTS example_db.expamle_tbl
-(
-`user_id` LARGEINT NOT NULL COMMENT "用户id",
-`username` VARCHAR (50) NOT NULL COMMENT "25143;" 261651;"
-`city` VARCHAR (20) REPLACE COMMENT `User City',
-`sex` TINYINT REPLACE COMMENT "用户性别",
-`phone` LARGEINT REPLACE COMMENT "25143;"
-`address` VARCHAR(500) REPLACE COMMENT "用户地址",
-`register_time` DATETIME REPLACE COMMENT "29992;" 25143;"27880;" 20876;"26102;"
-)
-AGGREGATE KEY(`user_id`, `username`)
-... /* ignore Partition and Distribution */
-;
-```
-
-That is to say, Unique model can be completely replaced by REPLACE in aggregation model. Its internal implementation and data storage are exactly the same. No further examples will be given here.
-
-## Duplicate Model
-
-In some multidimensional analysis scenarios, data has neither primary keys nor aggregation requirements. Therefore, we introduce Duplicate data model to meet this kind of demand. Examples are given.
-
-|ColumnName|Type|SortKey|Comment|
-|---|---|---|---|
-| Timstamp | DATETIME | Yes | Logging Time|
-| Type | INT | Yes | Log Type|
-|error_code|INT|Yes|error code|
-| Error_msg | VARCHAR (1024) | No | Error Details|
-|op_id|BIGINT|No|operator id|
-|op_time|DATETIME|No|operation time|
-
-The TABLE statement is as follows:
-```
-CREATE TABLE IF NOT EXISTS example_db.expamle_tbl
-(
-`timestamp` DATETIME NOT NULL COMMENT "日志时间",
-`type` INT NOT NULL COMMENT "日志类型",
-"Error"\\\\\\\\\\\\\
-`error_msg` VARCHAR(1024) COMMENT "错误详细信息",
-`op_id` BIGINT COMMENT "负责人id",
-OP `op `time ` DATETIME COMMENT "22788;" 29702;"26102;" 388;"
-)
-DUPLICATE KEY(`timestamp`, `type`)
-... /* 省略 Partition 和 Distribution 信息 */
-;
-```
-
-This data model is different from Aggregate and Unique models. Data is stored entirely in accordance with the data in the imported file, without any aggregation. Even if the two rows of data are identical, they will be retained.
-The DUPLICATE KEY specified in the table building statement is only used to specify which columns the underlying data is sorted according to. (The more appropriate name should be "Sorted Column", where the name "DUPLICATE KEY" is used to specify the data model used. For more explanations of "Sorted Column", see the section [Prefix Index](https://doris.apache.org/getting-started/data-model-rollup.html#prefix-index). On the choice of DUPLICATE KEY, we recommend that the first 2-4 columns be selected appropriately.
-
-This data model is suitable for storing raw data without aggregation requirements and primary key uniqueness constraints. For more usage scenarios, see the [Limitations of the Aggregation Model](https://doris.apache.org/getting-started/data-model-rollup.html#limitations-of-aggregation-model) section.
-
-## ROLLUP
-
-ROLLUP in multidimensional analysis means "scroll up", which means that data is aggregated further at a specified granularity.
-
-### Basic concepts
-
-In Doris, we make the table created by the user through the table building statement a Base table. Base table holds the basic data stored in the way specified by the user's table-building statement.
-
-On top of the Base table, we can create any number of ROLLUP tables. These ROLLUP data are generated based on the Base table and physically **stored independently**.
-
-The basic function of ROLLUP tables is to obtain coarser aggregated data on the basis of Base tables.
-
-Let's illustrate the ROLLUP tables and their roles in different data models with examples.
-
-#### ROLLUP in Aggregate Model and Unique Model
-
-Because Unique is only a special case of the Aggregate model, we do not distinguish it here.
-
-Example 1: Get the total consumption per user
-
-Following **Example 2** in the **Aggregate Model** section, the Base table structure is as follows:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| user_id | LARGEINT | | user id|
-| date | DATE | | date of data filling|
-| Time stamp | DATETIME | | Data filling time, accurate to seconds|
-| City | VARCHAR (20) | | User City|
-| age | SMALLINT | | User age|
-| sex | TINYINT | | User gender|
-| Last_visit_date | DATETIME | REPLACE | Last user access time|
-| Cost | BIGINT | SUM | Total User Consumption|
-| max dwell time | INT | MAX | Maximum user residence time|
-| min dwell time | INT | MIN | User minimum residence time|
-
-The data stored are as follows:
-
-|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|---|---|---|---|---|
-| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10|
-| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2|
-| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22|
-| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5|
-| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11|
-| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3|
-| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6|
-
-On this basis, we create a ROLLUP:
-
-|ColumnName|
-|---|
-|user_id|
-|cost|
-
-The ROLLUP contains only two columns: user_id and cost. After the creation, the data stored in the ROLLUP is as follows:
-
-|user\_id|cost|
-|---|---|
-|10000|35|
-|10001|2|
-|10002|200|
-|10003|30|
-|10004|111|
-
-As you can see, ROLLUP retains only the results of SUM on the cost column for each user_id. So when we do the following query:
-
-`SELECT user_id, sum(cost) FROM table GROUP BY user_id;`
-
-Doris automatically hits the ROLLUP table, thus completing the aggregated query by scanning only a very small amount of data.
-
-2. Example 2: Get the total consumption, the longest and shortest page residence time of users of different ages in different cities
-
-Follow example 1. Based on the Base table, we create a ROLLUP:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| City | VARCHAR (20) | | User City|
-| age | SMALLINT | | User age|
-| Cost | BIGINT | SUM | Total User Consumption|
-| max dwell time | INT | MAX | Maximum user residence time|
-| min dwell time | INT | MIN | User minimum residence time|
-
-After the creation, the data stored in the ROLLUP is as follows:
-
-|city|age|cost|max\_dwell\_time|min\_dwell\_time|
-|---|---|---|---|---|
-| Beijing | 20 | 35 | 10 | 2|
-| Beijing | 30 | 2 | 22 | 22|
-| Shanghai | 20 | 200 | 5 | 5|
-| Guangzhou | 32 | 30 | 11 | 11|
-| Shenzhen | 35 | 111 | 6 | 3|
-
-When we do the following queries:
-
-* `SELECT city, age, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city, age;`
-* `SELECT city, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city;`
-* `SELECT city, age, sum(cost), min(min_dwell_time) FROM table GROUP BY city, age;`
-
-Doris automatically hits the ROLLUP table.
-
-#### ROLLUP in Duplicate Model
-
-Because the Duplicate model has no aggregate semantics. So the ROLLLUP in this model has lost the meaning of "scroll up". It's just to adjust the column order to hit the prefix index. In the next section, we will introduce prefix index in detail, and how to use ROLLUP to change prefix index in order to achieve better query efficiency.
-
-### Prefix Index and ROLLUP
-
-#### prefix index
-
-Unlike traditional database design, Doris does not support indexing on any column. OLAP databases based on MPP architecture such as Doris usually handle large amounts of data by improving concurrency.
-In essence, Doris's data is stored in a data structure similar to SSTable (Sorted String Table). This structure is an ordered data structure, which can be sorted and stored according to the specified column. In this data structure, it is very efficient to search by sorting columns.
-
-In Aggregate, Unique and Duplicate three data models. The underlying data storage is sorted and stored according to the columns specified in AGGREGATE KEY, UNIQUE KEY and DUPLICATE KEY in their respective table-building statements.
-
-The prefix index, which is based on sorting, implements an index method to query data quickly according to a given prefix column.
-
-We use the prefix index of **36 bytes** of a row of data as the prefix index of this row of data. When a VARCHAR type is encountered, the prefix index is truncated directly. We give examples to illustrate:
-
-1. The prefix index of the following table structure is user_id (8 Bytes) + age (4 Bytes) + message (prefix 20 Bytes).
-
-|ColumnName|Type|
-|---|---|
-|user_id|BIGINT|
-|age|INT|
-|message|VARCHAR(100)|
-|max\_dwell\_time|DATETIME|
-|min\_dwell\_time|DATETIME|
-
-2. The prefix index of the following table structure is user_name (20 Bytes). Even if it does not reach 36 bytes, because it encounters VARCHAR, it truncates directly and no longer continues.
-
-|ColumnName|Type|
-|---|---|
-|user_name|VARCHAR(20)|
-|age|INT|
-|message|VARCHAR(100)|
-|max\_dwell\_time|DATETIME|
-|min\_dwell\_time|DATETIME|
-
-When our query condition is the prefix of **prefix index**, it can greatly speed up the query speed. For example, in the first example, we execute the following queries:
-
-`SELECT * FROM table WHERE user_id=1829239 and age=20;`
-
-The efficiency of this query is **much higher than that of** the following queries:
-
-`SELECT * FROM table WHERE age=20;`
-
-Therefore, when constructing tables, **correctly choosing column order can greatly improve query efficiency**.
-
-#### ROLLUP adjusts prefix index
-
-Because column order is specified when a table is built, there is only one prefix index for a table. This may be inefficient for queries that use other columns that cannot hit prefix indexes as conditions. Therefore, we can manually adjust the order of columns by creating ROLLUP. Examples are given.
-
-The structure of the Base table is as follows:
-
-|ColumnName|Type|
-|---|---|
-|user\_id|BIGINT|
-|age|INT|
-|message|VARCHAR(100)|
-|max\_dwell\_time|DATETIME|
-|min\_dwell\_time|DATETIME|
-
-On this basis, we can create a ROLLUP table:
-
-|ColumnName|Type|
-|---|---|
-|age|INT|
-|user\_id|BIGINT|
-|message|VARCHAR(100)|
-|max\_dwell\_time|DATETIME|
-|min\_dwell\_time|DATETIME|
-
-As you can see, the columns of ROLLUP and Base tables are exactly the same, just changing the order of user_id and age. So when we do the following query:
-
-`SELECT * FROM table where age=20 and massage LIKE "%error%";`
-
-The ROLLUP table is preferred because the prefix index of ROLLUP matches better.
-
-### Some Explanations of ROLLUP
-
-* The fundamental role of ROLLUP is to improve the query efficiency of some queries (whether by aggregating to reduce the amount of data or by modifying column order to match prefix indexes). Therefore, the meaning of ROLLUP has gone beyond the scope of "roll-up". That's why we named it Materialized Index in the source code.
-* ROLLUP is attached to the Base table and can be seen as an auxiliary data structure of the Base table. Users can create or delete ROLLUP based on the Base table, but cannot explicitly specify a query for a ROLLUP in the query. Whether ROLLUP is hit or not is entirely determined by the Doris system.
-* ROLLUP data is stored in separate physical storage. Therefore, the more ROLLUP you create, the more disk space you occupy. It also has an impact on the speed of import (the ETL phase of import automatically generates all ROLLUP data), but it does not reduce query efficiency (only better).
-* Data updates for ROLLUP are fully synchronized with Base representations. Users need not care about this problem.
-* Columns in ROLLUP are aggregated in exactly the same way as Base tables. There is no need to specify or modify ROLLUP when creating it.
-* A necessary (inadequate) condition for a query to hit ROLLUP is that **all columns** (including the query condition columns in select list and where) involved in the query exist in the column of the ROLLUP. Otherwise, the query can only hit the Base table.
-* Certain types of queries (such as count(*)) cannot hit ROLLUP under any conditions. See the next section **Limitations of the aggregation model**.
-* The query execution plan can be obtained by `EXPLAIN your_sql;` command, and in the execution plan, whether ROLLUP has been hit or not can be checked.
-* Base tables and all created ROLLUP can be displayed by `DESC tbl_name ALL;` statement.
-
-In this document, you can see [Query how to hit Rollup](hit-the-rollup)
-
-## Limitations of aggregation model
-
-Here we introduce the limitations of Aggregate model (including Unique model).
-
-In the aggregation model, what the model presents is the aggregated data. That is to say, any data that has not yet been aggregated (for example, two different imported batches) must be presented in some way to ensure consistency. Let's give an example.
-
-The hypothesis table is structured as follows:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| userid | LARGEINT | | user id|
-| date | DATE | | date of data filling|
-| Cost | BIGINT | SUM | Total User Consumption|
-
-Assume that there are two batches of data that have been imported into the storage engine as follows:
-
-**batch 1**
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|50|
-|10002|2017-11-21|39|
-
-**batch 2**
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|1|
-|10001|2017-11-21|5|
-|10003|2017-11-22|22|
-
-As you can see, data belonging to user 10001 in two import batches has not yet been aggregated. However, in order to ensure that users can only query the aggregated data as follows:
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|51|
-|10001|2017-11-21|5|
-|10002|2017-11-21|39|
-|10003|2017-11-22|22|
-
-We add aggregation operator to query engine to ensure data consistency.
-
-In addition, on the aggregate column (Value), when executing aggregate class queries that are inconsistent with aggregate types, attention should be paid to semantics. For example, in the example above, we execute the following queries:
-
-`SELECT MIN(cost) FROM table;`
-
-The result is 5, not 1.
-
-At the same time, this consistency guarantee will greatly reduce the query efficiency in some queries.
-
-Let's take the most basic count(*) query as an example:
-
-`SELECT COUNT(*) FROM table;`
-
-In other databases, such queries return results quickly. Because in the implementation, we can get the query result by counting rows at the time of import and saving count statistics information, or by scanning only a column of data to get count value at the time of query, with very little overhead. But in Doris's aggregation model, the overhead of this query **is very large**.
-
-Let's take the data as an example.
-
-**batch 1**
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|50|
-|10002|2017-11-21|39|
-
-**batch 2**
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|1|
-|10001|2017-11-21|5|
-|10003|2017-11-22|22|
-
-Because the final aggregation result is:
-
-|user\_id|date|cost|
-|---|---|---|
-|10001|2017-11-20|51|
-|10001|2017-11-21|5|
-|10002|2017-11-21|39|
-|10003|2017-11-22|22|
-
-So `select count(*) from table;` The correct result should be **4**. But if we only scan the `user_id`column and add query aggregation, the final result is **3** (10001, 10002, 10003). If aggregated without queries, the result is **5** (a total of five rows in two batches). It can be seen that both results are wrong.
-
-In order to get the correct result, we must read the data of `user_id` and `date`, and **together with aggregate** when querying, to return the correct result of **4**. That is to say, in the `count(*)` query, Doris must scan all AGGREGATE KEY columns (here are `user_id` and `date`) and aggregate them to get the semantically correct results. When aggregated columns are large, `count(*)` queries need to scan a large amount of data.
-
-Therefore, when there are frequent `count(*)` queries in the business, we recommend that users simulate `count(*)` by adding a column with a value of 1 and aggregation type of SUM. As the table structure in the previous example, we modify it as follows:
-
-|ColumnName|Type|AggregationType|Comment|
-|---|---|---|---|
-| user ID | BIGINT | | user id|
-| date | DATE | | date of data filling|
-| Cost | BIGINT | SUM | Total User Consumption|
-| count | BIGINT | SUM | for counting|
-
-Add a count column and import the data with the column value **equal to 1**. The result of `select count(*) from table;`is equivalent to `select sum(count) from table;` The query efficiency of the latter is much higher than that of the former. However, this method also has limitations, that is, users need to guarantee that they will not import rows with the same AGGREGATE KEY column repeatedly. Otherwise, `select sum(count) from table;`can only express the number of rows originally imported, not the semantics of `select count(*) from table;`
-
-Another way is to **change the aggregation type of the count column above to REPLACE, and still weigh 1**. Then`select sum(count) from table;` and `select count(*) from table;` the results will be consistent. And in this way, there is no restriction on importing duplicate rows.
-
-### Duplicate Model
-
-Duplicate model has no limitation of aggregation model. Because the model does not involve aggregate semantics, when doing count(*) query, we can get the correct semantics by choosing a column of queries arbitrarily.
-
-## Suggestions for Choosing Data Model
-
-Because the data model was established when the table was built, and **could not be modified**. Therefore, it is **very important** to select an appropriate data model.
-
-1. Aggregate model can greatly reduce the amount of data scanned and the amount of query computation by pre-aggregation. It is very suitable for report query scenarios with fixed patterns. But this model is not very friendly for count(*) queries. At the same time, because the aggregation method on the Value column is fixed, semantic correctness should be considered in other types of aggregation queries.
-2. Unique model guarantees the uniqueness of primary key for scenarios requiring unique primary key constraints. However, the query advantage brought by pre-aggregation such as ROLLUP cannot be exploited (because the essence is REPLACE, there is no such aggregation as SUM).
-3. Duplicate is suitable for ad-hoc queries of any dimension. Although it is also impossible to take advantage of the pre-aggregation feature, it is not constrained by the aggregation model and can take advantage of the queue-store model (only reading related columns, but not all Key columns).
diff --git a/docs/en/getting-started/data-partition.md b/docs/en/getting-started/data-partition.md
deleted file mode 100644
index 55c3fe2b6c..0000000000
--- a/docs/en/getting-started/data-partition.md
+++ /dev/null
@@ -1,398 +0,0 @@
----
-{
- "title": "Data Partition",
- "language": "en"
-}
----
-
-
-
-# Data Partition
-
-This document mainly introduces Doris's table construction and data partitioning, as well as problems and solutions that may be encountered in the construction of the table.
-
-## Basic Concepts
-
-In Doris, data is logically described in the form of a table.
-
-### Row & Column
-
-A table includes rows (rows) and columns (columns). Row is a row of data for the user. Column is used to describe different fields in a row of data.
-
-Column can be divided into two broad categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and metric columns, respectively. From the perspective of the aggregation model, the same row of Key columns will be aggregated into one row. The way the Value column is aggregated is specified by the user when the table is built. For an introduction to more aggregation models, see the [Doris Data Model](./data-model-rollup.md).
-
-### Tablet & Partition
-
-In Doris's storage engine, user data is horizontally divided into several data slices (also known as data buckets). Each tablet contains several rows of data. The data between the individual tablets does not intersect and is physically stored independently.
-
-Multiple tablets are logically attributed to different partitions. A tablet belongs to only one Partition. And a Partition contains several Tablets. Because the tablet is physically stored independently, it can be considered that the Partition is physically independent. Tablet is the smallest physical storage unit for data movement, replication, and so on.
-
-Several Partitions form a Table. Partition can be thought of as the smallest logical unit of management. Importing and deleting data can be done for one Partition or only for one Partition.
-
-## Data division
-
-We use a table-building operation to illustrate Doris' data partitioning.
-
-Doris's built-in table is a synchronous command. If the command returns successfully, it means that the table is built successfully.
-
-See more help with `HELP CREATE TABLE;`.
-
-This section introduces Doris's approach to building tables with an example.
-
-```
--- Range Partition
-
-CREATE TABLE IF NOT EXISTS example_db.expamle_range_tbl
-(
- `user_id` LARGEINT NOT NULL COMMENT "User id",
- `date` DATE NOT NULL COMMENT "Data fill in date time",
- `timestamp` DATETIME NOT NULL COMMENT "Timestamp of data being poured",
- `city` VARCHAR(20) COMMENT "The city where the user is located",
- `age` SMALLINT COMMENT "User age",
- `sex` TINYINT COMMENT "User gender",
- `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "User last visit time",
- `cost` BIGINT SUM DEFAULT "0" COMMENT "Total user consumption",
- `max_dwell_time` INT MAX DEFAULT "0" COMMENT "User maximum dwell time",
- `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "User minimum dwell time"
-)
-ENGINE=olap
-AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)
-PARTITION BY RANGE(`date`)
-(
- PARTITION `p201701` VALUES LESS THAN ("2017-02-01"),
- PARTITION `p201702` VALUES LESS THAN ("2017-03-01"),
- PARTITION `p201703` VALUES LESS THAN ("2017-04-01")
-)
-DISTRIBUTED BY HASH(`user_id`) BUCKETS 16
-PROPERTIES
-(
- "replication_num" = "3",
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2018-01-01 12:00:00"
-);
-
-
--- List Partition
-
-CREATE TABLE IF NOT EXISTS example_db.expamle_list_tbl
-(
- `user_id` LARGEINT NOT NULL COMMENT "User id",
- `date` DATE NOT NULL COMMENT "Data fill in date time",
- `timestamp` DATETIME NOT NULL COMMENT "Timestamp of data being poured",
- `city` VARCHAR(20) COMMENT "The city where the user is located",
- `age` SMALLINT COMMENT "User Age",
- `sex` TINYINT COMMENT "User gender",
- `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "User last visit time",
- `cost` BIGINT SUM DEFAULT "0" COMMENT "Total user consumption",
- `max_dwell_time` INT MAX DEFAULT "0" COMMENT "User maximum dwell time",
- `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "User minimum dwell time"
-)
-ENGINE=olap
-AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)
-PARTITION BY LIST(`city`)
-(
- PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),
- PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),
- PARTITION `p_jp` VALUES IN ("Tokyo")
-)
-DISTRIBUTED BY HASH(`user_id`) BUCKETS 16
-PROPERTIES
-(
- "replication_num" = "3",
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2018-01-01 12:00:00"
-);
-
-```
-
-### Column Definition
-
-Here we only use the AGGREGATE KEY data model as an example. See the [Doris Data Model](./data-model-rollup.md) for more data models.
-
-The basic type of column can be viewed by executing `HELP CREATE TABLE;` in mysql-client.
-
-In the AGGREGATE KEY data model, all columns that do not specify an aggregation mode (SUM, REPLACE, MAX, MIN) are treated as Key columns. The rest is the Value column.
-
-When defining columns, you can refer to the following suggestions:
-
-1. The Key column must precede all Value columns.
-2. Try to choose the type of integer. Because integer type calculations and lookups are much more efficient than strings.
-3. For the selection principle of integer types of different lengths, follow **enough to**.
-4. For lengths of type VARCHAR and STRING, follow **is sufficient**.
-5. The total byte length of all columns (including Key and Value) cannot exceed 100KB.
-
-### Partitioning and binning
-
-Doris supports two levels of data partitioning. The first layer is Partition, which supports Range and List partitioning. The second layer is the Bucket (Tablet), which only supports Hash partitioning.
-
-It is also possible to use only one layer of partitioning. When using a layer partition, only Bucket partitioning is supported.
-
-1. Partition
-
- * The Partition column can specify one or more columns. The partition class must be a KEY column. The use of multi-column partitions is described later in the **Multi-column partitioning** summary.
- * Regardless of the type of partition column, double quotes are required when writing partition values.
- * There is no theoretical limit on the number of partitions.
- * When you do not use Partition to build a table, the system will automatically generate a Partition with the same name as the table name. This Partition is not visible to the user and cannot be modified.
-
- #### Range Partition
-
- * Partition columns are usually time columns for easy management of old and new data.
- * Partition supports only the upper bound by `VALUES LESS THAN (...)`, the system will use the upper bound of the previous partition as the lower bound of the partition, and generate a left closed right open interval. Passing, also supports specifying the upper and lower bounds by `VALUES [...)`, and generating a left closed right open interval.
- * It is easier to understand by specifying `VALUES [...)`. Here is an example of the change in partition range when adding or deleting partitions using the `VALUES LESS THAN (...)` statement:
- * As in the `example_range_tbl` example above, when the table is built, the following 3 partitions are automatically generated:
- ```
- P201701: [MIN_VALUE, 2017-02-01)
- P201702: [2017-02-01, 2017-03-01)
- P201703: [2017-03-01, 2017-04-01)
- ```
- * When we add a partition p201705 VALUES LESS THAN ("2017-06-01"), the partition results are as follows:
-
- ```
- P201701: [MIN_VALUE, 2017-02-01)
- P201702: [2017-02-01, 2017-03-01)
- P201703: [2017-03-01, 2017-04-01)
- P201705: [2017-04-01, 2017-06-01)
- ```
-
- * At this point we delete the partition p201703, the partition results are as follows:
-
- ```
- p201701: [MIN_VALUE, 2017-02-01)
- p201702: [2017-02-01, 2017-03-01)
- p201705: [2017-04-01, 2017-06-01)
- ```
-
- > Note that the partition range of p201702 and p201705 has not changed, and there is a hole between the two partitions: [2017-03-01, 2017-04-01). That is, if the imported data range is within this hole, it cannot be imported.
-
- * Continue to delete partition p201702, the partition results are as follows:
-
- ```
- p201701: [MIN_VALUE, 2017-02-01)
- p201705: [2017-04-01, 2017-06-01)
- The void range becomes: [2017-02-01, 2017-04-01)
- ```
-
- * Now add a partition p201702new VALUES LESS THAN ("2017-03-01"), the partition results are as follows:
-
- ```
- p201701: [MIN_VALUE, 2017-02-01)
- p201702new: [2017-02-01, 2017-03-01)
- p201705: [2017-04-01, 2017-06-01)
- ```
-
- > You can see that the hole size is reduced to: [2017-03-01, 2017-04-01)
-
- * Now delete partition p201701 and add partition p201612 VALUES LESS THAN ("2017-01-01"), the partition result is as follows:
-
- ```
- p201612: [MIN_VALUE, 2017-01-01)
- p201702new: [2017-02-01, 2017-03-01)
- p201705: [2017-04-01, 2017-06-01)
- ```
-
- > A new void appeared: [2017-01-01, 2017-02-01)
-
- In summary, the deletion of a partition does not change the scope of an existing partition. There may be holes in deleting partitions. When a partition is added by the `VALUES LESS THAN` statement, the lower bound of the partition immediately follows the upper bound of the previous partition.
-
- You cannot add partitions with overlapping ranges.
-
- #### List Partition
-
- * The partition column supports the `BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR` data type, and the partition value is an enumeration value. Partitions can be hit only if the data is one of the target partition enumeration values.
- * Partition supports specifying the number of partitions contained in each partition via `VALUES IN (...) ` to specify the enumeration values contained in each partition.
- * The following example illustrates how partitions change when adding or deleting partitions.
-
- * As in the `example_list_tbl` example above, when the table is built, the following three partitions are automatically created.
-
- ```
- p_cn: ("Beijing", "Shanghai", "Hong Kong")
- p_usa: ("New York", "San Francisco")
- p_jp: ("Tokyo")
- ```
-
- * When we add a partition p_uk VALUES IN ("London"), the result of the partition is as follows
-
- ```
- p_cn: ("Beijing", "Shanghai", "Hong Kong")
- p_usa: ("New York", "San Francisco")
- p_jp: ("Tokyo")
- p_uk: ("London")
- ```
-
- * When we delete the partition p_jp, the result of the partition is as follows.
-
- ```
- p_cn: ("Beijing", "Shanghai", "Hong Kong")
- p_usa: ("New York", "San Francisco")
- p_uk: ("London")
- ```
-
- You cannot add partitions with overlapping ranges.
-
-2. Bucket
-
- * If a Partition is used, the `DISTRIBUTED ...` statement describes the division rules for the data in each partition. If you do not use Partition, it describes the rules for dividing the data of the entire table.
- * The bucket column can be multiple columns, but it must be a Key column. The bucket column can be the same or different from the Partition column.
- * The choice of bucket column is a trade-off between **query throughput** and **query concurrency**:
-
- 1. If you select multiple bucket columns, the data is more evenly distributed. However, if the query condition does not include the equivalent condition for all bucket columns, a query will scan all buckets. The throughput of such queries will increase, and the latency of a single query will decrease. This method is suitable for large throughput and low concurrent query scenarios.
- 2. If you select only one or a few bucket columns, the point query can query only one bucket. This approach is suitable for high-concurrency point query scenarios.
-
- * There is no theoretical limit on the number of buckets.
-
-3. Recommendations on the number and amount of data for Partitions and Buckets.
-
- * The total number of tablets in a table is equal to (Partition num * Bucket num).
- * The number of tablets in a table, which is slightly more than the number of disks in the entire cluster, regardless of capacity expansion.
- * The data volume of a single tablet does not theoretically have an upper and lower bound, but is recommended to be in the range of 1G - 10G. If the amount of data for a single tablet is too small, the aggregation of the data is not good and the metadata management pressure is high. If the amount of data is too large, it is not conducive to the migration, completion, and increase the cost of Schema Change or Rollup operation failure retry (the granularity of these operations failure retry is Tablet).
- * When the tablet's data volume principle and quantity principle conflict, it is recommended to prioritize the data volume principle.
- * When building a table, the number of Buckets for each partition is uniformly specified. However, when dynamically increasing partitions (`ADD PARTITION`), you can specify the number of Buckets for the new partition separately. This feature can be used to easily reduce or expand data.
- * Once the number of Buckets for a Partition is specified, it cannot be changed. Therefore, when determining the number of Buckets, you need to consider the expansion of the cluster in advance. For example, there are currently only 3 hosts, and each host has 1 disk. If the number of Buckets is only set to 3 or less, then even if you add more machines later, you can't increase the concurrency.
- * Give some examples: Suppose there are 10 BEs, one for each BE disk. If the total size of a table is 500MB, you can consider 4-8 shards. 5GB: 8-16. 50GB: 32. 500GB: Recommended partitions, each partition is about 50GB in size, with 16-32 shards per partition. 5TB: Recommended partitions, each with a size of around 50GB and 16-32 shards per partition.
-
- > Note: The amount of data in the table can be viewed by the `show data` command. The result is divided by the number of copies, which is the amount of data in the table.
-
-#### Multi-column partition
-
-Doris supports specifying multiple columns as partition columns, examples are as follows:
-
-##### Range Partition
-
-```
- PARTITION BY RANGE(`date`, `id`)
- (
- PARTITION `p201701_1000` VALUES LESS THAN ("2017-02-01", "1000"),
- PARTITION `p201702_2000` VALUES LESS THAN ("2017-03-01", "2000"),
- PARTITION `p201703_all` VALUES LESS THAN ("2017-04-01")
- )
-```
-
- In the above example, we specify `date`(DATE type) and `id`(INT type) as partition columns. The resulting partitions in the above example are as follows:
-
-```
-p201701_1000: [(MIN_VALUE, MIN_VALUE), ("2017-02-01", "1000") )
-p201702_2000: [("2017-02-01", "1000"), ("2017-03-01", "2000") )
-p201703_all: [("2017-03-01", "2000"), ("2017-04-01", MIN_VALUE))
-```
-
-Note that the last partition user defaults only the partition value of the `date` column, so the partition value of the `id` column will be filled with `MIN_VALUE` by default. When the user inserts data, the partition column values are compared in order, and the corresponding partition is finally obtained. Examples are as follows:
-
-```
- Data --> Partition
- 2017-01-01, 200 --> p201701_1000
- 2017-01-01, 2000 --> p201701_1000
- 2017-02-01, 100 --> p201701_1000
- 2017-02-01, 2000 --> p201702_2000
- 2017-02-15, 5000 --> p201702_2000
- 2017-03-01, 2000 --> p201703_all
- 2017-03-10, 1 --> p201703_all
- 2017-04-01, 1000 --> Unable to import
- 2017-05-01, 1000 --> Unable to import
-```
-
-##### List Partition
-
-```
- PARTITION BY LIST(`id`, `city`)
- (
- PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
- PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
- PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
- )
-```
-
-In the above example, we specify `id`(INT type) and `city`(VARCHAR type) as partition columns. The above example ends up with the following partitions.
-
-```
- p1_city: [("1", "Beijing"), ("1", "Shanghai")]
- p2_city: [("2", "Beijing"), ("2", "Shanghai")]
- p3_city: [("3", "Beijing"), ("3", "Shanghai")]
-```
-
-When the user inserts data, the partition column values will be compared sequentially in order to finally get the corresponding partition. An example is as follows.
-
-```
-Data ---> Partition
-1, Beijing ---> p1_city
-1, Shanghai ---> p1_city
-2, Shanghai ---> p2_city
-3, Beijing ---> p3_city
-1, Tianjin ---> Unable to import
-4, Beijing ---> Unable to import
-```
-
-### PROPERTIES
-
-In the last PROPERTIES of the table statement, you can specify the following two parameters:
-
-Replication_num
-
- * The number of copies per tablet. The default is 3, it is recommended to keep the default. In the build statement, the number of Tablet copies in all Partitions is uniformly specified. When you add a new partition, you can individually specify the number of copies of the tablet in the new partition.
- * The number of copies can be modified at runtime. It is strongly recommended to keep odd numbers.
- * The maximum number of copies depends on the number of independent IPs in the cluster (note that it is not the number of BEs). The principle of replica distribution in Doris is that the copies of the same Tablet are not allowed to be distributed on the same physical machine, and the physical machine is identified as IP. Therefore, even if 3 or more BE instances are deployed on the same physical machine, if the BEs have the same IP, you can only set the number of copies to 1.
- * For some small, and infrequently updated dimension tables, consider setting more copies. In this way, when joining queries, there is a greater probability of local data join.
-
-2. storage_medium & storage\_cooldown\_time
-
- * The BE data storage directory can be explicitly specified as SSD or HDD (differentiated by .SSD or .HDD suffix). When you build a table, you can uniformly specify the media for all Partition initial storage. Note that the suffix is to explicitly specify the disk media without checking to see if it matches the actual media type.
- * The default initial storage media can be specified by `default_storage_medium= XXX` in the fe configuration file `fe.conf`, or, if not, by default, HDD. If specified as an SSD, the data is initially stored on the SSD.
- * If storage\_cooldown\_time is not specified, the data is automatically migrated from the SSD to the HDD after 30 days by default. If storage\_cooldown\_time is specified, the data will not migrate until the storage_cooldown_time time is reached.
- * Note that when storage_medium is specified, if FE parameter 'enable_strict_storage_medium_check' is' False 'this parameter is simply a' do your best 'setting. Even if SSD storage media is not set up within the cluster, no errors are reported, and it is automatically stored in the available data directory.
- Similarly, if the SSD media is not accessible and space is insufficient, it is possible to initially store data directly on other available media. When the data is due to be migrated to an HDD, the migration may also fail (but will try again and again) if the HDD medium is not accessible and space is insufficient.
- If FE parameter 'enable_strict_storage_medium_check' is' True ', then 'Failed to find enough host in all Backends with storage medium is SSD' will be reported when SSD storage medium is not set in the cluster.
-
-### ENGINE
-
-In this example, the type of ENGINE is olap, the default ENGINE type. In Doris, only this ENGINE type is managed and stored by Doris. Other ENGINE types, such as mysql, broker, es, etc., are essentially mappings to tables in other external databases or systems to ensure that Doris can read the data. And Doris itself does not create, manage, and store any tables and data of a non-olap ENGINE type.
-
-### Other
-
-`IF NOT EXISTS` indicates that if the table has not been created, it is created. Note that only the table name is judged here, and it is not determined whether the new table structure is the same as the existing table structure. So if there is a table with the same name but different structure, the command will also return success, but it does not mean that a new table and a new structure have been created.
-
-## common problem
-
-### Build Table Operations FAQ
-
-1. If a syntax error occurs in a long build statement, a syntax error may be incomplete. Here is a list of possible syntax errors for manual error correction:
-
- * The syntax is incorrect. Please read `HELP CREATE TABLE;` carefully to check the relevant syntax structure.
- * Reserved words. When the user-defined name encounters a reserved word, it needs to be enclosed in the backquote ``. It is recommended that all custom names be generated using this symbol.
- * Chinese characters or full-width characters. Non-utf8 encoded Chinese characters, or hidden full-width characters (spaces, punctuation, etc.) can cause syntax errors. It is recommended to check with a text editor with invisible characters.
-
-2. `Failed to create partition [xxx] . Timeout`
-
- Doris builds are created in order of Partition granularity. This error may be reported when a Partition creation fails. Even if you don't use Partition, you will report `Failed to create partition` when there is a problem with the built table, because as mentioned earlier, Doris will create an unchangeable default Partition for tables that do not have a Partition specified.
-
- When this error is encountered, it is usually the BE that has encountered problems creating data fragments. You can follow the steps below to troubleshoot:
-
- 1. In fe.log, find the `Failed to create partition` log for the corresponding point in time. In this log, a series of numbers like `{10001-10010}` will appear. The first number of the pair is the Backend ID and the second number is the Tablet ID. As for the pair of numbers above, on the Backend with ID 10001, creating a tablet with ID 10010 failed.
- 2. Go to the be.INFO log corresponding to Backend and find the log related to the tablet id in the corresponding time period. You can find the error message.
- 3. Listed below are some common tablet creation failure errors, including but not limited to:
- * BE did not receive the relevant task, and the tablet id related log could not be found in be.INFO. Or the BE is created successfully, but the report fails. For the above questions, see [Deployment and Upgrade Documentation] to check the connectivity of FE and BE.
- * Pre-allocated memory failed. It may be that the length of a line in a row in the table exceeds 100KB.
- * `Too many open files`. The number of open file handles exceeds the Linux system limit. The handle limit of the Linux system needs to be modified.
-
- You can also extend the timeout by setting `tablet_create_timeout_second=xxx` in fe.conf. The default is 2 seconds.
-
-3. The build table command does not return results for a long time.
-
- Doris's table creation command is a synchronous command. The timeout of this command is currently set to be relatively simple, ie (tablet num * replication num) seconds. If you create more data fragments and have fragment creation failed, it may cause an error to be returned after waiting for a long timeout.
-
- Under normal circumstances, the statement will return in a few seconds or ten seconds. If it is more than one minute, it is recommended to cancel this operation directly and go to the FE or BE log to view the related errors.
diff --git a/docs/en/getting-started/hit-the-rollup.md b/docs/en/getting-started/hit-the-rollup.md
deleted file mode 100644
index 4cc8d7ed42..0000000000
--- a/docs/en/getting-started/hit-the-rollup.md
+++ /dev/null
@@ -1,298 +0,0 @@
----
-{
- "title": "Rollup and query",
- "language": "en"
-}
----
-
-
-
-# Rollup and query
-
-As a polymer view in Doris, Rollup can play two roles in queries:
-
-* Index
-* Aggregate data (only for aggregate models, aggregate key)
-
-However, in order to hit Rollup, certain conditions need to be met, and the value of PreAggregation of ScanNdo node in the execution plan can be used to determine whether Rollup can be hit or not, and the Rollup field can be used to determine which Rollup table is hit.
-
-## Noun Interpretation
-
-Base: Base table.
-
-Rollup: Generally, it refers to the Rollup tables created based on Base tables, but in some scenarios, it includes Base and Rollup tables.
-
-## Index
-
-Doris's prefix index has been introduced in the previous query practice, that is, Doris will generate the first 36 bytes in the Base/Rollup table separately in the underlying storage engine (with varchar type, the prefix index may be less than 36 bytes, varchar will truncate the prefix index, and use up to 20 bytes of varchar). A sorted sparse index data (data is also sorted, positioned by index, and then searched by dichotomy in the data), and then matched each Base/Rollup prefix index according to the conditions in the query, and selected a Base/Rollup that matched the longest prefix index.
-
-```
- ---> matching from left to right
-+----+----+----+----+----+----+
-| c1 | c2 | c3 | c4 | c5 |... |
-```
-
-As shown in the figure above, the conditions of where and on in the query are pushed up and down to ScanNode and matched from the first column of the prefix index. Check if there are any of these columns in the condition, and then accumulate the matching length until the matching cannot match or the end of 36 bytes (columns of varchar type can only match 20 bytes and match less than 36 words). Section truncates prefix index, and then chooses a Base/Rollup with the longest matching length. The following example shows how to create a Base table and four rollups:
-
-```
-+---------------+-------+--------------+------+-------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+---------------+-------+--------------+------+-------+---------+-------+
-| test | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup_index1 | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup_index2 | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k1 | TINYINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup_index3 | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup_index4 | k4 | BIGINT | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-+---------------+-------+--------------+------+-------+---------+-------+
-```
-
-The prefix indexes of the five tables are
-
-```
-Base(k1 ,k2, k3, k4, k5, k6, k7)
-
-rollup_index1(k9)
-
-rollup_index2(k9)
-
-rollup_index3(k4, k5, k6, k1, k2, k3, k7)
-
-rollup_index4(k4, k6, k5, k1, k2, k3, k7)
-```
-
-Conditions on columns that can be indexed with the prefix need to be `=` `<` `>` `<=` `>=` `in` `between`, and these conditions are side-by-side and the relationship uses `and` connections', which cannot be hit for `or`、`!=` and so on. Then look at the following query:
-
-```
-SELECT * FROM test WHERE k1 = 1 AND k2 > 3;
-```
-
-With the conditions on K1 and k2, check that only the first column of Base contains K1 in the condition, so match the longest prefix index, test, explain:
-
-```
-| 0:OlapScanNode
-| TABLE: test
-| PREAGGREGATION: OFF. Reason: No AggregateInfo
-| PREDICATES: `k1` = 1, `k2` > 3
-| partitions=1/1
-| rollup: test
-| buckets=1/10
-| cardinality=-1
-| avgRowSize=0.0
-| numNodes=0
-| tuple ids: 0
-```
-
-Look again at the following queries:
-
-`SELECT * FROM test WHERE k4 =1 AND k5 > 3;`
-
-With K4 and K5 conditions, check that the first column of rollup_index3 and rollup_index4 contains k4, but the second column of rollup_index3 contains k5, so the matching prefix index is the longest.
-
-```
-| 0:OlapScanNode
-| TABLE: test
-| PREAGGREGATION: OFF. Reason: No AggregateInfo
-| PREDICATES: `k4` = 1, `k5` > 3
-| partitions=1/1
-| rollup: rollup_index3
-| buckets=10/10
-| cardinality=-1
-| avgRowSize=0.0
-| numNodes=0
-| tuple ids: 0
-```
-
-Now we try to match the conditions on the column containing varchar, as follows:
-
-`SELECT * FROM test WHERE k9 IN ("xxx", "yyyy") AND k1 = 10;`
-
-There are K9 and K1 conditions. The first column of rollup_index1 and rollup_index2 contains k9. It is reasonable to choose either rollup here to hit the prefix index and randomly select the same one (because there are just 20 bytes in varchar, and the prefix index is truncated in less than 36 bytes). The current strategy here will continue to match k1, because the second rollup_index1 is listed as k1, so rollup_index1 is chosen, in fact, the latter K1 condition will not play an accelerating role. (If the condition outside the prefix index needs to accelerate the query, it can be accelerated by establishing a Bloom Filter filter. Typically for string types, because Doris has a Block level for columns, a Min/Max index for shaping and dates.) The following is the result of explain.
-
-```
-| 0:OlapScanNode
-| TABLE: test
-| PREAGGREGATION: OFF. Reason: No AggregateInfo
-| PREDICATES: `k9` IN ('xxx', 'yyyy'), `k1` = 10
-| partitions=1/1
-| rollup: rollup_index1
-| buckets=1/10
-| cardinality=-1
-| avgRowSize=0.0
-| numNodes=0
-| tuple ids: 0
-```
-
-Finally, look at a query that can be hit by more than one Rollup:
-
-`Select * from test where K4 < 1000 and K5 = 80 and K6 = 10000;`
-
-There are three conditions: k4, K5 and k6. The first three columns of rollup_index3 and rollup_index4 contain these three columns respectively. So the prefix index length matched by them is the same. Both can be selected. The current default strategy is to select a rollup created earlier. Here is rollup_index3.
-
-```
-| 0:OlapScanNode
-| TABLE: test
-| PREAGGREGATION: OFF. Reason: No AggregateInfo
-| PREDICATES: `k4` < 1000, `k5` = 80, `k6` >= 10000.0
-| partitions=1/1
-| rollup: rollup_index3
-| buckets=10/10
-| cardinality=-1
-| avgRowSize=0.0
-| numNodes=0
-| tuple ids: 0
-```
-
-If you modify the above query slightly as follows:
-
-`SELECT * FROM test WHERE k4 < 1000 AND k5 = 80 OR k6 >= 10000;`
-
-The query here cannot hit the prefix index. (Even any Min/Max in the Doris storage engine, the BloomFilter index doesn't work.)
-
-## Aggregate data
-
-Of course, the function of aggregated data is indispensable for general polymer views. Such materialized views are very helpful for aggregated queries or report queries. To hit the polymer views, the following prerequisites are needed:
-
-1. There is a separate Rollup for all columns involved in a query or subquery.
-2. If there is Join in a query or sub-query, the type of Join needs to be Inner join.
-
-The following are some types of aggregated queries that can hit Rollup.
-
-| Column type Query type | Sum | Distinct/Count Distinct | Min | Max | APPROX_COUNT_DISTINCT |
-|--------------|-------|-------------------------|-------|-------|-------|
-| Key | false | true | true | true | true |
-| Value(Sum) | true | false | false | false | false |
-|Value(Replace)| false | false | false | false | false |
-| Value(Min) | false | false | true | false | false |
-| Value(Max) | false | false | false | true | false |
-
-
-If the above conditions are met, there will be two stages in judging the hit of Rollup for the aggregation model:
-
-1. Firstly, the Rollup table with the longest index hit by prefix index is matched by conditions. See the index strategy above.
-2. Then compare the rows of Rollup and select the smallest Rollup.
-
-The following Base table and Rollup:
-
-```
-+-------------+-------+--------------+------+-------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-------------+-------+--------------+------+-------+---------+-------+
-| test_rollup | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k6 | CHAR(5) | Yes | true | N/A | |
-| | k7 | DATE | Yes | true | N/A | |
-| | k8 | DATETIME | Yes | true | N/A | |
-| | k9 | VARCHAR(20) | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup2 | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-| | | | | | | |
-| rollup1 | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | true | N/A | |
-| | k3 | INT | Yes | true | N/A | |
-| | k4 | BIGINT | Yes | true | N/A | |
-| | k5 | DECIMAL(9,3) | Yes | true | N/A | |
-| | k10 | DOUBLE | Yes | false | N/A | MAX |
-| | k11 | FLOAT | Yes | false | N/A | SUM |
-+-------------+-------+--------------+------+-------+---------+-------+
-```
-
-
-See the following queries:
-
-`SELECT SUM(k11) FROM test_rollup WHERE k1 = 10 AND k2 > 200 AND k3 in (1,2,3);`
-
-Firstly, it judges whether the query can hit the aggregated Rollup table. After checking the graph above, it is possible. Then the condition contains three conditions: k1, K2 and k3. The first three columns of test_rollup, rollup1 and rollup2 contain all the three conditions. So the prefix index length is the same. Then, it is obvious that the aggregation degree of rollup2 is the highest when comparing the number of rows. Row 2 is selected because of the minimum number of rows.
-
-```
-| 0:OlapScanNode |
-| TABLE: test_rollup |
-| PREAGGREGATION: ON |
-| PREDICATES: `k1` = 10, `k2` > 200, `k3` IN (1, 2, 3) |
-| partitions=1/1 |
-| rollup: rollup2 |
-| buckets=1/10 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 0 |
-```
diff --git a/new-docs/en/install/install-deploy.md b/docs/en/install/install-deploy.md
similarity index 100%
rename from new-docs/en/install/install-deploy.md
rename to docs/en/install/install-deploy.md
diff --git a/new-docs/en/install/source-install/compilation-arm.md b/docs/en/install/source-install/compilation-arm.md
similarity index 100%
rename from new-docs/en/install/source-install/compilation-arm.md
rename to docs/en/install/source-install/compilation-arm.md
diff --git a/docs/en/installing/compilation-with-ldb-toolchain.md b/docs/en/install/source-install/compilation-with-ldb-toolchain.md
similarity index 100%
rename from docs/en/installing/compilation-with-ldb-toolchain.md
rename to docs/en/install/source-install/compilation-with-ldb-toolchain.md
diff --git a/new-docs/en/install/source-install/compilation.md b/docs/en/install/source-install/compilation.md
similarity index 100%
rename from new-docs/en/install/source-install/compilation.md
rename to docs/en/install/source-install/compilation.md
diff --git a/docs/en/installing/compilation-arm.md b/docs/en/installing/compilation-arm.md
deleted file mode 100644
index 305ee35aac..0000000000
--- a/docs/en/installing/compilation-arm.md
+++ /dev/null
@@ -1,258 +0,0 @@
----
-{
- "title": "Compile on ARM platform",
- "language": "en"
-}
----
-
-
-
-
-# Compile and Run Doris on ARM64 + KylinOS.
-
-This document describes how to compile Doris on the ARM64 platform.
-
-Note that this document is only a guide document. Other errors may occur when compiling in different environments.
-
-## Software and hardware environment
-
-1. KylinOS version:
-
- ```
- $> cat /etc/.kyinfo
- name=Kylin-Server
- milestone=10-SP1-Release-Build04-20200711
- arch=arm64
- beta=False
- time=2020-07-11 17:16:54
- dist_id=Kylin-Server-10-SP1-Release-Build04-20200711-arm64-2020-07-11 17:16:54
- ```
-
-2. CPU model
-
- ```
- $> cat /proc/cpuinfo
- model name: Phytium,FT-2000+/64
- ```
-
-## Compile using ldb-toolchain
-
-This method works with Doris versions after [commit 7f3564](https://github.com/apache/incubator-doris/commit/7f3564cca62de49c9f2ea67fcf735921dbebb4d1)
-
-Download [ldbi\_toolchain\_gen.aarch64.sh](https://github.com/amosbird/ldb_toolchain_gen/releases/download/v0.9.1/ldb_toolchain_gen.aarch64.sh)
-
-For subsequent compilation, see [Compiling with LDB toolchain](./compilation-with-ldb-toolchain.md)
-
-Note that both jdk and nodejs need to be downloaded with the corresponding aarch64 versions:
-
-1. [Java8-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/jdk-8u291-linux-aarch64.tar.gz)
-2. [Node v12.13.0-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/node-v16.3.0-linux-arm64.tar.xz)
-
-## ~~ Compile with GCC 10 (deprecated) ~~
-
-This method only works with Doris source code before [commit 68bab73](https://github.com/apache/incubator-doris/commit/68bab73c359e40bf485a663e9a6e6ee76d81d382).
-
-### Compilation tool installation (no network)
-
-In the example, all tools are installed in the `/home/doris/tools/installed/` directory.
-
-Please obtain the required installation package first under network conditions.
-
-#### 1. Install gcc10
-
-Download gcc-10.1.0
-
-```
-wget https://mirrors.tuna.tsinghua.edu.cn/gnu/gcc/gcc-10.1.0/gcc-10.1.0.tar.gz
-```
-
-After unzipping, check the dependencies in `contrib/download_prerequisites` and download:
-
-```
-http://gcc.gnu.org/pub/gcc/infrastructure/gmp-6.1.0.tar.bz2
-http://gcc.gnu.org/pub/gcc/infrastructure/mpfr-3.1.4.tar.bz2
-http://gcc.gnu.org/pub/gcc/infrastructure/mpc-1.0.3.tar.gz
-http://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2
-```
-
-Unzip these four dependencies, then move to the gcc-10.1.0 source directory and rename them to gmp, isl, mpc, mpfr.
-
-Download and install automake-1.15 (because gcc10 will find automake 1.15 version during compilation)
-
-```
-https://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz
-tar xzf automake-1.15.tar.gz
-./configure --prefix=/home/doris/tools/installed
-make && make install
-export PATH=/home/doris/tools/installed/bin:$PATH
-```
-
-Compile GCC10:
-
-```
-cd gcc-10.1.0
-./configure --prefix=/home/doris/tools/installed
-make -j && make install
-```
-
-Compile time is longer.
-
-#### 2. Install other compilation components
-
-1. jdk-8u291-linux-aarch64.tar.gz
-
- `https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html`
-
- No need to compile, just use it out of the box.
-
-2. cmake-3.19.8-Linux-aarch64.tar.gz
-
- `https://cmake.org/download/`
-
- No need to compile, just use it out of the box
-
-3. apache-maven-3.8.1-bin.tar.gz
-
- `https://maven.apache.org/download.cgi`
-
- No need to compile, just use it out of the box
-
-4. nodejs 16.3.0
-
- `https://nodejs.org/dist/v16.3.0/node-v16.3.0-linux-arm64.tar.xz`
-
- No need to compile, just use it out of the box
-
-5. libtool-2.4.6.tar.gz
-
- For compiling third-party components, although the system may come with libtool, libtool needs to be together with automake, so it is not easy to cause problems.
-
- ```
- https://ftp.gnu.org/gnu/libtool/libtool-2.4.6.tar.gz
- cd libtool-2.4.6/
- ./configure --prefix=/home/doris/tools/installed
- make -j && make install
- ```
-
-6. binutils-2.36.tar.xz (obtain bdf.h)
-
- ```
- https://ftp.gnu.org/gnu/binutils/binutils-2.36.tar.bz2
- ./configure --prefix=/home/doris/tools/installed
- make -j && make install
- ```
-
-7. Libiberty (for compiling BE)
-
- The source code of this library is under the source code package of gcc-10.1.0
- ```
- cd gcc-10.1.0/libiberty/
- ./configure --prefix=/home/doris/tools/installed
- make
- ```
-
- After compilation, libiberty.a will be generated, which can be moved to the lib64 directory of Doris' thirdparty.
-
-#### 3. Compile third-party libraries
-
-Suppose Doris source code is under `/home/doris/doris-src/`.
-
-1. Manually download all third-party libraries and place them in the thirdparty/src directory.
-2. Add `custom_env.sh` in the Doris source directory and add the following content
-
- ```
- export DORIS_THIRDPARTY=/home/doris/doris-src/thirdparty/
- export JAVA_HOME=/home/doris/tools/jdk1.8.0_291/
- export DORIS_GCC_HOME=/home/doris/tools/installed/
- export PATCH_COMPILER_RT=true
- ```
-
- Pay attention to replace the corresponding directory
-
-3. Modify part of the content in build-thirdparty.sh
-
- 1. Close `build_mysql` and `build_libhdfs3`
-
- mysql is no longer needed. However, libhdfs3 does not support arm architecture for the time being, so running Doris in arm does not support direct access to hdfs through libhdfs3, and requires a broker.
-
- 2. Add the configure parameter in `build_curl`: `--without-libpsl`. If it is not added, an error may be reported during the linking phase of the final compilation of Doris BE: `undefined reference to ‘psl_is_cookie_domain_acceptable'`
-
-4. Execute build-thirdparty.sh. Here are only possible errors
-
- * `error: narrowing conversion of'-1' from'int' to'char' [-Wnarrowing]`
-
- There will be an error when compiling brpc 0.9.7. The solution is to add `-Wno-narrowing` in `CMAKE_CXX_FLAGS` of CMakeLists.txt of brpc. This problem has been fixed in the brpc master code:
-
- `https://github.com/apache/incubator-brpc/issues/1091`
-
- * `libz.a(deflate.o): relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `z_errmsg' which may bind externally can not be used when making a shared object; recompile with -fPIC`
-
- There will be errors when compiling brpc 0.9.7, and libcrypto will also report similar errors. The reason is unknown. It seems that under aarch64, brpc needs to link the dynamic zlib and crypto libraries. But when we compile these two third-party libraries, we only compiled .a static files. Solution: Recompile zlib and openssl to generate .so dynamic library:
-
- Open `build-thirdparty.sh`, find the `build_zlib` function, and change:
-
- ```
- ./configure --prefix=$TP_INSTALL_DIR --static
- Just change to
- ./configure --prefix=$TP_INSTALL_DIR
- ```
-
- Find `build_openssl` and comment out the following parts:
-
- ```
- #if [-f $TP_INSTALL_DIR/lib64/libcrypto.so ]; then
- # rm -rf $TP_INSTALL_DIR/lib64/libcrypto.so*
- #fi
- #if [-f $TP_INSTALL_DIR/lib64/libssl.so ]; then
- # rm -rf $TP_INSTALL_DIR/lib64/libssl.so*
- #fi
- ```
-
- Then go to `build-thirdparty.sh`, comment out other `build_xxx`, open only `build_zlib` and `build_openssl`, and `build_brpc` and later `build_xxx`. Then re-execute `build-thirdparty.sh`.
-
- * The compilation is stuck at a certain stage.
-
- Not sure why. Solution: Rerun `build-thirdparty.sh`. `build-thirdparty.sh` can be executed repeatedly.
-
-#### 4. Compile Doris source code
-
-First run the following command to check whether the compilation machine supports the avx2 instruction set
-
-```
-$ cat /proc/cpuinfo | grep avx2
-```
-
-If it is not supported, use the following command to compile
-
-```
-$ USE_AVX2=0 sh build.sh
-```
-
-If supported, you can directly execute `sh build.sh` without adding USE_AVX2=0.
-
-
-#### 5. FAQ
-
-1. `undefined reference to psl_free` appears when compiling Doris
-
- libcurl will call libpsl functions, but libpsl is not linked for an unknown reason. Solutions (choose one of the two):
-
- 1. Add `--without-libpsl` to the `build_curl` method in `thirdparty/build-thirdparty.sh`, recompile libcurl, and then recompile Doris.
- 2. About line 603 in `be/CMakeLists.txt`, add `-lpsl` after `-pthread`, and then recompile Doris.
diff --git a/docs/en/installing/compilation.md b/docs/en/installing/compilation.md
deleted file mode 100644
index 3f25b0f8cb..0000000000
--- a/docs/en/installing/compilation.md
+++ /dev/null
@@ -1,263 +0,0 @@
----
-{
- "title": "Compilation",
- "language": "en"
-}
----
-
-
-
-
-# Compilation
-
-This document focuses on how to code Doris through source code.
-
-## Developing mirror compilation using Docker (recommended)
-
-### Use off-the-shelf mirrors
-
-1. Download Docker Mirror
-
- `$ docker pull apache/incubator-doris:build-env-ldb-toolchain-latest`
-
- Check mirror download completed:
-
- ```
- $ docker images
- REPOSITORY TAG IMAGE ID CREATED SIZE
- apache/incubator-doris build-env-ldb-toolchain-latest 49f68cecbc1a 4 days ago 3.76GB
- ```
-
-> Note1: For different versions of Doris, you need to download the corresponding mirror version. From Apache Doris 0.15 version, the docker image will keep same version number with Doris. For example, you can use `apache/incubator-doris:build-env-for-0.15.0` to compile Apache Doris 0.15.0.
->
-> Node2: `apache/incubator-doris:build-env-ldb-toolchain-latest` is for compiling trunk code, and will be updated along with trunk code. View the update time in `docker/README.md`
-
-| image version | commit id | release version |
-|---|---|---|
-| apache/incubator-doris:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x |
-| apache/incubator-doris:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) or later | 0.10.x or later |
-| apache/incubator-doris:build-env-1.2 | [4ef5a8c](https://github.com/apache/incubator-doris/commit/4ef5a8c8560351d7fff7ff8fd51c4c7a75e006a8) or later | 0.12.x - 0.14.0 |
-| apache/incubator-doris:build-env-1.3.1 | [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) or later | 0.14.x |
-| apache/incubator-doris:build-env-for-0.15.0 | [a81f4da](https://github.com/apache/incubator-doris/commit/a81f4da4e461a54782a96433b746d07be89e6b54) or later | 0.15.0 |
-| apache/incubator-doris:build-env-latest | before [0efef1b](https://github.com/apache/incubator-doris/commit/0efef1b332300887ee0473f9df9bdd9d7297d824) | |
-| apache/incubator-doris:build-env-ldb-toolchain-latest | trunk | trunk |
-
-**note**:
-
-> 1. Dev docker image [ChangeLog](https://github.com/apache/incubator-doris/blob/master/thirdparty/CHANGELOG.md)
-
-> 2. Doris version 0.14.0 still uses apache/incubator-doris:build-env-1.2 to compile, and the 0.14.x code will use apache/incubator-doris:build-env-1.3.1.
-
-> 3. From docker image of build-env-1.3.1, both OpenJDK 8 and OpenJDK 11 are included, and OpenJDK 11 is used for compilation by default. Please make sure that the JDK version used for compiling is the same as the JDK version used at runtime, otherwise it may cause unexpected operation errors. You can use the following command to switch the default JDK version in container:
->
-> Switch to JDK 8:
->
-> ```
-> $ alternatives --set java java-1.8.0-openjdk.x86_64
-> $ alternatives --set javac java-1.8.0-openjdk.x86_64
-> $ export JAVA_HOME=/usr/lib/jvm/java-1.8.0
-> ```
->
-> Switch to JDK 11:
->
-> ```
-> $ alternatives --set java java-11-openjdk.x86_64
-> $ alternatives --set javac java-11-openjdk.x86_64
-> $ export JAVA_HOME=/usr/lib/jvm/java-11
-> ```
-
-2. Running Mirror
-
- `$ docker run -it apache/incubator-doris:build-env-ldb-toolchain-latest`
-
- It is recommended to run the container by mounting the local Doris source directory, so that the compiled binary file will be stored in the host machine and will not disappear because the container exits.
-
- At the same time, it is recommended to mount the maven `.m2` directory in the mirror to the host directory at the same time to prevent repeated downloading of maven's dependent libraries each time the compilation is started.
-
- ```
- $ docker run -it -v /your/local/.m2:/root/.m2 -v /your/local/incubator-doris-DORIS-x.x.x-release/:/root/incubator-doris-DORIS-x.x.x-release/ apache/incubator-doris:build-env-ldb-toolchain-latest
- ```
-
-3. Download source code
-
- After starting the mirror, you should be in the container. The Doris source code can be downloaded from the following command (local source directory mounted is not required):
-
- ```
- $ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz
- or
- $ git clone https://github.com/apache/incubator-doris.git
- ```
-
-4. Compile Doris
-
- First run the following command to check whether the compilation machine supports the avx2 instruction set
-
- ```
- $ cat /proc/cpuinfo | grep avx2
- ```
-
- If it is not supported, use the following command to compile
-
- ```
- $ USE_AVX2=0 sh build.sh
- ```
-
- If supported, compile directly without adding USE_AVX2=0
-
- ```
- $ sh build.sh
- ```
-
- > **Note:**
- >
- > If you are using `build-env-for-0.15.0` or later version for the first time, use the following command when compiling:
- >
- > `sh build.sh --clean --be --fe --ui`
- >
- > This is because from build-env-for-0.15.0, we upgraded thrift (0.9 -> 0.13), you need to use the --clean command to force the use of the new version of thrift to generate code files, otherwise incompatible code will appear.
-
- After compilation, the output file is in the `output/` directory.
-
-### Self-compiling Development Environment Mirror
-
-You can also create a Doris development environment mirror yourself, referring specifically to the `docker/README.md` file.
-
-
-## Direct Compilation (CentOS/Ubuntu)
-
-You can try to compile Doris directly in your own Linux environment.
-
-1. System Dependence
- * Before commit [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) will use the dependencies as follows:
-
- `GCC 7.3+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.11+ Bison 3.0+`
-
- If you are using Ubuntu 16.04 or newer, you can use the following command to install the dependencies
-
- `sudo apt-get install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python autopoint pkg-config`
-
- If you are using CentOS you can use the following command to install the dependencies
-
- `sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk`
-
- * After commit [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) will use the dependencies as follows:
-
- `GCC 10+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.19.2+ Bison 3.0+`
-
- If you are using Ubuntu 16.04 or newer, you can use the following command to install the dependencies
-
- ```
- sudo apt install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python
- sudo add-apt-repository ppa:ubuntu-toolchain-r/ppa
- sudo apt update
- sudo apt install gcc-10 g++-10
- sudo apt-get install autoconf automake libtool autopoint
- ```
- If you are using CentOS you can use the following command to install the dependencies
-
- ```
- sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk
- sudo yum install centos-release-scl
- sudo yum install devtoolset-10
- scl enable devtoolset-10 bash
- ```
- If devtoolset-10 is not found in current repo. Oracle has already rebuilt the devtoolset-10 packages. You can use this repo file:
- ```
- [ol7_software_collections]
- name=Software Collection packages for Oracle Linux 7 ($basearch)
- baseurl=http://yum.oracle.com/repo/OracleLinux/OL7/SoftwareCollections/$basearch/
- gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-oracle
- gpgcheck=1
- enabled=1
- ```
- After installation, set environment variables `PATH`, `JAVA_HOME`, etc.
- > nit: you can find the jdk install directory by using command `alternatives --list`
-
- Doris 0.14.0 will use gcc7 env to compile.
-
-2. Compile Doris
-
- As with Docker development image compilation, check whether the avx2 instruction is supported before compiling
-
- ```
- $ cat /proc/cpuinfo | grep avx2
- ```
-
- If supported, use the following command to compile
-
- ```
- $ sh build.sh
- ```
-
- If it is not supported, you need to add USE_AVX2=0
-
- ```
- $ USE_AVX2=0 sh build.sh
- ```
-
- After compilation, the output files are in the `output/` directory.
-
-## FAQ
-
-1. `Could not transfer artifact net.sourceforge.czt.dev:cup-maven-plugin:pom:1.6-cdh from/to xxx`
-
- If you encounter the above error, please refer to [PR #4769](https://github.com/apache/incubator-doris/pull/4769/files) to modify the cloudera-related repo configuration in `fe/pom.xml`.
-
-2. The third party relies on download connection errors, failures, etc.
-
- The download links of the third-party libraries that Doris relies on are all in the `thirdparty/vars.sh` file. Over time, some download connections may fail. If you encounter this situation. It can be solved in the following two ways:
-
- 1. Manually modify the `thirdparty/vars.sh` file
-
- Manually modify the problematic download connection and the corresponding MD5 value.
-
- 2. Use a third-party download warehouse:
-
- ```
- export REPOSITORY_URL=https://doris-thirdparty-repo.bj.bcebos.com/thirdparty
- sh build-thirdparty.sh
- ```
-
- REPOSITORY_URL contains all third-party library source code packages and their historical versions.
-
-3. `fatal error: Killed signal terminated program ...`
-
- If you encounter the above error when compiling with a Docker image, it may be that the memory allocated to the image is insufficient (the default memory size allocated by Docker is 2GB, and the peak memory usage during the compilation process is greater than 2GB).
-
- Try to increase the allocated memory of the image appropriately, 4GB ~ 8GB is recommended.
-
-## Special statement
-
-Starting from version 0.13, the dependency on the two third-party libraries [1] and [2] will be removed in the default compiled output. These two third-party libraries are under [GNU General Public License V3](https://www.gnu.org/licenses/gpl-3.0.en.html). This license is incompatible with [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), so it should not appear in the Apache release by default.
-
-Remove library [1] will result in the inability to access MySQL external tables. The feature of accessing MySQL external tables will be implemented through `UnixODBC` in future release version.
-
-Remove library [2] will cause some data written in earlier versions (before version 0.8) to be unable to read. Because the data in the earlier version was compressed using the LZO algorithm, in later versions, it has been changed to the LZ4 compression algorithm. We will provide tools to detect and convert this part of the data in the future.
-
-If required, users can continue to use these two dependent libraries. If you want to use it, you need to add the following options when compiling:
-
-```
-WITH_MYSQL=1 WITH_LZO=1 sh build.sh
-```
-
-Note that when users rely on these two third-party libraries, Doris is not used under the Apache License 2.0 by default. Please pay attention to the GPL related agreements.
-
-* [1] mysql-5.7.18
-* [2] lzo-2.10
diff --git a/docs/en/installing/install-deploy.md b/docs/en/installing/install-deploy.md
deleted file mode 100644
index bf572044dd..0000000000
--- a/docs/en/installing/install-deploy.md
+++ /dev/null
@@ -1,490 +0,0 @@
----
-{
- "title": "Installation and deployment",
- "language": "en"
-}
----
-
-
-
-
-# Installation and deployment
-
-This document mainly introduces the hardware and software environment needed to deploy Doris, the proposed deployment mode, cluster expansion and scaling, and common problems in the process of cluster building and running.
-Before reading this document, compile Doris according to the compiled document.
-
-## Software and hardware requirements
-
-### Overview
-
-Doris, as an open source MPP architecture OLAP database, can run on most mainstream commercial servers. In order to make full use of the concurrency advantages of MPP architecture and the high availability features of Doris, we recommend that the deployment of Doris follow the following requirements:
-
-#### Linux Operating System Version Requirements
-
-| Linux System | Version|
-|---|---|
-| Centos | 7.1 and above |
-| Ubuntu | 16.04 and above |
-
-#### Software requirements
-
-| Soft | Version |
-|---|---|
-| Java | 1.8 and above |
-| GCC | 4.8.2 and above |
-
-#### OS Installation Requirements
-
-##### Set the maximum number of open file handles in the system
-
-````
-vi /etc/security/limits.conf
-*soft nofile 65536
-*hard nofile 65536
-````
-
-##### Clock synchronization
-
-The metadata of Doris requires the time precision to be less than 5000ms, so all machines in the cluster need to synchronize the clocks to avoid service exceptions caused by inconsistencies in metadata caused by clock problems.
-
-##### Close the swap partition (swap)
-
-The Linux swap partition will cause serious performance problems for Doris, you need to disable the swap partition before installation
-
-##### Linux file system
-
-Here we recommend using the ext4 file system. When installing the operating system, please select the ext4 file system.
-
-#### Development Test Environment
-
-| Module | CPU | Memory | Disk | Network | Instance Number|
-|---|---|---|---|---|---|
-| Frontend | 8 core + | 8GB + | SSD or SATA, 10GB + * | Gigabit Network Card | 1|
-| Backend | 8-core + | 16GB + | SSD or SATA, 50GB + * | Gigabit Network Card | 1-3*|
-
-#### Production environment
-
-| Module | CPU | Memory | Disk | Network | Number of Instances (Minimum Requirements)|
-|---|---|---|---|---|---|
-| Frontend | 16 core + | 64GB + | SSD or RAID card, 100GB + * | 10,000 Mbp network card | 1-5*|
-| Backend | 16 core + | 64GB + | SSD or SATA, 100G + * | 10-100 Mbp network card*|
-
-> Note 1:
->
-> 1. The disk space of FE is mainly used to store metadata, including logs and images. Usually it ranges from several hundred MB to several GB.
-> 2. BE's disk space is mainly used to store user data. The total disk space is calculated according to the user's total data * 3 (3 copies). Then an additional 40% of the space is reserved for background compaction and some intermediate data storage.
-> 3. Multiple BE instances can be deployed on a single machine, but **can only deploy one FE**. If you need three copies of data, you need at least one BE instance per machine (instead of three BE instances per machine). **Clocks of multiple FE servers must be consistent (allowing a maximum of 5 seconds clock deviation)**
-> 4. The test environment can also be tested with only one BE. In the actual production environment, the number of BE instances directly determines the overall query latency.
-> 5. All deployment nodes close Swap.
-
-> Note 2: Number of FE nodes
->
-> 1. FE roles are divided into Follower and Observer. (Leader is an elected role in the Follower group, hereinafter referred to as Follower, for the specific meaning, see [Metadata Design Document](./internal/metadata-design).)
-> 2. FE node data is at least 1 (1 Follower). When one Follower and one Observer are deployed, high read availability can be achieved. When three Followers are deployed, read-write high availability (HA) can be achieved.
-> 3. The number of Followers **must be** odd, and the number of Observers is arbitrary.
-> 4. According to past experience, when cluster availability requirements are high (e.g. providing online services), three Followers and one to three Observers can be deployed. For offline business, it is recommended to deploy 1 Follower and 1-3 Observers.
-
-* **Usually we recommend about 10 to 100 machines to give full play to Doris's performance (3 of them deploy FE (HA) and the rest deploy BE)**
-* **Of course, Doris performance is positively correlated with the number and configuration of nodes. With a minimum of four machines (one FE, three BEs, one BE mixed with one Observer FE to provide metadata backup) and a lower configuration, Doris can still run smoothly.**
-* **If FE and BE are mixed, we should pay attention to resource competition and ensure that metadata catalogue and data catalogue belong to different disks.**
-
-#### Broker deployment
-
-Broker is a process for accessing external data sources, such as hdfs. Usually, a broker instance is deployed on each machine.
-
-#### Network Requirements
-
-Doris instances communicate directly over the network. The following table shows all required ports
-
-| Instance Name | Port Name | Default Port | Communication Direction | Description|
-| ---|---|---|---|---|
-| BE | be_port | 9060 | FE --> BE | BE for receiving requests from FE|
-| BE | webserver\_port | 8040 | BE <--> BE | BE|
-| BE | heartbeat\_service_port | 9050 | FE --> BE | the heart beat service port (thrift) on BE, used to receive heartbeat from FE|
-| BE | brpc\_port | 8060 | FE <--> BE, BE <--> BE | BE for communication between BEs|
-| FE | http_port | 8030 | FE <--> FE, user <--> FE | HTTP server port on FE |
-| FE | rpc_port | 9020 | BE --> FE, FE <--> FE | thrift server port on FE, the configuration of each fe needs to be consistent|
-| FE | query_port | 9030 | user <--> FE | FE|
-| FE | edit\_log_port | 9010 | FE <--> FE | FE|
-| Broker | broker ipc_port | 8000 | FE --> Broker, BE --> Broker | Broker for receiving requests|
-
-> Note:
->
-> 1. When deploying multiple FE instances, make sure that the http port configuration of FE is the same.
-> 2. Make sure that each port has access in its proper direction before deployment.
-
-#### IP binding
-
-Because of the existence of multiple network cards, or the existence of virtual network cards caused by the installation of docker and other environments, the same host may have multiple different ips. Currently Doris does not automatically identify available IP. So when you encounter multiple IP on the deployment host, you must force the correct IP to be specified through the priority\_networks configuration item.
-
-Priority\_networks is a configuration that both FE and BE have, and the configuration items need to be written in fe.conf and be.conf. This configuration item is used to tell the process which IP should be bound when FE or BE starts. Examples are as follows:
-
-`priority_networks=10.1.3.0/24`
-
-This is a representation of [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing). FE or BE will find the matching IP based on this configuration item as their own local IP.
-
-**Note**: When priority networks is configured and FE or BE is started, only the correct IP binding of FE or BE is ensured. In ADD BACKEND or ADD FRONTEND statements, you also need to specify IP matching priority networks configuration, otherwise the cluster cannot be established. Give an example:
-
-BE is configured as `priority_networks = 10.1.3.0/24'.`.
-
-When you want to ADD BACKEND use: `ALTER SYSTEM ADD BACKEND "192.168.0.1:9050";`
-
-Then FE and BE will not be able to communicate properly.
-
-At this point, DROP must remove the BE that added errors and re-use the correct IP to perform ADD BACKEND.
-
-FE is the same.
-
-BROKER does not currently have, nor does it need, priority\_networks. Broker's services are bound to 0.0.0 by default. Simply execute the correct accessible BROKER IP when ADD BROKER is used.
-
-#### Table Name Case Sensitivity Setting
-
-By default, doris is case-sensitive. If there is a need for case-insensitive table names, you need to set it before cluster initialization. The table name case sensitivity cannot be changed after cluster initialization is completed.
-
-See the section on `lower_case_table_names` variables in [Variables](../administrator-guide/variables.md) for details.
-
-## Cluster deployment
-
-### Manual deployment
-
-#### Deploy FE
-
-* Copy the FE deployment file to the specified node
-
- Copy the Fe folder under output generated by source code compilation to the node specified deployment path of FE and enter this directory.
-
-* Configure FE
-
- 1. The configuration file is conf/fe.conf. Note: `meta_dir` indicates the Metadata storage location. The default value is `${DORIS_HOME}/doris-meta`. The directory needs to be **created manually**.
- 2. JAVA_OPTS in fe.conf defaults to a maximum heap memory of 4GB for java, and it is recommended that the production environment be adjusted to more than 8G.
-
-* Start FE
-
- `bin/start_fe.sh --daemon`
-
- The FE process starts and enters the background execution. Logs are stored in the log/ directory by default. If startup fails, you can view error messages by looking at log/fe.log or log/fe.out.
-
-* For deployment of multiple FEs, see the section "FE scaling and downsizing"
-
-#### Deploy BE
-
-* Copy BE deployment files to all nodes to deploy BE
-
- Copy the be folder under output generated by source code compilation to the specified deployment path of the BE node.
-
- > Note: The `output/be/lib/debug_info/` directory is for debug information files, the file size is big, but they are not needed ar runtime and can be deployed without them.
-
-* Modify all BE configurations
-
- Modify be/conf/be.conf. Mainly configure `storage_root_path`: data storage directory. The default is be/storage, this directory needs to be **created manually** by. In multi directories case, using `;` separation (do not add `;` after the last directory).
-
- eg.1:
-
- Note: For SSD disks, '.SSD 'is followed by the directory, and for HDD disks,'.HDD 'is followed by the directory
-
- `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris`
-
- **instructions**
-
- * 1./home/disk1/doris.HDD,50, indicates capacity limit is 50GB, HDD;
- * 2./home/disk2/doris.SSD,1, indicates capacity limit is 1GB, SSD;
- * 3./home/disk2/doris, indicates capacity limit is disk capacity, HDD(default)
-
- eg.2:
-
- Note: you do not need to add the suffix to either HDD or SSD disk directories. You only need to set the medium parameter
-
- `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50`
-
- **instructions**
-
- * 1./home/disk1/doris,medium:hdd,capacity:10,capacity limit is 10GB, HDD;
- * 2./home/disk2/doris,medium:ssd,capacity:50,capacity limit is 50GB, SSD;
-
-* BE webserver_port configuration
-
- If the Be componet is installed in hadoop cluster , need to change configuration `webserver_port=8040` to avoid port used.
-
-* Add all BE nodes to FE
-
- BE nodes need to be added in FE before they can join the cluster. You can use mysql-client([Download MySQL 5.7](https://dev.mysql.com/downloads/mysql/5.7.html)) to connect to FE:
-
- `./mysql-client -h fe_host -P query_port -uroot`
-
- The fe_host is the node IP where FE is located; the query_port in fe/conf/fe.conf; the root account is used by default and no password is used to login.
-
- After login, execute the following commands to add each BE:
-
- `ALTER SYSTEM ADD BACKEND "be_host:heartbeat_service_port";`
-
- The be_host is the node IP where BE is located; the heartbeat_service_port in be/conf/be.conf.
-
-* Start BE
-
- `bin/start_be.sh --daemon`
-
- The BE process will start and go into the background for execution. Logs are stored in be/log/directory by default. If startup fails, you can view error messages by looking at be/log/be.log or be/log/be.out.
-
-* View BE status
-
- Connect to FE using mysql-client and execute `SHOW PROC '/backends'; `View BE operation. If everything is normal, the `Alive`column should be `true`.
-
-#### (Optional) FS_Broker deployment
-
-Broker is deployed as a plug-in, independent of Doris. If you need to import data from a third-party storage system, you need to deploy the corresponding Broker. By default, it provides fs_broker to read HDFS ,Baidu cloud BOS and Amazon S3. Fs_broker is stateless and it is recommended that each FE and BE node deploy a Broker.
-
-* Copy the corresponding Broker directory in the output directory of the source fs_broker to all the nodes that need to be deployed. It is recommended to maintain the same level as the BE or FE directories.
-
-* Modify the corresponding Broker configuration
-
- In the corresponding broker/conf/directory configuration file, you can modify the corresponding configuration.
-
-* Start Broker
-
- `bin/start_broker.sh --daemon`
-
-* Add Broker
-
- To let Doris FE and BE know which nodes Broker is on, add a list of Broker nodes by SQL command.
-
- Use mysql-client to connect the FE started, and execute the following commands:
-
- `ALTER SYSTEM ADD BROKER broker_name "broker_host1:broker_ipc_port1","broker_host2:broker_ipc_port2",...;`
-
- The broker\_host is Broker's node ip; the broker_ipc_port is in the Broker configuration file.
-
-* View Broker status
-
- Connect any booted FE using mysql-client and execute the following command to view Broker status: `SHOW PROC '/brokers';`
-
-**Note: In production environments, daemons should be used to start all instances to ensure that processes are automatically pulled up after they exit, such as [Supervisor](http://supervisord.org/). For daemon startup, in 0.9.0 and previous versions, you need to modify the start_xx.sh scripts to remove the last & symbol**. Starting with version 0.10.0, call `sh start_xx.sh` directly to start. Also refer to [here](https://www.cnblogs.com/lenmom/p/9973401.html)
-
-## Expansion and contraction
-
-Doris can easily expand and shrink FE, BE, Broker instances.
-
-### FE Expansion and Compression
-
-High availability of FE can be achieved by expanding FE to three top-one nodes.
-
-Users can login to Master FE through MySQL client. By:
-
-`SHOW PROC '/frontends';`
-
-To view the current FE node situation.
-
-You can also view the FE node through the front-end page connection: ``http://fe_hostname:fe_http_port/frontend`` or ```http://fe_hostname:fe_http_port/system?Path=//frontends```.
-
-All of the above methods require Doris's root user rights.
-
-The process of FE node expansion and contraction does not affect the current system operation.
-
-#### Adding FE nodes
-
-FE is divided into three roles: Leader, Follower and Observer. By default, a cluster can have only one Leader and multiple Followers and Observers. Leader and Follower form a Paxos selection group. If the Leader goes down, the remaining Followers will automatically select a new Leader to ensure high write availability. Observer synchronizes Leader data, but does not participate in the election. If only one FE is deployed, FE defaults to Leader.
-
-The first FE to start automatically becomes Leader. On this basis, several Followers and Observers can be added.
-
-Add Follower or Observer. Connect to the started FE using mysql-client and execute:
-
-`ALTER SYSTEM ADD FOLLOWER "follower_host:edit_log_port";`
-
-or
-
-`ALTER SYSTEM ADD OBSERVER "observer_host:edit_log_port";`
-
-The follower\_host and observer\_host is the node IP of Follower or Observer, and the edit\_log\_port in its configuration file fe.conf.
-
-Configure and start Follower or Observer. Follower and Observer are configured with Leader. The following commands need to be executed at the first startup:
-
-`bin/start_fe.sh --helper host:edit_log_port --daemon`
-
-The host is the node IP of Leader, and the edit\_log\_port in Lead's configuration file fe.conf. The --helper is only required when follower/observer is first startup.
-
-View the status of Follower or Observer. Connect to any booted FE using mysql-client and execute:
-
-```SHOW PROC '/frontends';```
-
-You can view the FE currently joined the cluster and its corresponding roles.
-
-> Notes for FE expansion:
->
-> 1. The number of Follower FEs (including Leaders) must be odd. It is recommended that a maximum of three constituent high availability (HA) modes be deployed.
-> 2. When FE is in a highly available deployment (1 Leader, 2 Follower), we recommend that the reading service capability of FE be extended by adding Observer FE. Of course, you can continue to add Follower FE, but it's almost unnecessary.
-> 3. Usually a FE node can handle 10-20 BE nodes. It is suggested that the total number of FE nodes should be less than 10. Usually three can meet most of the needs.
-> 4. The helper cannot point to the FE itself, it must point to one or more existing running Master/Follower FEs.
-
-#### Delete FE nodes
-
-Delete the corresponding FE node using the following command:
-
-```ALTER SYSTEM DROP FOLLOWER[OBSERVER] "fe_host:edit_log_port";```
-
-> Notes for FE contraction:
->
-> 1. When deleting Follower FE, make sure that the remaining Follower (including Leader) nodes are odd.
-
-### BE Expansion and Compression
-
-Users can login to Leader FE through mysql-client. By:
-
-```SHOW PROC '/backends';```
-
-To see the current BE node situation.
-
-You can also view the BE node through the front-end page connection: ``http://fe_hostname:fe_http_port/backend`` or ``http://fe_hostname:fe_http_port/system?Path=//backends``.
-
-All of the above methods require Doris's root user rights.
-
-The expansion and scaling process of BE nodes does not affect the current system operation and the tasks being performed, and does not affect the performance of the current system. Data balancing is done automatically. Depending on the amount of data available in the cluster, the cluster will be restored to load balancing in a few hours to a day. For cluster load, see the [Tablet Load Balancing Document](../administrator-guide/operation/tablet-repair-and-balance.md).
-
-#### Add BE nodes
-
-The BE node is added in the same way as in the **BE deployment** section. The BE node is added by the `ALTER SYSTEM ADD BACKEND` command.
-
-> Notes for BE expansion:
->
-> 1. After BE expansion, Doris will automatically balance the data according to the load, without affecting the use during the period.
-
-#### Delete BE nodes
-
-There are two ways to delete BE nodes: DROP and DECOMMISSION
-
-The DROP statement is as follows:
-
-```ALTER SYSTEM DROP BACKEND "be_host:be_heartbeat_service_port";```
-
-**Note: DROP BACKEND will delete the BE directly and the data on it will not be recovered!!! So we strongly do not recommend DROP BACKEND to delete BE nodes. When you use this statement, there will be corresponding error-proof operation hints.**
-
-DECOMMISSION clause:
-
-```ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";```
-
-> DECOMMISSION notes:
->
-> 1. This command is used to safely delete BE nodes. After the command is issued, Doris attempts to migrate the data on the BE to other BE nodes, and when all data is migrated, Doris automatically deletes the node.
-> 2. The command is an asynchronous operation. After execution, you can see that the BE node's isDecommission status is true through ``SHOW PROC '/backends';` Indicates that the node is offline.
-> 3. The order **does not necessarily carry out successfully**. For example, when the remaining BE storage space is insufficient to accommodate the data on the offline BE, or when the number of remaining machines does not meet the minimum number of replicas, the command cannot be completed, and the BE will always be in the state of isDecommission as true.
-> 4. The progress of DECOMMISSION can be viewed through `SHOW PROC '/backends';` Tablet Num, and if it is in progress, Tablet Num will continue to decrease.
-> 5. The operation can be carried out by:
-> ```CANCEL ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";```
-> The order was cancelled. When cancelled, the data on the BE will maintain the current amount of data remaining. Follow-up Doris re-load balancing
-
-**For expansion and scaling of BE nodes in multi-tenant deployment environments, please refer to the [Multi-tenant Design Document] (./administrator-guide/operation/multi-tenant.md).**
-
-### Broker Expansion and Shrinkage
-
-There is no rigid requirement for the number of Broker instances. Usually one physical machine is deployed. Broker addition and deletion can be accomplished by following commands:
-
-```ALTER SYSTEM ADD BROKER broker_name "broker_host:broker_ipc_port";```
-```ALTER SYSTEM DROP BROKER broker_name "broker_host:broker_ipc_port";```
-```ALTER SYSTEM DROP ALL BROKER broker_name;```
-
-Broker is a stateless process that can be started or stopped at will. Of course, when it stops, the job running on it will fail. Just try again.
-
-## Common Questions
-
-### Process correlation
-
-1. How to determine the success of FE process startup
-
- After the FE process starts, metadata is loaded first. According to the different roles of FE, you can see ```transfer from UNKNOWN to MASTER/FOLLOWER/OBSERVER```in the log. Eventually, you will see the ``thrift server started`` log and connect to FE through MySQL client, which indicates that FE started successfully.
-
- You can also check whether the startup was successful by connecting as follows:
-
- `http://fe_host:fe_http_port/api/bootstrap`
-
- If returned:
-
- `{"status":"OK","msg":"Success"}`
-
- The startup is successful, there may be problems in other cases.
-
- > Note: If you can't see the information of boot failure in fe. log, you may see it in fe. out.
-
-2. How to determine the success of BE process startup
-
- After the BE process starts, if there is data before, there may be several minutes of data index loading time.
-
- If BE is started for the first time or the BE has not joined any cluster, the BE log will periodically scroll the words `waiting to receive first heartbeat from frontend`. BE has not received Master's address through FE's heartbeat and is waiting passively. This error log will disappear after ADD BACKEND in FE sends the heartbeat. If the word `````master client', get client from cache failed. host:, port: 0, code: 7````` master client appears again after receiving heartbeat, it indicates that FE has successfully connected BE, but BE cannot actively connect FE. It may be necessary to check the connectivity of rpc_port from BE to FE.
-
- If BE has been added to the cluster, the heartbeat log from FE should be scrolled every five seconds: ```get heartbeat, host:xx. xx.xx.xx, port:9020, cluster id:xxxxxxx```, indicating that the heartbeat is normal.
-
- Secondly, the word `finish report task success. return code: 0` should be scrolled every 10 seconds in the log to indicate that BE's communication to FE is normal.
-
- At the same time, if there is a data query, you should see the rolling logs, and have `execute time is xxx` logs, indicating that BE started successfully, and the query is normal.
-
- You can also check whether the startup was successful by connecting as follows:
-
- `http://be_host:be_http_port/api/health`
-
- If returned:
-
- `{"status": "OK","msg": "To Be Added"}`
-
- If the startup is successful, there may be problems in other cases.
-
- > Note: If you can't see the information of boot failure in be.INFO, you may see it in be.out.
-
-3. How to determine the normal connectivity of FE and BE after building the system
-
- Firstly, confirm that FE and BE processes have been started separately and normally, and confirm that all nodes have been added through `ADD BACKEND` or `ADD FOLLOWER/OBSERVER` statements.
-
- If the heartbeat is normal, BE logs will show ``get heartbeat, host:xx.xx.xx.xx, port:9020, cluster id:xxxxx`` If the heartbeat fails, the words ```backend [10001] get Exception: org.apache.thrift.transport.TTransportException``` will appear in FE's log, or other thrift communication abnormal log, indicating that the heartbeat fails from FE to 10001 BE. Here you need to check the connectivity of FE to BE host's heart-beating port.
-
- If BE's communication to FE is normal, the BE log will display the words `finish report task success. return code: 0`. Otherwise, the words `master client`, get client from cache failed` will appear. In this case, the connectivity of BE to the rpc_port of FE needs to be checked.
-
-4. Doris Node Authentication Mechanism
-
- In addition to Master FE, the other role nodes (Follower FE, Observer FE, Backend) need to register to the cluster through the `ALTER SYSTEM ADD` statement before joining the cluster.
-
- When Master FE is first started, a cluster_id is generated in the doris-meta/image/VERSION file.
-
- When FE first joins the cluster, it first retrieves the file from Master FE. Each subsequent reconnection between FEs (FE reboot) checks whether its cluster ID is the same as that of other existing FEs. If different, the FE will exit automatically.
-
- When BE first receives the heartbeat of Master FE, it gets the cluster ID from the heartbeat and records it in the `cluster_id` file of the data directory. Each heartbeat after that compares to the cluster ID sent by FE. If cluster IDs are not equal, BE will refuse to respond to FE's heartbeat.
-
- The heartbeat also contains Master FE's ip. When FE cuts the master, the new Master FE will carry its own IP to send the heartbeat to BE, BE will update its own saved Master FE ip.
-
- > **priority\_network**
- >
- > priority network is that both FE and BE have a configuration. Its main purpose is to assist FE or BE to identify their own IP addresses in the case of multi-network cards. Priority network is represented by CIDR: [RFC 4632](https://tools.ietf.org/html/rfc4632)
- >
- > When the connectivity of FE and BE is confirmed to be normal, if the table Timeout still occurs, and the FE log has an error message with the words `backend does not find. host:xxxx.xxx.XXXX`. This means that there is a problem with the IP address that Doris automatically identifies and that priority\_network parameters need to be set manually.
- >
- > The main reason for this problem is that when the user adds BE through the `ADD BACKEND` statement, FE recognizes whether the statement specifies hostname or IP. If it is hostname, FE automatically converts it to an IP address and stores it in metadata. When BE reports on the completion of the task, it carries its own IP address. If FE finds that BE reports inconsistent IP addresses and metadata, it will make the above error.
- >
- > Solutions to this error: 1) Set **priority\_network** parameters in FE and BE respectively. Usually FE and BE are in a network segment, so this parameter can be set to the same. 2) Fill in the `ADD BACKEND` statement directly with the correct IP address of BE instead of hostname to avoid FE getting the wrong IP address.
-
-5. File descriptor number of BE process
-
- The number of file descriptor of BE process is controlled by the two parameters min_file_descriptor_number/max_file_descriptor_number.
-
- If it is not in the [min_file_descriptor_number, max_file_descriptor_number] interval, error will occurs when starting BE process.
-
- Please using ulimit command to set file descriptor under this circumstance.
-
- The default value of min_file_descriptor_number is 65536.
-
- The default value of max_file_descriptor_number is 131072.
-
- For Example: ulimit -n 65536; this command set file descriptor to 65536.
-
- After starting BE process, you can use **cat /proc/$pid/limits** to see the actual limit of process.
diff --git a/docs/en/installing/upgrade.md b/docs/en/installing/upgrade.md
deleted file mode 100644
index f5d6252571..0000000000
--- a/docs/en/installing/upgrade.md
+++ /dev/null
@@ -1,86 +0,0 @@
----
-{
- "title": "Cluster upgrade",
- "language": "en"
-}
----
-
-
-
-
-# Cluster upgrade
-
-Doris can upgrade smoothly by rolling upgrades. The following steps are recommended for security upgrade.
-
-> **Note:**
-> 1. Doris does not support upgrading across two-digit version numbers, for example: you cannot upgrade directly from 0.13 to 0.15, only through 0.13.x -> 0.14.x -> 0.15.x, and the three-digit version number can be upgraded across versions, such as from 0.13 .15 can be directly upgraded to 0.14.13.1, it is not necessary to upgrade 0.14.7 or 0.14.12.1
-> 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability.
-
-## Preparen
-
-1. Turn off the replica repair and balance operation.
-
- There will be node restarts during the upgrade process, so unnecessary cluster balancing and replica repair logic may be triggered. You can close it first with the following command:
-
- ```
- # Turn off the replica ealance logic. After it is closed, the balancing operation of the ordinary table replica will no longer be triggered.
- $ mysql-client> admin set frontend config("disable_balance" = "true");
-
- # Turn off the replica balance logic of the colocation table. After it is closed, the replica redistribution operation of the colocation table will no longer be triggered.
- $ mysql-client> admin set frontend config("disable_colocate_balance" = "true");
-
- # Turn off the replica scheduling logic. After shutting down, all generated replica repair and balancing tasks will no longer be scheduled.
- $ mysql-client> admin set frontend config("disable_tablet_scheduler" = "true");
- ```
-
- After the cluster is upgraded, just use the above command to set the corresponding configuration to the original value.
-
-2. **important! ! Metadata needs to be backed up before upgrading(The entire directory needs to be backed up)! !**
-
-## Test the correctness of BE upgrade
-
-1. Arbitrarily select a BE node and deploy the latest palo_be binary file.
-2. Restart the BE node and check the BE log be.INFO to see if the boot was successful.
-3. If the startup fails, you can check the reason first. If the error is not recoverable, you can delete the BE directly through DROP BACKEND, clean up the data, and restart the BE using the previous version of palo_be. Then re-ADD BACKEND. (**This method will result in the loss of a copy of the data, please make sure that three copies are complete, and perform this operation!!!**
-
-## Testing FE Metadata Compatibility
-
-0. **Important! Exceptional metadata compatibility is likely to cause data cannot be restored!!**
-1. Deploy a test FE process (such as your own local developer) using the new version alone.
-2. Modify the FE configuration file fe.conf for testing and set all ports to **different from online**.
-3. Add configuration in fe.conf: cluster_id=123456
-4. Add the configuration in fe.conf: metadatafailure_recovery=true
-5. Copy the metadata directory doris-meta of the online environment Master FE to the test environment
-6. Modify the cluster_id in the doris-meta/image/VERSION file copied into the test environment to 123456 (that is, the same as in Step 3)
-7. run sh bin/start_fe.sh in the test environment.
-8. Observe whether the start-up is successful through FE log fe.log.
-9. If the startup is successful, run sh bin/stop_fe.sh to stop the FE process of the test environment.
-10. **The purpose of the above 2-6 steps is to prevent the FE of the test environment from being misconnected to the online environment after it starts.**
-
-## Upgrade preparation
-
-1. After data validation, the new version of BE and FE binary files are distributed to their respective directories.
-2. Usually small version upgrade, BE only needs to upgrade palo_be; FE only needs to upgrade palo-fe.jar. If it is a large version upgrade, you may need to upgrade other files (including but not limited to bin / lib / etc.) If you are not sure whether you need to replace other files, it is recommended to replace all of them.
-
-## rolling upgrade
-
-1. Confirm that the new version of the file is deployed. Restart FE and BE instances one by one.
-2. It is suggested that BE be restarted one by one and FE be restarted one by one. Because Doris usually guarantees backward compatibility between FE and BE, that is, the old version of FE can access the new version of BE. However, the old version of BE may not be supported to access the new version of FE.
-3. It is recommended to restart the next instance after confirming the previous instance started successfully. Refer to the Installation Deployment Document for the identification of successful instance startup.
diff --git a/docs/en/internal/Flink doris connector Design.md b/docs/en/internal/Flink doris connector Design.md
deleted file mode 100644
index 05481c67bf..0000000000
--- a/docs/en/internal/Flink doris connector Design.md
+++ /dev/null
@@ -1,259 +0,0 @@
----
-{
- "title": "Flink doris connector Design",
- "language": "en"
-}
-
-
----
-
-
-
-
-# Doris Storage File Format Optimization #
-
-## File format ##
-
-
-
1. doris segment
-
-Documents include:
-- The file starts with an 8-byte magic code to identify the file format and version
-- Data Region: Used to store data information for each column, where the data is loaded on demand by pages.
-- Index Region: Doris stores the index data of each column in Index Region, where the data is loaded according to column granularity, so the data information of the following column is stored separately.
-- Footer
- - FileFooterPB: Metadata Information for Definition Files
- - Checksum of 4 bytes of footer Pb content
- - Four bytes FileFooterPB message length for reading FileFooterPB
- - The 8 byte MAGIC CODE is stored in the last bit to facilitate the identification of file types in different scenarios.
-
-The data in the file is organized in the form of page, which is the basic unit of coding and compression. Current page types include the following:
-
-### DataPage ###
-
-Data Page is divided into two types: nullable and non-nullable data pages.
-
-Nullable's data page includes:
-```
-
- +----------------+
- | value count |
- |----------------|
- | first row id |
- |----------------|
- | bitmap length |
- |----------------|
- | null bitmap |
- |----------------|
- | data |
- |----------------|
- | checksum |
- +----------------+
-```
-
-non -zero data page32467;- 26500;- 229140;-
-
-```
- |----------------|
- | value count |
- |----------------|
- | first row id |
- |----------------|
- | data |
- |----------------|
- | checksum |
- +----------------+
-```
-
-The meanings of each field are as follows:
-
-- value count
- - Represents the number of rows in a page
-- First row id
- - Line number of the first line in page
-- bitmap length
- - Represents the number of bytes in the next bitmap
-- null bitmap
- - bitmap representing null information
-- Data
- - Store data after encoding and compress
- - You need to write in the header information of the data: is_compressed
- - Various kinds of data encoded by different codes need to write some field information in the header information in order to achieve data parsing.
- - TODO: Add header information for various encodings
-- Checksum
- - Store page granularity checksum, including page header and subsequent actual data
-
-
-### Bloom Filter Pages ###
-
-For each bloom filter column, a page of the bloom filter is generated corresponding to the granularity of the page and saved in the bloom filter pages area.
-
-### Ordinal Index Page ###
-
-For each column, a sparse index of row numbers is established according to page granularity. The content is a pointer to the block (including offset and length) for the line number of the start line of the page
-
-### Short Key Index page ###
-
-We generate a sparse index of short key every N rows (configurable) with the contents of short key - > line number (ordinal)
-
-### Column's other indexes ###
-
-The format design supports the subsequent expansion of other index information, such as bitmap index, spatial index, etc. It only needs to write the required data to the existing column data, and add the corresponding metadata fields to FileFooterPB.
-
-### Metadata Definition ###
-SegmentFooterPB is defined as:
-
-```
-message ColumnPB {
- required int32 unique_id = 1; // The column id is used here, and the column name is not used
- optional string name = 2; // Column name, when name equals __DORIS_DELETE_SIGN__, this column is a hidden delete column
- required string type = 3; // Column type
- optional bool is_key = 4; // Whether column is a primary key column
- optional string aggregation = 5; // Aggregate type
- optional bool is_nullable = 6; // Whether column is allowed to assgin null
- optional bytes default_value = 7; // Defalut value
- optional int32 precision = 8; // Precision of column
- optional int32 frac = 9;
- optional int32 length = 10; // Length of column
- optional int32 index_length = 11; // Length of column index
- optional bool is_bf_column = 12; // Whether column has bloom filter index
- optional bool has_bitmap_index = 15 [default=false]; // Whether column has bitmap index
-}
-
-// page offset
-message PagePointerPB {
- required uint64 offset; // offset of page in segment file
- required uint32 length; // length of page
-}
-
-message MetadataPairPB {
- optional string key = 1;
- optional bytes value = 2;
-}
-
-message ColumnMetaPB {
- optional ColumnMessage encoding; // Encoding of column
-
- optional PagePointerPB dict_page // Dictionary page
- repeated PagePointerPB bloom_filter_pages; // Bloom filter pages
- optional PagePointerPB ordinal_index_page; // Ordinal index page
- optional PagePointerPB page_zone_map_page; // Page level of statistics index data
-
- optional PagePointerPB bitmap_index_page; // Bitmap index page
-
- optional uint64 data_footprint; // The size of the index in the column
- optional uint64 index_footprint; // The size of the data in the column
- optional uint64 raw_data_footprint; // Original column data size
-
- optional CompressKind compress_kind; // Column compression type
-
- optional ZoneMapPB column_zone_map; // Segment level of statistics index data
- repeated MetadataPairPB column_meta_datas;
-}
-
-message SegmentFooterPB {
- optional uint32 version = 2 [default = 1]; // For version compatibility and upgrade use
- repeated ColumnPB schema = 5; // Schema of columns
- optional uint64 num_values = 4; // Number of lines saved in the file
- optional uint64 index_footprint = 7; // Index size
- optional uint64 data_footprint = 8; // Data size
- optional uint64 raw_data_footprint = 8; // Original data size
-
- optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // Compression type
- repeated ColumnMetaPB column_metas = 10; // Column metadata
- optional PagePointerPB key_index_page = 11; // short key index page
-}
-
-```
-
-## Read-write logic ##
-
-### Write ###
-
-The general writing process is as follows:
-1. Write magic
-2. Generate corresponding Column Writer according to schema information. Each Column Writer obtains corresponding encoding information (configurable) according to different types, and generates corresponding encoder according to encoding.
-3. Call encoder - > add (value) for data writing. Each K line generates a short key index entry, and if the current page satisfies certain conditions (the size exceeds 1M or the number of rows is K), a new page is generated and cached in memory.
-4. Continuous cycle step 3 until data writing is completed. Brush the data of each column into the file in sequence
-5. Generate FileFooterPB information and write it to the file.
-
-Relevant issues:
-
-- How does the index of short key be generated?
- - Now we still generate a short key sparse index according to how many rows are sparse, and keep a short sparse index generated every 1024 rows. The specific content is: short key - > ordinal
-
-- What should be stored in the ordinal index?
- - Store the first ordinal to page pointer mapping information for pages
-- What are stored in pages of different encoding types?
- - Dictionary Compression
- - plain
- - rle
- - bshuf
-
-### Read ###
-
-1. Read the magic of the file and judge the type and version of the file.
-2. Read FileFooterPB and check sum
-3. Read short key index and data ordinal index information of corresponding columns according to required columns
-4. Use start key and end key, locate the row number to be read through short key index, then determine the row ranges to be read through ordinal index, and filter the row ranges to be read through statistics, bitmap index and so on.
-5. Then read row data through ordinal index according to row ranges
-
-Relevant issues:
-1. How to quickly locate a row within the page?
-
- The data inside the page is encoding, so it cannot locate the row-level data quickly. Different encoding methods have different schemes for fast line number positioning in-house, which need to be analyzed concretely:
- - If it is rle-coded, skip is performed by resolving the head of RLE until the RLE block containing the row is reached, and then the reverse solution is performed.
- - binary plain encoding: offset information will be stored in the page, and offset information will be specified in the page header. When reading, offset information will be parsed into the array first, so that you can quickly locate the data of a row of block through offset data information of each row.
-2. How to achieve efficient block reading? Consider merging adjacent blocks while they are being read, one-time reading?
-This requires judging whether the block is continuous at the time of reading, and if it is continuous, reading it once.
-
-## Coding ##
-
-In the existing Doris storage, plain encoding is adopted for string type encoding, which is inefficient. After comparison, it is found that in Baidu statistics scenario, data will expand more than twice because of string type coding. Therefore, it is planned to introduce dictionary-based coding compression.
-
-## Compression ##
-
-It implements a scalable compression framework, supports a variety of compression algorithms, facilitates the subsequent addition of new compression algorithms, and plans to introduce zstd compression.
-
-## TODO ##
-1. How to implement nested types? How to locate line numbers in nested types?
-2. How to optimize the downstream bitmap and column statistics caused by ScanRange splitting?
diff --git a/docs/en/internal/grouping_sets_design.md b/docs/en/internal/grouping_sets_design.md
deleted file mode 100644
index 16acc33997..0000000000
--- a/docs/en/internal/grouping_sets_design.md
+++ /dev/null
@@ -1,501 +0,0 @@
----
-{
- "title": "GROUPING SETS DESIGN",
- "language": "en"
-}
----
-
-
-# GROUPING SETS DESIGN
-
-## 1. GROUPING SETS Background
-
-The `CUBE`, `ROLLUP`, and `GROUPING` `SETS` extensions to SQL make querying and reporting easier and faster. `CUBE`, `ROLLUP`, and grouping sets produce a single result set that is equivalent to a `UNION` `ALL` of differently grouped rows. `ROLLUP` calculates aggregations such as `SUM`, `COUNT`, `MAX`, `MIN`, and `AVG` at increasing levels of aggregation, from the most detailed up to a grand total. `CUBE` is an extension similar to `ROLLUP`, enabling a single statement to calculate all possible combinations of aggregations. The `CUBE`, `ROLLUP`, and the `GROUPING` `SETS` extension lets you specify just the groupings needed in the `GROUP` `BY` clause. This allows efficient analysis across multiple dimensions without performing a `CUBE` operation. Computing a `CUBE` creates a heavy processing load, so replacing cubes with grouping sets can significantly increase performance.
-To enhance performance, `CUBE`, `ROLLUP`, and `GROUPING SETS` can be parallelized: multiple processes can simultaneously execute all of these statements. These capabilities make aggregate calculations more efficient, thereby enhancing database performance, and scalability.
-
-The three `GROUPING` functions help you identify the group each row belongs to and enable sorting subtotal rows and filtering results.
-
-### 1.1 GROUPING SETS Syntax
-
-`GROUPING SETS` syntax lets you define multiple groupings in the same query. `GROUP BY` computes all the groupings specified and combines them with `UNION ALL`. For example, consider the following statement:
-
-```
-SELECT k1, k2, SUM( k3 ) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k1), (k2), ( ) );
-```
-
-
-This statement is equivalent to:
-
-```
-SELECT k1, k2, SUM( k3 ) FROM t GROUP BY k1, k2
-UNION
-SELECT k1, null, SUM( k3 ) FROM t GROUP BY k1
-UNION
-SELECT null, k2, SUM( k3 ) FROM t GROUP BY k2
-UNION
-SELECT null, null, SUM( k3 ) FROM t
-```
-
-This is an example of real query:
-
-```
-mysql> SELECT * FROM t;
-+------+------+------+
-| k1 | k2 | k3 |
-+------+------+------+
-| a | A | 1 |
-| a | A | 2 |
-| a | B | 1 |
-| a | B | 3 |
-| b | A | 1 |
-| b | A | 4 |
-| b | B | 1 |
-| b | B | 5 |
-+------+------+------+
-8 rows in set (0.01 sec)
-
-mysql> SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) );
-+------+------+-----------+
-| k1 | k2 | sum(`k3`) |
-+------+------+-----------+
-| b | B | 6 |
-| a | B | 4 |
-| a | A | 3 |
-| b | A | 5 |
-| NULL | B | 10 |
-| NULL | A | 8 |
-| a | NULL | 7 |
-| b | NULL | 11 |
-| NULL | NULL | 18 |
-+------+------+-----------+
-9 rows in set (0.06 sec)
-```
-
-### 1.2 ROLLUP Syntax
-
-`ROLLUP` enables a `SELECT` statement to calculate multiple levels of subtotals across a specified group of dimensions. It also calculates a grand total. `ROLLUP` is a simple extension to the `GROUP` `BY` clause, so its syntax is extremely easy to use. The `ROLLUP` extension is highly efficient, adding minimal overhead to a query.
-
-`ROLLUP` appears in the `GROUP` `BY` clause in a `SELECT` statement. Its form is:
-
-```
-SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c)
-```
-
-This statement is equivalent to GROUPING SETS as followed:
-
-```
-GROUPING SETS (
-(a,b,c),
-( a, b ),
-( a),
-( )
-)
-```
-
-### 1.3 CUBE Syntax
-
-Like `ROLLUP` `CUBE` generates all the subtotals that could be calculated for a data cube with the specified dimensions.
-
-```
-SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY CUBE(a,b,c)
-```
-
-e.g. CUBE ( a, b, c ) is equivalent to GROUPING SETS as followed:
-
-```
-GROUPING SETS (
-( a, b, c ),
-( a, b ),
-( a, c ),
-( a ),
-( b, c ),
-( b ),
-( c ),
-( )
-)
-```
-
-### 1.4 GROUPING and GROUPING_ID Function
-
-Indicates whether a specified column expression in a `GROUP BY` list is aggregated or not. `GROUPING `returns 1 for aggregated or 0 for not aggregated in the result set. `GROUPING` can be used only in the `SELECT` list, `HAVING`, and `ORDER BY` clauses when `GROUP BY` is specified.
-
-`GROUPING_ID` describes which of a list of expressions are grouped in a row produced by a `GROUP BY` query. The `GROUPING_ID` function simply returns the decimal equivalent of the binary value formed as a result of the concatenation of the values returned by the `GROUPING` functions.
-
-Each `GROUPING_ID` argument must be an element of the `GROUP BY` list. `GROUPING_ID ()` returns an **integer** bitmap whose lowest N bits may be lit. A lit **bit** indicates the corresponding argument is not a grouping column for the given output row. The lowest-order **bit** corresponds to argument N, and the N-1th lowest-order **bit** corresponds to argument 1. If the column is a grouping column the bit is 0 else is 1.
-
-For example:
-
-```
-mysql> select * from t;
-+------+------+------+
-| k1 | k2 | k3 |
-+------+------+------+
-| a | A | 1 |
-| a | A | 2 |
-| a | B | 1 |
-| a | B | 3 |
-| b | A | 1 |
-| b | A | 4 |
-| b | B | 1 |
-| b | B | 5 |
-+------+------+------+
-```
-
-grouping sets result:
-
-```
-mysql> SELECT k1, k2, GROUPING(k1), GROUPING(k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) );
-+------+------+----------------+----------------+-----------+
-| k1 | k2 | grouping(`k1`) | grouping(`k2`) | sum(`k3`) |
-+------+------+----------------+----------------+-----------+
-| a | A | 0 | 0 | 3 |
-| a | B | 0 | 0 | 4 |
-| a | NULL | 0 | 1 | 7 |
-| b | A | 0 | 0 | 5 |
-| b | B | 0 | 0 | 6 |
-| b | NULL | 0 | 1 | 11 |
-| NULL | A | 1 | 0 | 8 |
-| NULL | B | 1 | 0 | 10 |
-| NULL | NULL | 1 | 1 | 18 |
-+------+------+----------------+----------------+-----------+
-9 rows in set (0.02 sec)
-
-mysql> SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) );
-+------+------+-------------------------+-----------+
-| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) |
-+------+------+-------------------------+-----------+
-| a | A | 0 | 3 |
-| a | B | 0 | 4 |
-| a | NULL | 1 | 7 |
-| b | A | 0 | 5 |
-| b | B | 0 | 6 |
-| b | NULL | 1 | 11 |
-| NULL | A | 2 | 8 |
-| NULL | B | 2 | 10 |
-| NULL | NULL | 3 | 18 |
-+------+------+-------------------------+-----------+
-9 rows in set (0.02 sec)
-
-mysql> SELECT k1, k2, grouping(k1), grouping(k2), GROUPING_ID(k1,k2), SUM(k4) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ) order by k1, k2;
-+------+------+----------------+----------------+-------------------------+-----------+
-| k1 | k2 | grouping(`k1`) | grouping(`k2`) | grouping_id(`k1`, `k2`) | sum(`k4`) |
-+------+------+----------------+----------------+-------------------------+-----------+
-| a | A | 0 | 0 | 0 | 3 |
-| a | B | 0 | 0 | 0 | 4 |
-| a | NULL | 0 | 1 | 1 | 7 |
-| b | A | 0 | 0 | 0 | 5 |
-| b | B | 0 | 0 | 0 | 6 |
-| b | NULL | 0 | 1 | 1 | 11 |
-| NULL | A | 1 | 0 | 2 | 8 |
-| NULL | B | 1 | 0 | 2 | 10 |
-| NULL | NULL | 1 | 1 | 3 | 18 |
-+------+------+----------------+----------------+-------------------------+-----------+
-9 rows in set (0.02 sec)
-
-```
-### 1.5 Composition and nesting of GROUPING SETS
-
-First of all, a GROUP BY clause is essentially a special case of GROUPING SETS, for example:
-
-```
- GROUP BY a
-is equivalent to:
- GROUP BY GROUPING SETS((a))
-also,
- GROUP BY a,b,c
-is equivalent to:
- GROUP BY GROUPING SETS((a,b,c))
-```
-
-Similarly, CUBE and ROLLUP can be expanded into GROUPING SETS, so the various combinations and nesting of GROUP BY, CUBE, ROLLUP, GROUPING SETS are essentially the combination and nesting of GROUPING SETS.
-
-For GROUPING SETS nesting, it is semantically equivalent to writing the statements inside the nest directly outside. (ref:) mentions:
-
-```
-The CUBE and ROLLUP constructs can be used either directly in the GROUP BY clause, or nested inside a GROUPING SETS clause. If one GROUPING SETS clause is nested inside another, the effect is the same as if all the elements of the inner clause had been written directly in the outer clause.
-```
-
-For a combined list of multiple GROUPING SETS, many databases consider it a cross product relationship.
-
-for example:
-
-```
-GROUP BY a, CUBE (b, c), GROUPING SETS ((d), (e))
-
-is equivalent to:
-
-GROUP BY GROUPING SETS (
-(a, b, c, d), (a, b, c, e),
-(a, b, d), (a, b, e),
-(a, c, d), (a, c, e),
-(a, d), (a, e)
-)
-```
-
-For the combination and nesting of GROUPING SETS, each database support is not the same. For example snowflake does not support any combination and nesting.
-()
-
-Oracle supports both composition and nesting.
-()
-
-Presto supports composition, but not nesting.
-()
-
-## 2. Object
-
-Support `GROUPING SETS`, `ROLLUP` and `CUBE ` syntax, implements 1.1, 1.2, 1.3 1.4, 1.5, not support the combination
- and nesting of GROUPING SETS in current version.
-
-### 2.1 GROUPING SETS Syntax
-
-```
-SELECT ...
-FROM ...
-[ ... ]
-GROUP BY GROUPING SETS ( groupSet [ , groupSet [ , ... ] ] )
-[ ... ]
-
-groupSet ::= { ( expr [ , expr [ , ... ] ] )}
-
-
-Expression, column name.
-```
-
-### 2.2 ROLLUP Syntax
-
-```
-SELECT ...
-FROM ...
-[ ... ]
-GROUP BY ROLLUP ( expr [ , expr [ , ... ] ] )
-[ ... ]
-
-
-Expression, column name.
-```
-
-### 2.3 CUBE Syntax
-
-```
-SELECT ...
-FROM ...
-[ ... ]
-GROUP BY CUBE ( expr [ , expr [ , ... ] ] )
-[ ... ]
-
-
-Expression, column name.
-```
-
-## 3. Implementation
-
-### 3.1 Overall Design Approaches
-
-For `GROUPING SET` is equivalent to the `UNION` of `GROUP BY` . So we can expand input rows, and run an GROUP BY on these rows.
-
-For example:
-
-```
-SELECT a, b FROM src GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ());
-```
-
-Data in table src:
-
-```
-1, 2
-3, 4
-```
-
-Base on GROUPING SETS , we can expend the input to:
-
-```
-1, 2 (GROUPING_ID: a, b -> 00 -> 0)
-1, null (GROUPING_ID: a, null -> 01 -> 1)
-null, 2 (GROUPING_ID: null, b -> 10 -> 2)
-null, null (GROUPING_ID: null, null -> 11 -> 3)
-
-3, 4 (GROUPING_ID: a, b -> 00 -> 0)
-3, null (GROUPING_ID: a, null -> 01 -> 1)
-null, 4 (GROUPING_ID: null, b -> 10 -> 2)
-null, null (GROUPING_ID: null, null -> 11 -> 3)
-```
-
-And then use those row as input, then GROUP BY a, b, GROUPING_ID
-
-### 3.2 Example
-
-Table t:
-
-```
-mysql> select * from t;
-+------+------+------+
-| k1 | k2 | k3 |
-+------+------+------+
-| a | A | 1 |
-| a | A | 2 |
-| a | B | 1 |
-| a | B | 3 |
-| b | A | 1 |
-| b | A | 4 |
-| b | B | 1 |
-| b | B | 5 |
-+------+------+------+
-8 rows in set (0.01 sec)
-```
-
-for the query:
-
-```
-SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ());
-```
-
-First, expand the input, every row expand into 4 rows ( the size of GROUPING SETS), and insert GROUPING_ID column
-
-e.g. a, A, 1 expanded to:
-
-```
-+------+------+------+-------------------------+
-| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) |
-+------+------+------+-------------------------+
-| a | A | 1 | 0 |
-| a | NULL | 1 | 1 |
-| NULL | A | 1 | 2 |
-| NULL | NULL | 1 | 3 |
-+------+------+------+-------------------------+
-```
-
-Finally, all rows expended as follows (32 rows):
-
-```
-+------+------+------+-------------------------+
-| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) |
-+------+------+------+-------------------------+
-| a | A | 1 | 0 |
-| a | A | 2 | 0 |
-| a | B | 1 | 0 |
-| a | B | 3 | 0 |
-| b | A | 1 | 0 |
-| b | A | 4 | 0 |
-| b | B | 1 | 0 |
-| b | B | 5 | 0 |
-| a | NULL | 1 | 1 |
-| a | NULL | 1 | 1 |
-| a | NULL | 2 | 1 |
-| a | NULL | 3 | 1 |
-| b | NULL | 1 | 1 |
-| b | NULL | 1 | 1 |
-| b | NULL | 4 | 1 |
-| b | NULL | 5 | 1 |
-| NULL | A | 1 | 2 |
-| NULL | A | 1 | 2 |
-| NULL | A | 2 | 2 |
-| NULL | A | 4 | 2 |
-| NULL | B | 1 | 2 |
-| NULL | B | 1 | 2 |
-| NULL | B | 3 | 2 |
-| NULL | B | 5 | 2 |
-| NULL | NULL | 1 | 3 |
-| NULL | NULL | 1 | 3 |
-| NULL | NULL | 1 | 3 |
-| NULL | NULL | 1 | 3 |
-| NULL | NULL | 2 | 3 |
-| NULL | NULL | 3 | 3 |
-| NULL | NULL | 4 | 3 |
-| NULL | NULL | 5 | 3 |
-+------+------+------+-------------------------+
-32 rows in set.
-```
-
-now GROUP BY k1, k2, GROUPING_ID(k1,k2):
-
-```
-+------+------+-------------------------+-----------+
-| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) |
-+------+------+-------------------------+-----------+
-| a | A | 0 | 3 |
-| a | B | 0 | 4 |
-| a | NULL | 1 | 7 |
-| b | A | 0 | 5 |
-| b | B | 0 | 6 |
-| b | NULL | 1 | 11 |
-| NULL | A | 2 | 8 |
-| NULL | B | 2 | 10 |
-| NULL | NULL | 3 | 18 |
-+------+------+-------------------------+-----------+
-9 rows in set (0.02 sec)
-```
-
-The result is equivalent to the UNION ALL
-
-```
-select k1, k2, sum(k3) from t group by k1, k2
-UNION ALL
-select NULL, k2, sum(k3) from t group by k2
-UNION ALL
-select k1, NULL, sum(k3) from t group by k1
-UNION ALL
-select NULL, NULL, sum(k3) from t;
-
-+------+------+-----------+
-| k1 | k2 | sum(`k3`) |
-+------+------+-----------+
-| b | B | 6 |
-| b | A | 5 |
-| a | A | 3 |
-| a | B | 4 |
-| a | NULL | 7 |
-| b | NULL | 11 |
-| NULL | B | 10 |
-| NULL | A | 8 |
-| NULL | NULL | 18 |
-+------+------+-----------+
-9 rows in set (0.06 sec)
-```
-
-### 3.3 FE
-
-#### 3.3.1 Tasks
-
-1. Add GroupByClause, replace groupingExprs.
-2. Add Grouping Sets, Cube and RollUp syntax.
-3. Add GroupByClause in SelectStmt.
-4. Add GroupingFunctionCallExpr, implements grouping grouping_id function call
-5. Add VirtualSlot, generate the map of virtual slots and real slots
-6. add virtual column GROUPING_ID and other virtual columns generated by grouping and grouping_id, insert into groupingExprs,
-7. Add a PlanNode, name as RepeatNode. For GroupingSets aggregation insert RepeatNode to the plan.
-
-#### 3.3.2 Tuple
-
-In order to add GROUPING_ID to groupingExprs in GroupByClause, need to create virtual SlotRef, also, need tot create a tuple for this slot, named GROUPING\_\_ID Tuple.
-
-For the plannode RepeatNode, its input are all the tuples of its children and its output tuple are the repeat data and GROUPING_ID.
-
-
-#### 3.3.3 Expression and Function Substitution
-
-expr -> if(bitand(pos, grouping_id)=0, expr, null) for expr in extension grouping clause
-grouping_id() -> grouping_id(grouping_id) for grouping_id function
-
-### 3.4 BE
-
-#### 3.4.1 Tasks
-
-1. Add RepeatNode executor, expend the input data and append GROUPING_ID to every row
-2. Implements grouping_id() and grouping() function.
diff --git a/docs/en/internal/metadata-design.md b/docs/en/internal/metadata-design.md
deleted file mode 100644
index 43a2fa89b1..0000000000
--- a/docs/en/internal/metadata-design.md
+++ /dev/null
@@ -1,127 +0,0 @@
----
-{
- "title": "Metadata Design Document",
- "language": "en"
-}
----
-
-
-
-
-# Metadata Design Document
-
-## Noun Interpretation
-
-* FE: Frontend, the front-end node of Doris. Mainly responsible for receiving and returning client requests, metadata, cluster management, query plan generation and so on.
-* BE: Backend, the back-end node of Doris. Mainly responsible for data storage and management, query plan execution and other work.
-* bdbje: [Oracle Berkeley DB Java Edition](http://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html). In Doris, we use bdbje to persist metadata operation logs and high availability of FE.
-
-## Overall architecture
-
-
-As shown above, Doris's overall architecture is divided into two layers. Multiple FEs form the first tier, providing lateral expansion and high availability of FE. Multiple BEs form the second layer, which is responsible for data storage and management. This paper mainly introduces the design and implementation of metadata in FE layer.
-
-1. There are two different kinds of FE nodes: follower and observer. Leader election and data synchronization are taken among FE nodes by bdbje ([BerkeleyDB Java Edition](http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/overview/index-093405.html)).
-
-2. The follower node is elected, and one of the followers becomes the leader node, which is responsible for the writing of metadata. When the leader node goes down, other follower nodes re-elect a leader to ensure high availability of services.
-
-3. The observer node only synchronizes metadata from the leader node and does not participate in the election. It can be scaled horizontally to provide the extensibility of metadata reading services.
-
-> Note: The concepts of follower and observer corresponding to bdbje are replica and observer. You may use both names below.
-
-## Metadata structure
-
-Doris's metadata is in full memory. A complete metadata image is maintained in each FE memory. Within Baidu, a cluster of 2,500 tables and 1 million fragments (3 million copies) occupies only about 2GB of metadata in memory. (Of course, the memory overhead for querying intermediate objects and various job information needs to be estimated according to the actual situation. However, it still maintains a low memory overhead.
-
-At the same time, metadata is stored in the memory as a whole in a tree-like hierarchical structure. By adding auxiliary structure, metadata information at all levels can be accessed quickly.
-
-The following figure shows the contents stored in Doris meta-information.
-
-
-
-As shown above, Doris's metadata mainly stores four types of data:
-
-1. User data information. Including database, table Schema, fragmentation information, etc.
-2. All kinds of job information. For example, import jobs, Clone jobs, SchemaChange jobs, etc.
-3. User and permission information.
-4. Cluster and node information.
-
-## Data stream
-
-
-
-The data flow of metadata is as follows:
-
-1. Only leader FE can write metadata. After modifying leader's memory, the write operation serializes into a log and writes to bdbje in the form of key-value. The key is a continuous integer, and as log id, value is the serialized operation log.
-
-2. After the log is written to bdbje, bdbje copies the log to other non-leader FE nodes according to the policy (write most/write all). The non-leader FE node modifies its metadata memory image by playback of the log, and completes the synchronization with the metadata of the leader node.
-
-3. When the number of log bars of the leader node reaches the threshold (default 10W bars), the checkpoint thread is started. Checkpoint reads existing image files and subsequent logs and replays a new mirror copy of metadata in memory. The copy is then written to disk to form a new image. The reason for this is to regenerate a mirror copy instead of writing an existing image to an image, mainly considering that the write operation will be blocked during writing the image plus read lock. So every checkpoint takes up twice as much memory space.
-
-4. After the image file is generated, the leader node notifies other non-leader nodes that a new image has been generated. Non-leader actively pulls the latest image files through HTTP to replace the old local files.
-
-5. The logs in bdbje will be deleted regularly after the image is completed.
-
-## Implementation details
-
-### Metadata catalogue
-
-1. The metadata directory is specified by the FE configuration item `meta_dir'.
-
-2. Data storage directory for bdbje under `bdb/` directory.
-
-3. The storage directory for image files under the `image/` directory.
-
-* `Image.[logid]`is the latest image file. The suffix `logid` indicates the ID of the last log contained in the image.
-* `Image.ckpt` is the image file being written. If it is successfully written, it will be renamed `image.[logid]` and replaced with the original image file.
-* The`cluster_id` is recorded in the `VERSION` file. `Cluster_id` uniquely identifies a Doris cluster. It is a 32-bit integer randomly generated at the first startup of leader. You can also specify a cluster ID through the Fe configuration item `cluster_id'.
-* The role of FE itself recorded in the `ROLE` file. There are only `FOLLOWER` and `OBSERVER`. Where `FOLLOWER` denotes FE as an optional node. (Note: Even the leader node has a role of `FOLLOWER`)
-
-### Start-up process
-
-1. FE starts for the first time. If the startup script does not add any parameters, it will try to start as leader. You will eventually see `transfer from UNKNOWN to MASTER` in the FE startup log.
-
-2. FE starts for the first time. If the `-helper` parameter is specified in the startup script and points to the correct leader FE node, the FE first asks the leader node about its role (ROLE) and cluster_id through http. Then pull up the latest image file. After reading image file and generating metadata image, start bdbje and start bdbje log synchronization. After synchronization is completed, the log after image file in bdbje is replayed, and the final metadata image generation is completed.
-
- > Note 1: When starting with the `-helper` parameter, you need to first add the FE through the leader through the MySQL command, otherwise, the start will report an error.
-
- > Note 2: `-helper` can point to any follower node, even if it is not leader.
-
- > Note 3: In the process of synchronization log, the Fe log will show `xxx detached`. At this time, the log pull is in progress, which is a normal phenomenon.
-
-3. FE is not the first startup. If the startup script does not add any parameters, it will determine its identity according to the ROLE information stored locally. At the same time, according to the cluster information stored in the local bdbje, the leader information is obtained. Then read the local image file and the log in bdbje to complete the metadata image generation. (If the roles recorded in the local ROLE are inconsistent with those recorded in bdbje, an error will be reported.)
-
-4. FE is not the first boot, and the `-helper` parameter is specified in the boot script. Just like the first process started, the leader role is asked first. But it will be compared with the ROLE stored by itself. If they are inconsistent, they will report errors.
-
-#### Metadata Read-Write and Synchronization
-
-1. Users can use Mysql to connect any FE node to read and write metadata. If the connection is a non-leader node, the node forwards the write operation to the leader node. When the leader is successfully written, it returns a current and up-to-date log ID of the leader. Later, the non-leader node waits for the log ID it replays to be larger than the log ID it returns to the client before returning the message that the command succeeds. This approach guarantees Read-Your-Write semantics for any FE node.
-
- > Note: Some non-write operations are also forwarded to leader for execution. For example, `SHOW LOAD` operation. Because these commands usually need to read the intermediate states of some jobs, which are not written to bdbje, there are no such intermediate states in the memory of the non-leader node. (FE's direct metadata synchronization depends entirely on bdbje's log playback. If a metadata modification operation does not write bdbje's log, the result of the modification of the operation will not be seen in other non-leader nodes.)
-
-2. The leader node starts a TimePrinter thread. This thread periodically writes a key-value entry for the current time to bdbje. The remaining non-leader nodes read the recorded time in the log by playback and compare it with the local time. If the lag between the local time and the local time is found to be greater than the specified threshold (configuration item: `meta_delay_toleration_second`). If the write interval is half of the configuration item, the node will be in the **unreadable** state. This mechanism solves the problem that non-leader nodes still provide outdated metadata services after a long time of leader disconnection.
-
-3. The metadata of each FE only guarantees the final consistency. Normally, inconsistent window periods are only milliseconds. We guarantee the monotonous consistency of metadata access in the same session. But if the same client connects different FEs, metadata regression may occur. (But for batch update systems, this problem has little impact.)
-
-### Downtime recovery
-
-1. When the leader node goes down, the rest of the followers will immediately elect a new leader node to provide services.
-2. Metadata cannot be written when most follower nodes are down. When metadata is not writable, if a write operation request occurs, the current process is that the **FE process exits**. This logic will be optimized in the future, and read services will still be provided in the non-writable state.
-3. The downtime of observer node will not affect the state of any other node. It also does not affect metadata reading and writing at other nodes.
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md b/docs/en/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/avg.md b/docs/en/sql-manual/sql-functions/aggregate-functions/avg.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/avg.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/avg.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/bitmap_union.md b/docs/en/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/bitmap_union.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/count.md b/docs/en/sql-manual/sql-functions/aggregate-functions/count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/count.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/count.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/group_concat.md b/docs/en/sql-manual/sql-functions/aggregate-functions/group_concat.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/group_concat.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/group_concat.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/en/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/max.md b/docs/en/sql-manual/sql-functions/aggregate-functions/max.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/max.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/max.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/max_by.md b/docs/en/sql-manual/sql-functions/aggregate-functions/max_by.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/max_by.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/max_by.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/min.md b/docs/en/sql-manual/sql-functions/aggregate-functions/min.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/min.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/min.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/min_by.md b/docs/en/sql-manual/sql-functions/aggregate-functions/min_by.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/min_by.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/min_by.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/percentile.md b/docs/en/sql-manual/sql-functions/aggregate-functions/percentile.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/percentile.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/percentile.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/en/sql-manual/sql-functions/aggregate-functions/percentile_approx.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/percentile_approx.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/percentile_approx.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/en/sql-manual/sql-functions/aggregate-functions/stddev.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/stddev.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/stddev.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/en/sql-manual/sql-functions/aggregate-functions/stddev_samp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/stddev_samp.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/stddev_samp.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/en/sql-manual/sql-functions/aggregate-functions/sum.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/sum.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/sum.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/topn.md b/docs/en/sql-manual/sql-functions/aggregate-functions/topn.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/topn.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/topn.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/en/sql-manual/sql-functions/aggregate-functions/var_samp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/var_samp.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/var_samp.md
diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/en/sql-manual/sql-functions/aggregate-functions/variance.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/aggregate-functions/variance.md
rename to docs/en/sql-manual/sql-functions/aggregate-functions/variance.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_max.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_max.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_max.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_max.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_min.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_min.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_min.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_min.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_not.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_not.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_not.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_not.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_union.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_union.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md b/docs/en/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/to_bitmap.md b/docs/en/sql-manual/sql-functions/bitmap-functions/to_bitmap.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitmap-functions/to_bitmap.md
rename to docs/en/sql-manual/sql-functions/bitmap-functions/to_bitmap.md
diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitand.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitand.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitand.md
rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitand.md
diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitnot.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitnot.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitnot.md
rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitnot.md
diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitor.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitor.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitor.md
rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitor.md
diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitxor.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitxor.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitxor.md
rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitxor.md
diff --git a/docs/en/sql-reference/sql-functions/cast.md b/docs/en/sql-manual/sql-functions/cast.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/cast.md
rename to docs/en/sql-manual/sql-functions/cast.md
diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/case.md b/docs/en/sql-manual/sql-functions/conditional-functions/case.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/conditional-functions/case.md
rename to docs/en/sql-manual/sql-functions/conditional-functions/case.md
diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/coalesce.md b/docs/en/sql-manual/sql-functions/conditional-functions/coalesce.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/conditional-functions/coalesce.md
rename to docs/en/sql-manual/sql-functions/conditional-functions/coalesce.md
diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/if.md b/docs/en/sql-manual/sql-functions/conditional-functions/if.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/conditional-functions/if.md
rename to docs/en/sql-manual/sql-functions/conditional-functions/if.md
diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/ifnull.md b/docs/en/sql-manual/sql-functions/conditional-functions/ifnull.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/conditional-functions/ifnull.md
rename to docs/en/sql-manual/sql-functions/conditional-functions/ifnull.md
diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/nullif.md b/docs/en/sql-manual/sql-functions/conditional-functions/nullif.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/conditional-functions/nullif.md
rename to docs/en/sql-manual/sql-functions/conditional-functions/nullif.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/convert_tz.md b/docs/en/sql-manual/sql-functions/date-time-functions/convert_tz.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/convert_tz.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/convert_tz.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/curdate.md b/docs/en/sql-manual/sql-functions/date-time-functions/curdate.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/curdate.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/curdate.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/current_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/current_timestamp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/current_timestamp.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/current_timestamp.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/curtime.md b/docs/en/sql-manual/sql-functions/date-time-functions/curtime.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/curtime.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/curtime.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_add.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_add.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/date_add.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/date_add.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_format.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/date_format.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/date_format.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_sub.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_sub.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/date_sub.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/date_sub.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/en/sql-manual/sql-functions/date-time-functions/datediff.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/datediff.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/datediff.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/day.md b/docs/en/sql-manual/sql-functions/date-time-functions/day.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/day.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/day.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayname.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/dayname.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/dayname.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofmonth.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofmonth.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofmonth.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofweek.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofweek.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofweek.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofyear.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofyear.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofyear.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/from_days.md b/docs/en/sql-manual/sql-functions/date-time-functions/from_days.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/from_days.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/from_days.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/from_unixtime.md b/docs/en/sql-manual/sql-functions/date-time-functions/from_unixtime.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/from_unixtime.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/from_unixtime.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/hour.md b/docs/en/sql-manual/sql-functions/date-time-functions/hour.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/hour.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/hour.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/makedate.md b/docs/en/sql-manual/sql-functions/date-time-functions/makedate.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/makedate.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/makedate.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/minute.md b/docs/en/sql-manual/sql-functions/date-time-functions/minute.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/minute.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/minute.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/month.md b/docs/en/sql-manual/sql-functions/date-time-functions/month.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/month.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/month.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/en/sql-manual/sql-functions/date-time-functions/monthname.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/monthname.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/monthname.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/now.md b/docs/en/sql-manual/sql-functions/date-time-functions/now.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/now.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/now.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/second.md b/docs/en/sql-manual/sql-functions/date-time-functions/second.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/second.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/second.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/en/sql-manual/sql-functions/date-time-functions/str_to_date.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/str_to_date.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/str_to_date.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/time_round.md b/docs/en/sql-manual/sql-functions/date-time-functions/time_round.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/time_round.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/time_round.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/en/sql-manual/sql-functions/date-time-functions/timediff.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/timediff.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/timediff.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timestampadd.md b/docs/en/sql-manual/sql-functions/date-time-functions/timestampadd.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/timestampadd.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/timestampadd.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timestampdiff.md b/docs/en/sql-manual/sql-functions/date-time-functions/timestampdiff.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/timestampdiff.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/timestampdiff.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/to_date.md b/docs/en/sql-manual/sql-functions/date-time-functions/to_date.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/to_date.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/to_date.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/to_days.md b/docs/en/sql-manual/sql-functions/date-time-functions/to_days.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/to_days.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/to_days.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/unix_timestamp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/unix_timestamp.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/unix_timestamp.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/utc_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/utc_timestamp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/utc_timestamp.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/utc_timestamp.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/week.md b/docs/en/sql-manual/sql-functions/date-time-functions/week.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/week.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/week.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/weekday.md b/docs/en/sql-manual/sql-functions/date-time-functions/weekday.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/weekday.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/weekday.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/weekofyear.md b/docs/en/sql-manual/sql-functions/date-time-functions/weekofyear.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/weekofyear.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/weekofyear.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/year.md b/docs/en/sql-manual/sql-functions/date-time-functions/year.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/year.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/year.md
diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/yearweek.md b/docs/en/sql-manual/sql-functions/date-time-functions/yearweek.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/date-time-functions/yearweek.md
rename to docs/en/sql-manual/sql-functions/date-time-functions/yearweek.md
diff --git a/docs/en/sql-reference/sql-functions/digital-masking.md b/docs/en/sql-manual/sql-functions/digital-masking.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/digital-masking.md
rename to docs/en/sql-manual/sql-functions/digital-masking.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/aes.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/aes.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md
diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm4.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm4.md
rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md
diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md
rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md
diff --git a/docs/en/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md b/docs/en/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md
rename to docs/en/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_double.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_double.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/get_json_double.md
rename to docs/en/sql-manual/sql-functions/json-functions/get_json_double.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_int.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_int.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/get_json_int.md
rename to docs/en/sql-manual/sql-functions/json-functions/get_json_int.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_string.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_string.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/get_json_string.md
rename to docs/en/sql-manual/sql-functions/json-functions/get_json_string.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_array.md b/docs/en/sql-manual/sql-functions/json-functions/json_array.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/json_array.md
rename to docs/en/sql-manual/sql-functions/json-functions/json_array.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_object.md b/docs/en/sql-manual/sql-functions/json-functions/json_object.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/json_object.md
rename to docs/en/sql-manual/sql-functions/json-functions/json_object.md
diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_quote.md b/docs/en/sql-manual/sql-functions/json-functions/json_quote.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/json-functions/json_quote.md
rename to docs/en/sql-manual/sql-functions/json-functions/json_quote.md
diff --git a/docs/en/sql-reference/sql-functions/math-functions/conv.md b/docs/en/sql-manual/sql-functions/math-functions/conv.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/math-functions/conv.md
rename to docs/en/sql-manual/sql-functions/math-functions/conv.md
diff --git a/docs/en/sql-reference/sql-functions/math-functions/pmod.md b/docs/en/sql-manual/sql-functions/math-functions/pmod.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/math-functions/pmod.md
rename to docs/en/sql-manual/sql-functions/math-functions/pmod.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_astext.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_astext.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_astext.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_circle.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_circle.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_circle.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_contains.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_contains.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_contains.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_linefromtext.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_linefromtext.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_linefromtext.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_point.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_point.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_point.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_polygon.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_polygon.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_polygon.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_x.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_x.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_x.md
diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_y.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/spatial-functions/st_y.md
rename to docs/en/sql-manual/sql-functions/spatial-functions/st_y.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md b/docs/en/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md
rename to docs/en/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/ascii.md b/docs/en/sql-manual/sql-functions/string-functions/ascii.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/ascii.md
rename to docs/en/sql-manual/sql-functions/string-functions/ascii.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/bit_length.md b/docs/en/sql-manual/sql-functions/string-functions/bit_length.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/bit_length.md
rename to docs/en/sql-manual/sql-functions/string-functions/bit_length.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/char_length.md b/docs/en/sql-manual/sql-functions/string-functions/char_length.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/char_length.md
rename to docs/en/sql-manual/sql-functions/string-functions/char_length.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/concat.md b/docs/en/sql-manual/sql-functions/string-functions/concat.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/concat.md
rename to docs/en/sql-manual/sql-functions/string-functions/concat.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/concat_ws.md b/docs/en/sql-manual/sql-functions/string-functions/concat_ws.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/concat_ws.md
rename to docs/en/sql-manual/sql-functions/string-functions/concat_ws.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/ends_with.md b/docs/en/sql-manual/sql-functions/string-functions/ends_with.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/ends_with.md
rename to docs/en/sql-manual/sql-functions/string-functions/ends_with.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/find_in_set.md b/docs/en/sql-manual/sql-functions/string-functions/find_in_set.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/find_in_set.md
rename to docs/en/sql-manual/sql-functions/string-functions/find_in_set.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/hex.md b/docs/en/sql-manual/sql-functions/string-functions/hex.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/hex.md
rename to docs/en/sql-manual/sql-functions/string-functions/hex.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/instr.md b/docs/en/sql-manual/sql-functions/string-functions/instr.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/instr.md
rename to docs/en/sql-manual/sql-functions/string-functions/instr.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/lcase.md b/docs/en/sql-manual/sql-functions/string-functions/lcase.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/lcase.md
rename to docs/en/sql-manual/sql-functions/string-functions/lcase.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/left.md b/docs/en/sql-manual/sql-functions/string-functions/left.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/left.md
rename to docs/en/sql-manual/sql-functions/string-functions/left.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md b/docs/en/sql-manual/sql-functions/string-functions/length.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/length.md
rename to docs/en/sql-manual/sql-functions/string-functions/length.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/like/like.md b/docs/en/sql-manual/sql-functions/string-functions/like/like.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/like/like.md
rename to docs/en/sql-manual/sql-functions/string-functions/like/like.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/like/not_like.md b/docs/en/sql-manual/sql-functions/string-functions/like/not_like.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/like/not_like.md
rename to docs/en/sql-manual/sql-functions/string-functions/like/not_like.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/locate.md b/docs/en/sql-manual/sql-functions/string-functions/locate.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/locate.md
rename to docs/en/sql-manual/sql-functions/string-functions/locate.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/lower.md b/docs/en/sql-manual/sql-functions/string-functions/lower.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/lower.md
rename to docs/en/sql-manual/sql-functions/string-functions/lower.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/lpad.md b/docs/en/sql-manual/sql-functions/string-functions/lpad.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/lpad.md
rename to docs/en/sql-manual/sql-functions/string-functions/lpad.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/ltrim.md b/docs/en/sql-manual/sql-functions/string-functions/ltrim.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/ltrim.md
rename to docs/en/sql-manual/sql-functions/string-functions/ltrim.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/money_format.md b/docs/en/sql-manual/sql-functions/string-functions/money_format.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/money_format.md
rename to docs/en/sql-manual/sql-functions/string-functions/money_format.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/null_or_empty.md b/docs/en/sql-manual/sql-functions/string-functions/null_or_empty.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/null_or_empty.md
rename to docs/en/sql-manual/sql-functions/string-functions/null_or_empty.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/not_regexp.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/not_regexp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/regexp/not_regexp.md
rename to docs/en/sql-manual/sql-functions/string-functions/regexp/not_regexp.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp.md
rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md
rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md
rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/repeat.md b/docs/en/sql-manual/sql-functions/string-functions/repeat.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/repeat.md
rename to docs/en/sql-manual/sql-functions/string-functions/repeat.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/replace.md b/docs/en/sql-manual/sql-functions/string-functions/replace.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/replace.md
rename to docs/en/sql-manual/sql-functions/string-functions/replace.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/reverse.md b/docs/en/sql-manual/sql-functions/string-functions/reverse.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/reverse.md
rename to docs/en/sql-manual/sql-functions/string-functions/reverse.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/right.md b/docs/en/sql-manual/sql-functions/string-functions/right.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/right.md
rename to docs/en/sql-manual/sql-functions/string-functions/right.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/rpad.md b/docs/en/sql-manual/sql-functions/string-functions/rpad.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/rpad.md
rename to docs/en/sql-manual/sql-functions/string-functions/rpad.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/split_part.md b/docs/en/sql-manual/sql-functions/string-functions/split_part.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/split_part.md
rename to docs/en/sql-manual/sql-functions/string-functions/split_part.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/starts_with.md b/docs/en/sql-manual/sql-functions/string-functions/starts_with.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/starts_with.md
rename to docs/en/sql-manual/sql-functions/string-functions/starts_with.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strleft.md b/docs/en/sql-manual/sql-functions/string-functions/strleft.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/strleft.md
rename to docs/en/sql-manual/sql-functions/string-functions/strleft.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/strright.md b/docs/en/sql-manual/sql-functions/string-functions/strright.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/strright.md
rename to docs/en/sql-manual/sql-functions/string-functions/strright.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/substring.md b/docs/en/sql-manual/sql-functions/string-functions/substring.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/substring.md
rename to docs/en/sql-manual/sql-functions/string-functions/substring.md
diff --git a/docs/en/sql-reference/sql-functions/string-functions/unhex.md b/docs/en/sql-manual/sql-functions/string-functions/unhex.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/string-functions/unhex.md
rename to docs/en/sql-manual/sql-functions/string-functions/unhex.md
diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md b/docs/en/sql-manual/sql-functions/table-functions/explode-bitmap.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md
rename to docs/en/sql-manual/sql-functions/table-functions/explode-bitmap.md
diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md b/docs/en/sql-manual/sql-functions/table-functions/explode-json-array.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md
rename to docs/en/sql-manual/sql-functions/table-functions/explode-json-array.md
diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-numbers.md b/docs/en/sql-manual/sql-functions/table-functions/explode-numbers.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/table-functions/explode-numbers.md
rename to docs/en/sql-manual/sql-functions/table-functions/explode-numbers.md
diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-split.md b/docs/en/sql-manual/sql-functions/table-functions/explode-split.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/table-functions/explode-split.md
rename to docs/en/sql-manual/sql-functions/table-functions/explode-split.md
diff --git a/docs/en/sql-reference/sql-functions/table-functions/outer-combinator.md b/docs/en/sql-manual/sql-functions/table-functions/outer-combinator.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/table-functions/outer-combinator.md
rename to docs/en/sql-manual/sql-functions/table-functions/outer-combinator.md
diff --git a/docs/en/sql-reference/sql-functions/window-function.md b/docs/en/sql-manual/sql-functions/window-function.md
similarity index 100%
rename from docs/en/sql-reference/sql-functions/window-function.md
rename to docs/en/sql-manual/sql-functions/window-function.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md
rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md
rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/BIGINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BIGINT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/BIGINT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BIGINT.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/BITMAP.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BITMAP.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/BITMAP.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BITMAP.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/BOOLEAN.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/BOOLEAN.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/CHAR.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/CHAR.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/CHAR.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/CHAR.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/DATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DATE.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/DATE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DATE.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/DATETIME.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DATETIME.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/DATETIME.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DATETIME.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/DECIMAL.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/DECIMAL.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/DOUBLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/DOUBLE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/FLOAT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/FLOAT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/FLOAT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/FLOAT.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/HLL.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/HLL.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/HLL.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/HLL.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/INT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/INT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/INT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/INT.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/SMALLINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/SMALLINT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/STRING.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/STRING.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/STRING.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/STRING.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/TINYINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/TINYINT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/TINYINT.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/TINYINT.md
diff --git a/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md
rename to docs/en/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md
similarity index 100%
rename from docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md
rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md
similarity index 100%
rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md
rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md
rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md
rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md
diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md
similarity index 100%
rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md
rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md
deleted file mode 100644
index e1690d81e9..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-USER",
- "language": "en"
-}
----
-
-
-
-## ALTER-USER
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, USER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md
deleted file mode 100644
index b6fdbc3ad1..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-ROLE",
- "language": "en"
-}
----
-
-
-
-## CREATE-ROLE
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, ROLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md
deleted file mode 100644
index ade67b5f43..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-USER",
- "language": "en"
-}
----
-
-
-
-## CREATE-USER
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, USER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md
deleted file mode 100644
index 16fab0dcfa..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-ROLE",
- "language": "en"
-}
----
-
-
-
-## DROP-ROLE
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, ROLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md
deleted file mode 100644
index 9ae0d1e76c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-USER",
- "language": "en"
-}
----
-
-
-
-## DROP-USER
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, USER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md
deleted file mode 100644
index db5180221d..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "GRANT",
- "language": "en"
-}
----
-
-
-
-## GRANT
-
-### Description
-
-### Example
-
-### Keywords
-
- GRANT
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md
deleted file mode 100644
index 08066c9e53..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "LDAP",
- "language": "en"
-}
----
-
-
-
-## LDAP
-
-### Description
-
-### Example
-
-### Keywords
-
- LDAP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md
deleted file mode 100644
index e29f278220..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "REVOKE",
- "language": "en"
-}
----
-
-
-
-## REVOKE
-
-### Description
-
-### Example
-
-### Keywords
-
- REVOKE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md
deleted file mode 100644
index ca52125b77..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SET-PASSWORD",
- "language": "en"
-}
----
-
-
-
-## SET-PASSWORD
-
-### Description
-
-### Example
-
-### Keywords
-
- SET, PASSWORD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md
deleted file mode 100644
index 47732877e6..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SET-PROPERTY",
- "language": "en"
-}
----
-
-
-
-## SET-PROPERTY
-
-### Description
-
-### Example
-
-### Keywords
-
- SET, PROPERTY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md
deleted file mode 100644
index 8673f99d5b..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-ADD-BACKEND",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-ADD-BACKEND
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, ADD, BACKEND
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md
deleted file mode 100644
index d90642ce59..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-ADD-FOLLOWER",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-ADD-FOLLOWER
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, ADD, FOLLOWER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md
deleted file mode 100644
index 439e3566a3..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-ADD-OBSERVER",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-ADD-OBSERVER
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, ADD, OBSERVER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md
deleted file mode 100644
index 5ae550f0c4..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-DECOMMISSION-BACKEND",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-DECOMMISSION-BACKEND
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, DECOMMISSION, BACKEND
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md
deleted file mode 100644
index e2f2e0bc44..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-DROP-BACKEND",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-DROP-BACKEND
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, DROP, BACKEND
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md
deleted file mode 100644
index 9999fa40ad..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-DROP-FOLLOWER",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-DROP-FOLLOWER
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, DROP, FOLLOWER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md
deleted file mode 100644
index 277f8096f8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-SYSTEM-DROP-OBSERVER",
- "language": "en"
-}
----
-
-
-
-## ALTER-SYSTEM-DROP-OBSERVER
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, SYSTEM, DROP, OBSERVER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md
deleted file mode 100644
index 1b66326d55..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CANCEL-ALTER-SYSTEM",
- "language": "en"
-}
----
-
-
-
-## CANCEL-ALTER-SYSTEM
-
-### Description
-
-### Example
-
-### Keywords
-
- CANCEL, ALTER, SYSTEM
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md
deleted file mode 100644
index 527f4ac1ee..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-DATABASE",
- "language": "en"
-}
----
-
-
-
-## ALTER-DATABASE
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, DATABASE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md
deleted file mode 100644
index 48fdae2ee0..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-COLUMN",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-COLUMN
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, COLUMN
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
deleted file mode 100644
index df6150a5d0..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-PARTITION",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-PARTITION
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, PARTITION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md
deleted file mode 100644
index fe094ecab3..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-PROPERTY",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-PROPERTY
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, PROPERTY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md
deleted file mode 100644
index dfaae33b28..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-RENAME",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-RENAME
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, RENAME
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md
deleted file mode 100644
index e383908221..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-REPLACE",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-REPLACE
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, REPLACE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md
deleted file mode 100644
index 99ab24c164..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-TABLE-ROLLUP",
- "language": "en"
-}
----
-
-
-
-## ALTER-TABLE-ROLLUP
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, TABLE, ROLLUP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md
deleted file mode 100644
index 23ab56ba07..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-VIEW",
- "language": "en"
-}
----
-
-
-
-## ALTER-VIEW
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, VIEW
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md
deleted file mode 100644
index ca6777e2fb..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CANCEL-ALTER-TABLE",
- "language": "en"
-}
----
-
-
-
-## CANCEL-ALTER-TABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- CANCEL, ALTER, TABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md
deleted file mode 100644
index e4f00c442c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "BACKUP",
- "language": "en"
-}
----
-
-
-
-## BACKUP
-
-### Description
-
-### Example
-
-### Keywords
-
- BACKUP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md
deleted file mode 100644
index bb6d9448fc..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CANCEL-BACKUP",
- "language": "en"
-}
----
-
-
-
-## CANCEL-BACKUP
-
-### Description
-
-### Example
-
-### Keywords
-
- CANCEL, BACKUP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md
deleted file mode 100644
index fa50eca878..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CANCEL-RESTORE",
- "language": "en"
-}
----
-
-
-
-## CANCEL-RESTORE
-
-### Description
-
-### Example
-
-### Keywords
-
- CANCEL, RESTORE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
deleted file mode 100644
index 611bfab2fc..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-REPOSITORY",
- "language": "en"
-}
----
-
-
-
-## CREATE-REPOSITORY
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, REPOSITORY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md
deleted file mode 100644
index adf837436a..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-REPOSITORY",
- "language": "en"
-}
----
-
-
-
-## DROP-REPOSITORY
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, REPOSITORY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md
deleted file mode 100644
index 9ff29b3f98..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "RESTORE",
- "language": "en"
-}
----
-
-
-
-## RESTORE
-
-### Description
-
-### Example
-
-### Keywords
-
- RESTORE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md
deleted file mode 100644
index b4b092baca..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-DATABASE",
- "language": "en"
-}
----
-
-
-
-## CREATE-DATABASE
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, DATABASE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md
deleted file mode 100644
index 5bbc792e74..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-ENCRYPT-KEY",
- "language": "en"
-}
----
-
-
-
-## CREATE-ENCRYPT-KEY
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, ENCRYPT, KEY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md
deleted file mode 100644
index 2003bab853..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-FILE",
- "language": "en"
-}
----
-
-
-
-## CREATE-FILE
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, FILE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md
deleted file mode 100644
index 315acc4c92..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-FUNCTION",
- "language": "en"
-}
----
-
-
-
-## CREATE-FUNCTION
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, FUNCTION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md
deleted file mode 100644
index 292ecbc9e1..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-INDEX",
- "language": "en"
-}
----
-
-
-
-## CREATE-INDEX
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, INDEX
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md
deleted file mode 100644
index 03223ea223..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-MATERIALIZED-VIEW",
- "language": "en"
-}
----
-
-
-
-## CREATE-MATERIALIZED-VIEW
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, MATERIALIZED, VIEW
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md
deleted file mode 100644
index 39593772a1..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-RESOURCE",
- "language": "en"
-}
----
-
-
-
-## CREATE-RESOURCE
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, RESOURCE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md
deleted file mode 100644
index 156020f7de..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-TABLE-LIKE",
- "language": "en"
-}
----
-
-
-
-## CREATE-TABLE-LIKE
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, TABLE, LIKE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md
deleted file mode 100644
index 72916ef337..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md
+++ /dev/null
@@ -1,568 +0,0 @@
----
-{
- "title": "CREATE-TABLE",
- "language": "en"
-}
----
-
-
-
-## CREATE-TABLE
-
-### Description
-
-This command is used to create a table. The subject of this document describes the syntax for creating Doris self-maintained tables. For external table syntax, please refer to the [CREATE-EXTERNAL-TABLE] document.
-
-```sql
-CREATE TABLE [IF NOT EXISTS] [database.]table
-(
- column_definition_list,
- [index_definition_list]
-)
-[engine_type]
-[keys_type]
-[table_comment]
-[partition_info]
-distribution_info
-[rollup_list]
-[properties]
-[extra_properties]
-```
-
-* `column_definition_list`
-
- Column definition list:
-
- `column_definition[, column_definition]`
-
- * `column_definition`
-
- Column definition:
-
- `column_name column_type [KEY] [aggr_type] [NULL] [default_value] [column_comment]`
-
- * `column_type`
-
- Column type, the following types are supported:
-
- ```
- TINYINT (1 byte)
- Range: -2^7 + 1 ~ 2^7-1
- SMALLINT (2 bytes)
- Range: -2^15 + 1 ~ 2^15-1
- INT (4 bytes)
- Range: -2^31 + 1 ~ 2^31-1
- BIGINT (8 bytes)
- Range: -2^63 + 1 ~ 2^63-1
- LARGEINT (16 bytes)
- Range: -2^127 + 1 ~ 2^127-1
- FLOAT (4 bytes)
- Support scientific notation
- DOUBLE (12 bytes)
- Support scientific notation
- DECIMAL[(precision, scale)] (16 bytes)
- The decimal type with guaranteed precision. The default is DECIMAL(10, 0)
- precision: 1 ~ 27
- scale: 0 ~ 9
- Where the integer part is 1 ~ 18
- Does not support scientific notation
- DATE (3 bytes)
- Range: 0000-01-01 ~ 9999-12-31
- DATETIME (8 bytes)
- Range: 0000-01-01 00:00:00 ~ 9999-12-31 23:59:59
- CHAR[(length)]
- Fixed-length character string. Length range: 1 ~ 255. Default is 1
- VARCHAR[(length)]
- Variable length character string. Length range: 1 ~ 65533. Default is 1
- HLL (1~16385 bytes)
- HyperLogLog column type, do not need to specify the length and default value. The length is controlled within the system according to the degree of data aggregation.
- Must be used with HLL_UNION aggregation type.
- BITMAP
- The bitmap column type does not need to specify the length and default value. Represents a collection of integers, and the maximum number of elements supported is 2^64-1.
- Must be used with BITMAP_UNION aggregation type.
- ```
-
- * `aggr_type`
-
- Aggregation type, the following aggregation types are supported:
-
- ```
- SUM: Sum. Applicable numeric types.
- MIN: Find the minimum value. Suitable for numeric types.
- MAX: Find the maximum value. Suitable for numeric types.
- REPLACE: Replace. For rows with the same dimension column, the index column will be imported in the order of import, and the last imported will replace the first imported.
- REPLACE_IF_NOT_NULL: non-null value replacement. The difference with REPLACE is that there is no replacement for null values. It should be noted here that the default value should be NULL, not an empty string. If it is an empty string, you should replace it with an empty string.
- HLL_UNION: The aggregation method of HLL type columns, aggregated by HyperLogLog algorithm.
- BITMAP_UNION: The aggregation mode of BIMTAP type columns, which performs the union aggregation of bitmaps.
- ```
-
- Example:
-
- ```
- k1 TINYINT,
- k2 DECIMAL(10,2) DEFAULT "10.5",
- k4 BIGINT NULL DEFAULT VALUE "1000" COMMENT "This is column k4",
- v1 VARCHAR(10) REPLACE NOT NULL,
- v2 BITMAP BITMAP_UNION,
- v3 HLL HLL_UNION,
- v4 INT SUM NOT NULL DEFAULT "1" COMMENT "This is column v4"
- ```
-
-* `index_definition_list`
-
- Index list definition:
-
- `index_definition[, index_definition]`
-
- * `index_definition`
-
- Index definition:
-
- ```sql
- INDEX index_name (col_name) [USING BITMAP] COMMENT'xxxxxx'
- ```
-
- Example:
-
- ```sql
- INDEX idx1 (k1) USING BITMAP COMMENT "This is a bitmap index1",
- INDEX idx2 (k2) USING BITMAP COMMENT "This is a bitmap index2",
- ...
- ```
-
-* `engine_type`
-
- Table engine type. All types in this document are OLAP. For other external table engine types, see [CREATE EXTERNAL TABLE] (DORIS/SQL Manual/Syntax Help/DDL/CREATE-EXTERNAL-TABLE.md) document. Example:
-
- `ENGINE=olap`
-
-* `key_desc`
-
- Data model.
-
- `key_type(col1, col2, ...)`
-
- `key_type` supports the following models:
-
- * DUPLICATE KEY (default): The subsequent specified column is the sorting column.
- * AGGREGATE KEY: The specified column is the dimension column.
- * UNIQUE KEY: The subsequent specified column is the primary key column.
-
- Example:
-
- ```
- DUPLICATE KEY(col1, col2),
- AGGREGATE KEY(k1, k2, k3),
- UNIQUE KEY(k1, k2)
- ```
-
-* `table_comment`
-
- Table notes. Example:
-
- ```
- COMMENT "This is my first DORIS table"
- ```
-
-* `partition_desc`
-
- Partition information supports two writing methods:
-
- 1. LESS THAN: Only define the upper boundary of the partition. The lower bound is determined by the upper bound of the previous partition.
-
- ```
- PARTITION BY RANGE(col1[, col2, ...])
- (
- PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...),
- PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...)
- )
- ```
-
- 2. FIXED RANGE: Define the left closed and right open interval of the zone.
-
- ```
- PARTITION BY RANGE(col1[, col2, ...])
- (
- PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ... )),
- PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, ))
- )
- ```
-
-* `distribution_desc`
-
- Define the data bucketing method.
-
- `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]`
-
-* `rollup_list`
-
- Multiple materialized views (ROLLUP) can be created at the same time as the table is built.
-
- `ROLLUP (rollup_definition[, rollup_definition, ...])`
-
- * `rollup_definition`
-
- `rollup_name (col1[, col2, ...]) [DUPLICATE KEY(col1[, col2, ...])] [PROPERTIES("key" = "value")]`
-
- Example:
-
- ```
- ROLLUP (
- r1 (k1, k3, v1, v2),
- r2 (k1, v1)
- )
- ```
-
-* `properties`
-
- Set table properties. The following attributes are currently supported:
-
- * `replication_num`
-
- Number of copies. The default number of copies is 3. If the number of BE nodes is less than 3, you need to specify that the number of copies is less than or equal to the number of BE nodes.
-
- After version 0.15, this attribute will be automatically converted to the `replication_allocation` attribute, such as:
-
- `"replication_num" = "3"` will be automatically converted to `"replication_allocation" = "tag.location.default:3"`
-
- * `replication_allocation`
-
- Set the copy distribution according to Tag. This attribute can completely cover the function of the `replication_num` attribute.
-
- * `storage_medium/storage_cooldown_time`
-
- Data storage medium. `storage_medium` is used to declare the initial storage medium of the table data, and `storage_cooldown_time` is used to set the expiration time. Example:
-
- ```
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2020-11-20 00:00:00"
- ```
-
- This example indicates that the data is stored in the SSD and will be automatically migrated to the HDD storage after the expiration of 2020-11-20 00:00:00.
-
- * `colocate_with`
-
- When you need to use the Colocation Join function, use this parameter to set the Colocation Group.
-
- `"colocate_with" = "group1"`
-
- * `bloom_filter_columns`
-
- The user specifies the list of column names that need to be added to the Bloom Filter index. The Bloom Filter index of each column is independent, not a composite index.
-
- `"bloom_filter_columns" = "k1, k2, k3"`
-
- * `in_memory`
-
- Use this property to set whether the table is [Memory Table] (DORIS/Operation Manual/Memory Table.md).
-
- `"in_memory" = "true"`
-
- * `function_column.sequence_type`
-
- When using the UNIQUE KEY model, you can specify a sequence column. When the KEY columns are the same, REPLACE will be performed according to the sequence column (the larger value replaces the smaller value, otherwise it cannot be replaced)
-
- Here we only need to specify the type of sequence column, support time type or integer type. Doris will create a hidden sequence column.
-
- `"function_column.sequence_type" ='Date'`
-
- * Dynamic partition related
-
- The relevant parameters of dynamic partition are as follows:
-
- * `dynamic_partition.enable`: Used to specify whether the dynamic partition function at the table level is enabled. The default is true.
- * `dynamic_partition.time_unit:` is used to specify the time unit for dynamically adding partitions, which can be selected as DAY (day), WEEK (week), MONTH (month), HOUR (hour).
- * `dynamic_partition.start`: Used to specify how many partitions to delete forward. The value must be less than 0. The default is Integer.MIN_VALUE.
- * `dynamic_partition.end`: Used to specify the number of partitions created in advance. The value must be greater than 0.
- * `dynamic_partition.prefix`: Used to specify the partition name prefix to be created. For example, if the partition name prefix is p, the partition name will be automatically created as p20200108.
- * `dynamic_partition.buckets`: Used to specify the number of partition buckets that are automatically created.
- * `dynamic_partition.create_history_partition`: Whether to create a history partition.
- * `dynamic_partition.history_partition_num`: Specify the number of historical partitions to be created.
- * `dynamic_partition.reserved_history_periods`: Used to specify the range of reserved history periods.
-
- * Data Sort Info
-
- The relevant parameters of data sort info are as follows:
-
- * `data_sort.sort_type`: the method of data sorting, options: z-order/lexical, default is lexical
- * `data_sort.col_num`: the first few columns to sort, col_num muster less than total key counts
-
-### Example
-
-1. Create a detailed model table
-
- ```sql
- CREATE TABLE example_db.table_hash
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- k3 CHAR(10) COMMENT "string column",
- k4 INT NOT NULL DEFAULT "1" COMMENT "int column"
- )
- COMMENT "my first table"
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- ```
-
-2. Create a detailed model table, partition, specify the sorting column, and set the number of copies to 1
-
- ```sql
- CREATE TABLE example_db.table_hash
- (
- k1 DATE,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- k3 CHAR(10) COMMENT "string column",
- k4 INT NOT NULL DEFAULT "1" COMMENT "int column"
- )
- DUPLICATE KEY(k1, k2)
- COMMENT "my first table"
- PARTITION BY RANGE(k1)
- (
- PARTITION p1 VALUES LESS THAN ("2020-02-01"),
- PARTITION p1 VALUES LESS THAN ("2020-03-01"),
- PARTITION p1 VALUES LESS THAN ("2020-04-01")
- )
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- PROPERTIES (
- "replication_num" = "1"
- );
- ```
-
-3. Create a table with a unique model of the primary key, set the initial storage medium and cooling time
-
- ```sql
- CREATE TABLE example_db.table_hash
- (
- k1 BIGINT,
- k2 LARGEINT,
- v1 VARCHAR(2048) REPLACE,
- v2 SMALLINT SUM DEFAULT "10"
- )
- UNIQUE KEY(k1, k2)
- DISTRIBUTED BY HASH (k1, k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2015-06-04 00:00:00"
- );
- ```
-
-4. Create an aggregate model table, using a fixed range partition description
-
- ```sql
- CREATE TABLE table_range
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048) REPLACE,
- v2 INT SUM DEFAULT "1"
- )
- AGGREGATE KEY(k1, k2, k3)
- PARTITION BY RANGE (k1, k2, k3)
- (
- PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")),
- PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300"))
- )
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- ```
-
-5. Create an aggregate model table with HLL and BITMAP column types
-
- ```sql
- CREATE TABLE example_db.example_table
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 HLL HLL_UNION,
- v2 BITMAP BITMAP_UNION
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- ```
-
-6. Create two self-maintained tables of the same Colocation Group.
-
- ```sql
- CREATE TABLE t1 (
- id int(11) COMMENT "",
- value varchar(8) COMMENT ""
- )
- DUPLICATE KEY(id)
- DISTRIBUTED BY HASH(id) BUCKETS 10
- PROPERTIES (
- "colocate_with" = "group1"
- );
-
- CREATE TABLE t2 (
- id int(11) COMMENT "",
- value1 varchar(8) COMMENT "",
- value2 varchar(8) COMMENT ""
- )
- DUPLICATE KEY(`id`)
- DISTRIBUTED BY HASH(`id`) BUCKETS 10
- PROPERTIES (
- "colocate_with" = "group1"
- );
- ```
-
-7. Create a memory table with bitmap index and bloom filter index
-
- ```sql
- CREATE TABLE example_db.table_hash
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 CHAR(10) REPLACE,
- v2 INT SUM,
- INDEX k1_idx (k1) USING BITMAP COMMENT'my first index'
- )
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- PROPERTIES (
- "bloom_filter_columns" = "k2",
- "in_memory" = "true"
- );
- ```
-
-8. Create a dynamic partition table.
-
- The table creates partitions 3 days in advance every day, and deletes the partitions 3 days ago. For example, if today is `2020-01-08`, partitions named `p20200108`, `p20200109`, `p20200110`, `p20200111` will be created. The partition ranges are:
-
- ```
- [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09];)
- [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10];)
- [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11];)
- [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12];)
- ```
-
- ```sql
- CREATE TABLE example_db.dynamic_partition
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY RANGE (k1) ()
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-3",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32"
- );
- ```
-
-9. Create a table with a materialized view (ROLLUP).
-
- ```sql
- CREATE TABLE example_db.rolup_index_table
- (
- event_day DATE,
- siteid INT DEFAULT '10',
- citycode SMALLINT,
- username VARCHAR(32) DEFAULT'',
- pv BIGINT SUM DEFAULT '0'
- )
- AGGREGATE KEY(event_day, siteid, citycode, username)
- DISTRIBUTED BY HASH(siteid) BUCKETS 10
- ROLLUP (
- r1(event_day,siteid),
- r2(event_day,citycode),
- r3(event_day)
- )
- PROPERTIES("replication_num" = "3");
- ```
-
-10. Set the replica of the table through the `replication_allocation` property.
-
- ```sql
- CREATE TABLE example_db.table_hash
- (
-
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5"
- )
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- PROPERTIES (
- "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2"
- );
-
- CREATE TABLE example_db.dynamic_partition
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- PARTITION BY RANGE (k1) ()
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-3",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32",
- "dynamic_partition."replication_allocation" = "tag.location.group_a:3"
- );
- ```
-### Keywords
-
- CREATE, TABLE
-
-### Best Practice
-
-#### Partitioning and bucketing
-
-A table must specify the bucket column, but it does not need to specify the partition. For the specific introduction of partitioning and bucketing, please refer to the [Data Division] (DORIS/Getting Started/Relational Model and Data Division.md) document.
-
-Tables in Doris can be divided into partitioned tables and non-partitioned tables. This attribute is determined when the table is created and cannot be changed afterwards. That is, for partitioned tables, you can add or delete partitions in the subsequent use process, and for non-partitioned tables, you can no longer perform operations such as adding partitions afterwards.
-
-At the same time, partitioning columns and bucketing columns cannot be changed after the table is created. You can neither change the types of partitioning and bucketing columns, nor do any additions or deletions to these columns.
-
-Therefore, it is recommended to confirm the usage method to build the table reasonably before building the table.
-
-#### Dynamic Partition
-
-The dynamic partition function is mainly used to help users automatically manage partitions. By setting certain rules, the Doris system regularly adds new partitions or deletes historical partitions. Please refer to [Dynamic Partition] (DORIS/Operation Manual/Dynamic Partition.md) document for more help.
-
-#### Materialized View
-
-Users can create multiple materialized views (ROLLUP) while building a table. Materialized views can also be added after the table is built. It is convenient for users to create all materialized views at one time by writing in the table creation statement.
-
-If the materialized view is created when the table is created, all subsequent data import operations will synchronize the data of the materialized view to be generated. The number of materialized views may affect the efficiency of data import.
-
-If you add a materialized view in the subsequent use process, if there is data in the table, the creation time of the materialized view depends on the current amount of data.
-
-For the introduction of materialized views, please refer to the document [materialized views] (DORIS/Operation Manual/materialized views.md).
-
-#### Index
-
-Users can create indexes on multiple columns while building a table. Indexes can also be added after the table is built.
-
-If you add an index in the subsequent use process, if there is data in the table, you need to rewrite all the data, so the creation time of the index depends on the current data volume.
-
-#### Memory table
-
-The `"in_memory" = "true"` attribute was specified when the table was created. Doris will try to cache the data blocks of the table in the PageCache of the storage engine, which has reduced disk IO. However, this attribute does not guarantee that the data block is permanently resident in memory, and is only used as a best-effort identification.
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md
deleted file mode 100644
index e87ca33a8c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-VIEW",
- "language": "en"
-}
----
-
-
-
-## CREATE-VIEW
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, VIEW
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md
deleted file mode 100644
index fae4759016..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-DATABASE",
- "language": "en"
-}
----
-
-
-
-## DROP-DATABASE
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, DATABASE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md
deleted file mode 100644
index 843c93c8f2..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-ENCRYPT-KEY",
- "language": "en"
-}
----
-
-
-
-## DROP-ENCRYPT-KEY
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, ENCRYPT, KEY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md
deleted file mode 100644
index cdcfe5963d..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-FILE",
- "language": "en"
-}
----
-
-
-
-## DROP-FILE
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, FILE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md
deleted file mode 100644
index e828d61c96..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-FUNCTION",
- "language": "en"
-}
----
-
-
-
-## DROP-FUNCTION
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, FUNCTION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md
deleted file mode 100644
index 9be0add1b4..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-INDEX",
- "language": "en"
-}
----
-
-
-
-## DROP-INDEX
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, INDEX
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md
deleted file mode 100644
index be27fddead..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-MATERIALIZED-VIEW",
- "language": "en"
-}
----
-
-
-
-## DROP-MATERIALIZED-VIEW
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, MATERIALIZED, VIEW
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md
deleted file mode 100644
index 9547f92552..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-RESOURCE",
- "language": "en"
-}
----
-
-
-
-## DROP-RESOURCE
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, RESOURCE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md
deleted file mode 100644
index 72c116f750..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DROP-TABLE",
- "language": "en"
-}
----
-
-
-
-## DROP-TABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- DROP, TABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md
deleted file mode 100644
index 448ff667c0..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "TRUNCATE-TABLE",
- "language": "en"
-}
----
-
-
-
-## TRUNCATE-TABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- TRUNCATE, TABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md
deleted file mode 100644
index 8092c7644b..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ALTER-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## ALTER-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- ALTER, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md
deleted file mode 100644
index bad06e60a8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "BROKER-LOAD",
- "language": "en"
-}
----
-
-
-
-## BROKER-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- BROKER, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md
deleted file mode 100644
index dfa1b883eb..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CANCEL-LOAD",
- "language": "en"
-}
----
-
-
-
-## CANCEL-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- CANCEL, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md
deleted file mode 100644
index c4f1f52119..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "CREATE-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## CREATE-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- CREATE, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md
deleted file mode 100644
index 63055f687c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "PAUSE-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## PAUSE-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- PAUSE, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md
deleted file mode 100644
index ee49c95db8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "RESUME-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## RESUME-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- RESUME, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md
deleted file mode 100644
index e8cd90cdec..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "STOP-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## STOP-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- STOP, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md
deleted file mode 100644
index 6e7e24522b..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "STREAM-LOAD",
- "language": "en"
-}
----
-
-
-
-## STREAM-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- STREAM, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md
deleted file mode 100644
index e233d84024..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DELETE",
- "language": "en"
-}
----
-
-
-
-## DELETE
-
-### Description
-
-### Example
-
-### Keywords
-
- DELETE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md
deleted file mode 100644
index 9931392c45..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "INSERT",
- "language": "en"
-}
----
-
-
-
-## INSERT
-
-### Description
-
-### Example
-
-### Keywords
-
- INSERT
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md
deleted file mode 100644
index bf608a8849..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "UPDATE",
- "language": "en"
-}
----
-
-
-
-## UPDATE
-
-### Description
-
-### Example
-
-### Keywords
-
- UPDATE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md
deleted file mode 100644
index 33eaed0cce..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-CANCEL-REPAIR",
- "language": "en"
-}
----
-
-
-
-## ADMIN-CANCEL-REPAIR
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, CANCEL, REPAIR
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md
deleted file mode 100644
index 97aaa56b51..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-CHECK-TABLET",
- "language": "en"
-}
----
-
-
-
-## ADMIN-CHECK-TABLET
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, CHECK, TABLET
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md
deleted file mode 100644
index 532befbc0e..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-REPAIR-TABLE",
- "language": "en"
-}
----
-
-
-
-## ADMIN-REPAIR-TABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, REPAIR, TABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md
deleted file mode 100644
index abf9b768f7..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-SET-CONFIG",
- "language": "en"
-}
----
-
-
-
-## ADMIN-SET-CONFIG
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, SET, CONFIG
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md
deleted file mode 100644
index eb6a7ad75a..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-SET-REPLICA-STATUS",
- "language": "en"
-}
----
-
-
-
-## ADMIN-SET-REPLICA-STATUS
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, SET, REPLICA, STATUS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md
deleted file mode 100644
index 56aac70c96..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-SHOW-CONFIG",
- "language": "en"
-}
----
-
-
-
-## ADMIN-SHOW-CONFIG
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, SHOW, CONFIG
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md
deleted file mode 100644
index 0312d5dd01..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-SHOW-REPLICA-DISTRIBUTION",
- "language": "en"
-}
----
-
-
-
-## ADMIN-SHOW-REPLICA-DISTRIBUTION
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, SHOW, REPLICA, DISTRIBUTION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md
deleted file mode 100644
index 17366282da..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "ADMIN-SHOW-REPLICA-STATUS",
- "language": "en"
-}
----
-
-
-
-## ADMIN-SHOW-REPLICA-STATUS
-
-### Description
-
-### Example
-
-### Keywords
-
- ADMIN, SHOW, REPLICA, STATUS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md
deleted file mode 100644
index a7b5d49a91..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "INSTALL-PLUGIN",
- "language": "en"
-}
----
-
-
-
-## INSTALL-PLUGIN
-
-### Description
-
-### Example
-
-### Keywords
-
- INSTALL, PLUGIN
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md
deleted file mode 100644
index fe93c90be8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "KILL",
- "language": "en"
-}
----
-
-
-
-## KILL
-
-### Description
-
-### Example
-
-### Keywords
-
- KILL
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md
deleted file mode 100644
index ccfe365973..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "RECOVER",
- "language": "en"
-}
----
-
-
-
-## RECOVER
-
-### Description
-
-### Example
-
-### Keywords
-
- RECOVER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md
deleted file mode 100644
index 4c59725a02..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SET-VARIABLE",
- "language": "en"
-}
----
-
-
-
-## SET-VARIABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- SET, VARIABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md
deleted file mode 100644
index ab482e4661..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "UNINSTALL-PLUGIN",
- "language": "en"
-}
----
-
-
-
-## UNINSTALL-PLUGIN
-
-### Description
-
-### Example
-
-### Keywords
-
- UNINSTALL, PLUGIN
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md
deleted file mode 100644
index 93ee8dd89c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-ALTER",
- "language": "en"
-}
----
-
-
-
-## SHOW-ALTER
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, ALTER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md
deleted file mode 100644
index 3c86aaaa7f..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-BACKENDS",
- "language": "en"
-}
----
-
-
-
-## SHOW-BACKENDS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, BACKENDS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md
deleted file mode 100644
index b8583ee74b..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-BACKUP",
- "language": "en"
-}
----
-
-
-
-## SHOW-BACKUP
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, BACKUP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md
deleted file mode 100644
index 4fe3a7f5c6..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-BROKER",
- "language": "en"
-}
----
-
-
-
-## SHOW-BROKER
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, BROKER
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md
deleted file mode 100644
index f9ad64cf52..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-COLUMNS",
- "language": "en"
-}
----
-
-
-
-## SHOW-COLUMNS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, COLUMNS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md
deleted file mode 100644
index 53e4ec81ad..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-CREATE-DATABASE",
- "language": "en"
-}
----
-
-
-
-## SHOW-CREATE-DATABASE
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, CREATE, DATABASE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md
deleted file mode 100644
index 3ebe2b8234..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-CREATE-FUNCTION",
- "language": "en"
-}
----
-
-
-
-## SHOW-CREATE-FUNCTION
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, CREATE, FUNCTION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md
deleted file mode 100644
index 8a0200b7c4..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-CREATE-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## SHOW-CREATE-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, CREATE, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md
deleted file mode 100644
index 67587b102a..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-CREATE-TABLE",
- "language": "en"
-}
----
-
-
-
-## SHOW-CREATE-TABLE
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, CREATE, TABLE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md
deleted file mode 100644
index cf03a8c76e..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-DATA",
- "language": "en"
-}
----
-
-
-
-## SHOW-DATA
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, DATA
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md
deleted file mode 100644
index 2279b4de52..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-DATABASE-ID",
- "language": "en"
-}
----
-
-
-
-## SHOW-DATABASE-ID
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, DATABASE, ID
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md
deleted file mode 100644
index 1e8f6705da..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-DATABASES",
- "language": "en"
-}
----
-
-
-
-## SHOW-DATABASES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, DATABASES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md
deleted file mode 100644
index cdae222606..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-DELETE",
- "language": "en"
-}
----
-
-
-
-## SHOW-DELETE
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, DELETE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md
deleted file mode 100644
index 2fccdba2d1..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-DYNAMIC-PARTITION",
- "language": "en"
-}
----
-
-
-
-## SHOW-DYNAMIC-PARTITION
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, DYNAMIC, PARTITION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md
deleted file mode 100644
index f45ab611c3..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-ENCRYPT-KEY",
- "language": "en"
-}
----
-
-
-
-## SHOW-ENCRYPT-KEY
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, ENCRYPT, KEY
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md
deleted file mode 100644
index fab358e3f7..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-EXPORT",
- "language": "en"
-}
----
-
-
-
-## SHOW-EXPORT
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, EXPORT
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md
deleted file mode 100644
index a946cd3230..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-FRONTENDS",
- "language": "en"
-}
----
-
-
-
-## SHOW-FRONTENDS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, FRONTENDS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md
deleted file mode 100644
index 82f2dd7f8d..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-FUNCTIONS",
- "language": "en"
-}
----
-
-
-
-## SHOW-FUNCTIONS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, FUNCTIONS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md
deleted file mode 100644
index 3bee8512c1..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-GRANTS",
- "language": "en"
-}
----
-
-
-
-## SHOW-GRANTS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, GRANTS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md
deleted file mode 100644
index 9b4f239fcf..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-INDEX",
- "language": "en"
-}
----
-
-
-
-## SHOW-INDEX
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, INDEX
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md
deleted file mode 100644
index 9bdcad5da3..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-LOAD-PROFILE",
- "language": "en"
-}
----
-
-
-
-## SHOW-LOAD-PROFILE
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, LOAD, PROFILE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md
deleted file mode 100644
index 09fef2eb22..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-LOAD-WARNINGS",
- "language": "en"
-}
----
-
-
-
-## SHOW-LOAD-WARNINGS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, LOAD, WARNINGS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md
deleted file mode 100644
index 06b5897243..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-LOAD",
- "language": "en"
-}
----
-
-
-
-## SHOW-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md
deleted file mode 100644
index 4d32fd4c0f..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-MIGRATIONS",
- "language": "en"
-}
----
-
-
-
-## SHOW-MIGRATIONS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, MIGRATIONS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md
deleted file mode 100644
index 3fd215f958..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-PARTITION-ID",
- "language": "en"
-}
----
-
-
-
-## SHOW-PARTITION-ID
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, PARTITION, ID
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md
deleted file mode 100644
index d5b0695463..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-PARTITIONS",
- "language": "en"
-}
----
-
-
-
-## SHOW-PARTITIONS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, PARTITIONS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md
deleted file mode 100644
index d96ce805fd..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-PLUGINS",
- "language": "en"
-}
----
-
-
-
-## SHOW-PLUGINS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, PLUGINS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md
deleted file mode 100644
index 1e85c7b3e0..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-PROC",
- "language": "en"
-}
----
-
-
-
-## SHOW-PROC
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, PROC
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md
deleted file mode 100644
index c587c732c9..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-PROCESSLIST",
- "language": "en"
-}
----
-
-
-
-## SHOW-PROCESSLIST
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, PROCESSLIST
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md
deleted file mode 100644
index a89000dedf..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-{
- "title": "SHOW-PROPERTY",
- "language": "zh-CN"
-}
----
-
-
-
-## SHOW-PROPERTY
-
-### Description
-
-This statement is used to view the attributes of the user
-
-```
-SHOW PROPERTY [FOR user] [LIKE key];
-```
-
-* `user`
-
- View the attributes of the specified user. If not specified, check the current user's.
-
-* `LIKE`
-
- Fuzzy matching can be done by attribute name.
-
-Return result description:
-
-```sql
-mysql> show property like'%connection%';
-+----------------------+-------+
-| Key | Value |
-+----------------------+-------+
-| max_user_connections | 100 |
-+----------------------+-------+
-1 row in set (0.01 sec)
-```
-
-* `Key`
-
- Property name.
-
-* `Value`
-
- Attribute value.
-
-### Example
-
-1. View the attributes of the jack user
-
- ```sql
- SHOW PROPERTY FOR'jack';
- ```
-
-2. View the attribute of jack user connection limit
-
- ```sql
- SHOW PROPERTY FOR'jack' LIKE'%connection%';
- ```
-
-### Keywords
-
- SHOW, PROPERTY
-
-### Best Practice
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md
deleted file mode 100644
index d037fe4800..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-REPOSITORIES",
- "language": "en"
-}
----
-
-
-
-## SHOW-REPOSITORIES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, REPOSITORIES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md
deleted file mode 100644
index 68d642ccc8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-RESOURCES",
- "language": "en"
-}
----
-
-
-
-## SHOW-RESOURCES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, RESOURCES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md
deleted file mode 100644
index 5d5f631c3c..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-RESTORE",
- "language": "en"
-}
----
-
-
-
-## SHOW-RESTORE
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, RESTORE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md
deleted file mode 100644
index e982e89eb0..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-ROLES",
- "language": "en"
-}
----
-
-
-
-## SHOW-ROLES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, ROLES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md
deleted file mode 100644
index 481c8ecd63..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-ROUTINE-LOAD-TASK",
- "language": "en"
-}
----
-
-
-
-## SHOW-ROUTINE-LOAD-TASK
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, ROUTINE, LOAD, TASK
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md
deleted file mode 100644
index fe0ab3a1c5..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-ROUTINE-LOAD",
- "language": "en"
-}
----
-
-
-
-## SHOW-ROUTINE-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, ROUTINE, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md
deleted file mode 100644
index 214c108aec..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-SMALL-FILES",
- "language": "en"
-}
----
-
-
-
-## SHOW-SMALL-FILES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, SMALL, FILES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md
deleted file mode 100644
index 048daac9be..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-SNAPSHOT",
- "language": "en"
-}
----
-
-
-
-## SHOW-SNAPSHOT
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, SNAPSHOT
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md
deleted file mode 100644
index 9d26596d1f..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-STATUS",
- "language": "en"
-}
----
-
-
-
-## SHOW-STATUS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, STATUS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md
deleted file mode 100644
index 840d3e00c5..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-STREAM-LOAD",
- "language": "en"
-}
----
-
-
-
-## SHOW-STREAM-LOAD
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, STREAM, LOAD
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md
deleted file mode 100644
index 015bbfd325..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-TABLE-ID",
- "language": "en"
-}
----
-
-
-
-## SHOW-TABLE-ID
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, TABLE, ID
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md
deleted file mode 100644
index 9bf2bf5806..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-TABLE-STATUS",
- "language": "en"
-}
----
-
-
-
-## SHOW-TABLE-STATUS
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, TABLE, STATUS
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md
deleted file mode 100644
index 8478411b5b..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-TABLET",
- "language": "en"
-}
----
-
-
-
-## SHOW-TABLET
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, TABLET
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md
deleted file mode 100644
index b04e5dfdba..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-TRANSACTION",
- "language": "en"
-}
----
-
-
-
-## SHOW-TRANSACTION
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, TRANSACTION
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md
deleted file mode 100644
index 6ab1c01086..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-VARIABLES",
- "language": "en"
-}
----
-
-
-
-## SHOW-VARIABLES
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, VARIABLES
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md
deleted file mode 100644
index e638427cf8..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW-VIEW",
- "language": "en"
-}
----
-
-
-
-## SHOW-VIEW
-
-### Description
-
-### Example
-
-### Keywords
-
- SHOW, VIEW
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md
deleted file mode 100644
index e854bdd6df..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "DESCRIBE",
- "language": "en"
-}
----
-
-
-
-## DESCRIBE
-
-### Description
-
-### Example
-
-### Keywords
-
- DESCRIBE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md
deleted file mode 100644
index 29a43b975a..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "HELP",
- "language": "en"
-}
----
-
-
-
-## HELP
-
-### Description
-
-### Example
-
-### Keywords
-
- HELP
-
-### Best Practice
-
diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md
deleted file mode 100644
index 00ce4dc298..0000000000
--- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "USE",
- "language": "en"
-}
----
-
-
-
-## USE
-
-### Description
-
-### Example
-
-### Keywords
-
- USE
-
-### Best Practice
-
diff --git a/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md b/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md
deleted file mode 100644
index 1ee11b440c..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "CREATE ROLE",
- "language": "en"
-}
----
-
-
-
-# CREATE ROLE
-## Description
-The statement user creates a role
-
-Grammar:
-CREATE ROLE role1;
-
-This statement creates an unauthorized role that can be subsequently granted permission through the GRANT command.
-
-## example
-
-1. Create a role
-
-CREATE ROLE role1;
-
-## keyword
-CREATE, ROLE
-
-
diff --git a/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md b/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md
deleted file mode 100644
index 79c72de866..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-{
- "title": "CREATE USER",
- "language": "en"
-}
----
-
-
-
-# CREATE USER
-##Description
-
-Syntax:
-
-CREATE USER user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name']
-
-user_identity:
-'user_name'@'host'
-
-The CREATE USER command is used to create a Doris user. In Doris, a user_identity uniquely identifies a user. User_identity consists of two parts, user_name and host, where username is the user name. The host identifies the host address where the client connects. The host part can use% for fuzzy matching. If no host is specified, the default is'%', which means that the user can connect to Doris from any host.
-
-The host part can also be specified as a domain with the grammar:'user_name'@['domain']. Even if surrounded by brackets, Doris will think of it as a domain and try to parse its IP address. At present, it only supports BNS analysis within Baidu.
-
-If a role (ROLE) is specified, the permissions that the role has are automatically granted to the newly created user. If not specified, the user defaults to having no permissions. The specified ROLE must already exist.
-
-## example
-
-1. Create a passwordless user (without specifying host, it is equivalent to Jack @'%')
-
-CREATE USER 'jack';
-
-2. Create a password user that allows login from'172.10.1.10'
-
-CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456';
-
-3. To avoid passing plaintext, use case 2 can also be created in the following way
-
-CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9';
-
-Later encrypted content can be obtained through PASSWORD (), for example:
-
-SELECT PASSWORD('123456');
-
-4. Create a user who is allowed to log in from the `192.168` subnet and specify its role as example_role
-
-CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role';
-
-5. Create a user who is allowed to log in from the domain name 'example_domain'.
-
-CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345';
-
-6. Create a user and specify a role
-
-CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role';
-
-## keyword
-CREATE, USER
diff --git a/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md b/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md
deleted file mode 100644
index 6966f8241a..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-{
- "title": "DROP ROLE",
- "language": "en"
-}
----
-
-
-
-# DROP ROLE
-## Description
-The statement user deletes a role
-
-Grammar:
-DROP ROLE role1;
-
-Deleting a role does not affect the permissions of users who previously belonged to that role. It is only equivalent to decoupling the role from the user. The permissions that the user has obtained from the role will not change.
-
-## example
-
-1. Delete a role
-
-DROP ROLE role1;
-
-## keyword
-DROP, ROLE
diff --git a/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md b/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md
deleted file mode 100644
index 8860140ddc..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-{
- "title": "DROP USER",
- "language": "en"
-}
----
-
-
-
-# DROP USER
-## Description
-
-Syntax:
-
- DROP USER 'user_identity'
-
- `user_identity`:
-
- user@'host'
- user@['domain']
-
- Drop a specified user identity.
-
-## example
-
-1. Delete user jack@'192.%'
-
- DROP USER 'jack'@'192.%'
-
-## keyword
-
- DROP, USER
diff --git a/docs/en/sql-reference/sql-statements/Account Management/GRANT.md b/docs/en/sql-reference/sql-statements/Account Management/GRANT.md
deleted file mode 100644
index 0d59295d11..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/GRANT.md
+++ /dev/null
@@ -1,81 +0,0 @@
----
-{
- "title": "Grant",
- "language": "en"
-}
----
-
-
-
-# Grant
-## Description
-
-The GRANT command is used to give the specified user or role the specified permissions.
-
-Syntax:
-
-GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name]
-
-
-Privilege_list is a list of permissions that need to be granted, separated by commas. Currently Doris supports the following permissions:
-
-NODE_PRIV: Operational privileges of cluster nodes, including operation of nodes' up and down lines. Only root users have this privilege and cannot be given to other users.
-ADMIN_PRIV: All rights except NODE_PRIV.
-GRANT_PRIV: Permission to operate permissions. Including the creation and deletion of users, roles, authorization and revocation, password settings and so on.
-SELECT_PRIV: Read permissions for specified libraries or tables
-LOAD_PRIV: Import permissions for specified libraries or tables
-ALTER_PRIV: schema change permissions for specified libraries or tables
-CREATE_PRIV: Creation permissions for specified libraries or tables
-DROP_PRIV: Delete permissions for specified libraries or tables
-
-旧版权限中的 ALL 和 READ_WRITE 会被转换成: SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV;
-READ_ONLY is converted to SELECT_PRIV.
-
-Db_name [.tbl_name] supports the following three forms:
-
-1. *. * permissions can be applied to all libraries and all tables in them
-2. db. * permissions can be applied to all tables under the specified library
-3. db.tbl permissions can be applied to specified tables under specified Libraries
-
-The libraries or tables specified here can be non-existent libraries and tables.
-
-user_identity:
-
-The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the validity time of permissions may be delayed by about one minute.
-
-You can also grant permissions to the specified ROLE, which is automatically created if the specified ROLE does not exist.
-
-## example
-
-1. Grant permissions to all libraries and tables to users
-
-GRANT SELECT_PRIV ON *.* TO 'jack'@'%';
-
-2. Grant permissions to specified library tables to users
-
-GRANT SELECT_PRIV,ALTER_PRIV,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%';
-
-3. Grant permissions to specified library tables to roles
-
-GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role';
-
-## keyword
-GRANT
-
diff --git a/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md b/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md
deleted file mode 100644
index d619f20ffb..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-{
- "title": "REVOKE",
- "language": "en"
-}
----
-
-
-
-# REVOKE
-## Description
-
-The REVOKE command is used to revoke the rights specified by the specified user or role.
-Syntax
-REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name]
-
-user_identity:
-
-The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the revocation time of permission may be delayed by about one minute.
-
-You can also revoke the permission of the specified ROLE, which must exist for execution.
-
-## example
-
-1. Revoke the rights of user Jack database testDb
-
-REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%';
-
-## keyword
-
-REVOKE
diff --git a/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md b/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md
deleted file mode 100644
index d796e853a9..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md
+++ /dev/null
@@ -1,55 +0,0 @@
----
-{
- "title": "SET PASSWORD",
- "language": "en"
-}
----
-
-
-
-# SET PASSWORD
-## Description
-
-Syntax:
-
-SET PASSWORD [FOR user_identity] =
-[PASSWORD('plain password')]|['hashed password']
-
-The SET PASSWORD command can be used to modify a user's login password. If the [FOR user_identity] field does not exist, modify the password of the current user.
-
-Note that the user_identity here must match exactly the user_identity specified when creating a user using CREATE USER, otherwise the user will be reported as non-existent. If user_identity is not specified, the current user is 'username'@'ip', which may not match any user_identity. The current user can be viewed through SHOW GRANTS.
-
-PASSWORD () input is a plaintext password, and direct use of strings, you need to pass the encrypted password.
-If you change the password of other users, you need to have administrator privileges.
-
-## example
-
-1. Modify the password of the current user
-
-SET PASSWORD = PASSWORD('123456')
-SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'
-
-2. Modify the specified user password
-
-SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456')
-SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'
-
-## keyword
-SET, PASSWORD
diff --git a/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md b/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md
deleted file mode 100644
index 3439e9b137..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md
+++ /dev/null
@@ -1,108 +0,0 @@
----
-{
- "title": "SET PROPERTY",
- "language": "en"
-}
----
-
-
-
-# SET PROPERTY
-## Description
-
-Syntax:
-
-SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value']
-
-Set user attributes, including resources allocated to users, import cluster, etc. The user attributes set here are for user, not user_identity. That is to say, if two users 'jack'@'%' and 'jack'@'192%'are created through the CREATE USER statement, the SET PROPERTY statement can only be used for the jack user, not 'jack'@'%' or 'jack'@'192%'
-
-Importing cluster is only applicable to Baidu internal users.
-
-key:
-
-Super user rights:
-max_user_connections: Maximum number of connections.
-max_query_instances: Maximum number of query instance user can use when query.
-sql_block_rules: set sql block rules.After setting, if the query user execute match the rules, it will be rejected.
-cpu_resource_limit: limit the cpu resource usage of a query. See session variable `cpu_resource_limit`.
-exec_mem_limit: Limit the memory usage of the query. See the description of the session variable `exec_mem_limit` for details. -1 means not set.
-load_mem_limit: Limit memory usage for imports. See the introduction of the session variable `load_mem_limit` for details. -1 means not set.
-resource.cpu_share: cpu resource assignment.(Derepcated)
-Load_cluster. {cluster_name}. priority: assigns priority to a specified cluster, which can be HIGH or NORMAL
-resource_tags: Specify the user's resource tag permissions.
-
-> Notice: The `cpu_resource_limit`, `exec_mem_limit`, and `load_mem_limit` properties default to the values in the session variables if they are not set.
-
-Ordinary user rights:
-Quota.normal: Resource allocation at the normal level.
-Quota.high: Resource allocation at the high level.
-Quota.low: Resource allocation at low level.
-
-Load_cluster. {cluster_name}. hadoop_palo_path: The Hadoop directory used by Palo needs to store ETL programs and intermediate data generated by ETL for Palo to import. After the import is completed, the intermediate data will be automatically cleaned up, and the ETL program will be automatically reserved for next use.
-Load_cluster. {cluster_name}. hadoop_configs: configuration of hadoop, where fs. default. name, mapred. job. tracker, hadoop. job. UGI must be filled in.
-Load_cluster. {cluster_name}. hadoop_port: Hadoop HDFS name node http}
-Default_load_cluster: The default import cluster.
-
-## example
-
-1. Modify the maximum number of user jacks to 1000
-SET PROPERTY FOR 'jack' 'max_user_connections' = '1000';
-
-2. Modify the cpu_share of user Jack to 1000
-SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000';
-
-3. Modify the weight of the normal group of Jack users
-Set property for 'jack''quota. normal' = 400';
-
-4. Add import cluster for user jack
-SET PROPERTY FOR 'jack'
-'load 'cluster.{cluster name}.hadoop' palo path' ='/user /palo /palo path',
-'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;';
-
-5. Delete the import cluster under user jack.
-SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = '';
-
-6. Modify user jack's default import cluster
-SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}';
-
-7. Modify the cluster priority of user Jack to HIGH
-SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH';
-
-8. Modify the maximum number of query instance for jack to 3000
-SET PROPERTY FOR 'jack' 'max_query_instances' = '3000';
-
-9. Modify the sql block rule for jack
-SET PROPERTY FOR 'jack' 'sql_block_rules' = 'rule1, rule2';
-
-10. Modify the cpu resource usage limit for jack
-SET PROPERTY FOR 'jack' 'cpu_resource_limit' = '2';
-
-11. Modify user's resource tag permission
-SET PROPERTY FOR 'jack' 'resource_tags.location' = 'group_a, group_b';
-
-12. modify the user's query memory usage limit in bytes
-SET PROPERTY FOR 'jack' 'exec_mem_limit' = '2147483648';
-
-13. modify the user's import memory usage limit in bytes
-SET PROPERTY FOR 'jack' 'load_mem_limit' = '2147483648';
-
-## keyword
-SET, PROPERTY
-
diff --git a/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md b/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md
deleted file mode 100644
index a12ac9bf91..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-{
- "title": "SHOW GRANTS",
- "language": "en"
-}
----
-
-
-
-# SHOW GRANTS
-## Description
-
-This statement is used to view user rights.
-
-Grammar:
-SHOW [ALL] GRANTS [FOR user_identity];
-
-Explain:
-1. SHOW ALL GRANTS can view the privileges of all users.
-2. If you specify user_identity, view the permissions of the specified user. And the user_identity must be created for the CREATE USER command.
-3. If you do not specify user_identity, view the permissions of the current user.
-
-
-## example
-
-1. View all user rights information
-
-SHOW ALL GRANTS;
-
-2. View the permissions of the specified user
-
-SHOW GRANTS FOR jack@'%';
-
-3. View the permissions of the current user
-
-SHOW GRANTS;
-
-## keyword
-SHOW, GRANTS
diff --git a/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md
deleted file mode 100644
index a27d30dbd1..0000000000
--- a/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-{
- "title": "SHOW ROLES",
- "language": "en"
-}
----
-
-
-
-# SHOW ROLES
-## Description
-This statement is used to display all created role information, including the role name, the user included, and the permissions.
-
-Grammar:
-SHOW ROLES;
-
-## example
-
-1. View the created roles:
-
-SHOW ROLES;
-
-## keyword
-SHOW,ROLES
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md
deleted file mode 100644
index 475e266306..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md
+++ /dev/null
@@ -1,51 +0,0 @@
----
-{
- "title": "ADMIN CANCEL REBALANCE DISK",
- "language": "en"
-}
----
-
-
-
-# ADMIN CANCEL REBALANCE DISK
-## Description
-
-This statement is used to cancel rebalancing disks of specified backends with high priority
-
-Grammar:
-
-ADMIN CANCEL REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)];
-
-Explain:
-
-1. This statement only indicates that the system no longer rebalance disks of specified backends with high priority. The system will still rebalance disks by default scheduling.
-
-## example
-
-1. Cancel High Priority Disk Rebalance of all of backends of the cluster
-
-ADMIN CANCEL REBALANCE DISK;
-
-2. Cancel High Priority Disk Rebalance of specified backends
-
-ADMIN CANCEL REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234");
-
-## keyword
-ADMIN,CANCEL,REBALANCE DISK
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md
deleted file mode 100644
index db001c90b3..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-{
- "title": "ADMIN CANCEL REPAIR",
- "language": "en"
-}
----
-
-
-
-# ADMIN CANCEL REPAIR
-## Description
-
-This statement is used to cancel repairing a specified table or partition with high priority
-
-Grammar:
-
-ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)];
-
-Explain:
-
-1. This statement only indicates that the system no longer repairs fragmented copies of specified tables or partitions with high priority. The system will still repair the copy by default scheduling.
-
-## example
-
-1. Cancel High Priority Repair
-
-ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1);
-
-## keyword
-ADMIN,CANCEL,REPAIR
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md
deleted file mode 100644
index 101d506289..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-{
- "title": "ADMIN CHECK TABLET",
- "language": "en"
-}
----
-
-
-
-# ADMIN CHECK TABLET
-## description
-
-This statement is used to perform a specified check operation on a list of tablets.
-
-Syntax:
-
-```
-ADMIN CHECK TABLE (tablet_id1, tablet_id2, ...)
-PROPERTIES("type" = "...");
-```
-
-Note:
-
-1. You must specify the list of tablet ids and the "type" property in PROPERTIES.
-2. Currently "type" only supports:
-
- * consistency: Check the consistency of the replicas of the tablet. This command is asynchronous. After sending, Doris will start to perform the consistency check job of the corresponding tablet. The final result will be reflected in the "InconsistentTabletNum" column in the result of `SHOW PROC" / statistic ";
-
-## example
-
-1. Perform a replica consistency check on a specified set of tablets
-
- ```
- ADMIN CHECK TABLET (10000, 10001)
- PROPERTIES("type" = "consistency");
- ```
-
-## keyword
-
- ADMIN,CHECK,TABLET
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md
deleted file mode 100644
index 0511a3e47b..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-{
- "title": "ADMIN CLEAN TRASH",
- "language": "en"
-}
----
-
-
-
-# ADMIN CLEAN TRASH
-## description
- This statement is used to clean up the trash data in the backend.
- Grammar:
- ADMIN CLEAN TRASH [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)];
-
- Explain:
- Take BackendHost:BackendHeartBeatPort to indicate the backend that needs to be cleaned up, and clean up all backends without adding the on limit.
-
-## example
-
- 1. Clean up the trash data of all be nodes.
-
- ADMIN CLEAN TRASH;
-
- 2. Clean up the trash data of '192.168.0.1:9050' and '192.168.0.2:9050'.
-
- ADMIN CLEAN TRASH ON ("192.168.0.1:9050","192.168.0.2:9050");
-
-## keyword
- ADMIN, CLEAN, TRASH
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md
deleted file mode 100644
index f460d6e016..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-{
- "title": "ADMIN COMPACT",
- "language": "en"
-}
----
-
-
-
-# ADMIN COMPACT
-## Description
-
- This statement is used to trigger compaction for all replicas of a specified partition
-
- Grammar:
-
- ADMIN COMPACT TABLE table_name PARTITION partition_name WHERE TYPE='BASE/CUMULATIVE'
-
- Explain:
-
- 1. This statement only means that the system attempts to submit a compaction task for each replica under the specified partition to compaction thread pool, and it is not guaranteed to be successful.
- 2. This statement supports executing compaction task for a single partition of the table at a time.
-
-## example
-
- 1. Attempt to trigger cumulative compaction for all replicas under the specified partition
-
- ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='CUMULATIVE';
-
- 2. Attempt to trigger base compaction for all replicas under the specified partition
-
- ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='BASE';
-
-## keyword
- ADMIN,COMPACT
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md
deleted file mode 100644
index 6e1c1aaa34..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-{
- "title": "ADMIN REBALANCE DISK",
- "language": "en"
-}
----
-
-
-
-# ADMIN REBALANCE DISK
-## Description
-
-This statement is used to try to rebalance disks of the specified backends first, no matter if the cluster is balanced
-
-Grammar:
-
-ADMIN REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)];
-
-Explain:
-
-1. This statement only means that the system attempts to rebalance disks of specified backends with high priority, no matter if the cluster is balanced.
-2. The default timeout is 24 hours. Timeout means that the system will no longer rebalance disks of specified backends with high priority. The command settings need to be reused.
-
-## example
-
-1. Attempt to rebalance disks of all backends
-
-ADMIN REBALANCE DISK;
-
-2. Attempt to rebalance disks oof the specified backends
-
-ADMIN REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234");
-
-## keyword
-ADMIN,REBALANCE,DISK
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md
deleted file mode 100644
index 8cb037be65..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-{
- "title": "ADMIN REPAIR",
- "language": "en"
-}
----
-
-
-
-# ADMIN REPAIR
-## Description
-
-This statement is used to try to fix the specified table or partition first
-
-Grammar:
-
-ADMIN REPAIR TABLE table_name[ PARTITION (p1,...)]
-
-Explain:
-
-1. This statement only means that the system attempts to repair a fragmented copy of a specified table or partition with high priority, and it is not guaranteed to be successful. Users can view the repair status through the ADMIN SHOW REPLICA STATUS command.
-2. The default timeout is 14400 seconds (4 hours). Timeout means that the system will no longer repair fragmented copies of specified tables or partitions with high priority. The command settings need to be reused.
-
-## example
-
-1. Attempt to fix the specified table
-
-ADMIN REPAIR TABLE tbl1;
-
-2. Attempt to fix the specified partition
-
-ADMIN REPAIR TABLE tbl1 PARTITION (p1, p2);
-
-## keyword
-ADMIN,REPAIR
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md
deleted file mode 100644
index ad87db2739..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md
+++ /dev/null
@@ -1,44 +0,0 @@
----
-{
- "title": "ADMIN SET CONFIG",
- "language": "en"
-}
----
-
-
-
-# ADMIN SET CONFIG
-## Description
-
-This statement is used to set the configuration items for the cluster (currently only the configuration items for setting FE are supported).
-Settable configuration items can be viewed through `ADMIN SHOW FRONTEND CONFIG;` commands.
-
-Grammar:
-
-ADMIN SET FRONTEND CONFIG ("key" = "value");
-
-## example
-
-1. "disable balance" true
-
-ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");
-
-## keyword
-ADMIN,SET,CONFIG
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md
deleted file mode 100644
index 03764b53d2..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md
+++ /dev/null
@@ -1,62 +0,0 @@
----
-{
- "title": "ADMIN SET REPLICA STATUS",
- "language": "en"
-}
----
-
-
-
-# ADMIN SET REPLICA STATUS
-## description
-
- This commend is used to set the status of the specified replica.
- This command is currently only used to manually set the status of some replicas to BAD or OK, allowing the system to automatically repair these replicas.
-
- Syntax:
-
- ADMIN SET REPLICA STATUS
- PROPERTIES ("key" = "value", ...);
-
- The following attributes are currently supported:
- "tablet_id": required. Specify a Tablet Id.
- "backend_id": required. Specify a Backend Id.
- "status": required. Specify the status. Only "bad" and "ok" are currently supported.
-
- If the specified replica does not exist or the status is already bad or ok, it will be ignored.
-
- Notice:
-
- Replica set to Bad status may be dropped immediately, please proceed with caution.
-
-## example
-
- 1. Set the replica status of tablet 10003 on BE 10001 to bad.
-
- ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "bad");
-
- 2. Set the replica status of tablet 10003 on BE 10001 to ok.
-
- ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "ok");
-
-## keyword
-
- ADMIN,SET,REPLICA,STATUS
-
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md
deleted file mode 100644
index 3be8ca88cf..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md
+++ /dev/null
@@ -1,63 +0,0 @@
----
-{
- "title": "ADMIN SHOW CONFIG",
- "language": "en"
-}
----
-
-
-
-# ADMIN SHOW CONFIG
-## Description
-
-This statement is used to show the configuration of the current cluster (currently only supporting the display of FE configuration items)
-
-Grammar:
-
-ADMIN SHOW FRONTEND CONFIG [LIKE "pattern"];
-
-Explain:
-
-The implications of the results are as follows:
-1. Key: Configuration item name
-2. Value: Configuration item value
-3. Type: Configuration item type
-4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置
-5. MasterOnly: 是否仅适用于 Master FE
-6. Comment: Configuration Item Description
-
-## example
-
-1. View the configuration of the current FE node
-
-ADMIN SHOW FRONTEND CONFIG;
-
-2. Search for a configuration of the current Fe node with like predicate
-
-mysql> ADMIN SHOW FRONTEND CONFIG LIKE '%check_java_version%';
-+--------------------+-------+---------+-----------+------------+---------+
-| Key | Value | Type | IsMutable | MasterOnly | Comment |
-+--------------------+-------+---------+-----------+------------+---------+
-| check_java_version | true | boolean | false | false | |
-+--------------------+-------+---------+-----------+------------+---------+
-1 row in set (0.00 sec)
-
-## keyword
-ADMIN,SHOW,CONFIG
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md
deleted file mode 100644
index b521ee0889..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md
+++ /dev/null
@@ -1,51 +0,0 @@
----
-{
- "title": "ADMIN SHOW REPLICA DISTRIBUTION",
- "language": "en"
-}
----
-
-
-
-# ADMIN SHOW REPLICA DISTRIBUTION
-## Description
-
-This statement is used to show the distribution status of a table or partition replica
-
-Grammar:
-
-ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)];
-
-Explain:
-
-The Graph column in the result shows the distribution ratio of replicas graphically
-
-## example
-
-1. View the distribution of replicas of tables
-
-ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;
-
-2. View the distribution of copies of partitions in the table
-
-ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2);
-
-## keyword
-ADMIN,SHOW,REPLICA,DISTRIBUTION
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md
deleted file mode 100644
index 26486e9f5c..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-{
- "title": "ADMIN SHOW REPLICA STATUS",
- "language": "en"
-}
----
-
-
-
-# ADMIN SHOW REPLICA STATUS
-## Description
-
-This statement is used to display copy status information for a table or partition
-
-Grammar:
-
-ADMIN SHOW REPLICA STATUS FROM [dbu name.]tbl name [PARTITION (p1,...)]
-[where_clause];
-
-where_clause:
-WHERE STATUS [!]= "replica_status"
-
-Reply status:
-OK: Replica 22788;'20581;' 29366;'24577;
-DEAD: The Backend of replica is not available
-VERSION_ERROR: The replica data version is missing
-SCHEMA ERROR: replica schema hash
-MISSING: replica does not exist
-
-## example
-
-1. View the status of all copies of the table
-
-ADMIN SHOW REPLICA STATUS FROM db1.tbl1;
-
-2. View a copy of a partition state of the table as VERSION_ERROR
-
-ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2)
-
-
-3. Check all unhealthy copies of the table
-
-ADMIN SHOW REPLICA STATUS FROM tbl1
-WHERE STATUS != "OK";
-
-## keyword
-ADMIN,SHOW,REPLICA,STATUS
diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md
deleted file mode 100644
index e41bf98c53..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md
+++ /dev/null
@@ -1,59 +0,0 @@
----
-{
- "title": "ADMIN DIAGNOSE TABLET",
- "language": "en"
-}
----
-
-
-
-# ADMIN DIAGNOSE TABLET
-## description
-
- This statement is used to diagnose the specified tablet. The results will show information about the tablet and some potential problems.
-
- grammar:
-
- ADMIN DIAGNOSE TABLET tblet_id
-
- illustrate:
-
- The lines of information in the result are as follows:
- 1. TabletExist: Whether the Tablet exists
- 2. TabletId: Tablet ID
- 3. Database: The DB to which the Tablet belongs and its ID
- 4. Table: The Table to which Tablet belongs and its ID
- 5. Partition: The Partition to which the Tablet belongs and its ID
- 6. MaterializedIndex: The materialized view to which the Tablet belongs and its ID
- 7. Replicas(ReplicaId -> BackendId): Tablet replicas and their BE.
- 8. ReplicasNum: Whether the number of replicas is correct.
- 9. ReplicaBackendStatus: Whether the BE node where the replica is located is normal.
- 10.ReplicaVersionStatus: Whether the version number of the replica is normal.
- 11.ReplicaStatus: Whether the replica status is normal.
- 12.ReplicaCompactionStatus: Whether the replica Compaction status is normal.
-
-## example
-
- 1. Diagnose tablet 10001
-
- ADMIN DIAGNOSE TABLET 10001;
-
-## keyword
- ADMIN,DIAGNOSE,TABLET
diff --git a/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md
deleted file mode 100644
index 12643abd7d..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-{
- "title": "ALTER CLUSTER",
- "language": "en"
-}
----
-
-
-
-# ALTER CLUSTER
-## description
-
-This statement is used to update the logical cluster. Administrator privileges are required
-
-grammar
-
-ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...);
-
-1. Scaling, scaling (according to the number of be existing in the cluster, large is scaling, small is scaling), scaling for synchronous operation, scaling for asynchronous operation, through the state of backend can be known whether the scaling is completed.
-
-## example
-
-1. Reduce the number of be of logical cluster test_cluster containing 3 be by 2.
-
-ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2");
-
-2. Expansion, increase the number of be of logical cluster test_cluster containing 3 be to 4
-
-ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4");
-
-## keyword
-ALTER,CLUSTER
diff --git a/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md b/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md
deleted file mode 100644
index 7c3d4902a4..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md
+++ /dev/null
@@ -1,141 +0,0 @@
----
-{
- "title": "ALTER SYSTEM",
- "language": "en"
-}
----
-
-
-
-# ALTER SYSTEM
-
-## Description
-
- This statement is used to operate on nodes in a system. (Administrator only!)
-
- Syntax:
- 1) Adding nodes (without multi-tenant functionality, add in this way)
- ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...];
- 2) Adding idle nodes (that is, adding BACKEND that does not belong to any cluster)
- ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...];
- 3) Adding nodes to a cluster
- ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...];
- 4) Delete nodes
- ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...];
- 5) Node offline
- ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...];
- 6) Add Broker
- ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...];
- 7) Drop Broker
- ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...];
- 8) Delete all Brokers
- ALTER SYSTEM DROP ALL BROKER broker_name
- 9) Set up a Load error hub for centralized display of import error information
- ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]);
- 10) Modify property of BE
- ALTER SYSTEM MODIFY BACKEND "host:heartbeat_port" SET ("key" = "value"[, ...]);
-
- Explain:
- 1) Host can be hostname or IP address
- 2) heartbeat_port is the heartbeat port of the node
- 3) Adding and deleting nodes are synchronous operations. These two operations do not take into account the existing data on the node, the node is directly deleted from the metadata, please use cautiously.
- 4) Node offline operations are used to secure offline nodes. This operation is asynchronous. If successful, the node will eventually be removed from the metadata. If it fails, the offline will not be completed.
- 5) The offline operation of the node can be cancelled manually. See CANCEL DECOMMISSION for details
- 6) Load error hub:
- Currently, two types of Hub are supported: Mysql and Broker. You need to specify "type" = "mysql" or "type" = "broker" in PROPERTIES.
- If you need to delete the current load error hub, you can set type to null.
- 1) When using the Mysql type, the error information generated when importing will be inserted into the specified MySQL library table, and then the error information can be viewed directly through the show load warnings statement.
-
- Hub of Mysql type needs to specify the following parameters:
- host: mysql host
- port: mysql port
- user: mysql user
- password: mysql password
- database mysql database
- table: mysql table
-
- 2) When the Broker type is used, the error information generated when importing will form a file and be written to the designated remote storage system through the broker. Make sure that the corresponding broker is deployed
- Hub of Broker type needs to specify the following parameters:
- Broker: Name of broker
- Path: Remote Storage Path
- Other properties: Other information necessary to access remote storage, such as authentication information.
-
- 7) Modify BE node attributes currently supports the following attributes:
- 1. tag.location: Resource tag
- 2. disable_query: Query disabled attribute
- 3. disable_load: Load disabled attribute
-
-## Example
-
- 1. Add a node
- ALTER SYSTEM ADD BACKEND "host:port";
-
- 2. Adding an idle node
- ALTER SYSTEM ADD FREE BACKEND "host:port";
-
- 3. Delete two nodes
- ALTER SYSTEM DROP BACKEND "host1:port", "host2:port";
-
- 4. offline two nodes
- ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port";
-
- 5. Add two Hdfs Broker
- ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port";
-
- 6. Add a load error hub of Mysql type
- ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES
- ("type"= "mysql",
- "host" = "192.168.1.17"
- "port" = "3306",
- "User" = "my" name,
- "password" = "my_passwd",
- "database" = "doris_load",
- "table" = "load_errors"
- );
-
- 7. 添加一个 Broker 类型的 load error hub
- ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES
- ("type"= "broker",
- "Name" = BOS,
- "path" = "bos://backup-cmy/logs",
- "bos_endpoint" ="http://gz.bcebos.com",
- "bos_accesskey" = "069fc278xxxxxx24ddb522",
- "bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a"
- );
-
- 8. Delete the current load error hub
- ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES
- ("type"= "null");
-
- 9. Modify BE resource tag
-
- ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("tag.location" = "group_a");
-
- 10. Modify the query disabled attribute of BE
-
- ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_query" = "true");
-
- 11. Modify the load disabled attribute of BE
-
- ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_load" = "true");
-
-## keyword
-
- AGE, SYSTEM, BACKGROUND, BROKER, FREE
diff --git a/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md b/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md
deleted file mode 100644
index 6965e6b80d..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-{
- "title": "CANCEL DECOMMISSION",
- "language": "en"
-}
----
-
-
-
-# CANCEL DECOMMISSION
-## Description
-
-This statement is used to undo a node's offline operation. (Administrator only!)
-Grammar:
-CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...];
-
-## example
-
-1. Cancel the offline operation of two nodes:
-CANCEL DECOMMISSION BACKEND "host1:port", "host2:port";
-
-## keyword
-CANCEL,DECOMMISSION,BACKEND
diff --git a/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md
deleted file mode 100644
index 440939dce0..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md
+++ /dev/null
@@ -1,60 +0,0 @@
----
-{
- "title": "CREATE CLUSTER",
- "language": "en"
-}
----
-
-
-
-# CREATE CLUSTER
-## Description
-
-This statement is used to create a new logical cluster, requiring administrator privileges. If you don't use multiple tenants, create a cluster named default_cluster directly. Otherwise, create a cluster with a custom name.
-
-grammar
-
-CREATE CLUSTER [IF NOT EXISTS] cluster_name
-
-PROPERTIES ("key"="value", ...)
-
-IDENTIFIED BY 'password'
-
-1. PROPERTIES
-
-Specify attributes of logical clusters
-
-PROPERTIES ("instance_num" = "3")
-
-
-2. Identify by 'password' each logical cluster contains a superuser whose password must be specified when creating a logical cluster
-
-## example
-
-1. Create a new test_cluster with three be nodes and specify its superuser password
-
-CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test';
-
-2. Create a new default_cluster with three be nodes (no multi-tenant is used) and specify its superuser password
-
-CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test';
-
-## keyword
-CREATE,CLUSTER
diff --git a/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md b/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md
deleted file mode 100644
index a7f13f30b9..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md
+++ /dev/null
@@ -1,76 +0,0 @@
----
-{
- "title": "CREATE FILE",
- "language": "en"
-}
----
-
-
-
-# CREATE FILE
-## Description
-
-This statement is used to create and upload a file to the Doris cluster.
-This function is usually used to manage files that need to be used in some other commands, such as certificates, public key, private key, etc.
-
-This command can be executed by users with admin privileges only.
-A file belongs to a database. This file can be used by users who have access to database.
-
-The size of a single file is limited to 1MB.
-A Doris cluster uploads up to 100 files.
-
-Grammar:
-
-CREATE FILE "File name" [IN database]
-[properties]
-
-Explain:
-File_name: Custom file name.
-Database: The file belongs to a db, and if not specified, the DB of the current session is used.
-properties 支持以下参数:
-
-Url: Must. Specify a download path for a file. Currently only unauthenticated HTTP download paths are supported. When the command line succeeds, the file will be saved in Doris and the URL will no longer be required.
-Catalog: Yes. The classification name of the file can be customized. But in some commands, files in the specified catalog are looked up. For example, in a routine import, when the data source is kafka, the file under the name of catalog is looked up.
-Md5: Optional. MD5 of the file. If specified, it will be checked after downloading the file.
-
-## example
-
-1. Create a file ca. pem, categorized as Kafka
-
-CREATE FILE "ca.pem"
-PROPERTIES
-(
-"url" ="https://test.bj.bcebos.com /kafka -key /ca.pem",
-"catalog" = "kafka"
-);
-
-2. Create the file client. key, categorized as my_catalog
-
-CREATE FILE "client.key"
-IN my database
-PROPERTIES
-(
-"url" ="https://test.bj.bcebos.com /kafka -key /client.key",
-"catalog" = "my_catalog",
-"md5"= "b5bb901bf1099205b39a46ac3557dd9"
-);
-
-## keyword
-CREATE,FILE
diff --git a/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md
deleted file mode 100644
index 4330caa93d..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-{
- "title": "DROP CLUSTER",
- "language": "en"
-}
----
-
-
-
-# DROP CLUSTER
-## Description
-
-This statement is used to delete logical cluster. Successful deletion of logical cluster requires first deleting dB in the cluster and administrator privileges.
-
-grammar
-
-DROP CLUSTER [IF EXISTS] cluster_name
-
-## example
-
-Delete logical cluster test_cluster
-
-DROP CLUSTER test_cluster;
-
-## keyword
-DROP,CLUSTER
diff --git a/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md b/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md
deleted file mode 100644
index 4d259fa980..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md
+++ /dev/null
@@ -1,51 +0,0 @@
----
-{
- "title": "DROP FILE",
- "language": "en"
-}
----
-
-
-
-# DROP FILE
-## Description
-
-This statement is used to delete an uploaded file.
-
-Grammar:
-
-DROP FILE "file_name" [FROM database]
-[properties]
-
-Explain:
-File_name: File name.
-Database: A DB to which the file belongs, if not specified, uses the DB of the current session.
-properties 支持以下参数:
-
-Catalog: Yes. Classification of documents.
-
-## example
-
-1. Delete the file ca.pem
-
-DROP FILE "ca.pem" properties("catalog" = "kafka");
-
-## keyword
-DROP,FILE
diff --git a/docs/en/sql-reference/sql-statements/Administration/ENTER.md b/docs/en/sql-reference/sql-statements/Administration/ENTER.md
deleted file mode 100644
index 22b3a10c1d..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/ENTER.md
+++ /dev/null
@@ -1,44 +0,0 @@
----
-{
- "title": "ENTER",
- "language": "en"
-}
----
-
-
-
-# ENTER
-## Description
-
-This statement is used to enter a logical cluster. All users and databases created need to be executed in a logical cluster. After creation, they belong to the logic.
-
-Cluster, need administrator privileges
-
-ENTER cluster name
-
-## example
-
-1. Enter the logical cluster test_cluster
-
-ENTER test cluster;
-
-## keyword
-ENTER
-
diff --git a/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md b/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md
deleted file mode 100644
index 71bc9f715e..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md
+++ /dev/null
@@ -1,63 +0,0 @@
----
-{
- "title": "INSTALL PLUGIN",
- "language": "en"
-}
----
-
-
-
-# INSTALL PLUGIN
-## description
-
- To install a plugin
-
- Syntax
-
- INSTALL PLUGIN FROM [source]
-
- source supports 3 kinds:
-
- 1. Point to a zip file with absolute path.
- 2. Point to a plugin dir with absolute path.
- 3. Point to a http/https download link of zip file.
-
- PROPERTIES supports setting some configurations of the plugin, such as setting the md5sum value of the zip file.
-
-## example
-
- 1. Intall a plugin with a local zip file:
-
- INSTALL PLUGIN FROM "/home/users/doris/auditdemo.zip";
-
- 2. Intall a plugin with a local dir:
-
- INSTALL PLUGIN FROM "/home/users/doris/auditdemo/";
-
- 3. Download and install a plugin:
-
- INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip";
-
- 4. Download and install a plugin, and set the md5sum value of the zip file:
-
- INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip" PROPERTIES("md5sum" = "73877f6029216f4314d712086a146570");
-
-## keyword
- INSTALL,PLUGIN
diff --git a/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md b/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md
deleted file mode 100644
index cf5b87ddbd..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-{
- "title": "LINK DATABASE",
- "language": "en"
-}
----
-
-
-
-# LINK DATABASE
-## Description
-
-This statement allows users to link a database of one logical cluster to another logical cluster. A database is only allowed to be linked once at the same time and the linked database is deleted.
-
-It does not delete data, and the linked database cannot be deleted. Administrator privileges are required.
-
-grammar
-
-LINK DATABASE src u cluster name.src db name of the cluster name.des db name
-
-## example
-
-1. Link test_db in test_cluster A to test_cluster B and name it link_test_db
-
-LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db;
-
-2. Delete linked database link_test_db
-
-DROP DATABASE link_test_db;
-
-## keyword
-LINK,DATABASE
diff --git a/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md b/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md
deleted file mode 100644
index bd6d1299a1..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "MIGRATE DATABASE",
- "language": "en"
-}
----
-
-
-
-# MIGRATE DATABASE
-## Description
-
-This statement is used to migrate a logical cluster database to another logical cluster. Before performing this operation, the database must be in a link state and need to be managed.
-
-Membership authority
-
-grammar
-
-MIGRATE DATABASE src u cluster name.src db name of the cluster name.des db name
-
-## example
-
-1. 迁移test_clusterA中的test_db到test_clusterB
-
-MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db;
-
-## keyword
-MIGRATE,DATABASE
diff --git a/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md b/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md
deleted file mode 100644
index 30ba218490..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "SET LDAP_ADMIN_PASSWORD",
- "language": "en"
-}
----
-
-
-
-# SET LDAP_ADMIN_PASSWORD
-## description
-
-Syntax:
-
- SET LDAP_ADMIN_PASSWORD = 'plain password'
-
- The SET command is used to set the LDAP administrator password. When using LDAP authentication, doris needs to use the administrator account and password to query the LDAP service for information about the logging user.
-
-## example
-
-1. Set LDAP admin password:
-```
-SET LDAP_ADMIN_PASSWORD = '123456'
-```
-
-## keyword
- SET, LDAP, LDAP_ADMIN_PASSWORD
-
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md
deleted file mode 100644
index e89adc9404..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-{
- "title": "SHOW BACKENDS",
- "language": "en"
-}
----
-
-
-
-# SHOW BACKENDS
-## Description
-This statement is used to view BE nodes in the cluster
-Grammar:
-SHOW BACKENDS;
-
-Explain:
-1. LastStartTime indicates the last BE start-up time.
-2. LastHeartbeat represents the latest heartbeat.
-3. Alive indicates whether the node survives.
-4. System Decommissioned is true to indicate that the node is safely offline.
-5. Cluster Decommissioned is true to indicate that the node is rushing downline in the current cluster.
-6. TabletNum represents the number of fragments on the node.
-7. Data Used Capacity represents the space occupied by the actual user data.
-8. Avail Capacity represents the available space on the disk.
-9. Total Capacity represents total disk space. Total Capacity = AvailCapacity + DataUsedCapacity + other non-user data files take up space.
-10. UsedPct represents the percentage of disk usage.
-11. ErrMsg is used to display error messages when a heartbeat fails.
-12. Status is used to display some Status information about BE in JSON format, including the last time that BE reported it's tablet.
-
-## keyword
-SHOW, BACKENDS
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md b/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md
deleted file mode 100644
index 20d8238840..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-{
- "title": "SHOW BROKER",
- "language": "en"
-}
----
-
-
-
-# SHOW BROKER
-## Description
-This statement is used to view the existing broker
-Grammar:
-SHOW BROKER;
-
-Explain:
-1. LastStartTime indicates the last BE start-up time.
-2. LastHeartbeat represents the latest heartbeat.
-3. Alive indicates whether the node survives.
-4. ErrMsg is used to display error messages when the heartbeat fails.
-
-## keyword
-SHOW, BROKER
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md
deleted file mode 100644
index 0fabe4e9ed..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-{
- "title": "SHOW FILE",
- "language": "en"
-}
----
-
-
-
-# SHOW FILE
-## Description
-
-This statement is used to show a file created in a database
-
-Grammar:
-
-SHOW FILE [FROM database];
-
-Explain:
-
-FileId: File ID, globally unique
-DbName: The name of the database to which it belongs
-Catalog: Custom Categories
-FileName: File name
-FileSize: File size, unit byte
-MD5: Document MD5
-
-## example
-
-1. View uploaded files in my_database
-
-SHOW FILE FROM my_database;
-
-## keyword
-SHOW,FILE
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md
deleted file mode 100644
index 097829522e..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-{
- "title": "SHOW FRONTENDS",
- "language": "en"
-}
----
-
-
-
-# SHOW FRONTENDS
-## Description
-This statement is used to view FE nodes
-Grammar:
-SHOW FRONTENDS;
-
-Explain:
-1. name denotes the name of the FE node in bdbje.
-2. Join is true to indicate that the node has joined the cluster. But it doesn't mean that it's still in the cluster (it may be out of touch)
-3. Alive indicates whether the node survives.
-4. Replayed Journal Id represents the maximum metadata log ID that the node has currently replayed.
-5. LastHeartbeat is the latest heartbeat.
-6. IsHelper indicates whether the node is a helper node in bdbje.
-7. ErrMsg is used to display error messages when the heartbeat fails.
-
-## keyword
-SHOW, FRONTENDS
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md
deleted file mode 100644
index 0aae646571..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-{
- "title": "SHOW FULL COLUMNS",
- "language": "en"
-}
----
-
-
-
-# SHOW FULL COLUMNS
-## description
- This statement is used to view some information about columns of a table.
-
- Syntax:
- SHOW FULL COLUMNS FROM tbl;
-
-## example
-
- 1. View the column information of specified table
-
- SHOW FULL COLUMNS FROM tbl;
-
-## keyword
-
- SHOW,FULL,COLUMNS
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md b/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md
deleted file mode 100644
index 80a4b00a6b..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-{
- "title": "SHOW INDEX",
- "language": "en"
-}
----
-
-
-
-# SHOW INDEX
-
-## description
-
- This statement is used to show all index(only bitmap index in current version) of a table
- Grammar:
- SHOW INDEX[ES] FROM [db_name.]table_name [FROM database];
-
- OR
-
- SHOW KEY[S] FROM [db_name.]table_name [FROM database];
-
-## example
-
- 1. dispaly all indexes in table table_name
- SHOW INDEX FROM example_db.table_name;
-
-## keyword
-
- SHOW,INDEX
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md
deleted file mode 100644
index d844d423ce..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-{
- "title": "SHOW MIGRATIONS",
- "language": "en"
-}
----
-
-
-
-# SHOW MIGRATIONS
-## Description
-
-This statement is used to view the progress of database migration
-
-grammar
-
-SHOW MIGRATIONS
-
-## keyword
-SHOW,MIGRATIONS
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md
deleted file mode 100644
index 02a4e13160..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "SHOW PLUGINS",
- "language": "en"
-}
----
-
-
-
-# SHOW PLUGINS
-## description
-
- To view the installed plugins.
-
- Syntax
-
- SHOW PLUGINS;
-
- This command will show all builtin and custom plugins.
-
-## example
-
- 1. To view the installed plugins:
-
- SHOW PLUGINS;
-
-## keyword
- SHOW, PLUGIN, PLUGINS
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md
deleted file mode 100644
index a396e1b43f..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md
+++ /dev/null
@@ -1,55 +0,0 @@
----
-{
- "title": "SHOW TABLE STATUS",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLE STATUS
-
-## description
-
-This statement is used to view some information about Table.
-
- Syntax:
-
- SHOW TABLE STATUS
- [FROM db] [LIKE "pattern"]
-
- Explain:
-
- 1. This statement is mainly used to be compatible with MySQL grammar. At present, only a small amount of information such as Comment is displayed.
-
-## Example
-
- 1. View the information of all tables under the current database
-
- SHOW TABLE STATUS;
-
-
- 2. View the information of the table whose name contains example in the specified database
-
- SHOW TABLE STATUS FROM DB LIKE "% example%";
-
-## Keyword
-
- SHOW,TABLE,STATUS
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md b/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md
deleted file mode 100644
index b1081d524f..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-{
- "title": "SHOW TRASH",
- "language": "en"
-}
----
-
-
-
-# SHOW TRASH
-## description
-
-This statement is used to view trash used capacity on some backends.
-
- Syntax:
-
- SHOW TRASH [ON "BackendHost:BackendHeartBeatPort"];
-
- Explain:
-
- 1. Backend The format is BackendHost:BackendHeartBeatPort of the node.
- 2. TrashUsedCapacity Indicates that the trash data of the node occupies space.
-
-## example
-
- 1. View the space occupied by trash data of all be nodes.
-
- SHOW TRASH;
-
- 2. Check the space occupied by trash data of '192.168.0.1:9050'(The specific disk information will be displayed).
-
- SHOW TRASH ON "192.168.0.1:9050";
-
-## keyword
- SHOW, TRASH
-
diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md b/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md
deleted file mode 100644
index 976dfc29f4..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-{
- "title": "SHOW TABLE STATUS",
- "language": "en"
-}
----
-
-
-
-# SHOW VIEW
-
-## description
-
- This statement is used to show all views based on a given table
-
- Syntax:
-
- SHOW VIEW { FROM | IN } table [ FROM db ]
-
-## Example
-
- 1. Show all views based on the table testTbl
-
- show view from testTbl;
-
-
-## Keyword
-
- SHOW,VIEW
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md b/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md
deleted file mode 100644
index 8be8b97f43..0000000000
--- a/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-{
- "title": "UNINSTALL PLUGIN",
- "language": "en"
-}
----
-
-
-
-# UNINSTALL PLUGIN
-## description
-
- To uninstall a plugin.
-
- Syntax
-
- UNINSTALL PLUGIN plugin_name;
-
- plugin_name can be found by `SHOW PLUGINS;`.
-
- Can only uninstall non-builtin plugins.
-
-## example
-
- 1. Uninstall a plugin:
-
- UNINSTALL PLUGIN auditdemo;
-
-## keyword
- UNINSTALL,PLUGIN
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md
deleted file mode 100644
index 6d6c2dd28f..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-{
- "title": "ALTER DATABASE",
- "language": "en"
-}
----
-
-
-
-# ALTER DATABASE
-## description
-This statement is used to set the properties of the specified database. (Administrators only)
-Grammar:
-1) Setting database data quota in B/K/KB/M/MB/G/GB/T/TB/P/PB
-OTHER DATABASE dbu name SET DATA QUOTA quota;
-
-2) Rename the database
-ALTER DATABASE db_name RENAME new_db_name;
-
-Explain:
-After renaming the database, use REVOKE and GRANT commands to modify the corresponding user rights if necessary.
-The database's default data quota is 1024GB, and the default replica quota is 1073741824.
-
-## example
-1. Setting the specified database data quota
-ALTER DATABASE example_db SET DATA QUOTA 10995116277760;
-The above units are bytes, equivalent to
-ALTER DATABASE example_db SET DATA QUOTA 10T;
-
-ALTER DATABASE example_db SET DATA QUOTA 100G;
-
-ALTER DATABASE example_db SET DATA QUOTA 200M;
-
-2. Rename the database example_db to example_db2
-ALTER DATABASE example_db RENAME example_db2;
-
-## keyword
-ALTER,DATABASE,RENAME
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md
deleted file mode 100644
index 1d1361cfa4..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-{
-"title": "ALTER RESOURCE",
-"language": "en"
-}
----
-
-
-
-# ALTER RESOURCE
-
-## Description
-
- This statement is used to modify an existing resource. Only the root or admin user can modify resources.
- Syntax:
- ALTER RESOURCE 'resource_name'
- PROPERTIES ("key"="value", ...);
-
- Note: The resource type does not support modification.
-
-## Example
-
- 1. Modify the working directory of the Spark resource named spark0:
- ALTER RESOURCE 'spark0' PROPERTIES ("working_dir" = "hdfs://127.0.0.1:10000/tmp/doris_new");
-
- 2. Modify the maximum number of connections to the S3 resource named remote_s3:
- ALTER RESOURCE 'remote_s3' PROPERTIES ("s3_max_connections" = "100");
-
-## keyword
-
- ALTER, RESOURCE
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md
deleted file mode 100644
index 0d4a1f6f04..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md
+++ /dev/null
@@ -1,445 +0,0 @@
----
-{
- "title": "ALTER TABLE",
- "language": "en"
-}
----
-
-
-
-# ALTER TABLE
-
-## description
-
- This statement is used to modify an existing table. If no rollup index is specified, the base operation is the default.
- The statement is divided into three types of operations: schema change, rollup, partition
- These three types of operations cannot appear in an ALTER TABLE statement at the same time.
- Where schema change and rollup are asynchronous operations and are returned if the task commits successfully. You can then use the SHOW ALTER command to view the progress.
- Partition is a synchronous operation, and a command return indicates that execution is complete.
-
- grammar:
- ALTER TABLE [database.]table
- Alter_clause1[, alter_clause2, ...];
-
- The alter_clause is divided into partition, rollup, schema change, rename and bimmap index.
-
- Partition supports the following modifications
- Increase the partition
- grammar:
- ADD PARTITION [IF NOT EXISTS] partition_name
- Partition_desc ["key"="value"]
- [DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]]
- note:
- 1) partition_desc supports two ways of writing:
- * VALUES LESS THAN [MAXVALUE|("value1", ...)]
- * VALUES [("value1", ...), ("value1", ...))
- 1) The partition is the left closed right open interval. If the user only specifies the right boundary, the system will automatically determine the left boundary.
- 2) If the bucket mode is not specified, the bucket method used by the built-in table is automatically used.
- 3) If the bucket mode is specified, only the bucket number can be modified, and the bucket mode or bucket column cannot be modified.
- 4) ["key"="value"] section can set some properties of the partition, see CREATE TABLE for details.
- 5) Adding partitions to non-partitioned table is not supported.
-
- 2. Delete the partition
- grammar:
- DROP PARTITION [IF EXISTS] partition_name
- note:
- 1) Use a partitioned table to keep at least one partition.
- 2) Execute DROP PARTITION For a period of time, the deleted partition can be recovered by the RECOVER statement. See the RECOVER statement for details.
- 3) If DROP PARTITION FORCE is executed, the system will not check whether the partition has unfinished transactions, the partition will be deleted directly and cannot be recovered, generally this operation is not recommended
-
- 3. Modify the partition properties
- grammar:
- MODIFY PARTITION p1|(p1[, p2, ...]) SET ("key" = "value", ...)
- Description:
- 1) The following attributes of the modified partition are currently supported.
- - storage_medium
- - storage_cooldown_time
- - remote_storage_cooldown_time
- - replication_num
- — in_memory
- 2) For single-partition tables, partition_name is the same as the table name.
-
- Rollup supports the following ways to create:
- 1. Create a rollup index
- grammar:
- ADD ROLLUP rollup_name (column_name1, column_name2, ...)
- [FROM from_index_name]
- [PROPERTIES ("key"="value", ...)]
-
- properties: Support setting timeout time, the default timeout time is 1 day.
- example:
- ADD ROLLUP r1(col1,col2) from r0
- 1.2 Batch create rollup index
- grammar:
- ADD ROLLUP [rollup_name (column_name1, column_name2, ...)
- [FROM from_index_name]
- [PROPERTIES ("key"="value", ...)],...]
- example:
- ADD ROLLUP r1(col1,col2) from r0, r2(col3,col4) from r0
- 1.3 note:
- 1) If from_index_name is not specified, it is created by default from base index
- 2) The columns in the rollup table must be existing columns in from_index
- 3) In properties, you can specify the storage format. See CREATE TABLE for details.
-
- 2. Delete the rollup index
- grammar:
- DROP ROLLUP rollup_name
- [PROPERTIES ("key"="value", ...)]
- example:
- DROP ROLLUP r1
- 2.1 Batch Delete rollup index
- grammar: DROP ROLLUP [rollup_name [PROPERTIES ("key"="value", ...)],...]
- example: DROP ROLLUP r1,r2
- 2.2 note:
- 1) Cannot delete base index
-
-
- Schema change supports the following modifications:
- 1. Add a column to the specified location of the specified index
- grammar:
- ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"]
- [AFTER column_name|FIRST]
- [TO rollup_index_name]
- [PROPERTIES ("key"="value", ...)]
- note:
- 1) Aggregate model If you add a value column, you need to specify agg_type
- 2) Non-aggregate models (such as DUPLICATE KEY) If you add a key column, you need to specify the KEY keyword.
- 3) You cannot add a column that already exists in the base index to the rollup index
- Recreate a rollup index if needed
-
- 2. Add multiple columns to the specified index
- grammar:
- ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...)
- [TO rollup_index_name]
- [PROPERTIES ("key"="value", ...)]
- note:
- 1) Aggregate model If you add a value column, you need to specify agg_type
- 2) Non-aggregate model If you add a key column, you need to specify the KEY keyword.
- 3) You cannot add a column that already exists in the base index to the rollup index
- (You can recreate a rollup index if needed)
-
- 3. Remove a column from the specified index
- grammar:
- DROP COLUMN column_name
- [FROM rollup_index_name]
- note:
- 1) Cannot delete partition column
- 2) If the column is removed from the base index, it will also be deleted if the column is included in the rollup index
-
- 4. Modify the column type and column position of the specified index
- grammar:
- MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"]
- [AFTER column_name|FIRST]
- [FROM rollup_index_name]
- [PROPERTIES ("key"="value", ...)]
- note:
- 1) Aggregate model If you modify the value column, you need to specify agg_type
- 2) Non-aggregate type If you modify the key column, you need to specify the KEY keyword.
- 3) Only the type of the column can be modified. The other attributes of the column remain as they are (ie other attributes need to be explicitly written in the statement according to the original attribute, see example 8)
- 4) The partition column cannot be modified
- 5) The following types of conversions are currently supported (accuracy loss is guaranteed by the user)
- TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE convert to a wider range of numeric types
- TINTINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE/DECIMAL is converted to VARCHAR
- VARCHAR supports modification of maximum length
- Convert VARCHAR/CHAR to TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE.
- Convert VARCHAR/CHAR to DATE (currently support six formats: "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d, "%y/%m/%d")
- Convert DATETIME to DATE(Only year-month-day information is retained, For example: `2019-12-09 21:47:05` <--> `2019-12-09`)
- Convert DATE to DATETIME(Set hour, minute, second to zero, For example: `2019-12-09` <--> `2019-12-09 00:00:00`)
- Convert FLOAT to DOUBLE
- Convert INT to DATE (If the INT data fails to convert, the original data remains the same)
- 6) Does not support changing from NULL to NOT NULL
-
- 5. Reorder the columns of the specified index
- grammar:
- ORDER BY (column_name1, column_name2, ...)
- [FROM rollup_index_name]
- [PROPERTIES ("key"="value", ...)]
- note:
- 1) All columns in index must be written
- 2) value is listed after the key column
-
- 6. Modify the properties of the table, currently supports modifying the bloom filter column, the colocate_with attribute and the dynamic_partition attribute, the replication_num and default.replication_num.
- grammar:
- PROPERTIES ("key"="value")
- note:
- Can also be merged into the above schema change operation to modify, see the example below
-
- 7. Enable batch delete support
- grammar:
- ENABLE FEATURE "BATCH_DELETE"
- note:
- 1) Only support unique tables
- 2) Batch deletion is supported for old tables, while new tables are already supported when they are created
-
- 8. Enable the ability to import in order by the value of the sequence column
- grammer:
- ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")
- note:
- 1) Only support unique tables
- 2) The sequence_type is used to specify the type of the sequence column, which can be integral and time type
- 3) Only the orderliness of newly imported data is supported. Historical data cannot be changed
-
- 9. Modify default buckets number of partition
- grammer:
- MODIFY DISTRIBUTION DISTRIBUTED BY HASH (k1[,k2 ...]) BUCKETS num
- note:
- 1)Only support non colocate table with RANGE partition and HASH distribution
-
- 10. Modify table comment
- grammer:
- MODIFY COMMENT "new table comment"
-
- 11. Modify column comment
- grammer:
- MODIFY COLUMN col1 COMMENT "new column comment"
-
- 12. Modify engine type
-
- Only the MySQL type can be changed to the ODBC type. The value of driver is the name of the driver in the odbc.init configuration.
-
- grammar:
- MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL");
-
- Rename supports modification of the following names:
- 1. Modify the table name
- grammar:
- RENAME new_table_name;
-
- 2. Modify the rollup index name
- grammar:
- RENAME ROLLUP old_rollup_name new_rollup_name;
-
- 3. Modify the partition name
- grammar:
- RENAME PARTITION old_partition_name new_partition_name;
-
- Replace supports swap data between two tables:
- 1. swap data between two tables
- geammar:
- REPLACE WITH TABLE new_table [PROPERTIES('swap' = 'true')];
- note:
- 1. if swap is true, swap data between two tables.
- 2. if swap is fasle, replace the old_table with the new_table, and delete the new_table.
-
- Bitmap index supports the following modifications:
- 1. create bitmap index
- grammar:
- ADD INDEX [IF NOT EXISTS] index_name (column [, ...],) [USING BITMAP] [COMMENT 'balabala'];
- note:
- 1. only supports bitmap index for current version
- 2. BITMAP index only supports apply on single column
- 2. drop index
- grammar:
- DROP INDEX [IF EXISTS] index_name;
-
-## example
-
- [table]
- 1. Modify the default number of replications of the table, which is used as default number of replications while creating new partition.
- ALTER TABLE example_db.my_table
- SET ("default.replication_num" = "2");
-
- 2. Modify the actual number of replications of a unpartitioned table (unpartitioned table only)
- ALTER TABLE example_db.my_table
- SET ("replication_num" = "3");
-
- [partition]
- 1. Add partition, existing partition [MIN, 2013-01-01), add partition [2013-01-01, 2014-01-01), use default bucket mode
- ALTER TABLE example_db.my_table
- ADD PARTITION p1 VALUES LESS THAN ("2014-01-01");
-
- 2. Increase the partition and use the new number of buckets
- ALTER TABLE example_db.my_table
- ADD PARTITION p1 VALUES LESS THAN ("2015-01-01")
- DISTRIBUTED BY HASH(k1) BUCKETS 20;
-
- 3. Increase the partition and use the new number of copies
- ALTER TABLE example_db.my_table
- ADD PARTITION p1 VALUES LESS THAN ("2015-01-01")
- ("replication_num"="1");
-
- 4. Modify the number of partition copies
- ALTER TABLE example_db.my_table
- MODIFY PARTITION p1 SET("replication_num"="1");
-
- 5. Batch modify the specified partitions
- ALTER TABLE example_db.my_table
- MODIFY PARTITION (p1, p2, p4) SET("in_memory"="true");
-
- 6. Batch modify all partitions
- ALTER TABLE example_db.my_table
- MODIFY PARTITION (*) SET("storage_medium"="HDD");
-
- 7. Delete the partition
- ALTER TABLE example_db.my_table
- DROP PARTITION p1;
-
- 8. Add a partition that specifies the upper and lower bounds
-
- ALTER TABLE example_db.my_table
- ADD PARTITION p1 VALUES [("2014-01-01"), ("2014-02-01"));
-
- [rollup]
- 1. Create index: example_rollup_index, based on base index(k1,k2,k3,v1,v2). Columnar storage.
- ALTER TABLE example_db.my_table
- ADD ROLLUP example_rollup_index(k1, k3, v1, v2);
-
- 2. Create index: example_rollup_index2, based on example_rollup_index(k1,k3,v1,v2)
- ALTER TABLE example_db.my_table
- ADD ROLLUP example_rollup_index2 (k1, v1)
- FROM example_rollup_index;
-
- 3. Create index: example_rollup_index3, based on base index (k1, k2, k3, v1), custom rollup timeout time is one hour.
-
- ALTER TABLE example_db.my_table
- ADD ROLLUP example_rollup_index(k1, k3, v1)
- PROPERTIES("timeout" = "3600");
-
- 3. Delete index: example_rollup_index2
- ALTER TABLE example_db.my_table
- DROP ROLLUP example_rollup_index2;
-
- [schema change]
- 1. Add a key column new_col to the col1 of example_rollup_index (non-aggregate model)
- ALTER TABLE example_db.my_table
- ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1
- TO example_rollup_index;
-
- 2. Add a value column new_col to the col1 of example_rollup_index (non-aggregate model)
- ALTER TABLE example_db.my_table
- ADD COLUMN new_col INT DEFAULT "0" AFTER col1
- TO example_rollup_index;
-
- 3. Add a key column new_col (aggregation model) to col1 of example_rollup_index
- ALTER TABLE example_db.my_table
- ADD COLUMN new_col INT DEFAULT "0" AFTER col1
- TO example_rollup_index;
-
- 4. Add a value column to the col1 of example_rollup_index. new_col SUM aggregation type (aggregation model)
- ALTER TABLE example_db.my_table
- ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1
- TO example_rollup_index;
-
- 5. Add multiple columns to the example_rollup_index (aggregate model)
- ALTER TABLE example_db.my_table
- ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3")
- TO example_rollup_index;
-
- 6. Remove a column from example_rollup_index
- ALTER TABLE example_db.my_table
- DROP COLUMN col2
- FROM example_rollup_index;
-
- 7. Modify the base index's col1 key column to be of type BIGINT and move to the col2 column
- (*Attention: Whether to modify the key column or the value column, complete column information need to be declared. For example, MODIFY COLUMN xxx COLUMNTYPE [KEY|agg_type]*)
- ALTER TABLE example_db.my_table
- MODIFY COLUMN col1 BIGINT KEY DEFAULT "1" AFTER col2;
-
- 8. Modify the maximum length of the val1 column of the base index. The original val1 is (val1 VARCHAR(32) REPLACE DEFAULT "abc")
- ALTER TABLE example_db.my_table
- MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc";
-
- 9. Reorder the columns in example_rollup_index (set the original column order: k1, k2, k3, v1, v2)
- ALTER TABLE example_db.my_table
- ORDER BY (k3, k1, k2, v2, v1)
- FROM example_rollup_index;
-
- 10. Perform both operations simultaneously
- ALTER TABLE example_db.my_table
- ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index,
- ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index;
-
- 11. Modify the bloom filter column of the table
- ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3");
-
- Can also be merged into the above schema change operation (note that the syntax of multiple clauses is slightly different)
- ALTER TABLE example_db.my_table
- DROP COLUMN col2
- PROPERTIES ("bloom_filter_columns"="k1,k2,k3");
-
- 12. Modify the Colocate property of the table
-
- ALTER TABLE example_db.my_table set ("colocate_with" = "t1");
-
- 13. Change the bucketing mode of the table from Hash Distribution to Random Distribution
-
- ALTER TABLE example_db.my_table set ("distribution_type" = "random");
-
- 14. Modify the dynamic partition properties of the table (support adding dynamic partition properties to tables without dynamic partition properties)
-
- ALTER TABLE example_db.my_table set ("dynamic_partition.enable" = "false");
-
- If you need to add dynamic partition attributes to a table without dynamic partition attributes, you need to specify all dynamic partition attributes.
- (Note:Adding dynamic partition attributes to non-partitioned table is not supported)
-
- ALTER TABLE example_db.my_table set ("dynamic_partition.enable"= "true", "dynamic_partition.time_unit" = "DAY", "dynamic_partition.end "= "3", "dynamic_partition.prefix" = "p", "dynamic_partition.buckets" = "32");
-
-
- 15. Modify the in_memory property of the table
-
- ALTER TABLE example_db.my_table set ("in_memory" = "true");
- 16. Enable batch delete support
-
- ALTER TABLE example_db.my_table ENABLE FEATURE "BATCH_DELETE"
- 17. Enable the ability to import in order by the value of the Sequence column
-
- ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")
-
- 18. Modify the default buckets number of example_db.my_table to 50
-
- ALTER TABLE example_db.my_table MODIFY DISTRIBUTION DISTRIBUTED BY HASH(k1) BUCKETS 50;
-
- 19. Modify table comment
-
- ALTER TABLE example_db.my_table MODIFY COMMENT "new comment";
-
- 20. Modify column comment
-
- ALTER TABLE example_db.my_table MODIFY COLUMN k1 COMMENT "k1", MODIFY COLUMN k2 COMMENT "k2";
-
- 21. Modify engine Type
-
- ALTER TABLE example_db.mysql_table MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL");
-
- [rename]
- 1. Modify the table named table1 to table2
- ALTER TABLE table1 RENAME table2;
-
- 2. Modify the rollup index named rollup1 in the table example_table to rollup2
- ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2;
-
- 3. Modify the partition named p1 in the table example_table to p2
- ALTER TABLE example_table RENAME PARTITION p1 p2;
-
- [replace]
- 1. swap data between two tables
- ALTER TABLE table1 REPLACE WITH TABLE table2;
- 2. replace the table1 with the table2, and delete the table2.
- ALTER TABLE table1 REPLACE WITH TABLE table2 PROPERTIES('swap' = 'false');
-
- [index]
- 1. create index on table1 column siteid using bitmap
- ALTER TABLE table1 ADD INDEX [IF NOT EXISTS] index_name [USING BITMAP] (siteid) COMMENT 'balabala';
- 2. drop bitmap index of table1
- ALTER TABLE table1 DROP INDEX [IF EXISTS] index_name;
-
-## keyword
-
- ALTER, TABLE, ROLLUP, COLUMN, PARTITION, RENAME
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md
deleted file mode 100644
index 03ff25f402..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md
+++ /dev/null
@@ -1,51 +0,0 @@
----
-{
- "title": "ALTER VIEW",
- "language": "en"
-}
----
-
-
-
-# ALTER VIEW
-## description
- This statement is used to modify the definition of a view
- Syntax:
- ALTER VIEW
- [db_name.]view_name
- (column1[ COMMENT "col comment"][, column2, ...])
- AS query_stmt
-
- Explain:
- 1. View is logical, it isn't stored in the physical medium. When we querying, view will be embed as subqueries in query statement. Therefore, modifying the definition of views is equivalent to modifying query_stmt which is defined in view.
- 2. query_stmt is arbitrarily supported SQL.
-
-## example
-
- 1. Modify example_view on the example_db
-
- ALTER VIEW example_db.example_view
- (
- c1 COMMENT "column 1",
- c2 COMMENT "column 2",
- c3 COMMENT "column 3"
- )
- AS SELECT k1, k2, SUM(v1) FROM example_table
- GROUP BY k1, k2
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md b/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md
deleted file mode 100644
index 6c1c7c317e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md
+++ /dev/null
@@ -1,71 +0,0 @@
----
-{
- "title": "BACKUP",
- "language": "en"
-}
----
-
-
-
-# BACKUP
-## Description
-This statement is used to backup data under the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW BACKUP command. Only tables of OLAP type are backed up.
-Grammar:
-BACKUP SNAPSHOT [db_name].{snapshot_name}
-TO `repository_name`
-[ON|EXCLUDE] (
-`Table_name` [partition (`P1',...)],
-...
-)
-PROPERTIES ("key"="value", ...);
-
-Explain:
-1. Only one BACKUP or RESTORE task can be performed under the same database.
-2. The ON clause identifies the tables and partitions that need to be backed up. If no partition is specified, all partitions of the table are backed up by default.
-3. The EXCLUDE clause identifies the tables and partitions that need not to be backed up. All partitions of all tables in the database except the specified tables or partitions will be backed up.
-4. PROPERTIES currently supports the following attributes:
-"Type" = "full": means that this is a full update (default).
-"Timeout" = "3600": Task timeout, default to one day. Unit seconds.
-
-## example
-
-1. Back up the table example_tbl under example_db in full to the warehouse example_repo:
-BACKUP SNAPSHOT example_db.snapshot_label1
-TO example repo
-On (example tbl)
-PROPERTIES ("type" = "full");
-
-2. Under full backup example_db, the P1 and P2 partitions of table example_tbl, and table example_tbl2 to warehouse example_repo:
-BACKUP SNAPSHOT example_db.snapshot_label2
-TO example repo
-ON
-(
-example_tbl PARTITION (p1,p2),
-Example:
-);
-
-3. Back up all tables in example_db except example_tbl to the warehouse example_repo:
-BACKUP SNAPSHOT example_db.snapshot_label3
-TO example_repo
-EXCLUDE (example_tbl);
-
-## keyword
-BACKUP
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md
deleted file mode 100644
index bb9339be38..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-{
- "title": "CANCEL ALTER",
- "language": "en"
-}
----
-
-
-
-# CANCEL ALTER
-## Description
-This statement is used to undo an ALTER operation.
-1. 撤销 ALTER TABLE COLUMN 操作
-Grammar:
-CANCEL ALTER TABLE COLUMN
-FROM db_name.table_name
-
-2. 撤销 ALTER TABLE ROLLUP 操作
-Grammar:
-CANCEL ALTER TABLE ROLLUP
-FROM db_name.table_name
-
-3. batch cancel rollup by job id
- Grammar:
- CANCEL ALTER TABLE ROLLUP
- FROM db_name.table_name (jobid,...)
- Note:
- Batch cancel rollup job is an async operation, use `show alter table rollup` to see whether it executes successfully
-
-2. OTHER CLUSTER
-Grammar:
-(To be realized...
-
-
-## example
-[CANCEL ALTER TABLE COLUMN]
-1. Cancel ALTER COLUMN operation for my_table.
-CANCEL ALTER TABLE COLUMN
-FROM example_db.my_table;
-
-[CANCEL ALTER TABLE ROLLUP]
-1. Cancel ADD ROLLUP operation for my_table.
-CANCEL ALTER TABLE ROLLUP
-FROM example_db.my_table;
-
-[CANCEL ALTER TABLE ROLLUP]
-1. cancel rollup alter job by job id
-CANCEL ALTER TABLE ROLLUP
-FROM example_db.my_table (12801,12802);
-
-## keyword
-CANCEL,ALTER,TABLE,COLUMN,ROLLUP
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md
deleted file mode 100644
index a0e379ad17..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-{
- "title": "CANCEL BACKUP",
- "language": "en"
-}
----
-
-
-
-# CANCEL BACKUP
-## Description
-This statement is used to cancel an ongoing BACKUP task.
-Grammar:
-CANCEL BACKUP FROM db_name;
-
-## example
-1. Cancel the BACKUP task under example_db.
-CANCEL BACKUP FROM example_db;
-
-## keyword
-CANCEL, BACKUP
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md
deleted file mode 100644
index b4bf1bb569..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-{
- "title": "CANCEL RESTORE",
- "language": "en"
-}
----
-
-
-
-# CANCEL RESTORE
-## Description
-This statement is used to cancel an ongoing RESTORE task.
-Grammar:
-CANCEL RESTORE FROM db_name;
-
-Be careful:
-When the recovery is abolished around the COMMIT or later stage, the restored tables may be inaccessible. At this point, data recovery can only be done by performing the recovery operation again.
-
-## example
-1. Cancel the RESTORE task under example_db.
-CANCEL RESTORE FROM example_db;
-
-## keyword
-CANCEL, RESTORE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md
deleted file mode 100644
index 7dadd2ef6e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-{
- "title": "CREATE DATABASE",
- "language": "en"
-}
----
-
-
-
-# CREATE DATABASE
-
-## Description
-
- This statement is used to create a new database
- Syntax:
- CREATE DATABASE [IF NOT EXISTS] db_name
- [PROPERTIES ("key"="value", ...)] ;
-
-1. PROPERTIES
- Additional information of a database, can be defaulted.
- 1) In case of iceberg, the following information needs to be provided in the properties.
- ```
- PROPERTIES (
- "iceberg.database" = "iceberg_db_name",
- "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- )
-
- ```
- `iceberg.database` is the name of the database corresponding to Iceberg.
- `iceberg.hive.metastore.uris` is the address of the hive metastore service.
- `iceberg.catalog.type` defaults to `HIVE_CATALOG`. Currently, only `HIVE_CATALOG` is supported, more Iceberg catalog types will be supported later.
-
-## example
- 1. Create a new database db_test
- ```
- CREATE DATABASE db_test;
- ```
-
- 2. Create a new Iceberg database iceberg_test
- ```
- CREATE DATABASE `iceberg_test`
- PROPERTIES (
- "iceberg.database" = "doris",
- "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
- ```
-
-## keyword
-CREATE,DATABASE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md
deleted file mode 100644
index 77928cb394..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-{
- "title": "CREATE ENCRYPTKEY",
- "language": "en"
-}
----
-
-
-
-# CREATE ENCRYPTKEY
-
-## Description
-
-### Syntax
-
-```
-CREATE ENCRYPTKEY key_name
- AS "key_string"
-```
-
-### Parameters
-
-> `key_name`: The name of the key to be created, which can include the name of the database. For example: `db1.my_key`.
->
-> `key_string`: The string to create the key
-
-This statement creates a custom key. Executing this command requires the user to have the `ADMIN` privileges.
-
-If the database name is included in `key_name`, then this custom key will be created in the corresponding database, otherwise this function will be created in the database where the current session is located. The name of the new key cannot be the same as the key that already exists in the corresponding database, otherwise the creation will fail.
-
-## Example
-
-1. Create a custom key
-
- ```
- CREATE ENCRYPTKEY my_key as "ABCD123456789";
- ```
-
-2. Using a custom key
-
- To use a custom key, add the keyword `KEY`/`key` before the key name, separated from `key_name` by a space.
-
- ```
- mysql> SELECT HEX(AES_ENCRYPT("Doris is Great", KEY my_key));
- +------------------------------------------------+
- | hex(aes_encrypt('Doris is Great', key my_key)) |
- +------------------------------------------------+
- | D26DB38579D6A343350EDDC6F2AD47C6 |
- +------------------------------------------------+
- 1 row in set (0.02 sec)
-
- mysql> SELECT AES_DECRYPT(UNHEX('D26DB38579D6A343350EDDC6F2AD47C6'), KEY my_key);
- +--------------------------------------------------------------------+
- | aes_decrypt(unhex('D26DB38579D6A343350EDDC6F2AD47C6'), key my_key) |
- +--------------------------------------------------------------------+
- | Doris is Great |
- +--------------------------------------------------------------------+
- 1 row in set (0.01 sec)
- ```
-
-## Keyword
-
- CREATE,ENCRYPTKEY
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md
deleted file mode 100644
index df128fda18..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "CREATE INDEX",
- "language": "en"
-}
----
-
-
-
-# CREATE INDEX
-
-## description
-
- This statement is used to create index
- grammer:
- CREATE INDEX [IF NOT EXISTS] index_name ON table_name (column [, ...],) [USING BITMAP] [COMMENT'balabala'];
- note:
- 1. only support bitmap index in current version
- 2. BITMAP index only supports apply to single column
-
-## example
-
- 1. create index on table1 column siteid using bitmap
- CREATE INDEX [IF NOT EXISTS] index_name ON table1 (siteid) USING BITMAP COMMENT 'balabala';
-
-## keyword
-
- CREATE,INDEX
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md
deleted file mode 100644
index 4c0afa2c65..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md
+++ /dev/null
@@ -1,238 +0,0 @@
----
-{
- "title": "CREATE MATERIALIZED VIEW",
- "language": "en"
-}
----
-
-
-
-# CREATE MATERIALIZED VIEW
-
-## description
-
-This statement is used to create a materialized view.
-
- Asynchronous syntax. After the call is successful, it only indicates that the task to create the materialized view is successfully submitted. The user needs to check the progress of the materialized view by using ```show alter table rollup```.
-
- After the progress is FINISHED, you can use the ```desc [table_name] all``` command to view the schema of the materialized view.
-
-syntax:
-
- ```
-
- CREATE MATERIALIZED VIEW [MG name] as [query]
- [PROPERTIES ("key" = "value")]
-
- ```
-
-1. MV name
-
- Name of the materialized view. Required.
-
- Materialized view names in the same table cannot be duplicated.
-
-2. query
-
- The query used to construct the materialized view. The result of the query is the data of the materialized view. The query format currently supported is:
-
- ```
- SELECT select_expr [, select_expr ...]
- FROM [Base view name]
- GROUP BY column_name [, column_name ...]
- ORDER BY column_name [, column_name ...]
-
- The syntax is the same as the query syntax.
- ```
-
- select_expr: All columns in the materialized view's schema.
-
- + Only single columns and aggregate columns without expression calculation are supported.
- + The aggregate function currently only supports SUM, MIN, MAX, and the parameters of the aggregate function can only be a single column without expression calculation.
- + Contains at least one single column.
- + All involved columns can only appear once.
-
- base view name: The original table name of the materialized view. Required.
-
- + Must be a single table and not a subquery
-
- group by: Grouped column of materialized view, optional.
-
- + If not filled, the data will not be grouped.
-
- order by: Sort order of materialized view, optional.
-
- + The order of the column sort must be the same as the column declaration order in select_expr.
- + If order by is not specified, sort columns are automatically supplemented according to the rules.
-
- + If the materialized view is an aggregate type, all grouping columns are automatically supplemented with sort columns.
- + If the materialized view is a non-aggregated type, the first 36 bytes are automatically supplemented as a sorted column. If the number of sorts for automatic replenishment is less than three, the first three are sorted.
- + If the query contains a grouping column, the sort order must be the same as the grouping column.
-
-3. properties
-
- Declare some configuration of materialized view, optional.
-
- ```
- PROPERTIES ("key" = "value", "key" = "value" ...)
-
- ```
-
- The following configurations can be declared here:
-
- + short_key: the number of columns.
- + timeout: timeout for materialized view construction.
-
-## example
-
-Base table structure is
-
-```
-mysql> desc duplicate_table;
-+-------+--------+------+------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+-------+--------+------+------+---------+-------+
-| k1 | INT | Yes | true | N/A | |
-| k2 | INT | Yes | true | N/A | |
-| k3 | BIGINT | Yes | true | N/A | |
-| k4 | BIGINT | Yes | true | N/A | |
-+-------+--------+------+------+---------+-------+
-```
-
-1. Create a materialized view containing only the columns of the original table (k1, k2)
-
- ```
- create materialized view k1_k2 as
-select k1, k2 from duplicate_table;
- ```
-
- The materialized view's schema is shown below. The materialized view contains only two columns k1, k2 without any aggregation.
-
- ```
- +-----------------+-------+--------+------+------+---------+-------+
- | IndexName | Field | Type | Null | Key | Default | Extra |
- +-----------------+-------+--------+------+------+---------+-------+
- | k1_k2 | k1 | INT | Yes | true | N/A | |
- | | k2 | INT | Yes | true | N/A | |
- +-----------------+-------+--------+------+------+---------+-------+
- ```
-
-2. Create a materialized view sorted by k2
-
- ```
- create materialized view k2_order as
-select k2, k1 from duplicate_table order by k2;
-```
-
- The materialized view's schema is shown below. The materialized view contains only two columns k2, k1, where column k2 is a sorted column without any aggregation.
-
- ```
- +-----------------+-------+--------+------+-------+---------+-------+
- | IndexName | Field | Type | Null | Key | Default | Extra |
- +-----------------+-------+--------+------+-------+---------+-------+
- | k2_order | k2 | INT | Yes | true | N/A | |
- | | k1 | INT | Yes | false | N/A | NONE |
- +-----------------+-------+--------+------+-------+---------+-------+
- ```
-
-3. Create a materialized view grouped by k1, k2 with k3 as the SUM aggregate
-
- ```
- create materialized view k1_k2_sumk3 as
-select k1, k2, sum (k3) from duplicate_table group by k1, k2;
- ```
-
- The materialized view's schema is shown below. The materialized view contains two columns k1, k2 and sum (k3), where k1, k2 are grouped columns, and sum (k3) is the sum of the k3 columns grouped according to k1, k2.
-
- Because the materialized view does not declare a sort column, and the materialized view has aggregate data, the system supplements the grouping columns k1 and k2 by default.
-
- ```
- +-----------------+-------+--------+------+-------+---------+-------+
- | IndexName | Field | Type | Null | Key | Default | Extra |
- +-----------------+-------+--------+------+-------+---------+-------+
- | k1_k2_sumk3 | k1 | INT | Yes | true | N/A | |
- | | k2 | INT | Yes | true | N/A | |
- | | k3 | BIGINT | Yes | false | N/A | SUM |
- +-----------------+-------+--------+------+-------+---------+-------+
- ```
-
-4. Create a materialized view to remove duplicate rows
-
- ```
- create materialized view deduplicate as
-select k1, k2, k3, k4 from duplicate_table group by k1, k2, k3, k4;
- ```
-
- The materialized view schema is shown below. The materialized view contains k1, k2, k3, and k4 columns, and there are no duplicate rows.
-
- ```
- +-----------------+-------+--------+------+-------+---------+-------+
- | IndexName | Field | Type | Null | Key | Default | Extra |
- +-----------------+-------+--------+------+-------+---------+-------+
- | deduplicate | k1 | INT | Yes | true | N/A | |
- | | k2 | INT | Yes | true | N/A | |
- | | k3 | BIGINT | Yes | true | N/A | |
- | | k4 | BIGINT | Yes | true | N/A | |
- +-----------------+-------+--------+------+-------+---------+-------+
- ```
-
-5. Create a non-aggregated materialized view that does not declare a sort column
-
- The schema of all_type_table is as follows:
-
- ```
- +-------+--------------+------+-------+---------+-------+
- | Field | Type | Null | Key | Default | Extra |
- +-------+--------------+------+-------+---------+-------+
- | k1 | TINYINT | Yes | true | N/A | |
- | k2 | SMALLINT | Yes | true | N/A | |
- | k3 | INT | Yes | true | N/A | |
- | k4 | BIGINT | Yes | true | N/A | |
- | k5 | DECIMAL(9,0) | Yes | true | N/A | |
- | k6 | DOUBLE | Yes | false | N/A | NONE |
- | k7 | VARCHAR(20) | Yes | false | N/A | NONE |
- +-------+--------------+------+-------+---------+-------+
- ```
-
- The materialized view contains k3, k4, k5, k6, k7 columns, and no sort column is declared. The creation statement is as follows:
-
- ```
- create materialized view mv_1 as
-select k3, k4, k5, k6, k7 from all_type_table;
- ```
-
- The system's default supplementary sort columns are k3, k4, and k5. The sum of the number of bytes for these three column types is 4 (INT) + 8 (BIGINT) + 16 (DECIMAL) = 28 <36. So these three columns are added as sort columns.
-
- The materialized view's schema is as follows. You can see that the key fields of the k3, k4, and k5 columns are true, which is the sort order. The key field of the k6, k7 columns is false, that is, non-sorted.
-
- ```
- +----------------+-------+--------------+------+-------+---------+-------+
- | IndexName | Field | Type | Null | Key | Default | Extra |
- +----------------+-------+--------------+------+-------+---------+-------+
- | mv_1 | k3 | INT | Yes | true | N/A | |
- | | k4 | BIGINT | Yes | true | N/A | |
- | | k5 | DECIMAL(9,0) | Yes | true | N/A | |
- | | k6 | DOUBLE | Yes | false | N/A | NONE |
- | | k7 | VARCHAR(20) | Yes | false | N/A | NONE |
- +----------------+-------+--------------+------+-------+---------+-------+
- ```
-
-## keyword
- CREATE, MATERIALIZED, VIEW
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md
deleted file mode 100644
index 1bc33a4b6e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md
+++ /dev/null
@@ -1,87 +0,0 @@
----
-{
- "title": "CREATE REPOSITORY",
- "language": "en"
-}
----
-
-
-
-# CREATE REPOSITORY
-## Description
- This statement is used to create the warehouse. The warehouse is used for backup or recovery. Only root or superuser users can create warehouses.
- Grammar:
- CREATE [READ ONLY] REPOSITORY `repo_name`
- WITH [BROKER `broker_name`|S3]
- ON LOCATION `repo_location`
- PROPERTIES ("key"="value", ...);
-
- Explain:
- 1. The creation of warehouses depends on existing brokers, or use aws s3 protocl to connet cloud storage directly.
- 2. If it is a read-only warehouse, it can only be restored on the warehouse. If not, you can backup and restore operations.
- 3. According to the different types of broker or S3, PROPERTIES is different, see the example.
-
-## example
- 1. Create a warehouse named bos_repo, which relies on BOS broker "bos_broker", and the data root directory is: bos://palo_backup.
- CREATE REPOSITORY `bos_repo`
- WITH BROKER `bos_broker`
- ON LOCATION "bos://palo_backup"
- PROPERTIES
- (
- "bos_endpoint" ="http://gz.bcebos.com",
- "bos_accesskey" = "bos_accesskey",
- "bos_secret_accesskey"="bos_accesskey"
- );
-
- 2. Create the same warehouse as in Example 1, but with read-only attributes:
- CREATE READ ONLY REPOSITORY `bos_repo`
- WITH BROKER `bos_broker`
- ON LOCATION "bos://palo_backup"
- PROPERTIES
- (
- "bos_endpoint" ="http://gz.bcebos.com",
- "bos_accesskey" = "bos_accesskey",
- "bos_secret_accesskey"="bos_secret_accesskey"
- );
-
- 3. Create a warehouse named hdfs_repo, which relies on Baidu HDFS broker "hdfs_broker", and the data root directory is: hdfs://hadoop-name-node:54310/path/to/repo./
- CREATE REPOSITORY `hdfs_repo`
- WITH BROKER `hdfs_broker`
- ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/"
- PROPERTIES
- (
- "Username" = "User"
- "password" = "password"
- );
-
- 4. 创建名为 s3_repo 的仓库, 直接链接云存储, 而不通过broker.
- CREATE REPOSITORY `s3_repo`
- WITH S3
- ON LOCATION "s3://s3-repo"
- PROPERTIES
- (
- "AWS_ENDPOINT" = "http://s3-REGION.amazonaws.com",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "REGION"
- );
-
-## keyword
-CREATE, REPOSITORY
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md
deleted file mode 100644
index 177454e70f..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md
+++ /dev/null
@@ -1,134 +0,0 @@
----
-{
- "title": "CREATE RESOURCE",
- "language": "en"
-}
----
-
-
-
-# CREATE RESOURCE
-
-## Description
-
- This statement is used to create a resource. Only the root or admin user can create resources. Currently supports Spark, ODBC, S3 external resources.
- In the future, other external resources may be added to Doris for use, such as Spark/GPU for query, HDFS/S3 for external storage, MapReduce for ETL, etc.
-
- Syntax:
- CREATE [EXTERNAL] RESOURCE "resource_name"
- PROPERTIES ("key"="value", ...);
-
- Explanation:
- 1. The type of resource needs to be specified in PROPERTIES "type" = "[spark|odbc_catalog|s3]", currently supports spark, odbc_catalog, s3.
- 2. The PROPERTIES varies according to the resource type, see the example for details.
-
-## Example
-
- 1. Create a Spark resource named spark0 in yarn cluster mode.
-
- ````
- CREATE EXTERNAL RESOURCE "spark0"
- PROPERTIES
- (
- "type" = "spark",
- "spark.master" = "yarn",
- "spark.submit.deployMode" = "cluster",
- "spark.jars" = "xxx.jar,yyy.jar",
- "spark.files" = "/tmp/aaa,/tmp/bbb",
- "spark.executor.memory" = "1g",
- "spark.yarn.queue" = "queue0",
- "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",
- "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker0",
- "broker.username" = "user0",
- "broker.password" = "password0"
- );
- ````
-
- Spark related parameters are as follows:
- - spark.master: Required, currently supports yarn, spark://host:port.
- - spark.submit.deployMode: The deployment mode of the Spark program, required, supports both cluster and client.
- - spark.hadoop.yarn.resourcemanager.address: Required when master is yarn.
- - spark.hadoop.fs.defaultFS: Required when master is yarn.
- - Other parameters are optional, refer to http://spark.apache.org/docs/latest/configuration.html
-
- Working_dir and broker need to be specified when Spark is used for ETL. described as follows:
- working_dir: The directory used by the ETL. Required when spark is used as an ETL resource. For example: hdfs://host:port/tmp/doris.
- broker: broker name. Required when spark is used as an ETL resource. Configuration needs to be done in advance using the `ALTER SYSTEM ADD BROKER` command.
- broker.property_key: The authentication information that the broker needs to specify when reading the intermediate file generated by ETL.
-
- 2. Create an ODBC resource
-
- ````
- CREATE EXTERNAL RESOURCE `oracle_odbc`
- PROPERTIES (
- "type" = "odbc_catalog",
- "host" = "192.168.0.1",
- "port" = "8086",
- "user" = "test",
- "password" = "test",
- "database" = "test",
- "odbc_type" = "oracle",
- "driver" = "Oracle 19 ODBC driver"
- );
- ````
-
- The relevant parameters of ODBC are as follows:
- - hosts: IP address of the external database
- - driver: The driver name of the ODBC appearance, which must be the same as the Driver name in be/conf/odbcinst.ini.
- - odbc_type: the type of the external database, currently supports oracle, mysql, postgresql
- - user: username of the foreign database
- - password: the password information of the corresponding user
-
- 3. Create S3 resource
-
- ````
- CREATE RESOURCE "remote_s3"
- PROPERTIES
- (
- "type" = "s3",
- "s3_endpoint" = "http://bj.s3.com",
- "s3_region" = "bj",
- "s3_root_path" = "/path/to/root",
- "s3_access_key" = "bbb",
- "s3_secret_key" = "aaaa",
- "s3_max_connections" = "50",
- "s3_request_timeout_ms" = "3000",
- "s3_connection_timeout_ms" = "1000"
- );
- ````
-
- S3 related parameters are as follows:
- - required
- - s3_endpoint: s3 endpoint
- - s3_region: s3 region
- - s3_root_path: s3 root directory
- - s3_access_key: s3 access key
- - s3_secret_key: s3 secret key
- - optional
- - s3_max_connections: the maximum number of s3 connections, the default is 50
- - s3_request_timeout_ms: s3 request timeout, in milliseconds, the default is 3000
- - s3_connection_timeout_ms: s3 connection timeout, in milliseconds, the default is 1000
-
-
-## keyword
-
- CREATE, RESOURCE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md
deleted file mode 100644
index b0cb703451..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md
+++ /dev/null
@@ -1,78 +0,0 @@
----
-{
- "title": "CREATE TABLE LIKE",
- "language": "en"
-}
----
-
-
-
-# CREATE TABLE LIKE
-
-## description
-
-Use CREATE TABLE ... LIKE to create an empty table based on the definition of another table, including any column attributes, table partitions and table properties defined in the original table:
-Syntax:
-
-```
- CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name LIKE [database.]table_name [WITH ROLLUP (r2,r2,r3,...)]
-```
-
-Explain:
- 1. The replicated table structures include Column Definition, Partitions, Table Properties, and so on
- 2. The SELECT privilege is required on the original table.
- 3. Support to copy external table such as MySQL.
- 4. Support to copy OLAP table rollup
-
-## Example
- 1. Under the test1 Database, create an empty table with the same table structure as table1, named table2
-
- CREATE TABLE test1.table2 LIKE test1.table1
-
- 2. Under the test2 Database, create an empty table with the same table structure as test1.table1, named table2
-
- CREATE TABLE test2.table2 LIKE test1.table1
-
- 3. Under the test1 Database, create an empty table with the same table structure as table1, named table2. copy r1 and r2 rollup of table1 simultaneously
-
- CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP (r1,r2)
-
- 4. Under the test1 Database, create an empty table with the same table structure as table1, named table2. copy all rollup of table1 simultaneously
-
- CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP
-
- 5. Under the test2 Database, create an empty table with the same table structure as table1, named table2. copy r1 and r2 rollup of table1 simultaneously
-
- CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP (r1,r2)
-
- 6. Under the test2 Database, create an empty table with the same table structure as table1, named table2. copy all rollup of table1 simultaneously
-
- CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP
-
- 7. Under the test1 Database, create an empty table with the same table structure as MySQL's external table1, called table2
-
- CREATE TABLE test1.table2 LIKE test1.table1
-
-## keyword
-
-```
- CREATE,TABLE,LIKE
-
-```
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md
deleted file mode 100644
index 88b843ddfe..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md
+++ /dev/null
@@ -1,879 +0,0 @@
----
-{
- "title": "CREATE TABLE",
- "language": "en"
-}
----
-
-
-
-# CREATE TABLE
-
-## description
-
-This statement is used to create table
-Syntax:
-
-```
- CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name
- (column_definition1[, column_definition2, ...]
- [, index_definition1[, ndex_definition12,]])
- [ENGINE = [olap|mysql|broker|hive|iceberg]]
- [key_desc]
- [COMMENT "table comment"]
- [partition_desc]
- [distribution_desc]
- [rollup_index]
- [PROPERTIES ("key"="value", ...)]
- [BROKER PROPERTIES ("key"="value", ...)];
-```
-
-1. column_definition
- Syntax:
- `col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"]`
- Explain:
- col_name: Name of column
- col_type: Type of column
- ```
- BOOLEAN(1 Byte)
- Range: {0,1}
- TINYINT(1 Byte)
- Range: -2^7 + 1 ~ 2^7 - 1
- SMALLINT(2 Bytes)
- Range: -2^15 + 1 ~ 2^15 - 1
- INT(4 Bytes)
- Range: -2^31 + 1 ~ 2^31 - 1
- BIGINT(8 Bytes)
- Range: -2^63 + 1 ~ 2^63 - 1
- LARGEINT(16 Bytes)
- Range: -2^127 + 1 ~ 2^127 - 1
- FLOAT(4 Bytes)
- Support scientific notation
- DOUBLE(8 Bytes)
- Support scientific notation
- DECIMAL[(precision, scale)] (16 Bytes)
- Default is DECIMAL(10, 0)
- precision: 1 ~ 27
- scale: 0 ~ 9
- integer part: 1 ~ 18
- fractional part: 0 ~ 9
- Not support scientific notation
- DATE(3 Bytes)
- Range: 0000-01-01 ~ 9999-12-31
- DATETIME(8 Bytes)
- Range: 0000-01-01 00:00:00 ~ 9999-12-31 23:59:59
- CHAR[(length)]
- Fixed length string. Range: 1 ~ 255. Default: 1
- VARCHAR[(length)]
- Variable length string. Range: 1 ~ 65533
- HLL (1~16385 Bytes)
- HLL tpye, No need to specify length.
- This type can only be queried by hll_union_agg, hll_cardinality, hll_hash functions.
- BITMAP
- BITMAP type, No need to specify length. Represent a set of unsigned bigint numbers, the largest element could be 2^64 - 1
- QUANTILE_STATE
- QUANTILE_STATE type, No need to specify length. Represents the quantile pre-aggregation result. Currently, only numerical raw data types are supported such as `int`,`float`,`double`, etc.
- If the number of elements is less than 2048, the explict data is stored.
- If the number of elements is greater than 2048, the intermediate result of the pre-aggregation of the TDigest algorithm is stored.
-
- ```
- agg_type: Aggregation type. If not specified, the column is key column. Otherwise, the column is value column.
-
- * SUM、MAX、MIN、REPLACE
- * HLL_UNION: Only for HLL type
- * REPLACE_IF_NOT_NULL: The meaning of this aggregation type is that substitution will occur if and only if the newly imported data is a non-null value. If the newly imported data is null, Doris will still retain the original value. Note: if NOT NULL is specified in the REPLACE_IF_NOT_NULL column when the user creates the table, Doris will convert it to NULL and will not report an error to the user. Users can leverage this aggregate type to achieve importing some of columns .**It should be noted here that the default value should be NULL, not an empty string. If it is an empty string, you should replace it with an empty string**.
- * BITMAP_UNION: Only for BITMAP type
- * QUANTILE_UNION: Only for QUANTILE_STATE type
- Allow NULL: Default is NOT NULL. NULL value should be represented as `\N` in load source file.
-
- Notice:
-
- The origin value of BITMAP_UNION column should be TINYINT, SMALLINT, INT, BIGINT.
-
- The origin value of QUANTILE_UNION column should be a numeric type such as TINYINT, INT, FLOAT, DOUBLE, DECIMAL, etc.
-2. index_definition
- Syntax:
- `INDEX index_name (col_name[, col_name, ...]) [USING BITMAP] COMMENT 'xxxxxx'`
- Explain:
- index_name: index name
- col_name: column name
- Notice:
- Only support BITMAP index in current version, BITMAP can only apply to single column
-3. ENGINE type
- Default is olap. Options are: olap, mysql, broker, hive, iceberg
- 1) For mysql, properties should include:
-
- ```
- PROPERTIES (
- "host" = "mysql_server_host",
- "port" = "mysql_server_port",
- "user" = "your_user_name",
- "password" = "your_password",
- "database" = "database_name",
- "table" = "table_name"
- )
- ```
-
- Notice:
- "table_name" is the real table name in MySQL database.
- table_name in CREATE TABLE stmt is table is Doris. They can be different or same.
- MySQL table created in Doris is for accessing data in MySQL database.
- Doris does not maintain and store any data from MySQL table.
-
- 2) For broker, properties should include:
-
- ```
- PROPERTIES (
- "broker_name" = "broker_name",
- "path" = "file_path1[,file_path2]",
- "column_separator" = "value_separator"
- "line_delimiter" = "value_delimiter"
- )
- ```
-
- ```
- BROKER PROPERTIES(
- "username" = "name",
- "password" = "password"
- )
- ```
-
- For different broker, the broker properties are different
- Notice:
- Files name in "path" is separated by ",". If file name includes ",", use "%2c" instead. If file name includes "%", use "%25" instead.
- Support CSV and Parquet. Support GZ, BZ2, LZ4, LZO(LZOP)
- 3) For hive, properties should include:
- ```
- PROPERTIES (
- "database" = "hive_db_name",
- "table" = "hive_table_name",
- "hive.metastore.uris" = "thrift://127.0.0.1:9083"
- )
- ```
- "database" is the name of the database corresponding to the hive table, "table" is the name of the hive table, and "hive.metastore.uris" is the hive metastore service address.
-
- 4) For iceberg, properties should include:
- ```
- PROPERTIES (
- "iceberg.database" = "iceberg_db_name",
- "iceberg.table" = "iceberg_table_name",
- "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- )
-
- ```
- database is the name of the database corresponding to Iceberg.
- table is the name of the table corresponding to Iceberg.
- hive.metastore.uris is the address of the hive metastore service.
- catalog.type defaults to HIVE_CATALOG. Currently, only HIVE_CATALOG is supported, more Iceberg catalog types will be supported later.
-
-4. key_desc
- Syntax:
- key_type(k1[,k2 ...])
- Explain:
- Data is order by specified key columns. And has different behaviors for different key desc.
- AGGREGATE KEY:
- value columns will be aggregated is key columns are same.
- UNIQUE KEY:
- The new incoming rows will replace the old rows if key columns are same.
- DUPLICATE KEY:
- All incoming rows will be saved.
- the default key_type is DUPLICATE KEY, and key columns are first 36 bytes of the columns in define order.
- If the number of columns in the first 36 is less than 3, the first 3 columns will be used.
- NOTICE:
- Except for AGGREGATE KEY, no need to specify aggregation type for value columns.
-5. partition_desc
- Currently, both RANGE and LIST partitioning methods are supported.
- 5.1 RANGE partition
- RANGE Partition has two ways to use:
- 1) LESS THAN
- Syntax:
-
- ```
- PARTITION BY RANGE (k1, k2, ...)
- (
- PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...),
- PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...)
- ...
- )
- ```
-
- Explain:
- Use the specified key column and the specified range of values for partitioning.
- 1) Partition name only support [A-z0-9_]
- 2) Partition key column's type should be:
- TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME
- 3) The range is [closed, open). And the lower bound of first partition is MIN VALUE of specified column type.
- 4) NULL values should be save in partition which includes MIN VALUE.
- 5) Support multi partition columns, the the default partition value is MIN VALUE.
- 2)Fixed Range
- Syntax:
- ```
- PARTITION BY RANGE (k1, k2, k3, ...)
- (
- PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ...)),
- PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, ))
- "k3-upper1-2", ...
- )
- ```
- Explain:
- 1)The Fixed Range is more flexible than the LESS THAN, and the left and right intervals are completely determined by the user.
- 2)Others are consistent with LESS THAN.
-
- 5.2 LIST partition
- LIST partition is divided into single column partition and multi-column partition
- 1) Single column partition
- Syntax.
-
- ```
- PARTITION BY LIST(k1)
- (
- PARTITION partition_name1 VALUES IN ("value1", "value2", ...) ,
- PARTITION partition_name2 VALUES IN ("value1", "value2", ...)
- ...
- )
- ```
-
- Explain:
- Use the specified key column and the formulated enumeration value for partitioning.
- 1) Partition name only support [A-z0-9_]
- 2) Partition key column's type should be:
- BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR
- 3) Partition is a collection of enumerated values, partition values cannot be duplicated between partitions
- 4) NULL values cannot be imported
- 5) partition values cannot be defaulted, at least one must be specified
-
- 2) Multi-column partition
- Syntax.
-
- ```
- PARTITION BY LIST(k1, k2)
- (
- PARTITION partition_name1 VALUES IN (("value1", "value2"), ("value1", "value2"), ...) ,
- PARTITION partition_name2 VALUES IN (("value1", "value2"), ("value1", "value2"), ...)
- ...
- )
- ```
-
- Explain:
- 1) the partition of a multi-column partition is a collection of tuple enumeration values
- 2) The number of tuple values per partition must be equal to the number of columns in the partition
- 3) The other partitions are synchronized with the single column partition
-
-6. distribution_desc
- 1) Hash
- Syntax:
- `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]`
- Explain:
- Hash bucketing using the specified key column.
- 2) Random
- Syntax:
- `DISTRIBUTED BY RANDOM [BUCKETS num]`
- Explain:
- Use random numbers for bucketing.
- Suggestion: It is recommended to use random bucketing when there is no suitable key for hash bucketing to make the data of the table evenly distributed.
-
-7. PROPERTIES
- 1) If ENGINE type is olap. User can specify storage medium, cooldown time and replication number:
-
- ```
- PROPERTIES (
- "storage_medium" = "[SSD|HDD]",
- ["storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"],
- ["remote_storage_resource" = "xxx"],
- ["remote_storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"],
- ["replication_num" = "3"],
- ["replication_allocation" = "xxx"]
- )
- ```
-
- storage_medium: SSD or HDD, The default initial storage media can be specified by `default_storage_medium= XXX` in the fe configuration file `fe.conf`, or, if not, by default, HDD.
- Note: when FE configuration 'enable_strict_storage_medium_check' is' True ', if the corresponding storage medium is not set in the cluster, the construction clause 'Failed to find enough host in all backends with storage medium is SSD|HDD'.
- storage_cooldown_time: If storage_medium is SSD, data will be automatically moved to HDD when timeout.
- Default is 30 days.
- Format: "yyyy-MM-dd HH:mm:ss"
- remote_storage_resource: The remote storage resource name, which needs to be used in conjunction with the storage_cold_medium parameter.
- remote_storage_cooldown_time: Used in conjunction with remote_storage_resource. Indicates the expiration time of the partition stored locally.
- Does not expire by default. Must be later than storage_cooldown_time if used with it.
- The format is: "yyyy-MM-dd HH:mm:ss"
- replication_num: Replication number of a partition. Default is 3.
- replication_allocation: Specify the distribution of replicas according to the resource tag.
-
- If table is not range partitions. This property takes on Table level. Or it will takes on Partition level.
- User can specify different properties for different partition by `ADD PARTITION` or `MODIFY PARTITION` statements.
- 2) If Engine type is olap, user can set bloom filter index for column.
- Bloom filter index will be used when query contains `IN` or `EQUAL`.
- Bloom filter index support key columns with type except TINYINT FLOAT DOUBLE, also support value with REPLACE aggregation type.
-
- ```
- PROPERTIES (
- "bloom_filter_columns"="k1,k2,k3"
- )
- ```
-
- 3) For Colocation Join:
-
- ```
- PROPERTIES (
- "colocate_with"="table1"
- )
- ```
-
- 4) if you want to use the dynamic partitioning feature, specify it in properties. Note: Dynamic partitioning only supports RANGE partitions
-
- ```
- PROPERTIES (
- "dynamic_partition.enable" = "true|false",
- "dynamic_partition.time_unit" = "HOUR|DAY|WEEK|MONTH",
- "dynamic_partition.end" = "${integer_value}",
- "dynamic_partition.prefix" = "${string_value}",
- "dynamic_partition.buckets" = "${integer_value}
- )
- ```
-
- dynamic_partition.enable: specifies whether dynamic partitioning at the table level is enabled
- dynamic_partition.time_unit: used to specify the time unit for dynamically adding partitions, which can be selected as HOUR, DAY, WEEK, and MONTH.
- Attention: When the time unit is HOUR, the data type of partition column cannot be DATE.
- dynamic_partition.end: used to specify the number of partitions created in advance
- dynamic_partition.prefix: used to specify the partition name prefix to be created, such as the partition name prefix p, automatically creates the partition name p20200108
- dynamic_partition.buckets: specifies the number of partition buckets that are automatically created
- dynamic_partition.create_history_partition: specifies whether create history partitions, default value is false
- dynamic_partition.history_partition_num: used to specify the number of history partitions when enable create_history_partition
- dynamic_partition.reserved_history_periods: Used to specify the range of reserved history periods
-
- ```
- 5) You can create multiple Rollups in bulk when building a table
- grammar:
- ```
- ROLLUP (rollup_name (column_name1, column_name2, ...)
- [FROM from_index_name]
- [PROPERTIES ("key"="value", ...)],...)
- ```
-
- 6) if you want to use the inmemory table feature, specify it in properties
-
- ```
- PROPERTIES (
- "in_memory"="true"
- )
- ```
-## example
-
-1. Create an olap table, distributed by hash, with aggregation type.
-
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 BOOLEAN,
- k2 TINYINT,
- k3 DECIMAL(10, 2) DEFAULT "10.5",
- v1 CHAR(10) REPLACE,
- v2 INT SUM
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2, k3)
- COMMENT "my first doris table"
- DISTRIBUTED BY HASH(k1) BUCKETS 32;
- ```
-
-2. Create an olap table, distributed by hash, with aggregation type. Also set storage medium and cooldown time.
-
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 BIGINT,
- k2 LARGEINT,
- v1 VARCHAR(2048) REPLACE,
- v2 SMALLINT SUM DEFAULT "10"
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH (k1, k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2015-06-04 00:00:00"
- );
- ```
-
-3. Create an olap table, distributed by hash, with aggregation type. Also set storage medium and cooldown time.
- Setting up remote storage resource and cold data storage media.
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 BIGINT,
- k2 LARGEINT,
- v1 VARCHAR(2048) REPLACE,
- v2 SMALLINT SUM DEFAULT "10"
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH (k1, k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD",
- "storage_cooldown_time" = "2015-06-04 00:00:00",
- "remote_storage_resource" = "remote_s3",
- "remote_storage_cooldown_time" = "2015-12-04 00:00:00"
- );
- ```
-
-4. Create an olap table, with range partitioned, distributed by hash. Records with the same key exist at the same time, set the initial storage medium and cooling time, use default column storage.
-
- 1) LESS THAN
-
- ```
- CREATE TABLE example_db.table_range
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- ENGINE=olap
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY RANGE (k1)
- (
- PARTITION p1 VALUES LESS THAN ("2014-01-01"),
- PARTITION p2 VALUES LESS THAN ("2014-06-01"),
- PARTITION p3 VALUES LESS THAN ("2014-12-01")
- )
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD", "storage_cooldown_time" = "2015-06-04 00:00:00"
- );
- ```
-
- Explain:
- This statement will create 3 partitions:
-
- ```
- ( { MIN }, {"2014-01-01"} )
- [ {"2014-01-01"}, {"2014-06-01"} )
- [ {"2014-06-01"}, {"2014-12-01"} )
- ```
-
- Data outside these ranges will not be loaded.
-
- 2) Fixed Range
- ```
- CREATE TABLE table_range
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- ENGINE=olap
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY RANGE (k1, k2, k3)
- (
- PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")),
- PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300"))
- )
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD"
- );
- ```
-5. Create an olap table, with list partitioned, distributed by hash. Records with the same key exist at the same time, set the initial storage medium and cooling time, use default column storage.
-
- 1) Single column partition
-
- ```
- CREATE TABLE example_db.table_list
- (
- k1 INT,
- k2 VARCHAR(128),
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- ENGINE=olap
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY LIST (k1)
- (
- PARTITION p1 VALUES IN ("1", "2", "3"),
- PARTITION p2 VALUES IN ("4", "5", "6"),
- PARTITION p3 VALUES IN ("7", "8", "9")
- )
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00"
- );
- ```
-
- Explain:
- This statement will divide the data into 3 partitions as follows.
-
- ```
- ("1", "2", "3")
- ("4", "5", "6")
- ("7", "8", "9")
- ```
-
- Data that does not fall within these partition enumeration values will be filtered as illegal data
-
- 2) Multi-column partition
-
- ```
- CREATE TABLE example_db.table_list
- (
- k1 INT,
- k2 VARCHAR(128),
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- ENGINE=olap
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY LIST (k1, k2)
- (
- PARTITION p1 VALUES IN (("1", "beijing"), ("1", "shanghai")),
- PARTITION p2 VALUES IN (("2", "beijing"), ("2", "shanghai")),
- PARTITION p3 VALUES IN (("3", "beijing"), ("3", "shanghai"))
- )
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00"
- );
- ```
-
- Explain:
- This statement will divide the data into 3 partitions as follows.
-
- ```
- (("1", "beijing"), ("1", "shanghai"))
- (("2", "beijing"), ("2", "shanghai"))
- (("3", "beijing"), ("3", "shanghai"))
- ```
-
- Data that is not within these partition enumeration values will be filtered as illegal data
-
-6. Create a mysql table
- 6.1 Create MySQL table directly from external table information
- ```
- CREATE EXTERNAL TABLE example_db.table_mysql
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- k4 VARCHAR(2048),
- k5 DATETIME
- )
- ENGINE=mysql
- PROPERTIES
- (
- "host" = "127.0.0.1",
- "port" = "8239",
- "user" = "mysql_user",
- "password" = "mysql_passwd",
- "database" = "mysql_db_test",
- "table" = "mysql_table_test"
- )
- ```
-
- 6.2 Create MySQL table with external ODBC catalog resource
- ```
- CREATE EXTERNAL RESOURCE "mysql_resource"
- PROPERTIES
- (
- "type" = "odbc_catalog",
- "user" = "mysql_user",
- "password" = "mysql_passwd",
- "host" = "127.0.0.1",
- "port" = "8239"
- );
-
- CREATE EXTERNAL TABLE example_db.table_mysql
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- k4 VARCHAR(2048),
- k5 DATETIME
- )
- ENGINE=mysql
- PROPERTIES
- (
- "odbc_catalog_resource" = "mysql_resource",
- "database" = "mysql_db_test",
- "table" = "mysql_table_test"
- );
- ```
-
-7. Create a broker table, with file on HDFS, line delimit by "|", column separated by "\n"
-
- ```
- CREATE EXTERNAL TABLE example_db.table_broker (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- k4 VARCHAR(2048),
- k5 DATETIME
- )
- ENGINE=broker
- PROPERTIES (
- "broker_name" = "hdfs",
- "path" = "hdfs://hdfs_host:hdfs_port/data1,hdfs://hdfs_host:hdfs_port/data2,hdfs://hdfs_host:hdfs_port/data3%2c4",
- "column_separator" = "|",
- "line_delimiter" = "\n"
- )
- BROKER PROPERTIES (
- "username" = "hdfs_user",
- "password" = "hdfs_password"
- );
- ```
-
-8. Create table will HLL column
-
- ```
- CREATE TABLE example_db.example_table
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 HLL HLL_UNION,
- v2 HLL HLL_UNION
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH(k1) BUCKETS 32;
- ```
-
-9. Create a table will BITMAP_UNION column
-
- ```
- CREATE TABLE example_db.example_table
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 BITMAP BITMAP_UNION,
- v2 BITMAP BITMAP_UNION
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH(k1) BUCKETS 32;
- ```
-10. Create a table with QUANTILE_UNION column (the origin value of **v1** and **v2** columns must be **numeric** types)
-
- ```
- CREATE TABLE example_db.example_table
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 QUANTILE_STATE QUANTILE_UNION,
- v2 QUANTILE_STATE QUANTILE_UNION
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- DISTRIBUTED BY HASH(k1) BUCKETS 32;
- ```
-11. Create 2 colocate join table.
-
- ```
- CREATE TABLE `t1` (
- `id` int(11) COMMENT "",
- `value` varchar(8) COMMENT ""
- ) ENGINE=OLAP
- DUPLICATE KEY(`id`)
- DISTRIBUTED BY HASH(`id`) BUCKETS 10
- PROPERTIES (
- "colocate_with" = "group1"
- );
- CREATE TABLE `t2` (
- `id` int(11) COMMENT "",
- `value` varchar(8) COMMENT ""
- ) ENGINE=OLAP
- DUPLICATE KEY(`id`)
- DISTRIBUTED BY HASH(`id`) BUCKETS 10
- PROPERTIES (
- "colocate_with" = "group1"
- );
- ```
-
-12. Create a broker table, with file on BOS.
-
- ```
- CREATE EXTERNAL TABLE example_db.table_broker (
- k1 DATE
- )
- ENGINE=broker
- PROPERTIES (
- "broker_name" = "bos",
- "path" = "bos://my_bucket/input/file",
- )
- BROKER PROPERTIES (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy"
- );
- ```
-
-13. Create a table with a bitmap index
-
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 CHAR(10) REPLACE,
- v2 INT SUM,
- INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx'
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- COMMENT "my first doris table"
- DISTRIBUTED BY HASH(k1) BUCKETS 32;
- ```
-
-14. Create a dynamic partitioning table (dynamic partitioning needs to be enabled in FE configuration), which creates partitions 3 days in advance every day. For example, if today is' 2020-01-08 ', partitions named 'p20200108', 'p20200109', 'p20200110', 'p20200111' will be created.
-
- ```
- [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09]; )
- [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10]; )
- [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11]; )
- [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12]; )
- ```
-
- ```
- CREATE TABLE example_db.dynamic_partition
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- ENGINE=olap
- DUPLICATE KEY(k1, k2, k3)
- PARTITION BY RANGE (k1) ()
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "storage_medium" = "SSD",
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32"
- );
- ```
-15. Create a table with rollup index
- ```
- CREATE TABLE example_db.rolup_index_table
- (
- event_day DATE,
- siteid INT DEFAULT '10',
- citycode SMALLINT,
- username VARCHAR(32) DEFAULT '',
- pv BIGINT SUM DEFAULT '0'
- )
- AGGREGATE KEY(event_day, siteid, citycode, username)
- DISTRIBUTED BY HASH(siteid) BUCKETS 10
- rollup (
- r1(event_day,siteid),
- r2(event_day,citycode),
- r3(event_day)
- )
- PROPERTIES("replication_num" = "3");
- ```
-
-16. Create a inmemory table:
-
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5",
- v1 CHAR(10) REPLACE,
- v2 INT SUM,
- INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx'
- )
- ENGINE=olap
- AGGREGATE KEY(k1, k2)
- COMMENT "my first doris table"
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- PROPERTIES ("in_memory"="true");
- ```
-
-17. Create a hive external table
- ```
- CREATE TABLE example_db.table_hive
- (
- k1 TINYINT,
- k2 VARCHAR(50),
- v INT
- )
- ENGINE=hive
- PROPERTIES
- (
- "database" = "hive_db_name",
- "table" = "hive_table_name",
- "hive.metastore.uris" = "thrift://127.0.0.1:9083"
- );
- ```
-
-18. Specify the replica distribution of the table through replication_allocation
-
- ```
- CREATE TABLE example_db.table_hash
- (
- k1 TINYINT,
- k2 DECIMAL(10, 2) DEFAULT "10.5"
- )
- DISTRIBUTED BY HASH(k1) BUCKETS 32
- PROPERTIES (
- "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2"
- );
-
- CREATE TABLE example_db.dynamic_partition
- (
- k1 DATE,
- k2 INT,
- k3 SMALLINT,
- v1 VARCHAR(2048),
- v2 DATETIME DEFAULT "2014-02-04 15:36:00"
- )
- PARTITION BY RANGE (k1) ()
- DISTRIBUTED BY HASH(k2) BUCKETS 32
- PROPERTIES(
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-3",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32",
- "dynamic_partition."replication_allocation" = "tag.location.group_a:3"
- );
- ```
-
-19. Create an Iceberg external table
-
- ```
- CREATE TABLE example_db.t_iceberg
- ENGINE=ICEBERG
- PROPERTIES (
- "iceberg.database" = "iceberg_db",
- "iceberg.table" = "iceberg_table",
- "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083",
- "iceberg.catalog.type" = "HIVE_CATALOG"
- );
- ```
-
-## keyword
-
- CREATE,TABLE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md
deleted file mode 100644
index fa6e245ed7..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-{
- "title": "CREATE VIEW",
- "language": "en"
-}
----
-
-
-
-# CREATE VIEW
-## Description
- This statement is used to create a logical view
- Grammar:
-
- CREATE VIEW [IF NOT EXISTS]
- [db_name.]view_name
- (column1[ COMMENT "col comment"][, column2, ...])
- AS query_stmt
-
- Explain:
-
- 1. Views are logical views without physical storage. All queries on views are equivalent to sub-queries corresponding to views.
- 2. query_stmt is arbitrarily supported SQL.
-
-## example
-
- 1. Create view example_view on example_db
-
- CREATE VIEW example_db.example_view (k1, k2, k3, v1)
- AS
- SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table
- WHERE k1 = 20160112 GROUP BY k1,k2,k3;
-
- 2. Create view with comment
-
- CREATE VIEW example_db.example_view
- (
- k1 COMMENT "first key",
- k2 COMMENT "second key",
- k3 COMMENT "third key",
- v1 COMMENT "first value"
- )
- COMMENT "my first view"
- AS
- SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table
- WHERE k1 = 20160112 GROUP BY k1,k2,k3;
-
-## keyword
-
- CREATE,VIEW
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md b/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md
deleted file mode 100644
index d54af225aa..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md
+++ /dev/null
@@ -1,98 +0,0 @@
----
-{
- "title": "Colocate Join",
- "language": "en"
-}
----
-
-
-
-# Colocate Join
-## Description
-Colocate/Local Join means that when multiple nodes are Join, there is no data movement and network transmission, and each node is only Join locally.
-The premise of Join locally is to import data from the same Join Key into a fixed node according to the same rules.
-
-1 How To Use:
-
-Simply add the property colocate_with when building a table. The value of colocate_with can be set to any one of the same set of colocate tables.
-However, you need to ensure that tables in the colocate_with attribute are created first.
-
-If you need to Colocate Join table t1 and t2, you can build tables according to the following statements:
-
-CREATE TABLE `t1` (
-`id` int(11) COMMENT "",
-'value ` varchar (8) COMMENT ""
-) ENGINE=OLAP
-DUPLICATE KEY(`id`)
-DISTRIBUTED BY HASH(`id`) BUCKETS 10
-PROPERTIES (
-"colocate_with" = "t1"
-);
-
-CREATE TABLE `t2` (
-`id` int(11) COMMENT "",
-'value ` varchar (8) COMMENT ""
-) ENGINE=OLAP
-DUPLICATE KEY(`id`)
-DISTRIBUTED BY HASH(`id`) BUCKETS 10
-PROPERTIES (
-"colocate_with" = "t1"
-);
-
-2 Colocate Join 目前的限制:
-
-1. Colcoate Table must be an OLAP-type table
-2. The BUCKET number of tables with the same colocate_with attribute must be the same
-3. The number of copies of tables with the same colocate_with attribute must be the same
-4. Data types of DISTRIBUTED Columns for tables with the same colocate_with attribute must be the same
-
-3 Colocate Join's applicable scenario:
-
-Colocate Join is well suited for scenarios where tables are bucketed according to the same field and high frequency according to the same field Join.
-
-4 FAQ:
-
-Q: 支持多张表进行Colocate Join 吗?
-
-A: 25903;. 25345
-
-Q: Do you support Colocate table and normal table Join?
-
-A: 25903;. 25345
-
-Q: Does the Colocate table support Join with non-bucket Key?
-
-A: Support: Join that does not meet Colocate Join criteria will use Shuffle Join or Broadcast Join
-
-Q: How do you determine that Join is executed according to Colocate Join?
-
-A: The child node of Hash Join in the result of explain is Colocate Join if it is OlapScanNode directly without Exchange Node.
-
-Q: How to modify the colocate_with attribute?
-
-A: ALTER TABLE example_db.my_table set ("colocate_with"="target_table");
-
-Q: 229144; colcoate join?
-
-A: set disable_colocate_join = true; 就可以禁用Colocate Join, 查询时就会使用Shuffle Join 和Broadcast Join
-
-## keyword
-
-COLOCATE, JOIN, CREATE TABLE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md
deleted file mode 100644
index 891b1eb230..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-{
- "title": "DROP DATABASE",
- "language": "en"
-}
----
-
-
-
-# DROP DATABASE
-##Description
-This statement is used to delete the database
-Grammar:
-DROP DATABASE [IF EXISTS] db_name;
-
-Explain:
-1) After executing DROP DATABASE for a period of time, the deleted database can be restored through the RECOVER statement. See RECOVER statement for details
-2) If DROP DATABASE FORCE is executed, the system will not check whether the database has unfinished transactions, the database will be deleted directly and cannot be recovered, generally this operation is not recommended
-
-## example
-1. Delete database db_test
-DROP DATABASE db_test;
-
-## keyword
-DROP,DATABASE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md
deleted file mode 100644
index 352258f1bf..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md
+++ /dev/null
@@ -1,55 +0,0 @@
----
-{
- "title": "DROP ENCRYPTKEY",
- "language": "en"
-}
----
-
-
-
-# DROP ENCRYPTKEY
-
-## Description
-
-### Syntax
-
-```
-DROP ENCRYPTKEY key_name
-```
-
-### Parameters
-
-> `key_name`: To delete the name of the key, you can include the name of the database. For example: `db1.my_key`.
-
-Delete a custom key. A key can be deleted only if its name is identical.
-
-Executing this command requires the user to have the `ADMIN` privileges.
-
-## example
-
-1. Delete a key.
-
-```
-DROP ENCRYPTKEY my_key;
-```
-
-## keyword
-
- DROP,ENCRYPTKEY
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md
deleted file mode 100644
index ece2902003..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-{
- "title": "DROP INDEX",
- "language": "en"
-}
----
-
-
-
-# DROP INDEX
-
-## description
-
- This statement is used to delete index from table
- grammer:
- DROP INDEX [IF EXISTS] index_name ON [db_name.]table_name;
-
-## keyword
-
- DROP,INDEX
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md
deleted file mode 100644
index cb1f3a8603..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md
+++ /dev/null
@@ -1,110 +0,0 @@
----
-{
- "title": "DROP MATERIALIZED VIEW",
- "language": "en"
-}
----
-
-
-
-# DROP MATERIALIZED VIEW
-
-## description
- This statement is used to delete a materialized view. Synchronization syntax
-
-syntax:
-
- ```
- DROP MATERIALIZED VIEW [IF EXISTS] mv_name ON table_name
- ```
-
-1. IF EXISTS
- If the materialized view does not exist, doris will not throw an error. If this keyword is not declared, an error will be reported if the materialized view does not exist.
-Ranch
-
-2. mv_name
- The name of the materialized view to be deleted. Required.
-
-3. Table_name
- Name of the table to which the materialized view to be deleted belongs. Required.
-
-## example
-
-Table structure is
-
-```
-mysql> desc all_type_table all;
-+----------------+-------+----------+------+-------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+----------------+-------+----------+------+-------+---------+-------+
-| all_type_table | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | false | N/A | NONE |
-| | k3 | INT | Yes | false | N/A | NONE |
-| | k4 | BIGINT | Yes | false | N/A | NONE |
-| | k5 | LARGEINT | Yes | false | N/A | NONE |
-| | k6 | FLOAT | Yes | false | N/A | NONE |
-| | k7 | DOUBLE | Yes | false | N/A | NONE |
-| | | | | | | |
-| k1_sumk2 | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | false | N/A | SUM |
-+----------------+-------+----------+------+-------+---------+-------+
-```
-
-1. Drop the materialized view named k1_sumk2 of the table all_type_table
-
- ```
- drop materialized view k1_sumk2 on all_type_table;
- ```
- Table structure after materialized view is deleted as following:
-
- ```
-+----------------+-------+----------+------+-------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+----------------+-------+----------+------+-------+---------+-------+
-| all_type_table | k1 | TINYINT | Yes | true | N/A | |
-| | k2 | SMALLINT | Yes | false | N/A | NONE |
-| | k3 | INT | Yes | false | N/A | NONE |
-| | k4 | BIGINT | Yes | false | N/A | NONE |
-| | k5 | LARGEINT | Yes | false | N/A | NONE |
-| | k6 | FLOAT | Yes | false | N/A | NONE |
-| | k7 | DOUBLE | Yes | false | N/A | NONE |
-+----------------+-------+----------+------+-------+---------+-------+
- ```
-
-2. Delete a non-existing materialized view in the table all_type_table
-
- ```
- drop materialized view k1_k2 on all_type_table;
-ERROR 1064 (HY000): errCode = 2, detailMessage = Materialized view [k1_k2] does not exist in table [all_type_table]
- ```
-
- The delete request directly reports an error
-
-3. Delete the materialized view k1_k2 in the table all_type_table. Materialized view does not exist and no error is reported.
-
- ```
- drop materialized view if exists k1_k2 on all_type_table;
-Query OK, 0 rows affected (0.00 sec)
- ```
-
- If it exists, it will be deleted; If it does not exist, no error will be reported.
-
-## keyword
- DROP, MATERIALIZED, VIEW
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md
deleted file mode 100644
index cb24b10b01..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-{
- "title": "DROP REPOSITORY",
- "language": "en"
-}
----
-
-
-
-# DROP REPOSITORY
-## Description
-This statement is used to delete a created warehouse. Only root or superuser users can delete the warehouse.
-Grammar:
-DROP REPOSITORY `repo_name`;
-
-Explain:
-1. Delete the warehouse, just delete the mapping of the warehouse in Palo, and do not delete the actual warehouse data. After deletion, you can map to the repository again by specifying the same broker and LOCATION.
-
-## example
-1. Delete the warehouse named bos_repo:
-DROP REPOSITORY `bos_repo`;
-
-## keyword
-DROP, REPOSITORY
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md
deleted file mode 100644
index 66342cada9..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-{
- "title": "DROP RESOURCE",
- "language": "en"
-}
----
-
-
-
-# DROP RESOURCE
-
-## Description
-
- This statement is used to delete an existing resource. Only the root or admin user can delete resources.
-
- Syntax:
- DROP RESOURCE 'resource_name'
-
- Note: ODBC/S3 resources that are in use cannot be deleted.
-
-## Example
-
- 1. Delete the Spark resource named spark0:
- DROP RESOURCE 'spark0';
-
-
-## keyword
-
- DROP, RESOURCE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md
deleted file mode 100644
index a5d3b6a0de..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-{
- "title": "DROP TABLE",
- "language": "en"
-}
----
-
-
-
-# DROP TABLE
-## Description
-This statement is used to delete the table.
-Grammar:
-DROP TABLE [IF EXISTS] [db_name.]table_name;
-
-Explain:
-1) After executing DROP TABLE for a period of time, the deleted table can be restored through the RECOVER statement. See RECOVER statement for details
-2) If DROP TABLE FORCE is executed, the system will not check whether the table has unfinished transactions, the table will be deleted directly and cannot be recovered, generally this operation is not recommended
-
-## example
-1. Delete a table
-DROP TABLE my_table;
-
-2. If it exists, delete the table that specifies the database
-DROP TABLE IF EXISTS example_db.my_table;
-
-## keyword
-DROP,TABLE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md
deleted file mode 100644
index b34a402b4f..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-{
- "title": "DROP VIEW",
- "language": "en"
-}
----
-
-
-
-# DROP VIEW
-## Description
-This statement is used to delete a logical view VIEW
-Grammar:
-DROP VIEW [IF EXISTS]
-[db_name.]view_name;
-
-## example
-1. If it exists, delete view example_view on example_db
-DROP VIEW IF EXISTS example_db.example_view;
-
-## keyword
-DROP,VIEW
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/HLL.md b/docs/en/sql-reference/sql-statements/Data Definition/HLL.md
deleted file mode 100644
index a3f92863b2..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/HLL.md
+++ /dev/null
@@ -1,111 +0,0 @@
----
-{
- "title": "HLL",
- "language": "en"
-}
----
-
-
-
-# HLL
-## Description
-HLL is an engineering implementation based on the HyperLogLog algorithm. It is used to store the intermediate results of the HyperLog calculation process. It can only be used as the value column type of the table.
-By aggregating to reduce the amount of data continuously, in order to achieve the purpose of speeding up the query, based on which an estimated result, the error is about 1%.
-The HLL column is generated by other columns or data in the imported data. When imported, the hll_hash function is used to specify which column in the data is used to generate the HLL column.
-It is often used to replace count distinct, and to quickly calculate UV in business by combining rollup.
-
-The correlation function:
-
-TOTAL UNION
-This function is an aggregation function, which is used to calculate the cardinality estimation of all data satisfying the conditions. This function can also be used to analyze functions. It only supports the default window and does not support the window clause.
-
-Coach L.u RAW AGG
-This function is an aggregation function that aggregates HLL type fields and returns HLL type.
-
-HLL_CARDINALITY(hll)
-This function is used to estimate the cardinality of a single HLL sequence
-
-HLL_HASH(column_name)
-Generate HLL column types for insert or import, see the instructions for the use of imports
-
-EMPTY_HLL()
-Generate empty HLL column types for insert or import, see the instructions for the use of imports
-
-## example
-1. First create a table with HLL columns
-create table test(
-dt date,
-id int,
-name char(10),
-Province of char (10),
-The char (1),
-the European Union,
-European Union
-distributed by hash(id) buckets 32;
-
-2. Import data. See help curl for the way you import it.
-
- A. Generate HLL columns using columns in tables
-
- curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, os, set1=hll_hash(id), set2=hll_hash(name)"
- http://host/api/test_db/test/_stream_load
-
- B. Generate HLL columns using a column in the data
-
- curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, sex, cuid, os, set1=hll_hash(cuid), set2=hll_hash(os)"
- http://host/api/test_db/test/_stream_load
-
-3. There are three common ways of aggregating data: (without aggregating the base table directly, the speed may be similar to that of using APPROX_COUNT_DISTINCT directly)
-
-A. Create a rollup that allows HLL columns to generate aggregation.
-alter table test add rollup test_rollup(dt, set1);
-
-B. Create another table dedicated to computing uv, and insert data)
-
-create table test_uv(
-dt date,
-uv_set hll hll_union)
-distributed by hash(dt) buckets 32;
-
-insert into test_uv select dt, set1 from test;
-
-C. Create another table dedicated to computing uv, then insert and generate HLL columns from other non-hll columns of test through hll_hash
-
-create table test_uv(
-dt date,
-id_set hll hll_union)
-distributed by hash(dt) buckets 32;
-
-insert into test_uv select dt, hll_hash(id) from test;
-
-4. Query, HLL column is not allowed to query its original value directly, it can be queried by matching functions.
-
-a. 27714; 24635; uv
-select HLL_UNION_AGG(uv_set) from test_uv;
-
-B. Seek every day's UV
-select dt, HLL_CARDINALITY(uv_set) from test_uv;
-
-C. Find the aggregate value of Set1 in the test table
-select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp;
-select dt, HLL_UNION_AGG(set1) as uv from test group by dt;
-
-## keyword
-HLL
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md b/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md
deleted file mode 100644
index d2f1f35019..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md
+++ /dev/null
@@ -1,54 +0,0 @@
----
-{
- "title": "RECOVER",
- "language": "en"
-}
----
-
-
-
-# RECOVER
-## Description
-This statement is used to restore previously deleted databases, tables, or partitions
-Grammar:
-1)24674;"22797database;
-RECOVER DATABASE db_name;
-2) 恢复 table
-RECOVER TABLE [db_name.]table_name;
-3)24674;"22797partition
-RECOVER PARTITION partition name FROM [dbu name.]table name;
-
-Explain:
-1. This operation can only recover the meta-information deleted in the previous period of time. The default is 1 day.(You can configure it with the `catalog_trash_expire_second` parameter in fe.conf)
-2. If new meta-information of the same name and type is created after deleting meta-information, the previously deleted meta-information cannot be restored.
-
-## example
-1. Restore the database named example_db
-RECOVER DATABASE example_db;
-
-2. Restore table named example_tbl
-RECOVER TABLE example_db.example_tbl;
-
-3. Restore partition named P1 in example_tbl
-RECOVER PARTITION p1 FROM example_tbl;
-
-## keyword
-RECOVER
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md
deleted file mode 100644
index 805b4a06d4..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "REFRESH DATABASE",
- "language": "en"
-}
----
-
-
-
-# REFRESH DATABASE
-
-## Description
-
- This statement is used to synchronize the remote Iceberg database and will delete and rebuild the Iceberg tables under the current Doris database, leaving the non-Iceberg tables unaffected.
- Syntax:
- REFRESH DATABASE db_name;
-
- Instructions.
- 1) Valid only for the Iceberg database mounted in Doris.
-
-## Example
-
- 1) Refresh the database iceberg_test_db
- REFRESH DATABASE iceberg_test_db;
-
-## keyword
-
- REFRESH,DATABASE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md
deleted file mode 100644
index 69455ca043..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "REFRESH TABLE",
- "language": "en"
-}
----
-
-
-
-# REFRESH TABLE
-
-## Description
-
- This statement is used to synchronize a remote Iceberg table and will delete and rebuild Doris' current external table.
- Syntax.
- REFRESH TABLE tbl_name;
-
- Instructions.
- 1) Valid only for the Iceberg table mounted in Doris.
-
-## Example
-
- 1) Refresh the table iceberg_tbl
- REFRESH TABLE iceberg_tbl;
-
-## keyword
-
- REFRESH,TABLE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md b/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md
deleted file mode 100644
index 5eb9dee253..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md
+++ /dev/null
@@ -1,87 +0,0 @@
----
-{
- "title": "RESTORE",
- "language": "en"
-}
----
-
-
-
-# RESTORE
-## Description
-1. RESTORE
-This statement is used to restore the data previously backed up by the BACKUP command to the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW RESTORE command. Restoring tables of OLAP type is supported only.
-Grammar:
-SNAPSHOT RESTORE [dbu name].{snapshot name}
-FROM `repository_name`
-[ON|EXCLUDE] (
-"`Table `uname'[`partition (`p1',...)] [as `tbl `uu alias'],
-...
-)
-PROPERTIES ("key"="value", ...);
-
-Explain:
-1. Only one BACKUP or RESTORE task can be performed under the same database.
-2. The ON clause identifies the tables and partitions that need to be restored. If no partition is specified, all partitions of the table are restored by default. The specified tables and partitions must already exist in the warehouse backup.
-3. The EXCLUDE clause identifies the tables and partiitons that need not to be restored. All partitions of all tables in the warehouse except the specified tables or partitions will be restored.
-4. The backup tables in the warehouse can be restored to new tables through AS statements. But the new table name cannot already exist in the database. Partition name cannot be changed.
-5. The backup tables in the warehouse can be restored and replaced with the same-name tables in the database, but the table structure of the two tables must be completely consistent. Table structure includes: table name, column, partition, Rollup and so on.
-6. Partitions of the recovery table can be specified, and the system checks whether the partition Range or List matches.
-7. PROPERTIES currently supports the following attributes:
-"Backup_timestamp" = "2018-05-04-16-45-08": specifies which version of the time to restore the corresponding backup must be filled in. This information can be obtained through the `SHOW SNAPSHOT ON repo;'statement.
-"Replication_num" = "3": Specifies the number of replicas of the restored table or partition. The default is 3. If an existing table or partition is restored, the number of copies must be the same as the number of copies of an existing table or partition. At the same time, there must be enough hosts to accommodate multiple copies.
-"Timeout" = "3600": Task timeout, default to one day. Unit seconds.
-"Meta_version" = 40: Use the specified meta_version to read the previously backed up metadata. Note that as a temporary solution, this parameter is only used to restore the data backed up by the older version of Doris. The latest version of the backup data already contains meta version, no need to specify.
-
-## example
-1. Restore backup table backup_tbl in snapshot_1 from example_repo to database example_db1 with the time version of "2018-05-04-16-45-08". Restore to one copy:
-RESTORE SNAPSHOT example_db1.`snapshot_1`
-FROM `example 'u repo'
-ON ( `backup_tbl` )
-PROPERTIES
-(
-"backup_timestamp"="2018-05-04-16-45-08",
-"Replication\ num" = "1"
-);
-
-2. Restore the partitions p1, P2 of table backup_tbl in snapshot_2 and table backup_tbl2 to database example_db1 from example_repo and rename it new_tbl. The time version is "2018-05-04-17-11-01". By default, three copies are restored:
-RESTORE SNAPSHOT example_db1.`snapshot_2`
-FROM `example 'u repo'
-ON
-(
-`backup_tbl` PARTITION (`p1`, `p2`),
-`backup_tbl2` AS `new_tbl`
-)
-PROPERTIES
-(
-"backup_timestamp"="2018-05-04-17-11-01"
-);
-
-3. Restore backup all partiitons of all tables in snapshot_3 from example_repo to database example_db1 except backup_tbl with the time version of "2018-05-04-18-12-18".
-RESTORE SNAPSHOT example_db1.`snapshot_3`
-FROM `example_repo`
-EXCLUDE ( `backup_tbl` )
-PROPERTIES
-(
- "backup_timestamp"="2018-05-04-18-12-18"
-);
-## keyword
-RESTORE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md b/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md
deleted file mode 100644
index 2473f98971..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-{
- "title": "SHOW ENCRYPTKEYS",
- "language": "en"
-}
----
-
-
-
-# SHOW ENCRYPTKEYS
-
-## Description
-
-### Syntax
-
-```
-SHOW ENCRYPTKEYS [IN|FROM db] [LIKE 'key_pattern']
-```
-
-### Parameters
-
->`db`: the name of the database to query
->`key_pattern`: parameter used to filter key names
-
-View all custom keys under the database. If the user specifies a database, then view the corresponding database, otherwise query the current session's database directly.
-
-You need to have `ADMIN` privileges for this database.
-
-## Example
-
- ```
- mysql> SHOW ENCRYPTKEYS;
- +-------------------+-------------------+
- | EncryptKey Name | EncryptKey String |
- +-------------------+-------------------+
- | example_db.my_key | ABCD123456789 |
- +-------------------+-------------------+
- 1 row in set (0.00 sec)
-
- mysql> SHOW ENCRYPTKEYS FROM example_db LIKE "%my%";
- +-------------------+-------------------+
- | EncryptKey Name | EncryptKey String |
- +-------------------+-------------------+
- | example_db.my_key | ABCD123456789 |
- +-------------------+-------------------+
- 1 row in set (0.00 sec)
- ```
-
-## keyword
-
- SHOW,ENCRYPTKEYS
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md b/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md
deleted file mode 100644
index 8ed9d60f55..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md
+++ /dev/null
@@ -1,67 +0,0 @@
----
-{
- "title": "SHOW RESOURCES",
- "language": "en"
-}
----
-
-
-
-# SHOW RESOURCES
-
-## Description
-
- This statement is used to display the resources that the user has permission to use.
- Ordinary users can only display the resources with permission, while root or admin users can display all the resources.
-
- Syntax:
-
- SHOW RESOURCES
- [
- WHERE
- [NAME [ = "your_resource_name" | LIKE "name_matcher"]]
- [RESOURCETYPE = ["[spark|odbc_catalog|s3]"]]
- ]
- [ORDER BY ...]
- [LIMIT limit][OFFSET offset];
-
- Explain:
- 1) If use NAME LIKE, the name of resource is matched to show.
- 2) If use NAME =, the specified name is exactly matched.
- 3) RESOURCETYPE is specified, the corresponding rerouce type is matched.
- 4) Use ORDER BY to sort any combination of columns.
- 5) If LIMIT is specified, limit matching records are displayed. Otherwise, it is all displayed.
- 6) If OFFSET is specified, the query results are displayed starting with the offset offset. The offset is 0 by default.
-
-## Example
-
- 1. Display all resources that the current user has permissions on
- SHOW RESOURCES;
-
- 2. Show the specified resource, the name contains the string "20140102", and displays 10 properties
- SHOW RESOURCES WHERE NAME LIKE "2014_01_02" LIMIT 10;
-
- 3. Display the specified resource, specify the name as "20140102" and sort in descending order by key
- SHOW RESOURCES WHERE NAME = "20140102" ORDER BY `KEY` DESC;
-
-
-## keyword
-
- SHOW RESOURCES, RESOURCES
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md
deleted file mode 100644
index 247f129d15..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-{
- "title": "TRUNCATE TABLES",
- "language": "en"
-}
----
-
-
-
-# TRUNCATE TABLES
-## Description
-This statement is used to empty the data of the specified table and partition
-Grammar:
-
-TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)];
-
-Explain:
-1. The statement empties the data, but retains the table or partition.
-2. Unlike DELETE, this statement can only empty the specified tables or partitions as a whole, without adding filtering conditions.
-3. Unlike DELETE, using this method to clear data will not affect query performance.
-4. The data deleted by this operation is not recoverable.
-5. When using this command, the table state should be NORMAL, i.e. SCHEMA CHANGE operations are not allowed.
-
-## example
-
-1. Clear the table TBL under example_db
-
-TRUNCATE TABLE example_db.tbl;
-
-2. P1 and P2 partitions of clearing TABLE tbl
-
-TRUNCATE TABLE tbl PARTITION(p1, p2);
-
-## keyword
-TRUNCATE,TABLE
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/create-function.md b/docs/en/sql-reference/sql-statements/Data Definition/create-function.md
deleted file mode 100644
index 7e29591fe7..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/create-function.md
+++ /dev/null
@@ -1,152 +0,0 @@
----
-{
- "title": "CREATE FUNCTION",
- "language": "en"
-}
----
-
-
-
-# CREATE FUNCTION
-## Description
-### Syntax
-
-```
-CREATE [AGGREGATE] [ALIAS] FUNCTION function_name
- (arg_type [, ...])
- [RETURNS ret_type]
- [INTERMEDIATE inter_type]
- [WITH PARAMETER(param [,...]) AS origin_function]
- [PROPERTIES ("key" = "value" [, ...]) ]
-```
-
-### Parameters
-
-> `AGGREGATE`: If this is the case, it means that the created function is an aggregate function.
->
-> `ALIAS`: If this is the case, it means that the created function is an alias function.
->
-> If the above two items are not present, it means that the created function is a scalar function.
->
-> `Function_name`: To create the name of the function, you can include the name of the database. For example: `db1.my_func'.
->
-> `arg_type`: The parameter type of the function is the same as the type defined at the time of table building. Variable-length parameters can be represented by `,...`. If it is a variable-length type, the type of the variable-length part of the parameters is the same as the last non-variable-length parameter type.
-> **NOTICE**: `ALIAS FUNCTION` variable-length parameters are not supported, and there is at least one parameter. In particular, the type `ALL` refers to any data type and can only be used for `ALIAS FUNCTION`.
->
-> `ret_type`: Required for creating a new function. This parameter is not required if you are aliasing an existing function.
->
-> `inter_type`: A data type used to represent the intermediate stage of an aggregate function.
->
-> `param`: The parameter used to represent the alias function, containing at least one.
->
-> `origin_function`: Used to represent the original function corresponding to the alias function.
->
-> `properties`: Used to set properties related to aggregate function and scalar function. Properties that can be set include
->
-> "Object_file": Custom function dynamic library URL path, currently only supports HTTP/HTTPS protocol, this path needs to remain valid throughout the life cycle of the function. This option is mandatory
->
-> "symbol": Function signature of scalar functions for finding function entries from dynamic libraries. This option is mandatory for scalar functions
->
-> "init_fn": Initialization function signature of aggregate function. Necessary for aggregation functions
->
-> "update_fn": Update function signature of aggregate function. Necessary for aggregation functions
->
-> "merge_fn": Merge function signature of aggregate function. Necessary for aggregation functions
->
-> "serialize_fn": Serialized function signature of aggregate function. For aggregation functions, it is optional, and if not specified, the default serialization function will be used
->
-> "finalize_fn": A function signature that aggregates functions to obtain the final result. For aggregation functions, it is optional. If not specified, the default fetch result function will be used.
->
-> "md5": The MD5 value of the function dynamic link library, which is used to verify that the downloaded content is correct. This option is optional
->
-> "prepare_fn": Function signature of the prepare function for finding the entry from the dynamic library. This option is optional for custom functions
->
-> "close_fn": Function signature of the close function for finding the entry from the dynamic library. This option is optional for custom functions
-> "type": Function type, RPC for remote udf, NATIVE for c++ native udf
-
-
-
-This statement creates a custom function. Executing this command requires that the user have `ADMIN` privileges.
-
-If the `function_name` contains the database name, the custom function will be created in the corresponding database, otherwise the function will be created in the database where the current session is located. The name and parameters of the new function cannot be the same as functions already existing in the current namespace, otherwise the creation will fail. But only with the same name and different parameters can the creation be successful.
-
-## example
-
-1. Create a custom scalar function
-
- ```
- CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
- "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
- "object_file" ="http://host:port/libmyadd.so"
- );
- ```
-2. Create a custom scalar function with prepare/close functions
-
- ```
- CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
- "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
- "prepare_fn" = "_ZN9doris_udf14AddUdf_prepareEPNS_15FunctionContextENS0_18FunctionStateScopeE",
- "close_fn" = "_ZN9doris_udf12AddUdf_closeEPNS_15FunctionContextENS0_18FunctionStateScopeE",
- "object_file" = "http://host:port/libmyadd.so"
- );
- ```
-
-3. Create a custom aggregation function
-
- ```
- CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES (
- "init_fn"="_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE",
- "update_fn"="_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE",
- "merge_fn"="_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_",
- "finalize_fn"="_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE",
- "object_file"="http://host:port/libudasample.so"
- );
- ```
-
-4. Create a scalar function with variable length parameters
-
- ```
- CREATE FUNCTION strconcat(varchar, ...) RETURNS varchar properties (
- "symbol" = "_ZN9doris_udf6StrConcatUdfEPNS_15FunctionContextERKNS_6IntValES4_",
- "object_file" = "http://host:port/libmyStrConcat.so"
- );
- ```
-
-5. Create a custom alias function
-
- ```
- -- create a custom functional alias function
- CREATE ALIAS FUNCTION id_masking(BIGINT) WITH PARAMETER(id)
- AS CONCAT(LEFT(id, 3), '****', RIGHT(id, 4));
-
- -- create a custom cast alias function
- CREATE ALIAS FUNCTION string(ALL, INT) WITH PARAMETER(col, length)
- AS CAST(col AS varchar(length));
- ```
-6. Create a remote UDF
- ```
- CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES (
- "SYMBOL"="add_int",
- "OBJECT_FILE"="127.0.0.1:9999",
- "TYPE"="RPC"
- );
- ```
-## keyword
-CREATE,FUNCTION
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md
deleted file mode 100644
index 0fbaa55131..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md
+++ /dev/null
@@ -1,54 +0,0 @@
----
-{
- "title": "DROP FUNCTION",
- "language": "en"
-}
----
-
-
-
-# DROP FUNCTION
-##Description
-### Syntax
-
-```
-DROP FUNCTION function_name
-(angry type [...])
-```
-
-### Parameters
-
->` function_name': To delete the name of the function
->
->` arg_type`: To delete the parameter list of the function
->
-
-
-Delete a custom function. The name of the function and the type of the parameter are exactly the same before they can be deleted.
-
-## example
-
-1. Delete a function
-
-```
-DROP FUNCTION my_add(INT, INT)
-```
-## keyword
-DROP,FUNCTION
diff --git a/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md b/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md
deleted file mode 100644
index 59b5bcb1fe..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-{
- "title": "SHOW FUNCTIONS",
- "language": "en"
-}
----
-
-
-
-# SHOW FUNCTIONS
-## Description
-### Syntax
-
-```
-SHOW [FULL] [BUILTIN] FUNCTIONS [IN|FROM db] [LIKE 'function_pattern']
-```
-
-### Parameters
-
->`full`: Indicate to show the details of function
->`builtin`: Indicate to show the functions that doris provides
->`db`: The name of the database to query
->`function_pattern`: The parameter to filter function name
-
-Look at all the custom(builtin) functions under the database. If the user specifies the database, then look at the corresponding database, otherwise directly query the database where the current session is located.
-
-You need `SHOW` privileges for this database
-
-## example
-
-```
-mysql> show full functions in testDb\G
-*************************** 1. row ***************************
- Signature: my_add(INT,INT)
- Return Type: INT
- Function Type: Scalar
-Intermediate Type: NULL
- Properties: {"symbol":"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_","object_file":"http://host:port/libudfsample.so","md5":"cfe7a362d10f3aaf6c49974ee0f1f878"}
-*************************** 2. row ***************************
- Signature: my_count(BIGINT)
- Return Type: BIGINT
- Function Type: Aggregate
-Intermediate Type: NULL
- Properties: {"object_file":"http://host:port/libudasample.so","finalize_fn":"_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE","init_fn":"_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE","merge_fn":"_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_","md5":"37d185f80f95569e2676da3d5b5b9d2f","update_fn":"_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE"}
-*************************** 3. row ***************************
- Signature: id_masking(BIGINT)
- Return Type: VARCHAR
- Function Type: Alias
-Intermediate Type: NULL
- Properties: {"parameter":"id","origin_function":"concat(left(`id`, 3), `****`, right(`id`, 4))"}
-
-3 rows in set (0.00 sec)
-mysql> show builtin functions in testDb like 'year%';
-+---------------+
-| Function Name |
-+---------------+
-| year |
-| years_add |
-| years_diff |
-| years_sub |
-+---------------+
-2 rows in set (0.00 sec)
-```
-
-## keyword
-SHOW,FUNCTIONS
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md b/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md
deleted file mode 100644
index 069eeeb786..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md
+++ /dev/null
@@ -1,92 +0,0 @@
----
-{
- "title": "BEGIN",
- "language": "en"
-}
----
-
-
-
-# BEGIN, COMMIT, ROLLBACK
-## Description
-### Syntax
-
-```
-BEGIN;
-INSERT INTO table_name ...
-COMMIT;
-```
-```
-BEGIN [ WITH LABEL label];
-INSERT INTO table_name ...
-ROLLBACK;
-```
-### Parameters
-
-> label: the label for this transaction, if you need to set it to a string.
-
-### Note
-
-A transaction can only be used on insert, nor update or delete. You can check the state of this transaction by `SHOW TRANSACTION WHERE LABEL = 'label'`
-
-## example
-
-1. Begin a transaction without a label, then commit it
-
-```
-BEGIN
-INSERT INTO test VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, DEFAULT);
-INSERT INTO test (c1) VALUES (1);
-COMMIT:
-```
-
-All the data in the sql between `begin` and `commit` will be inserted into the table.
-
-2. Begin a transaction without a label, then abort it
-
-```
-BEGIN
-INSERT INTO test VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, DEFAULT);
-INSERT INTO test (c1) VALUES (1);
-ROLLBACK:
-```
-
-All the data in the sql between `begin` and `rollback` will be aborted, nothing will be inserted into the table.
-
-3. Begin a transaction with a label, then commit it
-
-```
-BEGIN WITH LABEL test_label1
-INSERT INTO test VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, DEFAULT);
-INSERT INTO test (c1) VALUES (1);
-COMMIT:
-```
-
-All the data in the sql between `begin` and `commit` will be inserted into the table.
-The label of `test_label1` will be set to mark this transaction. You can check this transaction by `SHOW TRANSACTION WHERE LABEL = 'test_label1'`.
-
-## keyword
-BEGIN, COMMIT, ROLLBACK
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md
deleted file mode 100644
index 312901e324..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md
+++ /dev/null
@@ -1,587 +0,0 @@
----
-{
- "title": "BROKER LOAD",
- "language": "en"
-}
----
-
-
-
-# BROKER LOAD
-## description
-
- Broker load will load data into Doris via Broker.
- Use `show broker;` to see the Broker deployed in cluster.
-
- Support following data sources:
-
- 1. Baidu HDFS: hdfs for Baidu. Only be used inside Baidu.
- 2. Baidu AFS: afs for Baidu. Only be used inside Baidu.
- 3. Baidu Object Storage(BOS): BOS on Baidu Cloud.
- 4. Apache HDFS.
- 5. Amazon S3: Amazon S3.
-
-### Syntax:
-
- LOAD LABEL load_label
- (
- data_desc1[, data_desc2, ...]
- )
- WITH [BROKER broker_name | S3]
- [load_properties]
- [opt_properties];
-
- 1. load_label
-
- Unique load label within a database.
- syntax:
- [database_name.]your_label
-
- 2. data_desc
-
- To describe the data source.
- syntax:
- [MERGE|APPEND|DELETE]
- DATA INFILE
- (
- "file_path1"[, file_path2, ...]
- )
- [NEGATIVE]
- INTO TABLE `table_name`
- [PARTITION (p1, p2)]
- [COLUMNS TERMINATED BY "column_separator"]
- [FORMAT AS "file_type"]
- [(column_list)]
- [SET (k1 = func(k2))]
- [PRECEDING FILTER predicate]
- [WHERE predicate]
- [DELETE ON label=true]
- [read_properties]
-
- Explain:
- file_path:
-
- File path. Support wildcard. Must match to file, not directory.
-
- PARTITION:
-
- Data will only be loaded to specified partitions. Data out of partition's range will be filtered. If not specifed, all partitions will be loaded.
-
- NEGATIVE:
-
- If this parameter is specified, it is equivalent to importing a batch of "negative" data to offset the same batch of data loaded before.
-
- This parameter applies only to the case where there are value columns and the aggregation type of value columns is only SUM.
-
- column_separator:
-
- Used to specify the column separator in the import file. Default is `\t`.
- If the character is invisible, it needs to be prefixed with `\\x`, using hexadecimal to represent the separator.
-
- For example, the separator `\x01` of the hive file is specified as `\\ x01`
-
- file_type:
-
- Used to specify the type of imported file, such as parquet, orc, csv. Default values are determined by the file suffix name.
-
- column_list:
-
- Used to specify the correspondence between columns in the import file and columns in the table.
-
- When you need to skip a column in the import file, specify it as a column name that does not exist in the table.
-
- syntax:
- (col_name1, col_name2, ...)
-
- SET:
-
- If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be loaded into the table. The grammar is `column_name = expression`. Some examples are given to help understand.
-
- Example 1: There are three columns "c1, c2, c3" in the table. The first two columns in the source file correspond in turn (c1, c2), and the last two columns correspond to c3. Then, column (c1, c2, tmp_c3, tmp_c4) SET (c3 = tmp_c3 + tmp_c4) should be specified.
-
- Example 2: There are three columns "year, month, day" in the table. There is only one time column in the source file, in the format of "2018-06-01:02:03". Then you can specify columns (tmp_time) set (year = year (tmp_time), month = month (tmp_time), day = day (tmp_time)) to complete the import.
-
- PRECEDING FILTER predicate:
-
- Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion.
-
- WHERE:
-
- After filtering the transformed data, data that meets where predicates can be loaded. Only column names in tables can be referenced in WHERE statements.
-
- merge_type:
-
- The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete on condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
- delete_on_predicates:
-
- Only used when merge type is MERGE
-
- read_properties:
-
- Used to specify some special parameters.
- Syntax:
- [PROPERTIES ("key"="value", ...)]
-
- You can specify the following parameters:
-
- line_delimiter: Used to specify the line delimiter in the load file. The default is `\n`. You can use a combination of multiple characters as the column separator.
-
- fuzzy_parse: Boolean type, true to indicate that parse json schema as the first line, this can make import more faster,but need all key keep the order of first line, default value is false. Only use for json format.
-
- jsonpaths: There are two ways to import json: simple mode and matched mode.
- simple mode: it is simple mode without setting the jsonpaths parameter. In this mode, the json data is required to be the object type. For example:
- {"k1": 1, "k2": 2, "k3": "hello"}, where k1, k2, k3 are column names.
-
- matched mode: the json data is relatively complex, and the corresponding value needs to be matched through the jsonpaths parameter.
-
- strip_outer_array: Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false. For example:
- [
- {"k1" : 1, "v1" : 2},
- {"k1" : 3, "v1" : 4}
- ]
- if strip_outer_array is true, and two rows of data are generated when imported into Doris.
-
- json_root: json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "".
-
- num_as_string: Boolean type, true means that when parsing the json data, it will be converted into a number type and converted into a string, and then it will be imported without loss of precision.
-
- 3. broker_name
-
- The name of the Broker used can be viewed through the `show broker` command.
-
- 4. load_properties
-
- Used to provide Broker access to data sources. Different brokers, and different access methods, need to provide different information.
-
- 4.1. Baidu HDFS/AFS
-
- Access to Baidu's internal hdfs/afs currently only supports simple authentication, which needs to be provided:
-
- username: hdfs username
- password: hdfs password
-
- 4.2. BOS
-
- bos_endpoint.
- bos_accesskey: cloud user's accesskey
- bos_secret_accesskey: cloud user's secret_accesskey
-
- 4.3. Apache HDFS
-
- Community version of HDFS supports simple authentication, Kerberos authentication, and HA configuration.
-
- Simple authentication:
- hadoop.security.authentication = simple (default)
- username: hdfs username
- password: hdfs password
-
- kerberos authentication:
- hadoop.security.authentication = kerberos
- kerberos_principal: kerberos's principal
- kerberos_keytab: path of kerberos's keytab file. This file should be able to access by Broker
- kerberos_keytab_content: Specify the contents of the KeyTab file in Kerberos after base64 encoding. This option is optional from the kerberos_keytab configuration.
-
- namenode HA:
- By configuring namenode HA, new namenode can be automatically identified when the namenode is switched
- dfs.nameservices: hdfs service name, customize, eg: "dfs.nameservices" = "my_ha"
- dfs.ha.namenodes.xxx: Customize the name of a namenode, separated by commas. XXX is a custom name in dfs. name services, such as "dfs. ha. namenodes. my_ha" = "my_nn"
- dfs.namenode.rpc-address.xxx.nn: Specify RPC address information for namenode, where NN denotes the name of the namenode configured in dfs.ha.namenodes.xxxx, such as: "dfs.namenode.rpc-address.my_ha.my_nn"= "host:port"
- dfs.client.failover.proxy.provider: Specify the provider that client connects to namenode by default: org. apache. hadoop. hdfs. server. namenode. ha. Configured Failover ProxyProvider.
- 4.4. Amazon S3
-
- fs.s3a.access.key: AmazonS3的access key
- fs.s3a.secret.key: AmazonS3的secret key
- fs.s3a.endpoint: AmazonS3的endpoint
- 4.5. If using the S3 protocol to directly connect to the remote storage, you need to specify the following attributes
-
- (
- "AWS_ENDPOINT" = "",
- "AWS_ACCESS_KEY" = "",
- "AWS_SECRET_KEY"="",
- "AWS_REGION" = ""
- )
- 4.6. if using load with hdfs, you need to specify the following attributes
- (
- "fs.defaultFS" = "",
- "hdfs_user"="",
- "dfs.nameservices"="my_ha",
- "dfs.ha.namenodes.xxx"="my_nn1,my_nn2",
- "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port",
- "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port",
- "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
- fs.defaultFS: defaultFS
- hdfs_user: hdfs user
- namenode HA:
- By configuring namenode HA, new namenode can be automatically identified when the namenode is switched
- dfs.nameservices: hdfs service name, customize, eg: "dfs.nameservices" = "my_ha"
- dfs.ha.namenodes.xxx: Customize the name of a namenode, separated by commas. XXX is a custom name in dfs. name services, such as "dfs. ha. namenodes. my_ha" = "my_nn"
- dfs.namenode.rpc-address.xxx.nn: Specify RPC address information for namenode, where NN denotes the name of the namenode configured in dfs.ha.namenodes.xxxx, such as: "dfs.namenode.rpc-address.my_ha.my_nn"= "host:port"
- dfs.client.failover.proxy.provider: Specify the provider that client connects to namenode by default: org. apache. hadoop. hdfs. server. namenode. ha. Configured Failover ProxyProvider.
-
- 5. opt_properties
-
- Used to specify some special parameters.
- Syntax:
- [PROPERTIES ("key"="value", ...)]
-
- You can specify the following parameters:
-
- timout: Specifies the timeout time for the import operation. The default timeout is 4 hours per second.
-
- max_filter_ratio: Data ratio of maximum tolerance filterable (data irregularity, etc.). Default zero tolerance.
-
- exc_mem_limit: Memory limit. Default is 2GB. Unit is Bytes.
-
- strict_mode: Whether the data is strictly restricted. The default is false.
-
- timezone: Specify time zones for functions affected by time zones, such as strftime/alignment_timestamp/from_unixtime, etc. See the documentation for details. If not specified, use the "Asia/Shanghai" time zone.
-
- send_batch_parallelism: Used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`.
-
- load_to_single_tablet: Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. The number of tasks for the job depends on the overall concurrency. This parameter can only be set when loading data into the OLAP table with random partition.
-
- 6. Load data format sample
-
- Integer(TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1, 1000, 1234
- Float(FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, .356
- Date(DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03.
- (Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command)
-
- String(CHAR/VARCHAR): "I am a student", "a"
- NULL: \N
-
-## example
-
- 1. Load a batch of data from HDFS, specify timeout and filtering ratio. Use the broker with the plaintext ugi my_hdfs_broker. Simple authentication.
-
- LOAD LABEL example_db.label1
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- )
- WITH BROKER my_hdfs_broker
- (
- "username" = "hdfs_user",
- "password" = "hdfs_passwd"
- )
- PROPERTIES
- (
- "timeout" = "3600",
- "max_filter_ratio" = "0.1"
- );
-
- Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000)
-
- 2. Load a batch of data from AFS contains multiple files. Import different tables, specify separators, and specify column correspondences.
-
- LOAD LABEL example_db.label2
- (
- DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file1")
- INTO TABLE `my_table_1`
- COLUMNS TERMINATED BY ","
- (k1, k3, k2, v1, v2),
- DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file2")
- INTO TABLE `my_table_2`
- COLUMNS TERMINATED BY "\t"
- (k1, k2, k3, v2, v1)
- )
- WITH BROKER my_afs_broker
- (
- "username" = "afs_user",
- "password" = "afs_passwd"
- )
- PROPERTIES
- (
- "timeout" = "3600",
- "max_filter_ratio" = "0.1"
- );
-
-
- 3. Load a batch of data from HDFS, specify hive's default delimiter \\x01, and use wildcard * to specify all files in the directory. Use simple authentication and configure namenode HA at the same time
-
- LOAD LABEL example_db.label3
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*")
- INTO TABLE `my_table`
- COLUMNS TERMINATED BY "\\x01"
- )
- WITH BROKER my_hdfs_broker
- (
- "username" = "hdfs_user",
- "password" = "hdfs_passwd",
- "dfs.nameservices" = "my_ha",
- "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
-
- 4. Load a batch of "negative" data from HDFS. Use Kerberos authentication to provide KeyTab file path.
-
- LOAD LABEL example_db.label4
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file)
- NEGATIVE
- INTO TABLE `my_table`
- COLUMNS TERMINATED BY "\t"
- )
- WITH BROKER my_hdfs_broker
- (
- "hadoop.security.authentication" = "kerberos",
- "kerberos_principal"="doris@YOUR.COM",
- "kerberos_keytab"="/home/palo/palo.keytab"
- )
-
- 5. Load a batch of data from HDFS, specify partition. At the same time, use Kerberos authentication mode. Provide the KeyTab file content encoded by base64.
-
- LOAD LABEL example_db.label5
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- PARTITION (p1, p2)
- COLUMNS TERMINATED BY ","
- (k1, k3, k2, v1, v2)
- )
- WITH BROKER my_hdfs_broker
- (
- "hadoop.security.authentication"="kerberos",
- "kerberos_principal"="doris@YOUR.COM",
- "kerberos_keytab_content"="BQIAAABEAAEACUJBSURVLkNPTQAEcGFsbw"
- )
-
- 6. Load a batch of data from BOS, specify partitions, and make some transformations to the columns of the imported files, as follows:
-
- Table schema:
- k1 varchar(20)
- k2 int
-
- Assuming that the data file has only one row of data:
-
- Adele,1,1
-
- The columns in the data file correspond to the columns specified in the load statement:
-
- k1,tmp_k2,tmp_k3
-
- transform as:
-
- 1) k1: unchanged
- 2) k2: sum of tmp_k2 and tmp_k3
-
- LOAD LABEL example_db.label6
- (
- DATA INFILE("bos://my_bucket/input/file")
- INTO TABLE `my_table`
- PARTITION (p1, p2)
- COLUMNS TERMINATED BY ","
- (k1, tmp_k2, tmp_k3)
- SET (
- k2 = tmp_k2 + tmp_k3
- )
- )
- WITH BROKER my_bos_broker
- (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy"
- )
-
- 7. Load data into tables containing HLL columns, which can be columns in tables or columns in data
-
- If there are 4 columns in the table are (id, v1, v2, v3). The v1 and v2 columns are hll columns. The imported source file has 3 columns, where the first column in the table = the first column in the source file, and the second and third columns in the table are the second and third columns in the source file, and the third column in the table is transformed. The four columns do not exist in the source file.
- Then (column_list) declares that the first column is id, and the second and third columns are temporarily named k1, k2.
-
- In SET, the HLL column in the table must be specifically declared hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in the original data.The v3 column in the table does not have a corresponding value in the original data, and empty_hll is used to supplement the default value.
-
- LOAD LABEL example_db.label7
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- PARTITION (p1, p2)
- COLUMNS TERMINATED BY ","
- (id, k1, k2)
- SET (
- v1 = hll_hash(k1),
- v2 = hll_hash(k2),
- v3 = empty_hll()
- )
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
-
- LOAD LABEL example_db.label8
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- PARTITION (p1, p2)
- COLUMNS TERMINATED BY ","
- (k1, k2, tmp_k3, tmp_k4, v1, v2)
- SET (
- v1 = hll_hash(tmp_k3),
- v2 = hll_hash(tmp_k4)
- )
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
-
- 8. Data in load Parquet file specifies FORMAT as parquet. By default, it is judged by file suffix.
-
- LOAD LABEL example_db.label9
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- FORMAT AS "parquet"
- (k1, k2, k3)
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
-
- 9. Extract partition fields in file paths
-
- If necessary, partitioned fields in the file path are resolved based on the field type defined in the table, similar to the Partition Discovery function in Spark.
-
- LOAD LABEL example_db.label10
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/*/*")
- INTO TABLE `my_table`
- FORMAT AS "csv"
- (k1, k2, k3)
- COLUMNS FROM PATH AS (city, utc_date)
- SET (uniq_id = md5sum(k1, city))
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
-
- Directory `hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing` contains following files:
-
- [hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0000.csv, hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0001.csv, ...]
-
- Extract city and utc_date fields in the file path
-
- 10. To filter the load data, columns whose K1 value is greater than K2 value can be imported.
-
- LOAD LABEL example_db.label10
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- where k1 > k2
- );
-
- 11. Extract date partition fields in file paths, and date time include %3A (in hdfs path, all ':' will be replaced by '%3A')
-
- Assume we have files:
-
- /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt
- /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt
-
- Table schema is:
- data_time DATETIME,
- k2 INT,
- k3 INT
-
- LOAD LABEL example_db.label12
- (
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl12`
- COLUMNS TERMINATED BY ","
- (k2,k3)
- COLUMNS FROM PATH AS (data_time)
- SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s'))
- )
- WITH BROKER "hdfs" ("username"="user", "password"="pass");
-
- 12. Load a batch of data from HDFS, specify timeout and filtering ratio. Use the broker with the plaintext ugi my_hdfs_broker. Simple authentication. delete the data when v2 >100, other append
-
- LOAD LABEL example_db.label1
- (
- MERGE DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- COLUMNS TERMINATED BY "\t"
- (k1, k2, k3, v2, v1)
- )
- DELETE ON v2 >100
- WITH BROKER my_hdfs_broker
- (
- "username" = "hdfs_user",
- "password" = "hdfs_passwd"
- )
- PROPERTIES
- (
- "timeout" = "3600",
- "max_filter_ratio" = "0.1"
- );
-
- 13. Filter the original data first, and perform column mapping, conversion and filtering operations
-
- LOAD LABEL example_db.label_filter
- (
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl1`
- COLUMNS TERMINATED BY ","
- (k1,k2,v1,v2)
- SET (k1 = k1 +1)
- PRECEDING FILTER k1 > 2
- WHERE k1 > 3
- )
- with BROKER "hdfs" ("username"="user", "password"="pass");
-
- 14. Import the data in the json file, and specify format as json, it is judged by the file suffix by default, set parameters for reading data
-
- LOAD LABEL example_db.label9
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- FORMAT AS "json"
- (k1, k2, k3)
- properties("fuzzy_parse"="true", "strip_outer_array"="true")
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
-
- 15. LOAD WITH HDFS, normal HDFS cluster
- LOAD LABEL example_db.label_filter
- (
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl1`
- COLUMNS TERMINATED BY ","
- (k1,k2,v1,v2)
- )
- with HDFS (
- "fs.defaultFS"="hdfs://testFs",
- "hdfs_user"="user"
- );
- 16. LOAD WITH HDFS, hdfs ha
- LOAD LABEL example_db.label_filter
- (
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl1`
- COLUMNS TERMINATED BY ","
- (k1,k2,v1,v2)
- )
- with HDFS (
- "fs.defaultFS"="hdfs://testFs",
- "hdfs_user"="user",
- "dfs.nameservices"="my_ha",
- "dfs.ha.namenodes.xxx"="my_nn1,my_nn2",
- "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port",
- "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port",
- "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- );
-
-## keyword
-
- BROKER,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md
deleted file mode 100644
index 0ab4c63888..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-{
- "title": "CANCEL DELETE",
- "language": "en"
-}
----
-
-
-
-# CANCEL DELETE
-Description
-
-This statement is used to undo a DELETE operation. (Administrator only!) (To be realized)
-
-'35;'35; example
-
-## keyword
-CANCEL,DELETE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md
deleted file mode 100644
index 990f3b4331..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-{
- "title": "Cancel Label",
- "language": "en"
-}
----
-
-
-
-# Cancel Label
-Description
-NAME:
-cancel_label: cancel a transaction with label
-
-SYNOPSIS
-curl -u user:passwd -XPOST http://host:port/api/{db}/{label}/_cancel
-
-DESCRIPTION
-This command is used to cancel a transaction corresponding to a specified Label, which can be successfully cancelled during the Prepare phase.
-
-RETURN VALUES
-When the execution is complete, the relevant content of this import will be returned in Json format. Currently includes the following fields
-Status: Successful cancel
-Success: 成功cancel事务
-20854; 2018282: 22833; 361333;
-Message: Specific Failure Information
-
-ERRORS
-
-'35;'35; example
-
-1. cancel testDb, testLabel20316;- 19994;
-curl -u root -XPOST http://host:port/api/testDb/testLabel/_cancel
-
-## keyword
-Cancel, Label
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md
deleted file mode 100644
index 3a122d4bcb..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
- "title": "CANCEL LOAD",
- "language": "en"
-}
----
-
-
-
-# CANCEL LOAD
-Description
-
-This statement is used to undo the import job for the batch of the specified load label.
-This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW LOAD command to view progress.
-Grammar:
-CANCEL LOAD
-[FROM both names]
-WHERE LABEL = "load_label";
-
-'35;'35; example
-
-1. Revoke the import job of example_db_test_load_label on the database example_db
-CANCEL LOAD
-FROM example_db
-WHERE LABEL = "example_db_test_load_label";
-
-## keyword
-CANCEL,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
deleted file mode 100644
index 9a46486c87..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
+++ /dev/null
@@ -1,165 +0,0 @@
----
-{
- "title": "CREATE SYNC JOB",
- "language": "en"
-}
----
-
-
-
-# CREATE SYNC JOB
-
-## description
-
-The sync job feature supports to submit a resident SyncJob, and CDC (change data capture) the user's update operation in MySQL database by reading the binlog log from the specified remote address.
-
-At present, data synchronization only supports docking with the canal, getting the parsed binlog from the canal server and loading it into Doris.
-
-You can view the SyncJob's status by command 'SHOW SYNC JOB'.
-
-Syntax:
-
-```
-CREATE SYNC [db.]job_name
- (
- channel_desc,
- channel_desc
- ...
- )
-binlog_desc
-```
-
-1. `job_name`
-
- job_Name is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time.
-
-2. `channel_desc`
-
- The data channel under the job is used to describe the mapping relationship between the MySQL source table and the Doris target table.
-
- Syntax:
-
- ```
- FROM mysql_db.src_tbl INTO des_tbl
- [partitions]
- [columns_mapping]
- ```
-
- 1. `mysql_db.src_tbl`
-
- Specify the database and source table on the MySQL side.
-
- 2. `des_tbl`
-
- Specify the target table on the Doris side. Only the unique table is supported, and the batch delete feature of the table needs to be enabled.
-
- 3. `partitions`
-
- Specify which partitions to be load into in target table. If it is not specified, it will be automatically loaded into the corresponding partition.
-
- Example:
-
- ```
- PARTITION(p1, p2, p3)
- ```
-
- 4. `column_mapping`
-
- Specify the mapping relationship between the columns of the MySQL source table and the Doris target table. If not specified, Fe will default that the columns of the source table and the target table correspond one by one in order.
-
- Columns are not supported in the 'col_name = expr' form.
-
- Example:
-
- ```
- Suppose the columns of target table are (K1, K2, V1),
-
- Change the order of columns K1 and K2
- COLUMNS(k2, k1, v1)
-
- Ignore the fourth column of the source data
- COLUMNS(k2, k1, v1, dummy_column)
- ```
-
-3. `binlog_desc`
-
- It is used to describe remote data sources. Currently, only canal is supported.
-
- Syntax:
-
- ```
- FROM BINLOG
- (
- "key1" = "value1",
- "key2" = "value2"
- )
- ```
-
- 1. The attribute related to the canal is prefixed with `canal.`
-
- 1. canal.server.ip: the address of the canal server
- 2. canal.server.port: the port of canal server
- 3. canal.destination: Identifier of instance
- 4. canal.batchSize: the maximum batch size. The default is 8192
- 5. canal.username: the username of instance
- 6. canal.password: password of instance
- 7. canal.debug: optional. When set to true, the details of each batch and each row will be printed.
-
-## example
-
-1. create a sync job named `job1` for target table `test_tbl` in `test_db`, connects to the local canal server, and corresponds to the MySQL source table `mysql_db1.tbl1`
-
- CREATE SYNC `test_db`.`job1`
- (
- FROM `mysql_db1`.`tbl1` INTO `test_tbl `
- )
- FROM BINLOG
- (
- "type" = "canal",
- "canal.server.ip" = "127.0.0.1",
- "canal.server.port" = "11111",
- "canal.destination" = "example",
- "canal.username" = "",
- "canal.password" = ""
- );
-
-2. create a sync job named `job1` for multiple target tables in `test_db`, correspond to multiple MySQL source tables one by one, and explicitly specify column mapping.
-
- CREATE SYNC `test_db`.`job1`
- (
- FROM `mysql_db`.`t1` INTO `test1` COLUMNS(k1, k2, v1) PARTITIONS (p1, p2),
- FROM `mysql_db`.`t2` INTO `test2` COLUMNS(k3, k4, v2) PARTITION p1
- )
- FROM BINLOG
- (
- "type" = "canal",
- "canal.server.ip" = "xx.xxx.xxx.xx",
- "canal.server.port" = "12111",
- "canal.destination" = "example",
- "canal.username" = "username",
- "canal.password" = "password"
- );
-
-## keyword
-
- CREATE,SYNC,JOB,BINLOG
-
-
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md
deleted file mode 100644
index be4c606cb3..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md
+++ /dev/null
@@ -1,66 +0,0 @@
----
-{
- "title": "DELETE",
- "language": "en"
-}
----
-
-
-
-# DELETE
-## Description
-
- This statement is used to conditionally delete data in the specified table (base index) partition.
- This action deletes the rollup index data associated with this base index at the same time.
- Grammar:
- DELETE FROM table_name [PARTITION partition_name]
- WHERE
- column_name1 op { value | value_list } [ AND column_name2 op { value | value_list } ...];
-
- Explain:
- 1) Optional types of OP include: =, >, <, >=, <=, <=, <=, !=, in, not in
- 2) Conditions on key columns can only be specified.
- 2) When the selected key column does not exist in a rollup, delete cannot be performed.
- 3) The relationship between conditions can only be "and".
- If you want to achieve the "or" relationship, you need to divide the conditions into two DELETE statements.
- 4) If it is a partitioned table, you can specify the partition. If not specified, and the session variable delete_without_partition is true, it will be applied to all partitions. If it is a single partition table, you do not need to specify it.
-
- Notice:
- This statement may reduce query efficiency for a period of time after execution.
- The degree of impact depends on the number of deletion conditions specified in the statement.
- The more conditions specified, the greater the impact.
-
-## example
-
- 1. Delete rows whose K1 column value is 3 in my_table partition p 1
- DELETE FROM my_table PARTITION p1
- WHERE k1 = 3;
-
- 2. Delete rows whose K1 column value is greater than or equal to 3 and whose K2 column value is "abc" in my_table partition P1
- DELETE FROM my_table PARTITION p1
- WHERE k1 >= 3 AND k2 = "abc";
-
- 2. Delete rows whose K1 column value is greater than or equal to 3 and whose K2 column value is "abc" in my_table partition P1,P2
- DELETE FROM my_table PARTITIONS (p1, p2)
- WHERE k1 >= 3 AND k2 = "abc";
-
-## keyword
- DELETE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md
deleted file mode 100644
index b1646c7972..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md
+++ /dev/null
@@ -1,125 +0,0 @@
----
-{
- "title": "EXPORT",
- "language": "en"
-}
----
-
-
-
-# EXPORT
-## Description
-
- This statement is used to export data from a specified table to a specified location.
- This function is implemented by broker process. For different purpose storage systems, different brokers need to be deployed. Deployed brokers can be viewed through SHOW BROKER.
- This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW EXPORT command to view progress.
-
- Grammar:
- EXPORT TABLE table_name
- [PARTITION (p1 [,p2]]
- [WHERE [expr]]
- TO export_path
- [opt_properties]
- [broker|S3];
-
- 1. table_name
- The table names to be exported currently support the export of tables with engine as OLAP and mysql.
-
- 2. partition
- You can export only certain specified partitions of the specified table
-
- 3. expr
- Export rows that meet the where condition, optional. If you leave it blank, all rows are exported by default.
-
- 4. export_path
- The exported path needs to be a directory. At present, it can't be exported to local, so it needs to be exported to broker.
-
- 5. opt_properties
- Used to specify some special parameters.
- Grammar:
- [PROPERTIES ("key"="value", ...)]
-
- The following parameters can be specified:
- label: The identifier of this export job. You can use this identifier to view the job status later.
- column_separator: Specifies the exported column separator, defaulting to t. Supports invisible characters, such as'\x07'.
- column: Specify the columns to be exported, separated by commas. If you do not fill in this parameter, the default is to export all the columns of the table.
- line_delimiter: Specifies the exported line separator, defaulting to\n. Supports invisible characters, such as'\x07'.
- exec_mem_limit: Exports the upper limit of memory usage for a single BE node, defaulting to 2GB in bytes.
- timeout: The time-out for importing jobs is 1 day by default, in seconds.
- tablet_num_per_task: The maximum number of tablets that each subtask can allocate.
-
- 6. broker|S3
- Specify to use broker export or export through S3 protocol
- Grammar:
- WITH [BROKER broker_name| S3] ("key"="value"[,...])
- Here you need to specify the specific broker name and the required broker attributes, If you use the S3 protocol, you do not need to specify the broker name
-
- For brokers corresponding to different storage systems, the input parameters are different. Specific parameters can be referred to: `help broker load', broker required properties.
- When exporting to local, you do not need to fill in this part.
-
- 7. hdfs
- Specify to use libhdfs export to hdfs
- Grammar:
- WITH HDFS ("key"="value"[,...])
-
- The following parameters can be specified:
- fs.defaultFS: Set the fs such as:hdfs://ip:port
- hdfs_user:Specify hdfs user name
-
-## example
-
- 1. Export all data from the testTbl table to HDFS
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy");
-
- 2. Export partitions P1 and P2 from the testTbl table to HDFS
- EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy");
-
- 3. Export all data in the testTbl table to hdfs, using "," as column separator, and specify label
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy");
-
- 4. Export the row meet condition k1 = 1 in the testTbl table to hdfs.
- EXPORT TABLE testTbl WHERE k1=1 TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy");
-
- 5. Export all data in the testTbl table to the local.
- EXPORT TABLE testTbl TO "file:///home/data/a";
-
- 6. Export all data in the testTbl table to hdfs, using the invisible character "\x07" as the column and row separator.
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy")
-
- 7. Export column k1, v1 from the testTbl to the local.
- EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("columns" = "k1,v1");
-
- 8. Export all data in the testTbl table to hdfs, using the invisible character "\x07" as the column and row separator.
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH HDFS ("fs.defaultFS"="hdfs://hdfs_host:port", "hdfs_user"="yyy")
-
- 9. Export all data in the testTbl table to the local , the first line represents the field name
- EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names");
-
- 10. Export all data in the testTbl table to the local, the first two lines represent the field name and type
- EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types");
-
- 11. Export all data in the testTbl table to the hdfs , the first line represents the field name
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword");
-
- 12. Export all data in the testTbl table to the hdfs, the first two lines represent the field name and type
- EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword");
-
-## keyword
- EXPORT
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md
deleted file mode 100644
index 21b46c73b1..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md
+++ /dev/null
@@ -1,58 +0,0 @@
----
-{
- "title": "GET LABEL STATE",
- "language": "en"
-}
----
-
-
-
-# GET LABEL STATE
-## Description
-NAME:
-get_label_state: get label's state
-
-SYNOPSIS
-curl -u user:passwd http://host:port /api /{db}/{label}// u state
-
-DESCRIPTION
-This command is used to view the transaction status of a Label
-
-RETURN VALUES
-After execution, the relevant content of this import will be returned in Json format. Currently includes the following fields
-Label: The imported label, if not specified, is a uuid.
-Status: Whether this command was successfully executed or not, Success indicates successful execution
-Message: Specific execution information
-State: It only makes sense if Status is Success
-UNKNOWN: No corresponding Label was found
-PREPARE: The corresponding transaction has been prepared, but not yet committed
-COMMITTED: The transaction has been committed and cannot be canceled
-VISIBLE: Transaction submission, and data visible, cannot be canceled
-ABORTED: The transaction has been ROLLBACK and the import has failed.
-
-ERRORS
-
-'35;'35; example
-
-1. Obtain the state of testDb, testLabel
-curl -u root http://host:port /api /testDb /testLabel / u state
-
-## keyword
-GET, LABEL, STATE
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md b/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md
deleted file mode 100644
index 8fcb20e568..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md
+++ /dev/null
@@ -1,168 +0,0 @@
----
-{
- "title": "GROUP BY",
- "language": "en"
-}
----
-
-
-
-# GROUP BY
-
-## description
-
- GROUP BY `GROUPING SETS` | `CUBE` | `ROLLUP` is an extension to GROUP BY clause. This syntax lets you define multiple groupings in the same query. GROUPING SETS produce a single result set that is equivalent to a UNION ALL of differently grouped rows
- For example GROUPING SETS clause:
-
- ```
- SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) );
- ```
-
- This statement is equivalent to:
-
- ```
- SELECT a, b, SUM( c ) FROM tab1 GROUP BY a, b
- UNION
- SELECT a, null, SUM( c ) FROM tab1 GROUP BY a
- UNION
- SELECT null, b, SUM( c ) FROM tab1 GROUP BY b
- UNION
- SELECT null, null, SUM( c ) FROM tab1
- ```
-
- `GROUPING(expr)` indicates whether a specified column expression in a GROUP BY list is aggregated or not. GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
-
- `GROUPING_ID(expr [ , expr [ , ... ] ])` describes which of a list of expressions are grouped in a row produced by a GROUP BY query. The GROUPING_ID function simply returns the decimal equivalent of the binary value formed as a result of the concatenation of the values returned by the GROUPING functions.
-
-### Syntax
-
- ```
- SELECT ...
- FROM ...
- [ ... ]
- GROUP BY [
- , ... |
- GROUPING SETS [, ...] ( groupSet [ , groupSet [ , ... ] ] ) |
- ROLLUP(expr [ , expr [ , ... ] ]) |
- expr [ , expr [ , ... ] ] WITH ROLLUP |
- CUBE(expr [ , expr [ , ... ] ]) |
- expr [ , expr [ , ... ] ] WITH CUBE
- ]
- [ ... ]
- ```
-
-### Parameters
-
- `groupSet` is a set of expression or column or it's alias appearing in the query block’s SELECT list. `groupSet ::= { ( expr [ , expr [ , ... ] ] )}`
-
- `expr` is expression or column or it's alias appearing in the query block’s SELECT list.
-
-### Note
-
- doris supports PostgreSQL like syntax, for example:
-
- ```
- SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) );
- SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c)
- SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY CUBE(a,b,c)
- ```
-
- `ROLLUP(a,b,c)` is equivalent to `GROUPING SETS` as follows:
-
- ```
- GROUPING SETS (
- (a,b,c),
- ( a, b ),
- ( a),
- ( )
- )
- ```
-
- `CUBE ( a, b, c )` is equivalent to `GROUPING SETS` as follows:
-
- ```
- GROUPING SETS (
- ( a, b, c ),
- ( a, b ),
- ( a, c ),
- ( a ),
- ( b, c ),
- ( b ),
- ( c ),
- ( )
- )
- ```
-
-## example
-
- This is a simple example
-
- ```
- > SELECT * FROM t;
- +------+------+------+
- | k1 | k2 | k3 |
- +------+------+------+
- | a | A | 1 |
- | a | A | 2 |
- | a | B | 1 |
- | a | B | 3 |
- | b | A | 1 |
- | b | A | 4 |
- | b | B | 1 |
- | b | B | 5 |
- +------+------+------+
- 8 rows in set (0.01 sec)
-
- > SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) );
- +------+------+-----------+
- | k1 | k2 | sum(`k3`) |
- +------+------+-----------+
- | b | B | 6 |
- | a | B | 4 |
- | a | A | 3 |
- | b | A | 5 |
- | NULL | B | 10 |
- | NULL | A | 8 |
- | a | NULL | 7 |
- | b | NULL | 11 |
- | NULL | NULL | 18 |
- +------+------+-----------+
- 9 rows in set (0.06 sec)
-
- > SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ());
- +------+------+---------------+----------------+
- | k1 | k2 | grouping_id(k1,k2) | sum(`k3`) |
- +------+------+---------------+----------------+
- | a | A | 0 | 3 |
- | a | B | 0 | 4 |
- | a | NULL | 1 | 7 |
- | b | A | 0 | 5 |
- | b | B | 0 | 6 |
- | b | NULL | 1 | 11 |
- | NULL | A | 2 | 8 |
- | NULL | B | 2 | 10 |
- | NULL | NULL | 3 | 18 |
- +------+------+---------------+----------------+
- 9 rows in set (0.02 sec)
- ```
-
-## keyword
-
- GROUP, GROUPING, GROUPING_ID, GROUPING_SETS, GROUPING SETS, CUBE, ROLLUP
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md
deleted file mode 100644
index 114af8331e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md
+++ /dev/null
@@ -1,291 +0,0 @@
----
-{
- "title": "LOAD",
- "language": "en"
-}
----
-
-
-
-# LOAD
-## Description
-
-Palo currently supports the following four import methods:
-
-1. Hadoop Load: Importing ETL based on MR.
-2. Broker Load: Use broker to import data.
-3. Mini Load: Upload files through HTTP protocol for batch data import.
-4. Stream Load: Stream data import through HTTP protocol.
-5. S3 Load: Directly access the storage system supporting the S3 protocol for data import through the S3 protocol. The import syntax is basically the same as that of Broker Load.
-
-This help mainly describes the first import method, namely Hadoop Load related help information. The rest of the import methods can use the following commands to view help:
-
-This import method may not be supported in a subsequent version. It is recommended that other import methods be used for data import. !!!
-
-1. help broker load;
-2. help mini load;
-3. help stream load;
-
-Hadoop Load is only applicable to Baidu's internal environment. Public, private and open source environments cannot use this import approach.
-The import method must set up a Hadoop computing queue for ETL, which can be viewed through the help set property command.
-
-Grammar:
-
-LOAD LABEL load_label
-(
-Date of date of date of entry
-)
-[opt_properties];
-
-1. load label
-
-The label of the current imported batch. Unique in a database.
-Grammar:
-[database_name.]your_label
-
-2. data_desc
-
-Used to describe a batch of imported data.
-Grammar:
-DATA INFILE
-(
-"file_path1"[, file_path2, ...]
-)
-[NEGATIVE]
-INTO TABLE `table_name`
-[PARTITION (p1, P2)]
-[COLUMNS TERMINATED BY "column_separator"]
-[FORMAT AS "file_type"]
-[(column_list)]
-[set (k1 = fun (k2)]]
-
-Explain:
-file_path:
-
-File paths can be specified to a file, or * wildcards can be used to specify all files in a directory. Wildcards must match to files, not directories.
-
-PARTICIPATION:
-
-If this parameter is specified, only the specified partition will be imported, and data outside the imported partition will be filtered out.
-If not specified, all partitions of the table are imported by default.
-
-NEGATIVE:
-If this parameter is specified, it is equivalent to importing a batch of "negative" data. Used to offset the same batch of data imported before.
-This parameter applies only to the case where there are value columns and the aggregation type of value columns is SUM only.
-
-Column U separator:
-
-Used to specify the column separator in the import file. Default tot
-If the character is invisible, it needs to be prefixed with \x, using hexadecimal to represent the separator.
-For example, the separator X01 of the hive file is specified as "\ x01"
-
-File type:
-
-Used to specify the type of imported file, such as parquet, orc, csv. The default value is determined by the file suffix name.
-
-column_list:
-
-Used to specify the correspondence between columns in the import file and columns in the table.
-When you need to skip a column in the import file, specify it as a column name that does not exist in the table.
-Grammar:
-(col_name1, col_name2, ...)
-
-SET:
-
-If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be imported into the table.
-The functions currently supported are:
-
-Strftime (fmt, column) date conversion function
-Fmt: Date format, such as% Y% m% d% H% M% S (year, month, day, hour, second)
-Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp.
-If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table.
-
-time_format(output_fmt, input_fmt, column) 日期格式转化
-Output_fmt: Converted date format, such as% Y% m% d% H% M% S (year, month, day, hour, second)
-Input_fmt: The date format of the column before transformation, such as% Y% m% d% H% M% S (days, hours, seconds, months, years)
-Column: Column in column_list, which is the column in the input file. Storage content should be a date string in input_fmt format.
-If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table.
-
-alignment_timestamp(precision, column) 将时间戳对齐到指定精度
-Precision: year 124month;124day;124hour;
-Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp.
-If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table.
-Note: When the alignment accuracy is year and month, only the time stamps in the range of 20050101-20191231 are supported.
-
-Default_value (value) sets the default value for a column import
-Use default values of columns when creating tables without specifying
-
-Md5sum (column1, column2,...) evaluates the value of the specified imported column to md5sum, returning a 32-bit hexadecimal string
-
-Replace_value (old_value [, new_value]) replaces old_value specified in the import file with new_value
-New_value, if not specified, uses the default value of the column when building the table
-
-Hll_hash (column) is used to transform a column in a table or data into a data structure of a HLL column
-
-3. opt_properties
-
-Used to specify some special parameters.
-Grammar:
-[PROPERTIES ("key"="value", ...)]
-
-The following parameters can be specified:
-Cluster: Import the Hadoop computed queue used.
-Timeout: Specifies the timeout time of the import operation. The default timeout is 3 days. Unit seconds.
-Max_filter_ratio: The ratio of data that is most tolerant of being filterable (for reasons such as data irregularities). Default zero tolerance.
-Load_delete_flag: Specifies whether the import deletes data by importing the key column, which applies only to UNIQUE KEY.
-Value column is not specified when importing. The default is false.
-
-5. Import data format sample
-
-Integer classes (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1,1000,1234
-Floating Point Class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, 356
-Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03.
-(Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command)
-字符串类(CHAR/VARCHAR): "I am a student", "a"
-NULL value: N
-
-6. S3 Storage
- fs.s3a.access.key user AK,required
- fs.s3a.secret.key user SK,required
- fs.s3a.endpoint user endpoint,required
- fs.s3a.impl.disable.cache whether disable cache,default true,optional
-
-'35;'35; example
-
-1. Import a batch of data, specify timeout time and filtering ratio. Specify the import queue as my_cluster.
-
-LOAD LABEL example db.label1
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
-INTO TABLE `my_table`
-)
-PROPERTIES
-(
-"cluster" ="my" cluster,
-Timeout ="3600",
-"max_filter_ratio" = "0.1"
-);
-
-Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000)
-
-2. Import a batch of data, including multiple files. Import different tables, specify separators, and specify column correspondences
-
-LOAD LABEL example db.label2
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1")
-INTO TABLE `my_table_1`
-COLUMNS TERMINATED BY ","
-(k1, k3, k2, v1, v2),
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2")
-INTO TABLE `my_table_2`
-COLUMNS TERMINATED BY "\t"
-(k1, k2, k3, v2, v1)
-);
-
-3. Import a batch of data, specify hive's default delimiter x01, and use wildcard * to specify all files in the directory
-
-LOAD LABEL example db.label3
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*")
-NEGATIVE
-INTO TABLE `my_table`
-COLUMNS TERMINATED BY "\\x01"
-);
-
-4. Import a batch of "negative" data
-
-LOAD LABEL example db.label4
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file)
-NEGATIVE
-INTO TABLE `my_table`
-COLUMNS TERMINATED BY "\t"
-);
-
-5. Import a batch of data and specify partitions
-
-LOAD LABEL example db.label5
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
-INTO TABLE `my_table`
-PARTITION (p1, P2)
-COLUMNS TERMINATED BY ","
-(k1, k3, k2, v1, v2)
-);
-
-6. Import a batch of data, specify partitions, and make some transformations to the columns of the imported files, as follows:
-The table structure is as follows:
-K1 date
-date
-k3 bigint
-k4 varchar (20)
-k5 varchar (64)
-k6 int
-
-Assume that the data file has only one row of data, five columns, and comma-separated:
-
-1537002087,2018-08-09 11:12:13,1537002087,-,1
-
-The columns in the data file correspond to the columns specified in the import statement:
-tmp -u k1, tmp -u k2, tmp u k3, k6, v1
-
-The conversion is as follows:
-
-1) k1: Transform tmp_k1 timestamp column into datetime type data
-2) k2: Converting tmp_k2 datetime-type data into date data
-3) k3: Transform tmp_k3 timestamp column into day-level timestamp
-4) k4: Specify import default value of 1
-5) k5: Calculate MD5 values from tmp_k1, tmp_k2, tmp_k3 columns
-6) k6: Replace the - value in the imported file with 10
-
-LOAD LABEL example db.label6
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
-INTO TABLE `my_table`
-PARTITION (p1, P2)
-COLUMNS TERMINATED BY ","
-(tmp /u k1, tmp /u k2, tmp /u k3, k6, v1)
-SET (
-K1 = strftime (%Y -%m -%d%H:%M:%S ", TMP u K1),
-K2 = Time = UFormat ("% Y-% M-% D% H:% M:% S", "% Y-% M-% D", "TMP = UK2),
-k3 = alignment_timestamp("day", tmp_k3),
-k4 = default_value("1"),
-K5 = MD5Sum (TMP = UK1, TMP = UK2, TMP = UK3)
-k6 = replace value ("-", "10")
-)
-);
-
-7. Import data into tables containing HLL columns, which can be columns in tables or columns in data
-
-LOAD LABEL example db.label7
-(
-DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
-INTO TABLE `my_table`
-PARTITION (p1, P2)
-COLUMNS TERMINATED BY ","
-SET (
-v1 = hll, u hash (k1),
-v2 = hll, u hash (k2)
-)
-);
-
-## keyword
-LOAD
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md
deleted file mode 100644
index 0547e95b07..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md
+++ /dev/null
@@ -1,132 +0,0 @@
----
-{
- "title": "MINI LOAD",
- "language": "en"
-}
----
-
-
-
-# MINI LOAD
-## Description
-
-MINI LOAD and STEAM LOAD are implemented in exactly the same way. MINI LOAD is a subset of STREAM LOAD in import support.
-Subsequent imports of new features will only be supported in STEAM LOAD, MINI LOAD will no longer add features. It is suggested that STREAM LOAD be used instead. Please use HELP STREAM LOAD.
-
-MINI LOAD is imported through HTTP protocol. Users can import without relying on Hadoop or Mysql client.
-The user describes the import through HTTP protocol, and the data is streamed into Doris in the process of receiving http requests. After the ** import job is completed, the ** returns to the user the imported results.
-
-* Note: In order to be compatible with the old version of mini load usage habits, users can still view the import results through the 'SHOW LOAD' command.
-
-Grammar:
-Import:
-
-curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx
-
-View import information
-
-curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx
-
-HTTP Protocol Specification
-
-Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing
-This way is to pass the password in plaintext, and does not support encrypted transmission for the time being.
-
-Expect Doris needs to send an HTTP request with the 'Expect' header information,'100-continue'.
-Why? Because we need to redirect the request, we have to transfer the data content before.
-This can avoid causing multiple data transmission, thereby improving efficiency.
-
-Content-Length Doris needs to send a request with the header 'Content-Length'. If the content ratio is sent
-'Content-Length' is less, so Doris believes that if there is a transmission problem, the submission task fails.
-NOTE: If you send more data than 'Content-Length', Doris reads only 'Content-Length'.
-Length content and import
-
-
-Description of parameters:
-
-User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name.
-
-Label: The label used to specify this batch of imports for later job queries, etc.
-This parameter must be passed in.
-
-Columns: Used to describe the corresponding column name in the import file.
-If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built.
-The specified method is comma-separated, such as columns = k1, k2, k3, K4
-
-Column_separator: Used to specify the separator between columns, default is' t'
-NOTE: Url encoding is required, for example
-If you need to specify '\t' as a separator, you should pass in 'column_separator=% 09'
-If you need to specify 'x01'as a delimiter, you should pass in 'column_separator=% 01'
-If you need to specify','as a separator, you should pass in 'column_separator=% 2c'
-
-
-Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter
-Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed.
-
-Timeout: Specifies the timeout time of the load job in seconds. When the load execution time exceeds this threshold, it is automatically cancelled. The default timeout time is 86400 seconds.
-It is recommended to specify a timeout time of less than 86400 seconds.
-
-Hll: Used to specify the corresponding relationship between the HLL columns in the data and the tables, the columns in the tables and the columns specified in the data.
-(If columns are not specified, the columns of the data column surface can also be other non-HLL columns in the table.) By "partition"
-Specify multiple HLL columns using ":" splitting, for example:'hll1, cuid: hll2, device'
-
-NOTE:
-1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data.
-It is recommended that the amount of data imported should not exceed 1 GB.
-
-2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files.
-Request sent, multiple requests cannot share a label number, so it cannot be used
-
-3. Miniload is imported in exactly the same way as streaming. It returns the results synchronously to users after the import of streaming is completed.
-Although the information of mini load can be found in subsequent queries, it cannot be operated on. The queries are only compatible with the old ways of use.
-
-4. When importing from the curl command line, you need to add escape before & or the parameter information will be lost.
-
-'35;'35; example
-
-1. Import the data from the local file 'testData' into the table of 'testTbl' in the database 'testDb'(the user is in default_cluster)
-curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123
-
-2. Import the data from the local file 'testData' into the table of 'testTbl' in the database'testDb'(the user is in test_cluster). The timeout time is 3600 seconds.
-curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123&timeout=3600
-
-3. Import data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allowing a 20% error rate (the user is in default_cluster)
-curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2
-
-4. Import the data from the local file 'testData' into the table 'testTbl' in the database 'testDb', allowing a 20% error rate, and specify the column name of the file (the user is in default_cluster)
-curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3
-
-5. Import in streaming mode (user is in default_cluster)
-seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123
-
-6. Import tables containing HLL columns, which can be columns in tables or columns in data to generate HLL columns (users are in default_cluster)
-
- curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&hll=hll_column1,k1:hll_column2,k2
- \&columns=k1,k2,k3
-
- curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2
- \&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5
-
-7. View imports after submission
-
-curl -u root http://host:port/api/testDb/_load_info?label=123
-
-## keyword
-MINI, LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md
deleted file mode 100644
index 78ff3edc20..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md
+++ /dev/null
@@ -1,107 +0,0 @@
----
-{
- "title": "MULTI LOAD",
- "language": "en"
-}
----
-
-
-
-# MULTI LOAD
-## Description
-
-Syntax:
-curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx
-curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy
-curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz
-curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx
-curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx
-
-'MULTI LOAD'can support users to import multiple tables at the same time on the basis of'MINI LOAD'. The specific commands are shown above.
-'/api/{db}/_multi_start' starts a multi-table import task
-'/api/{db}/{table}/_load' adds a table to be imported to an import task. The main difference from 'MINI LOAD' is that the 'sub_label' parameter needs to be passed in.
-'/api/{db}/_multi_commit' submits the entire multi-table import task and the background begins processing
-'/api/{db}/_multi_abort' Abandons a multi-table import task
-'/api/{db}/_multi_desc' shows the number of jobs submitted by a multi-table import task
-
-HTTP Protocol Specification
-Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing
-This way is to pass passwords in plaintext, since we are all in the Intranet environment at present...
-
-Expect Doris needs to send an HTTP request, and needs the 'Expect' header information with the content of'100-continue'.
-Why? Because we need to redirect the request, we have to transfer the data content before.
-This can avoid causing multiple data transmission, thereby improving efficiency.
-
-Content-Length Doris needs to send a request with the header 'Content-Length'. If the content ratio is sent
-If'Content-Length'is less, Palo believes that if there is a transmission problem, the submission of the task fails.
-NOTE: If you send more data than 'Content-Length', Doris reads only 'Content-Length'.
-Length content and import
-
-Description of parameters:
-User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name.
-
-Label: Used to specify the label number imported in this batch for later job status queries, etc.
-This parameter must be passed in.
-
-Sub_label: Used to specify a subversion number within a multi-table import task. For multi-table imported loads, this parameter must be passed in.
-
-Columns: Used to describe the corresponding column name in the import file.
-If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built.
-The specified method is comma-separated, such as columns = k1, k2, k3, K4
-
-Column_separator: Used to specify the separator between columns, default is' t'
-NOTE: Url encoding is required, such as specifying '\t'as a delimiter.
-Then you should pass in 'column_separator=% 09'
-
-Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter
-Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed.
-Pass in effect at'_multi_start'
-
-NOTE:
-1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data.
-It is recommended that the amount of data imported should not exceed 1GB
-
-2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files.
-Request sent, multiple requests cannot share a label number, so it cannot be used
-
-3. Supports streaming-like ways to use curl to import data into Doris, but Doris will have to wait until the streaming is over
-Real import behavior will occur, and the amount of data in this way cannot be too large.
-
-'35;'35; example
-
-1. Import the data from the local file 'testData1'into the table of 'testTbl1' in the database 'testDb', and
-Import the data from 'testData2'into the table 'testTbl2' in 'testDb'(the user is in default_cluster)
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123
-curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1
-curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123
-
-2. Multi-table Import Midway Abandon (User in default_cluster)
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123
-curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123
-
-3. Multi-table import to see how much content has been submitted (user is in default_cluster)
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123
-curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1
-curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123
-
-## keyword
-MULTI, MINI, LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md
deleted file mode 100644
index 9a97dffbf6..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md
+++ /dev/null
@@ -1,207 +0,0 @@
----
-{
- "title": "OUTFILE",
- "language": "zh-CN"
-}
----
-
-
-
-# OUTFILE
-## description
-
- The `SELECT INTO OUTFILE` statement can export the query results to a file. Currently supports export to remote storage through Broker process, or directly through S3, HDFS protocol such as HDFS, S3, BOS and COS(Tencent Cloud) through the Broker process. The syntax is as follows:
-
- Grammar:
- query_stmt
- INTO OUTFILE "file_path"
- [format_as]
- [properties]
-
- 1. file_path
- `file_path` specify the file path and file name prefix. Like: `hdfs://path/to/my_file_`.
- The final file name will be assembled as `my_file_`, file seq no and the format suffix. File seq no starts from 0, determined by the number of split.
- my_file_abcdefg_0.csv
- my_file_abcdefg_1.csv
- my_file_abcdegf_2.csv
-
- 2. format_as
- FORMAT AS CSV
- Specify the export format. The default is CSV.
-
-
- 3. properties
- Specify the relevant attributes. Currently it supports exporting through the Broker process, or through the S3, HDFS protocol.
-
- Grammar:
- [PROPERTIES ("key"="value", ...)]
- The following parameters can be specified:
- column_separator: Specifies the exported column separator, defaulting to t. Supports invisible characters, such as'\x07'.
- line_delimiter: Specifies the exported line separator, defaulting to\n. Supports invisible characters, such as'\x07'.
- max_file_size: max size for each file
-
- Broker related attributes need to be prefixed with `broker.`:
- broker.name: broker name
- broker.hadoop.security.authentication: Specify authentication as kerberos
- broker.kerberos_principal: Specify the principal of kerberos
- broker.kerberos_keytab: Specify the keytab path of kerberos, this file is the path on the broker.
-
- HDFS protocal can directly execute HDFS protocal configuration:
- hdfs.fs.defaultFS: namenode ip and port
- hdfs.hdfs_user: hdfs user name
-
- S3 protocol can directly execute S3 protocol configuration:
- AWS_ENDPOINT
- AWS_ACCESS_KEY
- AWS_SECRET_KEY
- AWS_REGION
-
-## example
-
- 1. Export simple query results to the file `hdfs://path/to/result.txt`. Specify the export format as CSV. Use `my_broker` and set kerberos authentication information. Specify the column separator as `,` and the line delimiter as `\n`.
- SELECT * FROM tbl
- INTO OUTFILE "hdfs://path/to/result_"
- FORMAT AS CSV
- PROPERTIES
- (
- "broker.name" = "my_broker",
- "broker.hadoop.security.authentication" = "kerberos",
- "broker.kerberos_principal" = "doris@YOUR.COM",
- "broker.kerberos_keytab" = "/home/doris/my.keytab",
- "column_separator" = ",",
- "line_delimiter" = "\n",
- "max_file_size" = "100MB"
- );
- If the result is less than 100MB, file will be: `result_0.csv`.
- If larger than 100MB, may be: `result_0.csv, result_1.csv, ...`.
-
- 2. Export simple query results to the file `hdfs://path/to/result.parquet`. Specify the export format as PARQUET. Use `my_broker` and set kerberos authentication information.
- SELECT c1, c2, c3 FROM tbl
- INTO OUTFILE "hdfs://path/to/result_"
- FORMAT AS PARQUET
- PROPERTIES
- (
- "broker.name" = "my_broker",
- "broker.hadoop.security.authentication" = "kerberos",
- "broker.kerberos_principal" = "doris@YOUR.COM",
- "broker.kerberos_keytab" = "/home/doris/my.keytab",
- "schema"="required,int32,c1;required,byte_array,c2;required,byte_array,c2"
- );
- If the exported file format is PARQUET, `schema` must be specified.
-
- 3. Export the query result of the CTE statement to the file `hdfs://path/to/result.txt`. The default export format is CSV. Use `my_broker` and set hdfs high availability information. Use the default column separators and line delimiter.
- WITH
- x1 AS
- (SELECT k1, k2 FROM tbl1),
- x2 AS
- (SELECT k3 FROM tbl2)
- SELEC k1 FROM x1 UNION SELECT k3 FROM x2
- INTO OUTFILE "hdfs://path/to/result_"
- PROPERTIES
- (
- "broker.name" = "my_broker",
- "broker.username"="user",
- "broker.password"="passwd",
- "broker.dfs.nameservices" = "my_ha",
- "broker.dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "broker.dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "broker.dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "broker.dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- );
- If the result is less than 1GB, file will be: `result_0.csv`.
- If larger than 1GB, may be: `result_0.csv, result_1.csv, ...`.
-
- 4. Export the query results of the UNION statement to the file `bos://bucket/result.parquet`. Specify the export format as PARQUET. Use `my_broker` and set hdfs high availability information. PARQUET format does not need to specify the column separator and line delimiter.
- SELECT k1 FROM tbl1 UNION SELECT k2 FROM tbl1
- INTO OUTFILE "bos://bucket/result_"
- FORMAT AS PARQUET
- PROPERTIES
- (
- "broker.name" = "my_broker",
- "broker.bos_endpoint" = "http://bj.bcebos.com",
- "broker.bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "broker.bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy",
- "schema"="required,int32,k1;required,byte_array,k2"
- );
-
- 5. Export simple query results to the file `cos://${bucket_name}/path/result.txt`. Specify the export format as CSV.
- And create a mark file after export finished.
- select k1,k2,v1 from tbl1 limit 100000
- into outfile "s3a://my_bucket/export/my_file_"
- FORMAT AS CSV
- PROPERTIES
- (
- "broker.name" = "hdfs_broker",
- "broker.fs.s3a.access.key" = "xxx",
- "broker.fs.s3a.secret.key" = "xxxx",
- "broker.fs.s3a.endpoint" = "https://cos.xxxxxx.myqcloud.com/",
- "column_separator" = ",",
- "line_delimiter" = "\n",
- "max_file_size" = "1024MB",
- "success_file_name" = "SUCCESS"
- )
- Please Note:
- 1. Paths that do not exist are automatically created.
- 2. These parameters(access.key/secret.key/endpointneed) need to be confirmed with `Tecent Cloud COS`. In particular, the value of endpoint does not need to be filled in bucket_name.
-
- 6. Use the s3 protocol to export to bos, and concurrent export is enabled.
- set enable_parallel_outfile = true;
- select k1 from tb1 limit 1000
- into outfile "s3://my_bucket/export/my_file_"
- format as csv
- properties
- (
- "AWS_ENDPOINT" = "http://s3.bd.bcebos.com",
- "AWS_ACCESS_KEY" = "xxxx",
- "AWS_SECRET_KEY" = "xxx",
- "AWS_REGION" = "bd"
- )
- The final generated file prefix is `my_file_{fragment_instance_id}_`.
-
- 7. Use the s3 protocol to export to bos, and enable concurrent export of session variables.
- set enable_parallel_outfile = true;
- select k1 from tb1 order by k1 limit 1000
- into outfile "s3://my_bucket/export/my_file_"
- format as csv
- properties
- (
- "AWS_ENDPOINT" = "http://s3.bd.bcebos.com",
- "AWS_ACCESS_KEY" = "xxxx",
- "AWS_SECRET_KEY" = "xxx",
- "AWS_REGION" = "bd"
- )
- But because the query statement has a top-level sorting node, even if the query is enabled for concurrently exported session variables, it cannot be exported concurrently.
-
- 8. Use libhdfs to export to hdfs cluster. Export the query results of the UNION statement to the file `hdfs://path/to/result.txt`
- Specify the export format as CSV. Use the user name as 'work', the column separators as ',' and line delimiter as '\n'.
- SELECT * FROM tbl
- INTO OUTFILE "hdfs://path/to/result_"
- FORMAT AS CSV
- PROPERTIES
- (
- "hdfs.fs.defaultFS" = "hdfs://ip:port",
- "hdfs.hdfs_user" = "work"
- );
- If the result is less than 1GB, file will be: `my_file_0.csv`.
- If larger than 1GB, may be: `my_file_0.csv, result_1.csv, ...`.
-
-## keyword
- OUTFILE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md
deleted file mode 100644
index 92c157a434..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-{
- "title": "PAUSE ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# PAUSE ROUTINE LOAD
-## example
-
-1. Pause routine load named test1;
-
- PAUSE ROUTINE LOAD FOR test1;
-
-2. Pause all running routine load;
-
- PAUSE ALL ROUTINE LOAD;
-
-## keyword
-
- PAUSE,ALL,ROUTINE,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
deleted file mode 100644
index b685270649..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-{
- "title": "PAUSE SYNC JOB",
- "language": "en"
-}
----
-
-
-
-# PAUSE SYNC JOB
-
-## description
-
-Pauses a running SyncJob in the database.
-
-The paused job will stop synchronizing and keep the latest consumption location until it is resumed by the user.
-
-Syntax:
-
- PAUSE SYNC JOB [db.]job_name
-
-## example
-
-1. Pause the SyncJob named `job_name`.
-
- PAUSE SYNC JOB `job_name`;
-
-## keyword
- PAUSE,SYNC,JOB,BINLOG
-
-
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md
deleted file mode 100644
index e49f931864..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-{
- "title": "RESTORE TABLET",
- "language": "en"
-}
----
-
-
-
-# RESTORE TABLET
-Description
-
-This function is used to recover the tablet data that was deleted by mistake in the trash directory.
-
-Note: For the time being, this function only provides an HTTP interface in be service. If it is to be used,
-A restore tablet API request needs to be sent to the HTTP port of the be machine for data recovery. The API format is as follows:
-Method: Postal
-URI: http://be_host:be_http_port/api/restore_tablet?tablet_id=xxx&schema_hash=xxx
-
-'35;'35; example
-
-Curl -X POST "http://hostname:8088 /api /restore" tablet? Tablet id =123456 &schema hash =1111111 "
-## keyword
-RESTORE,TABLET,RESTORE,TABLET
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md
deleted file mode 100644
index 26a499b1dc..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-{
- "title": "RESUME ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# RESUME ROUTINE LOAD
-## example
-
-1. Resume routine load job named test1.
-
- RESUME ROUTINE LOAD FOR test1;
-
-2. Resume all paused routine load job.
-
- RESUME ALL ROUTINE LOAD;
-
-## keyword
-
- RESUME,ALL,ROUTINE,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
deleted file mode 100644
index ef5f7f851a..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-{
- "title": "RESUME SYNC JOB",
- "language": "en"
-}
----
-
-
-
-# RESUME SYNC JOB
-
-## description
-
-Rusumes a paused SyncJob in the database.
-
-The job will continue to synchronize data from the latest location before the last pause.
-
-Syntax:
-
- RESUME SYNC JOB [db.]job_name
-
-## example
-
-1. Resume the SyncJob named `job_name`
-
- RESUME SYNC JOB `job_name`;
-
-## keyword
- RESUME,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md
deleted file mode 100644
index c695f2a4a1..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md
+++ /dev/null
@@ -1,588 +0,0 @@
----
-{
- "title": "ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# ROUTINE LOAD
-## description
-
-Routine Load function allows users to submit a resident load task, and continuously load data into Doris by continuously reading data from the specified data source. Currently, only text data format (CSV) data is loaded from Kafka by means of no authentication or SSL authentication.
-
-Syntax:
-
-```
-CREATE ROUTINE LOAD [db.]job_name ON tbl_name
-[merge_type]
-[load_properties]
-[job_properties]
-FROM data_source
-[data_source_properties]
-```
-
-1. [db.]job_name
-
- The name of the load job, in the same database, only one job can run with the same name.
-
-2. tbl_name
-
- Specifies the name of the table that needs to be loaded.
-
-3. merge_type
-
- The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete on condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
-4. load_properties
-
- Used to describe the load data. grammar:
-
- ```
- [column_separator],
- [columns_mapping],
- [where_predicates],
- [delete_on_predicates]
- [partitions],
- [preceding_predicates]
- ```
-
- 1. column_separator:
-
- Specify column separators, such as:
-
- `COLUMNS TERMINATED BY ","`
-
- The default is: `\t`
-
- 2. columns_mapping:
-
- Specifies the mapping of columns in the source data and defines how the derived columns are generated.
-
- 1. Map column:
-
- Specify in order, which columns in the source data correspond to which columns in the destination table. For columns that you want to skip, you can specify a column name that does not exist.
-
- Suppose the destination table has three columns k1, k2, v1. The source data has 4 columns, of which columns 1, 2, and 4 correspond to k2, k1, and v1, respectively. Write as follows:
-
- `COLUMNS (k2, k1, xxx, v1)`
-
- Where xxx is a column that does not exist and is used to skip the third column in the source data.
-
- 2. Derived columns:
-
- A column represented in the form of col_name = expr, which we call a derived column. That is, the value of the corresponding column in the destination table is calculated by expr.
-
- Derived columns are usually arranged after the mapped column. Although this is not mandatory, Doris always parses the mapped columns first and then parses the derived columns.
-
- Following an example, assume that the destination table also has column 4, v2, which is generated by the sum of k1 and k2. You can write as follows:
-
- `COLUMNS (k2, k1, xxx, v1, v2 = k1 + k2);`
-
- 3. where_predicates
-
- Used to specify filter criteria to filter out unwanted columns. Filter columns can be either mapped columns or derived columns.
-
- For example, if we only want to load a column with k1 greater than 100 and k2 equal to 1000, we would write as follows:
-
- `WHERE k1 > 100 and k2 = 1000`
-
- 4. partitions
-
- Specifies which partitions of the load destination table. If not specified, it will be automatically loaded into the corresponding partition.
-
- Example:
-
- `PARTITION(p1, p2, p3)`
-
- 5. delete_on_predicates:
-
- Only used when merge type is MERGE
-
- 6. preceding_predicates
-
- Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion.
-
-5. job_properties
-
- A generic parameter that specifies a routine load job.
-
- syntax:
-
- ```
- PROPERTIES (
- "key1" = "val1",
- "key2" = "val2"
- )
- ```
-
- Currently we support the following parameters:
-
- 1. `desired_concurrent_number`
-
- The degree of concurrency desired. A routine load job is split into multiple subtasks. This parameter specifies how many tasks can be executed simultaneously in a job. Must be greater than 0. The default is 3.
-
- This concurrency is not the actual concurrency. The actual concurrency will be considered by the number of nodes in the cluster, the load, and the data source.
-
- example:
-
- `"desired_concurrent_number" = "3"`
-
- 2. `max_batch_interval/max_batch_rows/max_batch_size`
-
- These three parameters represent:
-
- 1) The maximum execution time of each subtask, in seconds. The range is 5 to 60. The default is 10.
-
- 2) The maximum number of rows read per subtask. Must be greater than or equal to 200,000. The default is 200000.
-
- 3) The maximum number of bytes read per subtask. The unit is byte and the range is 100MB to 1GB. The default is 100MB.
-
- These three parameters are used to control the execution time and throughput of a subtask. When either one reaches the threshold, the task ends.
-
- example:
-
- ```
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200"
- ```
-
- 3. `max_error_number`
-
- The maximum number of error lines allowed in the sampling window. Must be greater than or equal to 0. The default is 0, which means that no error lines are allowed.
-
- The sampling window is max_batch_rows * 10. That is, if the number of error lines is greater than max_error_number in the sampling window, the routine job will be suspended, and manual intervention is required to check the data quality problem.
-
- Lines that are filtered by the where condition are not counted as error lines.
-
- 4. `strict_mode`
-
- Whether to enable strict mode, the default is disabled. If turned on, the column type transformation of non-null raw data is filtered if the result is NULL. Specified as "strict_mode" = "true"
-
- 5. `timezone`
-
- Specifies the time zone in which the job will be loaded. The default by using session variable's timezone. This parameter affects all function results related to the time zone involved in the load.
-
- 6. `format`
-
- Specifies the format of the imported data. Support csv and json, the default is csv.
-
- 7. `jsonpaths`
-
- There are two ways to import json: simple mode and matched mode. If jsonpath is set, it will be the matched mode import, otherwise it will be the simple mode import, please refer to the example for details.
-
- 8. `strip_outer_array`
- Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false.
-
- 9. `json_root`
- json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "".
-
- 10. `send_batch_parallelism`
- Integer, Used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`.
-
- 11. `load_to_single_tablet`
- Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. This parameter can only be set when loading data into the OLAP table with random partition.
-
-6. data_source
-
- The type of data source. Current support:
-
- KAFKA
-
-7. `data_source_properties`
-
- Specify information about the data source.
-
- syntax:
-
- ```
- (
- "key1" = "val1",
- "key2" = "val2"
- )
- ```
-
- 1. KAFKA data source
-
- `Kafka_broker_list`
-
- Kafka's broker connection information. The format is ip:host. Multiple brokers are separated by commas.
-
- Example:
-
- `"kafka_broker_list" = "broker1:9092,broker2:9092"`
-
- 2. `kafka_topic`
-
- Specify the topic of Kafka to subscribe to.
-
- Example:
-
- `"kafka_topic" = "my_topic"`
-
- 3. `kafka_partitions/kafka_offsets`
-
- Specify the kafka partition to be subscribed to, and the corresponding star offset for each partition.
-
- Offset can specify a specific offset from 0 or greater, or:
-
- 1) OFFSET_BEGINNING: Subscribe from the location where the data is available.
-
- 2) OFFSET_END: Subscribe from the end.
-
- 3) Timestamp, the format must be like: "2021-05-11 10:00:00", the system will automatically locate the offset of the first message greater than or equal to the timestamp.
- Note that the offset of the timestamp format cannot be mixed with the number type, only one of them can be selected.
-
- If not specified, all partitions under topic are subscribed by default fromSET_END.
-
- Example:
-
- ```
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,OFFSET_BEGINNING,OFFSET_END"
-
- "kafka_partitions" = "0,1",
- "kafka_offsets" = "2021-05-11 10:00:00, 2021-05-11 11:00:00"
- ```
-
- 4. property
-
- Specify custom kafka parameters.
-
- The function is equivalent to the "--property" parameter in the kafka shel
-
- When the value of the parameter is a file, you need to add the keyword: "FILE" before the value.
-
- For information on how to create a file, see "HELP CREATE FILE;"
-
- For more supported custom parameters, see the configuration items on the nt side in the official CONFIGURATION documentation for librdkafka.
-
- Example:
-
- ```
- "property.client.id" = "12345",
- "property.ssl.ca.location" = "FILE:ca.pem"
- ```
-
- 1. When connecting to Kafka using SSL, you need to specify the following parameters:
-
- ```
- "property.security.protocol" = "ssl",
- "property.ssl.ca.location" = "FILE:ca.pem",
- "property.ssl.certificate.location" = "FILE:client.pem",
- "property.ssl.key.location" = "FILE:client.key",
- "property.ssl.key.password" = "abcdefg"
- ```
-
- among them:
-
- "property.security.protocol" and "property.ssl.ca.location" are required to indicate the connection method is SSL and the location of the CA certificate.
-
- If the client authentication is enabled on the Kafka server, you also need to set:
-
- ```
- "property.ssl.certificate.location"
- "property.ssl.key.location"
- "property.ssl.key.password"
- ```
-
- Used to specify the public key of the client, the private key, and the word of the private key.
-
- 2. Specify the default starting offset for kafka partition
-
- If kafka_partitions/kafka_offsets is not specified, all partitions are unanmed by default, and you can specify kafka_default_offsets to specify the star offset. The default is OFFSET_END, which starts at the end of the subscription.
-
- Values:
-
- 1) OFFSET_BEGINNING: Subscribe from the location where the data is available.
-
- 2) OFFSET_END: Subscribe from the end.
-
- 3) Timestamp, the format is the same as kafka_offsets
-
- Example:
-
- `"property.kafka_default_offsets" = "OFFSET_BEGINNING"`
- `"property.kafka_default_offsets" = "2021-05-11 10:00:00"`
-
-8. load data format sample
-
- Integer class (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1, 1000, 1234
-
- Floating point class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, .356
-
- Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03.
-
- String class (CHAR/VARCHAR) (without quotes): I am a student, a
-
- NULL value: \N
-
-## example
-
-1. Create a Kafka routine load task named test1 for the example_tbl of example_db. Specify group.id and client.id, and automatically consume all partitions by default, with subscriptions starting at the end (OFFSET_END)
- ```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100)
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "property.group.id" = "xxx",
- "property.client.id" = "xxx"
- );
- ```
-
-2. Create a Kafka routine load task named test1 for the example_tbl of example_db. The load task is in strict mode.
-
- ```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100),
- WHERE k1 > 100 and k2 like "%doris%"
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
- ```
-
-3. load data from Kafka clusters via SSL authentication. Also set the client.id parameter. The load task is in non-strict mode and the time zone is Africa/Abidjan
-
- ```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100),
- WHERE k1 > 100 and k2 like "%doris%"
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false",
- "timezone" = "Africa/Abidjan"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "property.security.protocol" = "ssl",
- "property.ssl.ca.location" = "FILE:ca.pem",
- "property.ssl.certificate.location" = "FILE:client.pem",
- "property.ssl.key.location" = "FILE:client.key",
- "property.ssl.key.password" = "abcdefg",
- "property.client.id" = "my_client_id"
- );
- ```
-
-4. Create a Kafka routine load task named test1 for the example_tbl of example_db. The load data is a simple json.
-
- ```
- CREATE ROUTINE LOAD example_db.test_json_label_1 ON table1
- COLUMNS(category,price,author)
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false",
- "format" = "json"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2",
- "kafka_offsets" = "0,0,0"
- );
- ```
- It support two kinds data style:
- 1){"category":"a9jadhx","author":"test","price":895}
- 2)[
- {"category":"a9jadhx","author":"test","price":895},
- {"category":"axdfa1","author":"EvelynWaugh","price":1299}
- ]
-
-5. Matched load json by jsonpaths.
-
- ```
- CREATE TABLE `example_tbl` (
- `category` varchar(24) NULL COMMENT "",
- `author` varchar(24) NULL COMMENT "",
- `timestamp` bigint(20) NULL COMMENT "",
- `dt` int(11) NULL COMMENT "",
- `price` double REPLACE
- ) ENGINE=OLAP
- AGGREGATE KEY(`category`,`author`,`timestamp`,`dt`)
- COMMENT "OLAP"
- PARTITION BY RANGE(`dt`)
- (PARTITION p0 VALUES [("-2147483648"), ("20200509")),
- PARTITION p20200509 VALUES [("20200509"), ("20200510")),
- PARTITION p20200510 VALUES [("20200510"), ("20200511")),
- PARTITION p20200511 VALUES [("20200511"), ("20200512")))
- DISTRIBUTED BY HASH(`category`,`author`,`timestamp`) BUCKETS 4
- PROPERTIES (
- "replication_num" = "1"
- );
-
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d'))
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false",
- "format" = "json",
- "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]",
- "strip_outer_array" = "true"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2",
- "kafka_offsets" = "0,0,0"
- );
- ```
- For example json data:
- [
- {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587},
- {"category":"22","author":"2avc","price":895,"timestamp":1589191487},
- {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387}
- ]
-
- Tips:
- 1)If the json data starts as an array and each object in the array is a record, you need to set the strip_outer_array to true to represent the flat array.
- 2)If the json data starts with an array, and each object in the array is a record, our ROOT node is actually an object in the array when we set jsonpath.
-
-6. User specifies the json_root node
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d'))
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false",
- "format" = "json",
- "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]",
- "strip_outer_array" = "true",
- "json_root" = "$.RECORDS"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2",
- "kafka_offsets" = "0,0,0"
- );
- For example json data:
- {
- "RECORDS":[
- {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587},
- {"category":"22","author":"2avc","price":895,"timestamp":1589191487},
- {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387}
- ]
- }
-
- 7. Create a Kafka routine load task named test1 for the example_tbl of example_db. delete all data key columns match v3 >100 key columns.
-
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- WITH MERGE
- COLUMNS(k1, k2, k3, v1, v2, v3),
- WHERE k1 > 100 and k2 like "%doris%",
- DELETE ON v3 >100
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-
- 8. Filter original data
-
- CREATE ROUTINE LOAD example_db.test_job ON example_tbl
- COLUMNS TERMINATED BY ",",
- COLUMNS(k1,k2,source_sequence,v1,v2),
- PRECEDING FILTER k1 > 2
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "30",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200"
- ) FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-
- 9. Start consumption from the specified point in time
-
- CREATE ROUTINE LOAD example_db.test_job ON example_tbl
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "30",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200"
- ) FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "property.kafka_default_offsets" = "2021-10-10 11:00:00"
- );
-
-## keyword
-
- CREATE, ROUTINE, LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md
deleted file mode 100644
index 37fb8f3192..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md
+++ /dev/null
@@ -1,55 +0,0 @@
----
-{
- "title": "SHOW ALTER",
- "language": "en"
-}
----
-
-
-
-# SHOW ALTER
-## Description
-This statement is used to show the execution of various modification tasks currently under way.
-Grammar:
-SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]];
-
-Explain:
-TABLE COLUMN: Shows the task of alter table column.
- Support grammar [WHERE TableName|CreateTime|FinishTime|State] [ORDER BY] [LIMIT]
-TABLE ROLLUP: Shows the task of creating or deleting ROLLUP index
-If db_name is not specified, use the current default DB
-CLUSTER: Show the cluster operation related tasks (only administrators use! To be realized...
-
-## example
-1. Show the task execution of all modified columns of default DB
-SHOW ALTER TABLE COLUMN;
-
-2. Show the last task execution of modified columns of some table
-SHOW ALTER TABLE COLUMN WHERE TableName = "table1" ORDER BY CreateTime LIMIT 1;
-
-3. Show the execution of tasks to create or delete ROLLUP index for specified DB
-SHOW ALTER TABLE ROLLUP FROM example_db;
-
-4. Show cluster operations related tasks (only administrators use! To be realized...
-SHOW ALTER CLUSTER;
-
-## keyword
-SHOW,ALTER
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md
deleted file mode 100644
index be076aa55e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md
+++ /dev/null
@@ -1,62 +0,0 @@
----
-{
- "title": "SHOW BACKUP",
- "language": "en"
-}
----
-
-
-
-# SHOW BACKUP
-## Description
-This statement is used to view BACKUP tasks
-Grammar:
-SHOW BACKUP [FROM db_name]
-
-Explain:
-1. Only the last BACKUP task is saved in Palo.
-2. Each column has the following meanings:
-JobId: Unique job ID
-SnapshotName: The name of the backup
-DbName: Subordinate database
-State: Current phase
-PENDING: The initial state after submitting a job
-SNAPSHOTTING: In the execution snapshot
-UPLOAD_SNAPSHOT: Snapshot completed, ready for upload
-UPLOADING: Snapshot uploading
-SAVE_META: Save job meta-information as a local file
-UPLOAD_INFO: Upload job meta-information
-FINISHED: Operation Successful
-CANCELLED: Job Failure
-Backup Objs: Backup tables and partitions
-CreateTime: Task submission time
-Snapshot Finished Time: Snapshot completion time
-Upload Finished Time: Snapshot Upload Completion Time
-FinishedTime: Job End Time
-Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAPSHOTTING and UPLOADING phases
-Status: Display failure information if the job fails
-Timeout: Job timeout, per second
-
-## example
-1. See the last BACKUP task under example_db.
-SHOW BACKUP FROM example_db;
-
-## keyword
-SHOW, BACKUP
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md
deleted file mode 100644
index 7cbdf0ce51..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-{
- "title": "SHOW CREATE FUNCTION",
- "language": "en"
-}
----
-
-
-
-# SHOW CREATE FUNCTION
-## description
- The statement is used to show the creation statement of user-defined function
- grammar:
- SHOW CREATE FUNTION function_name(arg_type [, ...]) [FROM db_name]];
-
- Description:
- `function_name`: the name of the function to be displayed
- `arg_type`: the parameter list of the function to be displayed
- If you do not specify db_name, use the current default db
-
-## example
- 1. Show the creation statement of the specified function under the default db
- SHOW CREATE FUNCTION my_add(INT, INT)
-
-## keyword
- SHOW,CREATE,FUNCTION
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md
deleted file mode 100644
index 80df442139..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-{
-"title": "SHOW CREATE ROUTINE LOAD",
-"language": "en"
-}
----
-
-
-
-# SHOW CREATE ROUTINE LOAD
-## description
- The statement is used to show the routine load job creation statement of user-defined.
-
- The kafka partition and offset in the result show the currently consumed partition and the corresponding offset to be consumed.
-
- grammar:
- SHOW [ALL] CREATE ROUTINE LOAD for load_name;
-
- Description:
- `ALL`: optional,Is for getting all jobs, including history jobs
- `load_name`: routine load name
-
-## example
- 1. Show the creation statement of the specified routine load under the default db
- SHOW CREATE ROUTINE LOAD for test_load
-
-## keyword
- SHOW,CREATE,ROUTINE,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md
deleted file mode 100644
index c7511f338e..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md
+++ /dev/null
@@ -1,50 +0,0 @@
----
-{
- "title": "SHOW DATA SKEW",
- "language": "en"
-}
----
-
-
-
-# SHOW DATA SKEW
-## description
-
- This statement is used to view the data skew of a table or a partition.
-
- grammar:
-
- SHOW DATA SKEW FROM [db_name.]tbl_name [PARTITION (p1)];
-
- Description:
-
- 1. Only one partition must be specified. For non-partitioned tables, the partition name is the same as the table name.
- 2. The result will show the data volume of each bucket under the specified partition, and the proportion of the data volume of each bucket in the total data volume.
-
-## example
-
- 1. View the data skew of the table
-
- SHOW DATA SKEW FROM db1.test PARTITION(p1);
-
-## keyword
-
- SHOW, DATA, SKEW
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md
deleted file mode 100644
index 71d7e8c920..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md
+++ /dev/null
@@ -1,110 +0,0 @@
----
-{
- "title": "SHOW DATA",
- "language": "en"
-}
----
-
-
-
-# SHOW DATA
-
-## Description
-
-This statement is used to show the amount of data, the number of replica and num of rows.
-
-Syntax:
-
-```
-SHOW DATA [FROM db_name[.table_name]] [ORDER BY ...];
-```
-
-Explain:
-
-1. If the FROM clause is not specified, the amount of data and the number of copies subdivided into each table under the current db are displayed. The data volume is the total data volume of all replicas. The number of replicas is of all partitions of the table and all materialized views.
-
-2. If the FROM clause is specified, the amount of data, the number of replicas, and the number of statistical rows subdivided into individual materialized views under table are displayed. The data volume is the total data volume of all replicas. The number of replicas is corresponding to all partitions of the materialized view. The number of statistical rows is corresponding to all partitions of the materialized view.
-
-3. When counting the number of rows, the replica with the largest number of rows among multiple replicas shall prevail.
-
-4. The `Total` row in the result set represents the summary row. The `Quota` row indicates the current quota of the database. The `Left` line indicates the remaining quota.
-
-5. If you want to check the size of each Partition, please refer to `help show partitions`.
-
-6. Arbitrary column combinations can be sorted using ORDER BY.
-
-## example
-
-1. Display the data volume, replica size, aggregate data volume and aggregate replica count of each table of default DB.
-
- ```
- SHOW DATA;
- ```
-
- ```
- +-----------+-------------+--------------+
- | TableName | Size | ReplicaCount |
- +-----------+-------------+--------------+
- | tbl1 | 900.000 B | 6 |
- | tbl2 | 500.000 B | 3 |
- | Total | 1.400 KB | 9 |
- | Quota | 1024.000 GB | 1073741824 |
- | Left | 1021.921 GB | 1073741815 |
- +-----------+-------------+--------------+
- ```
-
-2. Display the subdivision data volume, replica count and number of rows of the specified table below the specified DB.
-
- ```
- SHOW DATA FROM example_db.test;
- ```
- ```
- +-----------+-----------+-----------+--------------+----------+
- | TableName | IndexName | Size | ReplicaCount | RowCount |
- +-----------+-----------+-----------+--------------+----------+
- | test | r1 | 10.000MB | 30 | 10000 |
- | | r2 | 20.000MB | 30 | 20000 |
- | | test2 | 50.000MB | 30 | 50000 |
- | | Total | 80.000 | 90 | |
- +-----------+-----------+-----------+--------------+----------+
- ```
-3. Can be combined and sorted according to the data volume, replica count,and number of rows,etc.
-
- ```
- SHOW DATA ORDER BY ReplicaCount desc,Size asc;
- ```
-
- ```
- +-----------+-------------+--------------+
- | TableName | Size | ReplicaCount |
- +-----------+-------------+--------------+
- | table_c | 3.102 KB | 40 |
- | table_d | .000 | 20 |
- | table_b | 324.000 B | 20 |
- | table_a | 1.266 KB | 10 |
- | Total | 4.684 KB | 90 |
- | Quota | 1024.000 GB | 1073741824 |
- | Left | 1024.000 GB | 1073741734 |
- +-----------+-------------+--------------+
- ```
-
-## keyword
-
- SHOW,DATA
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md
deleted file mode 100644
index 87989ba692..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW DATABASE",
- "language": "en"
-}
----
-
-
-
-# SHOW DATABASE ID
-## Description
-This statement is used to display database name according to database id (for administrators only)
-Grammar:
-SHOW DATABASE [database_id]
-
-## example
-1. Display database name according to database id
-SHOW DATABASE 1001;
-
-## keyword
-SHOW,DATABASE
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md
deleted file mode 100644
index b9ebd36273..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-{
- "title": "SHOW DATABASES",
- "language": "en"
-}
----
-
-
-
-# SHOW DATABASES
-## Description
-This statement is used to show the currently visible DB
-Grammar:
-SHOW DATABASES;
-
-## keyword
-SHOW,DATABASES
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md
deleted file mode 100644
index 2e397d5223..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-{
- "title": "SHOW DELETE",
- "language": "en"
-}
----
-
-
-
-# SHOW DELETE
-## Description
-This statement is used to show successful historical delete tasks performed
-Grammar:
-SHOW DELETE [FROM db_name]
-
-## example
-1. Show all historical delete tasks for database
-SHOW DELETE FROM database;
-
-## keyword
-SHOW,DELETE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md
deleted file mode 100644
index ef1900db3c..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-{
- "title": "SHOW DYNAMIC PARTITION TABLES",
- "language": "en"
-}
----
-
-
-
- # SHOW DYNAMIC PARTITION TABLES
-## description
- This statement is used to display all dynamically partitioned table states under the current db
- Grammar:
- SHOW DYNAMIC PARTITION TABLES [FROM db_name];
-
- ## example
- 1. Displays all dynamically partitioned table states for the database
- SHOW DYNAMIC PARTITION TABLES FROM database;
-
- ## keyword
- SHOW,DYNAMIC,PARTITION,TABLES
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md
deleted file mode 100644
index aba5fbd983..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-{
- "title": "SHOW EXPORT",
- "language": "en"
-}
----
-
-
-
-# SHOW EXPORT
-## Description
-This statement is used to show the execution of the specified export task
-Grammar:
- SHOW EXPORT
- [FROM db_name]
- [
- WHERE
- [ID = your_job_id]
- [STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]]
- [LABEL [ = "your_label" | LIKE "label_matcher"]]
- ]
- [ORDER BY ...]
- [LIMIT limit];
-
-Explain:
-1) If db_name is not specified, use the current default DB
-2) If STATE is specified, the EXPORT state is matched
-3) Any column combination can be sorted using ORDER BY
-4) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed.
-
-## example
-1. Show all export tasks of default DB
- SHOW EXPORT;
-
-2. Show the export tasks of the specified db, sorted in descending order by StartTime
- SHOW EXPORT FROM example_db ORDER BY StartTime DESC;
-
-3. Show the export task of the specified db, state is "exporting" and sorted in descending order by StartTime
- SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC;
-
-4. Show the export task of specifying DB and job_id
- SHOW EXPORT FROM example_db WHERE ID = job_id;
-
-5. Show the export task of specifying DB and label
- SHOW EXPORT FROM example_db WHERE LABEL = "mylabel";
-
-6. Show the export task of specifying DB and label prefix is "labelprefix"
- SHOW EXPORT FROM example_db WHERE LABEL LIKE "labelprefix%";
-
-## keyword
-
- SHOW,EXPORT
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md
deleted file mode 100644
index e5ef14acea..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-{
- "title": "SHOW LOAD",
- "language": "en"
-}
----
-
-
-
-# SHOW LOAD
-## Description
-This statement is used to show the execution of the specified import task
-Grammar:
-SHOW LOAD
-[FROM both names]
-[
-WHERE
-[LABEL [ = "your_label" | LIKE "label_matcher"]]
-[STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]]
-]
-[ORDER BY ...]
-[LIMIT limit][OFFSET offset];
-
-Explain:
-1) If db_name is not specified, use the current default DB
-2) If you use LABEL LIKE, the label that matches the import task contains the import task of label_matcher
-3) If LABEL = is used, the specified label is matched accurately.
-4) If STATE is specified, the LOAD state is matched
-5) Arbitrary column combinations can be sorted using ORDER BY
-6) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed.
-7) If OFFSET is specified, the query results are displayed from offset. By default, the offset is 0.
-8) If broker/mini load is used, the connection in the URL column can be viewed using the following command:
-
-SHOW LOAD WARNINGS ON 'url'
-
-## example
-1. Show all import tasks of default DB
-SHOW LOAD;
-
-2. Show the import task of the specified db. The label contains the string "2014_01_02", showing the oldest 10
-SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10;
-
-3. Show the import task of the specified db, specify label as "load_example_db_20140102" and sort it in descending order by LoadStartTime
-SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC;
-
-4. Show the import task of the specified db, specify label as "load_example_db_20140102" and state as "load", and sort it in descending order by LoadStartTime
-SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC;
-
-5. Show the import task of the specified dB and sort it in descending order by LoadStartTime, and display 10 query results starting with offset 5
-SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10;
-SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5;
-
-6. Small batch import is a command to view the import status
-curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname}
-
-## keyword
-SHOW,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md
deleted file mode 100644
index e443da46a3..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW PARTITION",
- "language": "en"
-}
----
-
-
-
-# SHOW PARTITION ID
-## Description
-This statement is used to display database name, table name, partition name according to partition id (for administrators only)
-Grammar:
-SHOW PARTITION [partition_id]
-
-## example
-1. Display database name, table name, partition name according to partition id
-SHOW PARTITION 10002;
-
-## keyword
-SHOW,PARTITION
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md
deleted file mode 100644
index 0a75f56116..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-{
- "title": "SHOW PARTITIONS",
- "language": "en"
-}
----
-
-
-
-# SHOW PARTITIONS
-## Description
-This statement is used to display partition information
-Grammar:
-SHOW PARTITIONS FROM [db_name.]table_name [WHERE] [ORDER BY] [LIMIT];
-Explain:
-Support filter with following columns: PartitionId,PartitionName,State,Buckets,ReplicationNum,
-LastConsistencyCheckTime
-
-## example
-1. Display partition information for the specified table below the specified DB
-SHOW PARTITIONS FROM example_db.table_name;
-
-2. Display information about the specified partition of the specified table below the specified DB
-SHOW PARTITIONS FROM example_db.table_name WHERE PartitionName = "p1";
-
-3. Display information about the newest partition of the specified table below the specified DB
-SHOW PARTITIONS FROM example_db.table_name ORDER BY PartitionId DESC LIMIT 1;
-
-## keyword
-SHOW,PARTITIONS
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md
deleted file mode 100644
index 3c55e9f6c2..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-{
- "title": "SHOW PROPERTY",
- "language": "en"
-}
----
-
-
-
-# SHOW PROPERTY
-## Description
-This statement is used to view user attributes
-Grammar:
-SHOW PROPERTY [FOR user] [LIKE key]
-
-## example
-1. View the attributes of the jack user
-SHOW PROPERTY FOR 'jack'
-
-2. View Jack user import cluster related properties
-SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%'
-
-## keyword
-SHOW, PROPERTY
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md
deleted file mode 100644
index 24c1197ea6..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-{
- "title": "SHOW REPOSITORIES",
- "language": "en"
-}
----
-
-
-
-# SHOW REPOSITORIES
-## Description
-This statement is used to view the currently created warehouse.
-Grammar:
-SHOW REPOSITORIES;
-
-Explain:
-1. Each column has the following meanings:
-RepoId: Unique Warehouse ID
-RepoName: Warehouse name
-CreateTime: The first time the warehouse was created
-IsReadOnly: Is it a read-only warehouse?
-Location: The root directory in the repository for backing up data
-Broker: Dependent Broker
-ErrMsg: Palo regularly checks the connectivity of the warehouse, and if problems occur, error messages are displayed here.
-
-## example
-1. View the warehouse that has been created:
-SHOW REPOSITORIES;
-
-## keyword
-SHOW, REPOSITORY, REPOSITORIES
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md
deleted file mode 100644
index 13c248a976..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md
+++ /dev/null
@@ -1,67 +0,0 @@
----
-{
- "title": "SHOW RESTORE",
- "language": "en"
-}
----
-
-
-
-# SHOW RESTORE
-## Description
-This statement is used to view RESTORE tasks
-Grammar:
-SHOW RESTORE [FROM db_name]
-
-Explain:
-1. Palo -20165;- 20445;- 233844;-36817;- 27425RESTORE -21153s;
-2. Each column has the following meanings:
-JobId: Unique job ID
-Label: The name of the backup to be restored
-Timestamp: Time version of backup to be restored
-DbName: Subordinate database
-State: Current phase
-PENDING: The initial state after submitting a job
-SNAPSHOTTING: In the execution snapshot
-DOWNLOAD: The snapshot is complete, ready to download the snapshot in the warehouse
-DOWNLOADING: Snapshot Download
-COMMIT: Snapshot download completed, ready to take effect
-COMMITTING: In force
-FINISHED: Operation Successful
-CANCELLED: Job Failure
-AllowLoad: Is import allowed on recovery (currently not supported)
-ReplicationNum: Specifies the number of replicas recovered
-Restore Jobs: Tables and partitions to be restored
-CreateTime: Task submission time
-MetaPreparedTime: Metadata Readiness Completion Time
-Snapshot Finished Time: Snapshot completion time
-Download Finished Time: Snapshot download completion time
-FinishedTime: Job End Time
-Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAPSHOTTING, DOWNLOADING, and COMMITTING phases
-Status: Display failure information if the job fails
-Timeout: Job timeout, per second
-
-## example
-1. Check the last RESTORE task under example_db.
-SHOW RESTORE FROM example_db;
-
-## keyword
-SHOW, RESTORE
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md
deleted file mode 100644
index e44d3c9246..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-{
- "title": "SHOW ROUTINE LOAD TASK",
- "language": "en"
-}
----
-
-
-
-# SHOW ROUTINE LOAD TASK
-## example
-
-1. Show sub-task information for a routine import task called test 1.
-
-SHOW ROUTINE LOAD TASK WHERE JobName = "test1";
-
-## keyword
-SHOW,ROUTINE,LOAD,TASK
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md
deleted file mode 100644
index 66ffbd66b7..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md
+++ /dev/null
@@ -1,107 +0,0 @@
----
-{
- "title": "SHOW ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# SHOW ROUTINE LOAD
-## description
- This statement is used to show the running status of the Routine Load job
- grammar:
- SHOW [ALL] ROUTINE LOAD [FOR jobName] [LIKE pattern];
-
- Result description:
-
- Id: Job ID
- Name: job name
- CreateTime: Job creation time
- PauseTime: Last job pause time
- EndTime: The end time of the job
- DbName: corresponding database name
- TableName: Corresponding table name
- State: job running status
- DataSourceType: Data source type: KAFKA
- CurrentTaskNum: current number of subtasks
- JobProperties: Job configuration details
-DataSourceProperties: Data source configuration details
- CustomProperties: custom configuration
- Statistic: job running status statistics
- Progress: Job running progress
- Lag: job delay status
-ReasonOfStateChanged: Reason of job status change
- ErrorLogUrls: The viewing address of the filtered data with unqualified quality
- OtherMsg: Other error messages
-
- * State
-
- There are the following 4 states:
-
- * NEED_SCHEDULE: The job is waiting to be scheduled
- * RUNNING: The job is running
- * PAUSED: The job is suspended
- * STOPPED: The job has ended
- * CANCELLED: The job has been cancelled
-
- * Progress
-
- For Kafka data sources, the offset currently consumed by each partition is displayed. For example, {"0":"2"} means that the consumption progress of Kafka partition 0 is 2.
-
- * Lag
-
- For Kafka data sources, the consumption delay of each partition is displayed. For example, {"0":10} means that the consumption delay of Kafka partition 0 is 10.
-
-## example
-
-1. Show all routine import jobs named test 1 (including stopped or cancelled jobs). The result is one or more lines.
-
-SHOW ALL ROUTINE LOAD FOR test1;
-
-2. Show the current running routine load job named test1
-
-SHOW ROUTINE LOAD FOR test1;
-
-3. Display all routine import jobs (including stopped or cancelled jobs) under example_db. The result is one or more lines.
-
-use example_db;
-SHOW ALL ROUTINE LOAD;
-
-4. Display all running routine import jobs under example_db
-
-use example_db;
-SHOW ROUTINE LOAD;
-
-5. Display the current running routine import job named test1 under example_db
-
-SHOW ROUTINE LOAD FOR example_db.test1;
-
-6. Display all routine import jobs named test1 (including stopped or cancelled jobs) under example_db. The result is one or more lines.
-
-SHOW ALL ROUTINE LOAD FOR example_db.test1;
-
-7. Show the current running routine load jobs under example_db with name match test1
-
-use example_db;
-SHOW ROUTINE LOAD LIKE "%test1%";
-
-## keyword
-SHOW,ROUTINE,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md
deleted file mode 100644
index 84ae5373dc..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-{
- "title": "SHOW SNAPSHOT",
- "language": "en"
-}
----
-
-
-
-# SHOW SNAPSHOT
-## Description
-This statement is used to view existing backups in the warehouse.
-Grammar:
-SHOW SNAPSHOT ON `repo_name`
-[WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]];
-
-Explain:
-1. Each column has the following meanings:
-Snapshot: The name of the backup
-Timestamp: Time version for backup
-Status: If the backup is normal, the OK will be displayed, otherwise the error message will be displayed.
-
-2. If TIMESTAMP is specified, the following additional information will be displayed:
-Database: The name of the database where the backup data belongs
-Details: Shows the entire backup data directory and file structure in the form of Json
-
-'35;'35; example
-1. Check the existing backups in warehouse example_repo:
-SHOW SNAPSHOT ON example_repo;
-
-2. View only the backup named backup1 in warehouse example_repo:
-SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1";
-
-2. Check the backup named backup1 in the warehouse example_repo for details of the time version "2018-05-05-15-34-26":
-SHOW SNAPSHOT ON example_repo
-WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26";
-
-## keyword
-SHOW, SNAPSHOT
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md
deleted file mode 100644
index b2704e3ad7..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-{
- "title": "SHOW STREAM LOAD",
- "language": "en"
-}
----
-
-
-
-# SHOW STREAM LOAD
-## Description
-This statement is used to show the execution of the specified import task
-Grammar:
-SHOW STREAM LOAD
-[FROM both names]
-[
-WHERE
-[LABEL [ = "your_label" | LIKE "label_matcher"]]
-[STATUS = ["SUCCESS"|"FAIL"|]]
-]
-[ORDER BY ...]
-[LIMIT limit][OFFSET offset];
-
-Explain:
-1) If db_name is not specified, use the current default DB
-2) If you use LABEL LIKE, the label that matches the task contains the STREAM LOAD task of label_matcher
-3) If LABEL = is used, the specified label is matched accurately.
-4) If STATUS is specified, the STREAM LOAD status is matched
-5) Arbitrary column combinations can be sorted using ORDER BY
-6) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed.
-7) If OFFSET is specified, the query results are displayed from offset. By default, the offset is 0.
-
-## example
-1. Show all STREAM LOAD tasks of default DB
-SHOW STREAM LOAD;
-
-2. Show the STREAM LOAD task of the specified db. The label contains the string "2014_01_02", showing the oldest 10
-SHOW STREAM LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10;
-
-3. Show the STREAM LOAD task of the specified db, specify label as "load_example_db_20140102"
-SHOW STREAM LOAD FROM example_db WHERE LABEL = "load_example_db_20140102";
-
-4. Show the STREAM LOAD task of the specified db, specify status as "success", and sort it in descending order by StartTime
-SHOW STREAM LOAD FROM example_db WHERE STATUS = "success" ORDER BY StartTime DESC;
-
-5. Show the STREAM LOAD task of the specified dB and sort it in descending order by StartTime, and display 10 query results starting with offset 5
-SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 5,10;
-SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 10 offset 5;
-
-## keyword
-SHOW,STREAM LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
deleted file mode 100644
index f496daffa1..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-{
- "title": "SHOW SYNC JOB",
- "language": "en"
-}
----
-
-
-
-# SHOW SYNC JOB
-
-## description
-
-This command is used to display the resident data synchronization job status in all databases.
-
-Syntax:
-
- SHOW SYNC JOB [FROM db_name]
-
-## example
-
-1. Show the status of all SyncJob in the current database.
-
- SHOW SYNC JOB;
-
-2. Show status of all SyncJob under databases `test_db`.
-
- SHOW SYNC JOB FROM `test_db`;
-
-## keyword
-
- SHOW,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md
deleted file mode 100644
index 08a730bc7a..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md
+++ /dev/null
@@ -1,82 +0,0 @@
----
-{
- "title": "SHOW TABLE CREATION",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLE CREATION
-
-## Description
-
- This statement is used to show the execution of the specified Iceberg Database table creation task
- Syntax.
- SHOW TABLE CREATION [FROM db_name] [LIKE table_name_wild];
-
- Description.
- 1. Usage Notes
- 1) If db_name is not specified, the current default db is used.
- 2) If you use LIKE, it will match the table creation task with table_name_wild in the table name
- 2. The meaning of each column
- 1) Database: the name of the database
- 2) Table: the name of the table to be created
- 3) Status: the creation status of the table, `success`/`fail`.
- 4) CreateTime: the time to perform the task of creating the table
- 5) Error Msg: Error message of the failed table creation, or empty if it succeeds.
-
-## example
-
- 1. Show all the table creation tasks in the default Iceberg db
- SHOW TABLE CREATION;
-
- mysql> show table creation;
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
- | Database | Table | Status | Create Time | Error Msg |
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
- | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | |
- | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. |
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
-
- 2. Show the table creation tasks in the specified Iceberg db
- SHOW TABLE CREATION FROM example_db;
-
- mysql> show table creation from iceberg_db;
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
- | Database | Table | Status | Create Time | Error Msg |
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
- | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | |
- | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. |
- +----------------------------+--------+---------+---------------------+----------------------------------------------------------+
-
- 3. Show table creation tasks for the specified Iceberg db with the string "log" in the table name
- SHOW TABLE CREATION FROM example_db LIKE '%log%';
-
- mysql> show table creation from iceberg_db like "%1";
- +----------------------------+--------+---------+---------------------+-----------+
- | Database | Table | Status | Create Time | Error Msg |
- +----------------------------+--------+---------+---------------------+-----------+
- | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | |
- +----------------------------+--------+---------+---------------------+-----------+
-
-## keyword
-
- SHOW,TABLE CREATION
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md
deleted file mode 100644
index f5268730fa..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-{
- "title": "SHOW TABLE",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLE ID
-## Description
-This statement is used to display database name, table name according to table id (for administrators only)
-Grammar:
-SHOW TABLE [table_id]
-
-## example
-1. Display database name, table name according to table id
-SHOW TABLE 10001;
-
-## keyword
-SHOW,TABLE
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md
deleted file mode 100644
index e1a5d3a627..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md
+++ /dev/null
@@ -1,34 +0,0 @@
----
-{
- "title": "SHOW TABLES",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLES
-## Description
-This statement is used to show all tables under the current DB
-Grammar:
-SHOW TABLES;
-
-## keyword
-SHOW,TABLES
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md
deleted file mode 100644
index 21ccbccd0a..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-{
- "title": "SHOW TABLET",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLET
-## Description
- This statement is used to display information of the specified tablet (for administrators only)
- Grammar:
- SHOW TABLET tablet_id
-
-## example
-
- // Display parent level ID information of tablet with specified tablet id of 10000
- SHOW TABLET 10000;
-
-## keyword
-SHOW,TABLET
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md
deleted file mode 100644
index 0ed338d5af..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-{
- "title": "SHOW TABLETS",
- "language": "en"
-}
----
-
-
-
-# SHOW TABLETS
-## Description
- This statement is used to display tablet-related information (for administrators only)
- Grammar:
- SHOW TABLETS
- [FROM [db_name.]table_name] [partiton(partition_name_1, partition_name_1)]
- [where [version=1] [and backendid=10000] [and state="NORMAL|ROLLUP|CLONE|DECOMMISSION"]]
- [order by order_column]
- [limit [offset,]size]
-
-## example
- // Display all tablets information in the specified table below the specified DB
- SHOW TABLETS FROM example_db.table_name;
-
- SHOW TABLETS FROM example_db.table_name partition(p1, p2);
-
- // display 10 tablets information in the table
- SHOW TABLETS FROM example_db.table_name limit 10;
-
- SHOW TABLETS FROM example_db.table_name limit 5,10;
-
- // display the tablets that fulfill some conditions
- SHOW TABLETS FROM example_db.table_name where backendid=10000 and version=1 and state="NORMAL";
-
- SHOW TABLETS FROM example_db.table_name where backendid=10000 order by version;
-
- SHOW TABLETS FROM example_db.table_name where indexname="t1_rollup";
-
-## keyword
- SHOW,TABLETS,LIMIT
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md
deleted file mode 100644
index b88322dd39..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md
+++ /dev/null
@@ -1,100 +0,0 @@
----
-{
- "title": "SHOW TRANSACTION",
- "language": "en"
-}
----
-
-
-
-# SHOW TRANSACTION
-## description
-
-This syntax is used to view transaction details for the specified transaction id, label name or transaction status.
-
-grammar:
-
-```
-SHOW TRANSACTION
-[FROM db_name]
-WHERE
-[id = transaction_id]
-[label = label_name]
-[status = transaction_status];
-```
-
-Example return result:
-
-```
- TransactionId: 4005
- Label: insert_8d807d5d-bcdd-46eb-be6d-3fa87aa4952d
- Coordinator: FE: 10.74.167.16
- TransactionStatus: VISIBLE
- LoadJobSourceType: INSERT_STREAMING
- PrepareTime: 2020-01-09 14:59:07
- PreCommitTime: 2020-01-09 14:59:07
- CommitTime: 2020-01-09 14:59:09
- FinishTime: 2020-01-09 14:59:09
- Reason:
-ErrorReplicasCount: 0
- ListenerId: -1
- TimeoutMs: 300000
-```
-
-* TransactionId: transaction id
-* Label: the label of the corresponding load job
-* Coordinator: the node responsible for transaction coordination
-* TransactionStatus: transaction status
- * PREPARE: preparation stage
- * PRECOMMITTED: The transaction was precommitted
- * COMMITTED: The transaction was successful, but the data is not visible
- * VISIBLE: The transaction was successful and the data is visible
- * ABORTED: transaction failed
-* LoadJobSourceType: The type of the load job.
-* PrepareTime: transaction start time
-* PreCommitTime: the time when the transaction was precommitted
-* CommitTime: the time when the transaction was successfully committed
-* FinishTime: The time when the data is visible
-* Reason: error message
-* ErrorReplicasCount: Number of replicas with errors
-* ListenerId: the id of the related load job
-* TimeoutMs: transaction timeout time in milliseconds
-
-## example
-
-1. View the transaction with id 4005:
-
- SHOW TRANSACTION WHERE ID = 4005;
-
-2. Specify the db and view the transaction with id 4005:
-
- SHOW TRANSACTION FROM db WHERE ID = 4005;
-
-3. View the transaction with label `label_name`:
-
- SHOW TRANSACTION WHERE LABEL = 'label_name';
-
-4. View the transactions with status `visible`:
-
- SHOW TRANSACTION WHERE STATUS = 'visible';
-
-## keyword
-
- SHOW, TRANSACTION
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md
deleted file mode 100644
index 5ea8da2db1..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md
+++ /dev/null
@@ -1,67 +0,0 @@
----
-{
- "title": "SHOW LAST INSERT",
- "language": "en"
-}
----
-
-
-
-# SHOW LAST INSERT
-## description
-
-This syntax is used to view the result of the last insert operation in the current session connection.
-
-Syntax:
-
-```
-SHOW LAST INSERT
-```
-
-Example of return result.
-
-```
- TransactionId: 64067
- Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b
- Database: default_cluster:db1
- Table: t1
-TransactionStatus: VISIBLE
- LoadedRows: 2
- FilteredRows: 0
-```
-
-* TransactionId: transaction id
-* Label: label corresponding to the insert task
-* Database: the database corresponding to the insert
-* Table: the table corresponding to the insert
-* TransactionStatus: the status of the transaction
- * PREPARE: preparation phase
- * PRECOMMITTED: Pre-committed stage
- * COMMITTED: The transaction is successful, but the data is not visible
- * VISIBLE: The transaction is successful and the data is visible
- * ABORTED: The transaction failed.
-* LoadedRows: Number of rows imported
-* FilteredRows: number of rows filtered
-
-## example
-
-## keyword
-
- SHOW, LAST, INSERT
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md
deleted file mode 100644
index 487aaaed4c..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-{
- "title": "STOP ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# STOP ROUTINE LOAD
-## example
-
-1. Stop the routine import job named test 1.
-
-STOP ROUTINE LOAD FOR test1;
-
-## keyword
-STOP,ROUTINE,LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
deleted file mode 100644
index b9fc262d0c..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
+++ /dev/null
@@ -1,44 +0,0 @@
----
-{
- "title": "STOP SYNC JOB",
- "language": "en"
-}
----
-
-
-
-# STOP SYNC JOB
-
-## description
-
-Stops a uncancelled SyncJob in the database.
-
-Syntax:
-
- STOP SYNC JOB [db.]job_name
-
-## example
-
-1. Stop the SyncJob named `job_name`
-
- STOP SYNC JOB `job_name`;
-
-## keyword
- STOP,SYNC,JOB,BINLOG
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
deleted file mode 100644
index f2c90473ad..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
+++ /dev/null
@@ -1,286 +0,0 @@
----
-{
- "title": "STREAM LOAD",
- "language": "en"
-}
----
-
-
-
-# STREAM LOAD
-## description
-
-NAME
-
-load data to table in streaming
-
-SYNOPSIS
-
-Curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load
-
-DESCRIPTION
-
-This statement is used to load data to the specified table. The difference from normal load is that this load method is synchronous load.
-
-This type of load still guarantees the atomicity of a batch of load tasks, either all data is loaded successfully or all fails.
-
-This operation also updates the data for the rollup table associated with this base table.
-
-This is a synchronous operation that returns the results to the user after the entire data load is completed.
-
-Currently, HTTP chunked and non-chunked uploads are supported. For non-chunked mode, Content-Length must be used to indicate the length of the uploaded content, which ensures data integrity.
-
-In addition, the user preferably sets the Content of the Expect Header field to 100-continue, which avoids unnecessary data transmission in certain error scenarios.
-
-OPTIONS
-
-Users can pass in the load parameters through the Header part of HTTP.
-
-`label`
-
-A label that is loaded at one time. The data of the same label cannot be loaded multiple times. Users can avoid the problem of repeated data load by specifying the label.
-
-Currently Palo internally retains the most recent successful label within 30 minutes.
-
-`column_separator`
-
-Used to specify the column separator in the load file. The default is `\t`. If it is an invisible character, you need to add `\x` as a prefix and hexadecimal to indicate the separator.
-
-For example, the separator `\x01` of the hive file needs to be specified as `-H "column_separator:\x01"`.
-
-You can use a combination of multiple characters as the column separator.
-
-`line_delimiter`
-
-Used to specify the line delimiter in the load file. The default is `\n`.
-
-You can use a combination of multiple characters as the column separator.
-
-`columns`
-
-used to specify the correspondence between the columns in the load file and the columns in the table. If the column in the source file corresponds exactly to the contents of the table, then it is not necessary to specify the contents of this field. If the source file does not correspond to the table schema, then this field is required for some data conversion. There are two forms of column, one is directly corresponding to the field in the load file, directly using the field name to indicate.
-
-One is a derived column with the syntax `column_name` = expression. Give a few examples to help understand.
-
-Example 1: There are three columns "c1, c2, c3" in the table. The three columns in the source file correspond to "c3, c2, c1" at a time; then you need to specify `-H "columns: c3, c2, c1"`
-
-Example 2: There are three columns in the table, "c1, c2, c3". The first three columns in the source file correspond in turn, but there are more than one column; then you need to specify` -H "columns: c1, c2, c3, xxx"`
-
-The last column can optionally specify a name for the placeholder.
-
-Example 3: There are three columns in the table, "year, month, day". There is only one time column in the source file, which is "2018-06-01 01:02:03" format. Then you can specify `-H "columns: col, year = year(col), month=month(col), day=day(col)"` to complete the load.
-
-`where`
-
-Used to extract some data. If the user needs to filter out the unwanted data, it can be achieved by setting this option.
-
-Example 1: load only data larger than k1 column equal to 20180601, then you can specify -H "where: k1 = 20180601" when loading
-
-`max_filter_ratio`
-
-The maximum proportion of data that can be filtered (for reasons such as data irregularity). The default is zero tolerance. Data non-standard does not include rows that are filtered out by the where condition.
-
-`Partitions`
-
-Used to specify the partition designed for this load. If the user is able to determine the partition corresponding to the data, it is recommended to specify the item. Data that does not satisfy these partitions will be filtered out.
-
-For example, specify load to p1, p2 partition, `-H "partitions: p1, p2"`
-
-`Timeout`
-
-Specifies the timeout for the load. Unit seconds. The default is 600 seconds. The range is from 1 second to 259200 seconds.
-
-`strict_mode`
-
-The user specifies whether strict load mode is enabled for this load. The default is disabled. Enable it with `-H "strict_mode: true"`.
-
-`timezone`
-
-Specifies the time zone used for this load. The default is East Eight District. This parameter affects all function results related to the time zone involved in the load.
-
-`exec_mem_limit`
-
-Memory limit. Default is 2GB. Unit is Bytes.
-
-`format`
-Specifies the format of the imported data. Support csv、csv_with_names、csv_with_names_and_types、json, the default is csv
-
-`jsonpaths`
-There are two ways to import json: simple mode and matched mode. If jsonpath is set, it will be the matched mode import, otherwise it will be the simple mode import, please refer to the example for details.
-
-`strip_outer_array`
-Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false.
-
-`json_root`
-json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "".
-
-`merge_type`
-
-The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics
-
-`fuzzy_parse` Boolean type, true to indicate that parse json schema as the first line, this can make import more faster,but need all key keep the order of first line, default value is false. Only use for json format.
-
-
-`num_as_string` Boolean type, true means that when parsing the json data, it will be converted into a number type and converted into a string, and then it will be imported without loss of precision.
-
-`read_json_by_line`: Boolean type, true means that one json object can be read per line, and the default value is false.
-
-`send_batch_parallelism`: Integer type, used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`.
-
-`load_to_single_tablet`: Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. This parameter can only be set when loading data into the OLAP table with random partition.
-
-RETURN VALUES
-
-After the load is completed, the related content of this load will be returned in Json format. Current field included
-
-* `Status`: load status.
-
- * Success: indicates that the load is successful and the data is visible.
-
- * Publish Timeout: Indicates that the load job has been successfully Commit, but for some reason it is not immediately visible. Users can be considered successful and do not have to retry load
-
- * Label Already Exists: Indicates that the Label is already occupied by another job, either the load was successful or it is being loaded. The user needs to use the get label state command to determine the subsequent operations.
-
- * Other: The load failed, the user can specify Label to retry the job.
-
-* Message: A detailed description of the load status. When it fails, it will return the specific reason for failure.
-
-* NumberTotalRows: The total number of rows read from the data stream
-
-* NumberLoadedRows: The number of data rows loaded this time, only valid when Success
-
-* NumberFilteredRows: The number of rows filtered by this load, that is, the number of rows with unqualified data quality.
-
-* NumberUnselectedRows: Number of rows that were filtered by the where condition for this load
-
-* LoadBytes: The amount of source file data loaded this time
-
-* LoadTimeMs: Time spent on this load
-
-* BeginTxnTimeMs: The time cost for RPC to Fe to begin a transaction, Unit milliseconds.
-
-* StreamLoadPutTimeMs: The time cost for RPC to Fe to get a stream load plan, Unit milliseconds.
-
-* ReadDataTimeMs: Read data time, Unit milliseconds.
-
-* WriteDataTimeMs: Write data time, Unit milliseconds.
-
-* CommitAndPublishTimeMs: The time cost for RPC to Fe to commit and publish a transaction, Unit milliseconds.
-
-* ErrorURL: The specific content of the filtered data, only the first 1000 items are retained
-
-ERRORS
-
-You can view the load error details by the following statement:
-
- ```SHOW LOAD WARNINGS ON 'url'```
-
-Where url is the url given by ErrorURL.
-
-## example
-
-1. load the data from the local file 'testData' into the table 'testTbl' in the database 'testDb' and use Label for deduplication. Specify a timeout of 100 seconds
-
- ```Curl --location-trusted -u root -H "label:123" -H "timeout:100" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-2. load the data in the local file 'testData' into the table of 'testTbl' in the database 'testDb', use Label for deduplication, and load only data with k1 equal to 20180601
-
- ```Curl --location-trusted -u root -H "label:123" -H "where: k1=20180601" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-3. load the data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allowing a 20% error rate (user is in default_cluster)
-
- ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-4. load the data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allow a 20% error rate, and specify the column name of the file (user is in default_cluster)
-
- ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "columns: k2, k1, v1" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-5. load the data from the local file 'testData' into the p1, p2 partition in the 'testTbl' table in the database 'testDb', allowing a 20% error rate.
-
- ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "partitions: p1, p2" -T testData http://host:port/api/testDb/testTbl/stream_load```
-
-6. load using streaming mode (user is in default_cluster)
-
- ```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load```
-
-7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data
-
- ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-8. load data for strict mode filtering and set the time zone to Africa/Abidjan
-
- ```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data
-
- ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-10. load a table with QUANTILE_STATE columns, which can be columns in the table or a column in the data used to generate QUANTILE_STATE columns, you can also use TO_QUANTILE_STATE to transfer numberical data to QUANTILE_STATE. 2048 is an optional parameter representing the precision of the TDigest algorithm, the valid value is [2048, 10000], the larger the value, the higher the precision, default is 2048
-
- ```Curl --location-trusted -u root -H "columns: k1, k2, v1, v2, v1=to_quantile_state(v1, 2048)" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-
-11. a simple load json
- table schema:
- `category` varchar(512) NULL COMMENT "",
- `author` varchar(512) NULL COMMENT "",
- `title` varchar(512) NULL COMMENT "",
- `price` double NULL COMMENT ""
- json data:
- {"category":"C++","author":"avc","title":"C++ primer","price":895}
- load command by curl:
- curl --location-trusted -u root -H "label:123" -H "format: json" -T testData http://host:port/api/testDb/testTbl/_stream_load
- In order to improve throughput, it supports importing multiple pieces of json data at one time. Each row is a json object. The default value for line delimeter is `\n`. Need to set read_json_by_line to true. The json data format is as follows:
- {"category":"C++","author":"avc","title":"C++ primer","price":89.5}
- {"category":"Java","author":"avc","title":"Effective Java","price":95}
- {"category":"Linux","author":"avc","title":"Linux kernel","price":195}
-
-12. Matched load json by jsonpaths
- For example json data:
- [
- {"category":"xuxb111","author":"1avc","title":"SayingsoftheCentury","price":895},
- {"category":"xuxb222","author":"2avc","title":"SayingsoftheCentury","price":895},
- {"category":"xuxb333","author":"3avc","title":"SayingsoftheCentury","price":895}
- ]
- Matched imports are made by specifying jsonpath parameter, such as `category`, `author`, and `price`, for example:
- curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -T testData http://host:port/api/testDb/testTbl/_stream_load
- Tips:
- 1)If the json data starts as an array and each object in the array is a record, you need to set the strip_outer_array to true to represent the flat array.
- 2)If the json data starts with an array, and each object in the array is a record, our ROOT node is actually an object in the array when we set jsonpath.
-
-13. User specifies the json_root node
- For example json data:
- {
- "RECORDS":[
- {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587},
- {"category":"22","author":"2avc","price":895,"timestamp":1589191487},
- {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387}
- ]
- }
- Matched imports are made by specifying jsonpath parameter, such as `category`, `author`, and `price`, for example:
- curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -H "json_root: $.RECORDS" -T testData http://host:port/api/testDb/testTbl/_stream_load
-
-14. delete all data which key columns match the load data
- curl --location-trusted -u root -H "merge_type: DELETE" -T testData http://host:port/api/testDb/testTbl/_stream_load
-15. delete all data which key columns match the load data where flag is true, others append
- curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv, flag" -H "merge_type: MERGE" -H "delete: flag=1" -T testData http://host:port/api/testDb/testTbl/_stream_load
-
-## keyword
-
- STREAM, LOAD
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md
deleted file mode 100644
index d087659e08..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md
+++ /dev/null
@@ -1,75 +0,0 @@
----
-{
- "title": "UPDATE",
- "language": "en"
-}
----
-
-
-
-# UPDATE
-## description
-### Syntax
-
-```
-UPDATE table_name
- SET assignment_list
- WHERE expression
-
-value:
- {expr | DEFAULT}
-
-assignment:
- col_name = value
-
-assignment_list:
- assignment [, assignment] ...
-```
-
-### Parameters
-
-+ table_name: The target table of the data to be updated. Can be in the form of 'db_name.table_name'
-+ assignment_list: The target column to be updated. Can be in the form of 'col_name = value, col_name = value'
-+ where expression: The condition to be updated is an expression that returns true or false
-
-### Note
-
-The current UPDATE statement only supports row updates on the Unique model, and there may be data conflicts caused by concurrent updates.
-Currently Doris does not deal with such problems, and users are required to avoid such problems from the business side.
-
-## example
-
-The `test` table is a unique model table, which contains four columns: k1, k2, v1, v2. Among them, k1, k2 are keys, v1, v2 are values, and the aggregation method is Replace.
-
-1. Update the v1 column that satisfies the conditions k1 =1 and k2 = 2 in the'test' table to 1
-
-```
-UPDATE test SET v1 = 1 WHERE k1=1 and k2=2;
-```
-
-2. Increment the v1 column of the column with k1=1 in the'test' table by 1
-
-```
-UPDATE test SET v1 = v1+1 WHERE k1=1;
-```
-
-## keyword
-
- UPDATE
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md b/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md
deleted file mode 100644
index bc226975eb..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md
+++ /dev/null
@@ -1,115 +0,0 @@
----
-{
- "title": "ALTER ROUTINE LOAD",
- "language": "en"
-}
----
-
-
-
-# ALTER ROUTINE LOAD
-## description
-
-This syntax is used to modify a routine import job that has been created.
-
-Only jobs in the PAUSED state can be modified.
-
-Syntax:
-
- ALTER ROUTINE LOAD FOR [db.]job_name
- [job_properties]
- FROM data_source
- [data_source_properties]
-
-1. `[db.]job_name`
-
- Specify the name of the job to be modified.
-
-2. `job_properties`
-
- Specify the job parameters that need to be modified. Currently only supports the modification of the following parameters:
-
- 1. `desired_concurrent_number`
- 2. `max_error_number`
- 3. `max_batch_interval`
- 4. `max_batch_rows`
- 5. `max_batch_size`
- 6. `jsonpaths`
- 7. `json_root`
- 8. `strip_outer_array`
- 9. `strict_mode`
- 10. `timezone`
- 11. `num_as_string`
- 12. `fuzzy_parse`
-
-
-3. `data_source`
-
- The type of data source. Currently supported:
-
- KAFKA
-
-4. `data_source_properties`
-
- The relevant attributes of the data source. Currently only supports:
-
- 1. `kafka_partitions`
- 2. `kafka_offsets`
- 3. `kafka_broker_list`
- 4. `kafka_topic`
- 5. Custom property, such as `property.group.id`
-
- Notice:
-
- 1. `kafka_partitions` and `kafka_offsets` are used to modify the offset of the kafka partition to be consumed, and can only modify the currently consumed partition. Cannot add partition.
-
-## example
-
-1. Modify `desired_concurrent_number` to 1
-
- ```
- ALTER ROUTINE LOAD FOR db1.label1
- PROPERTIES
- (
- "desired_concurrent_number" = "1"
- );
- ```
-
-2. Modify `desired_concurrent_number` to 10, modify partition offset, and modify group id.
-
- ```
- ALTER ROUTINE LOAD FOR db1.label1
- PROPERTIES
- (
- "desired_concurrent_number" = "10"
- )
- FROM kafka
- (
- "kafka_partitions" = "0, 1, 2",
- "kafka_offsets" = "100, 200, 100",
- "property.group.id" = "new_group"
- );
- ```
-
-
-## keyword
-
- ALTER,ROUTINE,LOAD
-
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md
deleted file mode 100644
index 2f4d06768d..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md
+++ /dev/null
@@ -1,110 +0,0 @@
----
-{
- "title": "INSERT",
- "language": "en"
-}
----
-
-
-
-# INSERT
-## Description
-### Syntax
-
-```
-INSERT INTO table_name
-[ PARTITION (p1, ...)]
-[ WITH LABEL label]
-[ (column [, ...]) ]
-[ [ hint [, ...] ] ]
-{ VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query }
-```
-
-### Parameters
-
-> tablet_name: Target table for loading data. It can be in the form of `db_name.table_name`.
->
-> partitions: Specifies the partitions to be loaded, with multiple partition names separated by commas. The partitions must exist in `table_name`,
->
-> label: Specifies a label for Insert job.
->
-> column_name: The specified destination columns must be columns that exists in `table_name`.
->
-> expression: The corresponding expression that needs to be assigned to a column.
->
-> DEFAULT: Let the corresponding columns use default values
->
-> query: A common query whose results are written to the target
->
-> hint: Indicators used to indicate `INSERT` execution. ` Both streaming `and default non `streaming'methods use synchronization to complete `INSERT' statement execution
-> The non `streaming'mode returns a label after execution to facilitate users to query the imported status through `SHOW LOAD'.
-
-### Note
-
-When the `INSERT'statement is currently executed, the default behavior for data that does not conform to the target table is filtering, such as string length. However, for business scenarios where data is not filtered, the session variable `enable_insert_strict'can be set to `true' to ensure that `INSERT` will not be successfully executed when data is filtered out.
-
-## example
-
-` The test `table contains two columns `c1', `c2'.
-
-1. Import a row of data into the `test` table
-
-```
-INSERT INTO test VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, 2);
-INSERT INTO test (c1, c2) VALUES (1, DEFAULT);
-INSERT INTO test (c1) VALUES (1);
-```
-
-The first and second sentences have the same effect. When the target column is not specified, the column order in the table is used as the default target column.
-The third and fourth statements express the same meaning, using the default value of `c2'column to complete data import.
-
-2. Import multiline data into the `test` table at one time
-
-```
-INSERT INTO test VALUES (1, 2), (3, 2 + 2)
-INSERT INTO test (c1, c2) VALUES (1, 2), (3, 2 * 2)
-INSERT INTO test (c1) VALUES (1), (3)
-Insert in test (C1, C2) values (1, Default), (3, Default)
-```
-
-The effect of the first and second statements is the same, and two data are imported into the `test'table at one time.
-The effect of the third and fourth statements is known, using the default value of the `c2'column to import two data into the `test' table.
-
-
-3. Insert into table `test` with a query stmt.
-
-```
-INSERT INTO test SELECT * FROM test2
-INSERT INTO test (c1, c2) SELECT * from test2
-```
-
-4. Insert into table `test` with specified partition and label
-
-```
-INSERT INTO test PARTITION(p1, p2) WITH LABEL `label1` SELECT * FROM test2;
-INSERT INTO test WITH LABEL `label1` (c1, c2) SELECT * from test2;
-```
-
-Asynchronous imports are, in fact, encapsulated asynchronously by a synchronous import. Filling in streaming is as efficient as not filling in * execution.
-
-Since Doris used to import asynchronously, in order to be compatible with the old usage habits, the `INSERT'statement without streaming will still return a label. Users need to view the status of the `label' import job through the `SHOW LOAD command.
-## keyword
-INSERT
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md b/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md
deleted file mode 100644
index 33cb1d513b..0000000000
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md
+++ /dev/null
@@ -1,94 +0,0 @@
----
-{
- "title": "Lateral View",
- "language": "en"
-}
----
-
-
-
-# Lateral View
-
-## description
-
-Lateral view syntax can be used with Table Function to fulfill the requirement of expanding one row of data into multiple rows (column to rows).
-
-grammar:
-
-```
-...
-FROM table_name
-lateral_view_ref[ lateral_view_ref ...]
-
-lateral_view_ref:
-
-LATERAL VIEW table_function(...) view_alias as col_name
-```
-
-The Lateral view clause must follow the table name or subquery. Can contain multiple Lateral view clauses. `view_alias` is the name of the corresponding Lateral View. `col_name` is the name of the column produced by the table function `table_function`.
-
-Table functions currently supported:
-
-1. `explode_split`
-2. `explode_bitmap`
-3. `explode_json_array`
-
-For specific function descriptions, please refer to the corresponding syntax help documentation.
-
-The data in the table will be Cartesian product with the result set produced by each Lateral View and then return to the upper level.
-
-## example
-
-Here, only the syntax example of Lateral View is given. For the specific meaning and output result description, please refer to the help document of the corresponding table function.
-
-1.
-
-```
-select k1, e1 from tbl1
-lateral view explode_split(v1,',') tmp1 as e1 where e1 = "abc";
-```
-
-2.
-
-```
-select k1, e1, e2 from tbl2
-lateral view explode_split(v1,',') tmp1 as e1
-lateral view explode_bitmap(bitmap1) tmp2 as e2
-where e2> 3;
-```
-
-3.
-
-```
-select k1, e1, e2 from tbl3
-lateral view explode_json_array_int("[1,2,3]") tmp1 as e1
-lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2;
-```
-
-4.
-
-```
-select k1, e1 from (select k1, bitmap_union(members) as x from tbl1 where k1=10000 group by k1)tmp1
-lateral view explode_bitmap(x) tmp2 as e1;
-```
-
-## keyword
-
- LATERAL, VIEW
diff --git a/docs/en/sql-reference/sql-statements/Utility/util_stmt.md b/docs/en/sql-reference/sql-statements/Utility/util_stmt.md
deleted file mode 100644
index 955b2dd9c1..0000000000
--- a/docs/en/sql-reference/sql-statements/Utility/util_stmt.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-{
- "title": "Describe",
- "language": "en"
-}
----
-
-
-
-# Describe
-## Description
-This statement is used to display schema information for the specified table
-Grammar:
-DESC [FISH] [dbu name.]table name [ALL];
-
-Explain:
-If ALL is specified, the schema of all indexes of the table is displayed
-
-## example
-
-## keyword
-DESCRIBE,DESC
diff --git a/new-docs/en/summary/basic-summary.md b/docs/en/summary/basic-summary.md
similarity index 100%
rename from new-docs/en/summary/basic-summary.md
rename to docs/en/summary/basic-summary.md
diff --git a/new-docs/en/summary/system-architecture.md b/docs/en/summary/system-architecture.md
similarity index 95%
rename from new-docs/en/summary/system-architecture.md
rename to docs/en/summary/system-architecture.md
index 2e9b3bb7b9..f7178ee8de 100644
--- a/new-docs/en/summary/system-architecture.md
+++ b/docs/en/summary/system-architecture.md
@@ -24,4 +24,6 @@ specific language governing permissions and limitations
under the License.
-->
-# Doris system architecture
\ No newline at end of file
+# Doris system architecture
+
+(TODO)
\ No newline at end of file
diff --git a/docs/zh-CN/README.md b/docs/zh-CN/README.md
index cf45c79ee8..d9b222a88a 100644
--- a/docs/zh-CN/README.md
+++ b/docs/zh-CN/README.md
@@ -89,7 +89,7 @@ cases:
- logo: /images/home/logo-youdao.png
alt: 有道
actionText: 快速上手 →
-actionLink: /zh-CN/installing/compilation
+actionLink: /zh-CN/get-starting/get-starting
articleText: 最新动态
articleLink: /zh-CN/article/article-list
---
diff --git a/new-docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md b/docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md
rename to docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md
diff --git a/new-docs/zh-CN/admin-manual/cluster-management/load-balancing.md b/docs/zh-CN/admin-manual/cluster-management/load-balancing.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/cluster-management/load-balancing.md
rename to docs/zh-CN/admin-manual/cluster-management/load-balancing.md
diff --git a/new-docs/zh-CN/admin-manual/cluster-management/upgrade.md b/docs/zh-CN/admin-manual/cluster-management/upgrade.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/cluster-management/upgrade.md
rename to docs/zh-CN/admin-manual/cluster-management/upgrade.md
diff --git a/new-docs/zh-CN/admin-manual/config/be-config.md b/docs/zh-CN/admin-manual/config/be-config.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/config/be-config.md
rename to docs/zh-CN/admin-manual/config/be-config.md
diff --git a/new-docs/zh-CN/admin-manual/config/fe-config.md b/docs/zh-CN/admin-manual/config/fe-config.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/config/fe-config.md
rename to docs/zh-CN/admin-manual/config/fe-config.md
diff --git a/new-docs/zh-CN/admin-manual/config/user-property.md b/docs/zh-CN/admin-manual/config/user-property.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/config/user-property.md
rename to docs/zh-CN/admin-manual/config/user-property.md
diff --git a/new-docs/zh-CN/admin-manual/data-admin/backup.md b/docs/zh-CN/admin-manual/data-admin/backup.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/data-admin/backup.md
rename to docs/zh-CN/admin-manual/data-admin/backup.md
diff --git a/new-docs/zh-CN/admin-manual/data-admin/delete-recover.md b/docs/zh-CN/admin-manual/data-admin/delete-recover.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/data-admin/delete-recover.md
rename to docs/zh-CN/admin-manual/data-admin/delete-recover.md
diff --git a/new-docs/zh-CN/admin-manual/data-admin/restore.md b/docs/zh-CN/admin-manual/data-admin/restore.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/data-admin/restore.md
rename to docs/zh-CN/admin-manual/data-admin/restore.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/cancel-label.md b/docs/zh-CN/admin-manual/http-actions/cancel-label.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/cancel-label.md
rename to docs/zh-CN/admin-manual/http-actions/cancel-label.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/check-reset-rpc-cache.md b/docs/zh-CN/admin-manual/http-actions/check-reset-rpc-cache.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/check-reset-rpc-cache.md
rename to docs/zh-CN/admin-manual/http-actions/check-reset-rpc-cache.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/compaction-action.md b/docs/zh-CN/admin-manual/http-actions/compaction-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/compaction-action.md
rename to docs/zh-CN/admin-manual/http-actions/compaction-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/connection-action.md b/docs/zh-CN/admin-manual/http-actions/connection-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/connection-action.md
rename to docs/zh-CN/admin-manual/http-actions/connection-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe-get-log-file.md b/docs/zh-CN/admin-manual/http-actions/fe-get-log-file.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe-get-log-file.md
rename to docs/zh-CN/admin-manual/http-actions/fe-get-log-file.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/backends-action.md b/docs/zh-CN/admin-manual/http-actions/fe/backends-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/backends-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/backends-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/bootstrap-action.md b/docs/zh-CN/admin-manual/http-actions/fe/bootstrap-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/bootstrap-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/bootstrap-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/cancel-load-action.md b/docs/zh-CN/admin-manual/http-actions/fe/cancel-load-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/cancel-load-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/cancel-load-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/check-decommission-action.md b/docs/zh-CN/admin-manual/http-actions/fe/check-decommission-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/check-decommission-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/check-decommission-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/check-storage-type-action.md b/docs/zh-CN/admin-manual/http-actions/fe/check-storage-type-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/check-storage-type-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/check-storage-type-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/config-action.md b/docs/zh-CN/admin-manual/http-actions/fe/config-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/config-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/config-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/connection-action.md b/docs/zh-CN/admin-manual/http-actions/fe/connection-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/connection-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/connection-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-ddl-stmt-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-ddl-stmt-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/get-ddl-stmt-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/get-ddl-stmt-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-load-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-load-info-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/get-load-info-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/get-load-info-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-load-state.md b/docs/zh-CN/admin-manual/http-actions/fe/get-load-state.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/get-load-state.md
rename to docs/zh-CN/admin-manual/http-actions/fe/get-load-state.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-log-file-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-log-file-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/get-log-file-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/get-log-file-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-small-file.md b/docs/zh-CN/admin-manual/http-actions/fe/get-small-file.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/get-small-file.md
rename to docs/zh-CN/admin-manual/http-actions/fe/get-small-file.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/ha-action.md b/docs/zh-CN/admin-manual/http-actions/fe/ha-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/ha-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/ha-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/hardware-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/hardware-info-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/hardware-info-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/hardware-info-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/health-action.md b/docs/zh-CN/admin-manual/http-actions/fe/health-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/health-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/health-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/log-action.md b/docs/zh-CN/admin-manual/http-actions/fe/log-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/log-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/log-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/logout-action.md b/docs/zh-CN/admin-manual/http-actions/fe/logout-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/logout-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/logout-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/cluster-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/cluster-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/cluster-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/manager/cluster-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/node-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/node-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/node-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/manager/node-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/query-profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/query-profile-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/query-profile-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/manager/query-profile-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/meta-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-info-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-info-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/meta-info-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-replay-state-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-replay-state-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-replay-state-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/meta-replay-state-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/profile-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/profile-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/profile-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/query-detail-action.md b/docs/zh-CN/admin-manual/http-actions/fe/query-detail-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/query-detail-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/query-detail-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/query-profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/query-profile-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/query-profile-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/query-profile-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/row-count-action.md b/docs/zh-CN/admin-manual/http-actions/fe/row-count-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/row-count-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/row-count-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/session-action.md b/docs/zh-CN/admin-manual/http-actions/fe/session-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/session-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/session-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/set-config-action.md b/docs/zh-CN/admin-manual/http-actions/fe/set-config-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/set-config-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/set-config-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-data-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-data-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/show-data-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/show-data-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-meta-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-meta-info-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/show-meta-info-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/show-meta-info-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-proc-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-proc-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/show-proc-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/show-proc-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-runtime-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-runtime-info-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/show-runtime-info-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/show-runtime-info-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/statement-execution-action.md b/docs/zh-CN/admin-manual/http-actions/fe/statement-execution-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/statement-execution-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/statement-execution-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/system-action.md b/docs/zh-CN/admin-manual/http-actions/fe/system-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/system-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/system-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-query-plan-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-query-plan-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/table-query-plan-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/table-query-plan-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-row-count-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-row-count-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/table-row-count-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/table-row-count-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-schema-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-schema-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/table-schema-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/table-schema-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/upload-action.md b/docs/zh-CN/admin-manual/http-actions/fe/upload-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/fe/upload-action.md
rename to docs/zh-CN/admin-manual/http-actions/fe/upload-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/get-load-state.md b/docs/zh-CN/admin-manual/http-actions/get-load-state.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/get-load-state.md
rename to docs/zh-CN/admin-manual/http-actions/get-load-state.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/get-tablets.md b/docs/zh-CN/admin-manual/http-actions/get-tablets.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/get-tablets.md
rename to docs/zh-CN/admin-manual/http-actions/get-tablets.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/profile-action.md b/docs/zh-CN/admin-manual/http-actions/profile-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/profile-action.md
rename to docs/zh-CN/admin-manual/http-actions/profile-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/query-detail-action.md b/docs/zh-CN/admin-manual/http-actions/query-detail-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/query-detail-action.md
rename to docs/zh-CN/admin-manual/http-actions/query-detail-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/restore-tablet.md b/docs/zh-CN/admin-manual/http-actions/restore-tablet.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/restore-tablet.md
rename to docs/zh-CN/admin-manual/http-actions/restore-tablet.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/show-data-action.md b/docs/zh-CN/admin-manual/http-actions/show-data-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/show-data-action.md
rename to docs/zh-CN/admin-manual/http-actions/show-data-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/tablet-migration-action.md b/docs/zh-CN/admin-manual/http-actions/tablet-migration-action.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/tablet-migration-action.md
rename to docs/zh-CN/admin-manual/http-actions/tablet-migration-action.md
diff --git a/docs/zh-CN/administrator-guide/http-actions/tablets_distribution.md b/docs/zh-CN/admin-manual/http-actions/tablets_distribution.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/http-actions/tablets_distribution.md
rename to docs/zh-CN/admin-manual/http-actions/tablets_distribution.md
diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md b/docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md
rename to docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md
diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md b/docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md
rename to docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md
diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md b/docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md
rename to docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md
diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md b/docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md
rename to docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md
diff --git a/docs/zh-CN/administrator-guide/operation/monitor-alert.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-alert.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/operation/monitor-alert.md
rename to docs/zh-CN/admin-manual/maint-monitor/monitor-alert.md
diff --git a/docs/zh-CN/administrator-guide/operation/monitor-metrics/be-metrics.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/be-metrics.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/operation/monitor-metrics/be-metrics.md
rename to docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/be-metrics.md
diff --git a/docs/zh-CN/administrator-guide/operation/monitor-metrics/fe-metrics.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/operation/monitor-metrics/fe-metrics.md
rename to docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md
diff --git a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-meta-tool.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md
rename to docs/zh-CN/admin-manual/maint-monitor/tablet-meta-tool.md
diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md
rename to docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md
diff --git a/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-restore-tool.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md
rename to docs/zh-CN/admin-manual/maint-monitor/tablet-restore-tool.md
diff --git a/new-docs/zh-CN/admin-manual/multi-tenant.md b/docs/zh-CN/admin-manual/multi-tenant.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/multi-tenant.md
rename to docs/zh-CN/admin-manual/multi-tenant.md
diff --git a/new-docs/zh-CN/admin-manual/optimization.md b/docs/zh-CN/admin-manual/optimization.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/optimization.md
rename to docs/zh-CN/admin-manual/optimization.md
diff --git a/new-docs/zh-CN/admin-manual/privilege-ldap/ldap.md b/docs/zh-CN/admin-manual/privilege-ldap/ldap.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/privilege-ldap/ldap.md
rename to docs/zh-CN/admin-manual/privilege-ldap/ldap.md
diff --git a/new-docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md b/docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md
rename to docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md
diff --git a/docs/zh-CN/administrator-guide/running-profile.md b/docs/zh-CN/admin-manual/query-profile.md
similarity index 99%
rename from docs/zh-CN/administrator-guide/running-profile.md
rename to docs/zh-CN/admin-manual/query-profile.md
index 9277ab4ae2..a2d765a4e5 100644
--- a/docs/zh-CN/administrator-guide/running-profile.md
+++ b/docs/zh-CN/admin-manual/query-profile.md
@@ -1,6 +1,6 @@
---
{
- "title": "查询执行的统计",
+ "title": "查询分析",
"language": "zh-CN"
}
---
@@ -26,6 +26,8 @@ under the License.
# 查询执行的统计
+# 查询执行的统计
+
本文档主要介绍Doris在查询执行的统计结果。利用这些统计的信息,可以更好的帮助我们了解Doris的执行情况,并有针对性的进行相应**Debug与调优工作**。
@@ -271,5 +273,4 @@ OLAP_SCAN_NODE (id=0):(Active: 1.2ms, % non-child: 0.00%)
- PeakReservation: Reservation的峰值
- PeakUnpinnedBytes: unpin的内存数据量
- PeakUsedReservation: Reservation的内存使用量
- - ReservationLimit: BufferPool的Reservation的限制量
-
+ - ReservationLimit: BufferPool的Reservation的限制量
\ No newline at end of file
diff --git a/new-docs/zh-CN/admin-manual/sql-interception.md b/docs/zh-CN/admin-manual/sql-interception.md
similarity index 100%
rename from new-docs/zh-CN/admin-manual/sql-interception.md
rename to docs/zh-CN/admin-manual/sql-interception.md
diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md
deleted file mode 100644
index 5e7853c0df..0000000000
--- a/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md
+++ /dev/null
@@ -1,82 +0,0 @@
----
-{
- "title": "Bitmap 索引",
- "language": "zh-CN"
-}
----
-
-
-
-# Bitmap 索引
-用户可以通过创建bitmap index 加速查询
-本文档主要介绍如何创建 index 作业,以及创建 index 的一些注意事项和常见问题。
-
-## 名词解释
-* bitmap index:位图索引,是一种快速数据结构,能够加快查询速度
-
-## 原理介绍
-创建和删除本质上是一个 schema change 的作业,具体细节可以参照 [Schema Change](alter-table-schema-change)。
-
-## 语法
-index 创建和修改相关语法有两种形式,一种集成与 alter table 语句中,另一种是使用单独的
-create/drop index 语法
-1. 创建索引
-
- 创建索引的的语法可以参见 [CREATE INDEX](../../sql-reference/sql-statements/Data%20Definition/CREATE%20INDEX.html)
- 或 [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html) 中bitmap 索引相关的操作,
- 也可以通过在创建表时指定bitmap 索引,参见[CREATE TABLE](../../sql-reference/sql-statements/Data%20Definition/CREATE%20TABLE.html)
-
-2. 查看索引
-
- 参照[SHOW INDEX](../../sql-reference/sql-statements/Administration/SHOW%20INDEX.html)
-
-3. 删除索引
-
- 参照[DROP INDEX](../../sql-reference/sql-statements/Data%20Definition/DROP%20INDEX.html)
- 或者 [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html) 中bitmap 索引相关的操作
-
-## 创建作业
-参照 schema change 文档 [Schema Change](alter-table-schema-change.html)
-
-## 查看作业
-参照 schema change 文档 [Schema Change](alter-table-schema-change.html)
-
-## 取消作业
-参照 schema change 文档 [Schema Change](alter-table-schema-change.html)
-
-## 注意事项
-* 目前索引仅支持 bitmap 类型的索引。
-* bitmap 索引仅在单列上创建。
-* bitmap 索引能够应用在 `Duplicate` 数据模型的所有列和 `Aggregate`, `Uniq` 模型的key列上。
-* bitmap 索引支持的数据类型如下:
- * `TINYINT`
- * `SMALLINT`
- * `INT`
- * `UNSIGNEDINT`
- * `BIGINT`
- * `CHAR`
- * `VARCHAR`
- * `DATE`
- * `DATETIME`
- * `LARGEINT`
- * `DECIMAL`
- * `BOOL`
-
-* bitmap索引仅在 Segment V2 下生效。当创建 index 时,表的存储格式将默认转换为 V2 格式。
diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md
deleted file mode 100644
index ce47723918..0000000000
--- a/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-{
- "title": "替换表",
- "language": "zh-CN"
-}
----
-
-
-
-# 替换表
-
-在 0.14 版本中,Doris 支持对两个表进行原子的替换操作。
-该操作仅适用于 OLAP 表。
-
-分区级别的替换操作,请参阅 [临时分区文档](./alter-table-temp-partition.md)
-
-## 语法说明
-
-```
-ALTER TABLE [db.]tbl1 REPLACE WITH TABLE tbl2
-[PROPERTIES('swap' = 'true')];
-```
-
-将表 tbl1 替换为表 tbl2。
-
-如果 `swap` 参数为 `true`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表中的数据为原 `tbl1` 表中的数据。即两张表数据发生了互换。
-
-如果 `swap` 参数为 `false`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表被删除。
-
-
-## 原理
-
-替换表功能,实际上是将以下操作集合变成一个原子操作。
-
-假设要将表 A 替换为表 B,且 `swap` 为 `true`,则操作如下:
-
-1. 将表 B 重名为表 A。
-2. 将表 A 重名为表 B。
-
-如果 `swap` 为 `false`,则操作如下:
-
-1. 删除表 A。
-2. 将表 B 重名为表 A。
-
-## 注意事项
-
-1. `swap` 参数默认为 `true`。即替换表操作相当于将两张表数据进行交换。
-2. 如果设置 `swap` 参数为 `false`,则被替换的表(表A)将被删除,且无法恢复。
-3. 替换操作仅能发生在两张 OLAP 表之间,且不会检查两张表的表结构是否一致。
-4. 替换操作不会改变原有的权限设置。因为权限检查以表名称为准。
-
-## 最佳实践
-
-1. 原子的覆盖写操作
-
- 某些情况下,用户希望能够重写某张表的数据,但如果采用先删除再导入的方式进行,在中间会有一段时间无法查看数据。这时,用户可以先使用 `CREATE TABLE LIKE` 语句创建一个相同结构的新表,将新的数据导入到新表后,通过替换操作,原子的替换旧表,以达到目的。分区级别的原子覆盖写操作,请参阅 [临时分区文档](./alter-table-temp-partition.md)
diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md
deleted file mode 100644
index 7d571a2d8a..0000000000
--- a/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md
+++ /dev/null
@@ -1,194 +0,0 @@
----
-{
- "title": "Rollup",
- "language": "zh-CN"
-}
----
-
-
-
-# Rollup
-
-用户可以通过创建上卷表(Rollup)加速查询。关于 Rollup 的概念和使用方式可以参阅 [数据模型、ROLLUP 及前缀索引](../../getting-started/data-model-rollup.md) 和 [Rollup 与查询](../../getting-started/hit-the-rollup.md) 两篇文档。
-
-本文档主要介绍如何创建 Rollup 作业,以及创建 Rollup 的一些注意事项和常见问题。
-
-## 名词解释
-
-* Base Table:基表。每一个表被创建时,都对应一个基表。基表存储了这个表的完整的数据。Rollup 通常基于基表中的数据创建(也可以通过其他 Rollup 创建)。
-* Index:物化索引。Rollup 或 Base Table 都被称为物化索引。
-* Transaction:事务。每一个导入任务都是一个事务,每个事务有一个唯一递增的 Transaction ID。
-
-## 原理介绍
-
-创建 Rollup 的基本过程,是通过 Base 表的数据,生成一份新的包含指定列的 Rollup 的数据。其中主要需要进行两部分数据转换,一是已存在的历史数据的转换,二是在 Rollup 执行过程中,新到达的导入数据的转换。
-
-```
-+----------+
-| Load Job |
-+----+-----+
- |
- | Load job generates both base and rollup index data
- |
- | +------------------+ +---------------+
- | | Base Index | | Base Index |
- +------> New Incoming Data| | History Data |
- | +------------------+ +------+--------+
- | |
- | | Convert history data
- | |
- | +------------------+ +------v--------+
- | | Rollup Index | | Rollup Index |
- +------> New Incoming Data| | History Data |
- +------------------+ +---------------+
-```
-
-在开始转换历史数据之前,Doris 会获取一个最新的 Transaction ID。并等待这个 Transaction ID 之前的所有导入事务完成。这个 Transaction ID 成为分水岭。意思是,Doris 保证在分水岭之后的所有导入任务,都会同时为 Rollup Index 生成数据。这样当历史数据转换完成后,可以保证 Rollup 和 Base 表的数据是齐平的。
-
-## 创建作业
-
-创建 Rollup 的具体语法可以查看帮助 `HELP ALTER TABLE` 中 Rollup 部分的说明。
-
-Rollup 的创建是一个异步过程,作业提交成功后,用户需要通过 `SHOW ALTER TABLE ROLLUP` 命令来查看作业进度。
-
-## 查看作业
-
-`SHOW ALTER TABLE ROLLUP` 可以查看当前正在执行或已经完成的 Rollup 作业。举例如下:
-
-```
- JobId: 20037
- TableName: tbl1
- CreateTime: 2019-08-06 15:38:49
- FinishedTime: N/A
- BaseIndexName: tbl1
-RollupIndexName: r1
- RollupId: 20038
- TransactionId: 10034
- State: PENDING
- Msg:
- Progress: N/A
- Timeout: 86400
-```
-
-* JobId:每个 Rollup 作业的唯一 ID。
-* TableName:Rollup 对应的基表的表名。
-* CreateTime:作业创建时间。
-* FinishedTime:作业结束时间。如未结束,则显示 "N/A"。
-* BaseIndexName:Rollup 对应的源 Index 的名称。
-* RollupIndexName:Rollup 的名称。
-* RollupId:Rollup 的唯一 ID。
-* TransactionId:转换历史数据的分水岭 transaction ID。
-* State:作业所在阶段。
- * PENDING:作业在队列中等待被调度。
- * WAITING_TXN:等待分水岭 transaction ID 之前的导入任务完成。
- * RUNNING:历史数据转换中。
- * FINISHED:作业成功。
- * CANCELLED:作业失败。
-* Msg:如果作业失败,这里会显示失败信息。
-* Progress:作业进度。只有在 RUNNING 状态才会显示进度。进度是以 M/N 的形式显示。其中 N 为 Rollup 的总副本数。M 为已完成历史数据转换的副本数。
-* Timeout:作业超时时间。单位秒。
-
-## 取消作业
-
-在作业状态不为 FINISHED 或 CANCELLED 的情况下,可以通过以下命令取消 Rollup 作业:
-
-`CANCEL ALTER TABLE ROLLUP FROM tbl_name;`
-
-## 注意事项
-
-* 一张表在同一时间只能有一个 Rollup 作业在运行。且一个作业中只能创建一个 Rollup。
-
-* Rollup 操作不阻塞导入和查询操作。
-
-* 如果 DELETE 操作,where 条件中的某个 Key 列在某个 Rollup 中不存在,则不允许该 DELETE。
-
- 如果某个 Key 列在某一 Rollup 中不存在,则 DELETE 操作无法对该 Rollup 进行数据删除,从而无法保证 Rollup 表和 Base 表的数据一致性。
-
-* Rollup 的列必须存在于 Base 表中。
-
- Rollup 的列永远是 Base 表列的子集。不能出现 Base 表中不存在的列。
-
-* 如果 Rollup 中包含 REPLACE 聚合类型的列,则该 Rollup 必须包含所有 Key 列。
-
- 假设 Base 表结构如下:
-
- ```(k1 INT, k2 INT, v1 INT REPLACE, v2 INT SUM)```
-
- 如果需要创建的 Rollup 包含 `v1` 列,则必须包含 `k1`, `k2` 列。否则系统无法决定 `v1` 列在 Rollup 中的取值。
-
- 注意,Unique 数据模型表中的所有 Value 列都是 REPLACE 聚合类型。
-
-* DUPLICATE 数据模型表的 Rollup,可以指定 Rollup 的 DUPLICATE KEY。
-
- DUPLICATE 数据模型表中的 DUPLICATE KEY 其实就是排序列。Rollup 可以指定自己的排序列,但排序列必须是 Rollup 列顺序的前缀。如果不指定,则系统会检查 Rollup 是否包含了 Base 表的所有排序列,如果没有包含,则会报错。举例:
-
- Base 表结构:`(k1 INT, k2 INT, k3 INT) DUPLICATE KEY(k1, k2)`
-
- 则 Rollup 可以为:`(k2 INT, k1 INT) DUPLICATE KEY(k2)`
-
-* Rollup 不需要包含 Base 表的分区列或分桶列。
-
-## 常见问题
-
-* 一个表可以创建多少 Rollup
-
- 一个表能够创建的 Rollup 个数理论上没有限制,但是过多的 Rollup 会影响导入性能。因为导入时,会同时给所有 Rollup 产生数据。同时 Rollup 会占用物理存储空间。通常一个表的 Rollup 数量在 10 个以内比较合适。
-
-* Rollup 创建的速度
-
- 目前 Rollup 创建速度按照最差效率估计约为 10MB/s。保守起见,用户可以根据这个速率来设置作业的超时时间。
-
-* 提交作业报错 `Table xxx is not stable. ...`
-
- Rollup 只有在表数据完整且非均衡状态下才可以开始。如果表的某些数据分片副本不完整,或者某些副本正在进行均衡操作,则提交会被拒绝。
-
- 数据分片副本是否完整,可以通过以下命令查看:
-
- ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";```
-
- 如果有返回结果,则说明有副本有问题。通常系统会自动修复这些问题,用户也可以通过以下命令优先修复这个表:
-
- ```ADMIN REPAIR TABLE tbl1;```
-
- 用户可以通过以下命令查看是否有正在运行的均衡任务:
-
- ```SHOW PROC "/cluster_balance/pending_tablets";```
-
- 可以等待均衡任务完成,或者通过以下命令临时禁止均衡操作:
-
- ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");```
-
-## 相关配置
-
-### FE 配置
-
-* `alter_table_timeout_second`:作业默认超时时间,86400 秒。
-
-### BE 配置
-
-* `alter_tablet_worker_count`:在 BE 端用于执行历史数据转换的线程数。默认为 3。如果希望加快 Rollup 作业的速度,可以适当调大这个参数后重启 BE。但过多的转换线程可能会导致 IO 压力增加,影响其他操作。该线程和 Schema Change 作业共用。
-
-
-
-
-
-
-
-
diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md
deleted file mode 100644
index 6ab14cc764..0000000000
--- a/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md
+++ /dev/null
@@ -1,249 +0,0 @@
----
-{
- "title": "Schema Change",
- "language": "zh-CN"
-}
----
-
-
-
-# Schema Change
-
-用户可以通过 Schema Change 操作来修改已存在表的 Schema。目前 Doris 支持以下几种修改:
-
-* 增加、删除列
-* 修改列类型
-* 调整列顺序
-* 增加、修改 Bloom Filter
-* 增加、删除 bitmap index
-
-本文档主要介绍如何创建 Schema Change 作业,以及进行 Schema Change 的一些注意事项和常见问题。
-
-## 名词解释
-
-* Base Table:基表。每一个表被创建时,都对应一个基表。
-* Rollup:基于基表或者其他 Rollup 创建出来的上卷表。
-* Index:物化索引。Rollup 或 Base Table 都被称为物化索引。
-* Transaction:事务。每一个导入任务都是一个事务,每个事务有一个唯一递增的 Transaction ID。
-
-## 原理介绍
-
-执行 Schema Change 的基本过程,是通过原 Index 的数据,生成一份新 Schema 的 Index 的数据。其中主要需要进行两部分数据转换,一是已存在的历史数据的转换,二是在 Schema Change 执行过程中,新到达的导入数据的转换。
-
-```
-+----------+
-| Load Job |
-+----+-----+
- |
- | Load job generates both origin and new index data
- |
- | +------------------+ +---------------+
- | | Origin Index | | Origin Index |
- +------> New Incoming Data| | History Data |
- | +------------------+ +------+--------+
- | |
- | | Convert history data
- | |
- | +------------------+ +------v--------+
- | | New Index | | New Index |
- +------> New Incoming Data| | History Data |
- +------------------+ +---------------+
-```
-
-在开始转换历史数据之前,Doris 会获取一个最新的 Transaction ID。并等待这个 Transaction ID 之前的所有导入事务完成。这个 Transaction ID 成为分水岭。意思是,Doris 保证在分水岭之后的所有导入任务,都会同时为原 Index 和新 Index 生成数据。这样当历史数据转换完成后,可以保证新的 Index 中的数据是完整的。
-
-## 创建作业
-
-创建 Schema Change 的具体语法可以查看帮助 `HELP ALTER TABLE` 中 Schema Change 部分的说明。
-
-Schema Change 的创建是一个异步过程,作业提交成功后,用户需要通过 `SHOW ALTER TABLE COLUMN` 命令来查看作业进度。
-
-## 查看作业
-
-`SHOW ALTER TABLE COLUMN` 可以查看当前正在执行或已经完成的 Schema Change 作业。当一次 Schema Change 作业涉及到多个 Index 时,该命令会显示多行,每行对应一个 Index。举例如下:
-
-```
- JobId: 20021
- TableName: tbl1
- CreateTime: 2019-08-05 23:03:13
- FinishTime: 2019-08-05 23:03:42
- IndexName: tbl1
- IndexId: 20022
-OriginIndexId: 20017
-SchemaVersion: 2:792557838
-TransactionId: 10023
- State: FINISHED
- Msg:
- Progress: N/A
- Timeout: 86400
-```
-
-* JobId:每个 Schema Change 作业的唯一 ID。
-* TableName:Schema Change 对应的基表的表名。
-* CreateTime:作业创建时间。
-* FinishedTime:作业结束时间。如未结束,则显示 "N/A"。
-* IndexName: 本次修改所涉及的某一个 Index 的名称。
-* IndexId:新的 Index 的唯一 ID。
-* OriginIndexId:旧的 Index 的唯一 ID。
-* SchemaVersion:以 M:N 的格式展示。其中 M 表示本次 Schema Change 变更的版本,N 表示对应的 Hash 值。每次 Schema Change,版本都会递增。
-* TransactionId:转换历史数据的分水岭 transaction ID。
-* State:作业所在阶段。
- * PENDING:作业在队列中等待被调度。
- * WAITING_TXN:等待分水岭 transaction ID 之前的导入任务完成。
- * RUNNING:历史数据转换中。
- * FINISHED:作业成功。
- * CANCELLED:作业失败。
-* Msg:如果作业失败,这里会显示失败信息。
-* Progress:作业进度。只有在 RUNNING 状态才会显示进度。进度是以 M/N 的形式显示。其中 N 为 Schema Change 涉及的总副本数。M 为已完成历史数据转换的副本数。
-* Timeout:作业超时时间。单位秒。
-
-## 取消作业
-
-在作业状态不为 FINISHED 或 CANCELLED 的情况下,可以通过以下命令取消 Schema Change 作业:
-
-`CANCEL ALTER TABLE COLUMN FROM tbl_name;`
-
-## 最佳实践
-
-Schema Change 可以在一个作业中,对多个 Index 进行不同的修改。举例如下:
-
-源 Schema:
-
-```
-+-----------+-------+------+------+------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-----------+-------+------+------+------+---------+-------+
-| tbl1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k3 | INT | No | true | N/A | |
-| | | | | | | |
-| rollup2 | k2 | INT | No | true | N/A | |
-| | | | | | | |
-| rollup1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-+-----------+-------+------+------+------+---------+-------+
-```
-
-可以通过以下命令给 rollup1 和 rollup2 都加入一列 k4,并且再给 rollup2 加入一列 k5:
-
-```
-ALTER TABLE tbl1
-ADD COLUMN k4 INT default "1" to rollup1,
-ADD COLUMN k4 INT default "1" to rollup2,
-ADD COLUMN k5 INT default "1" to rollup2;
-```
-
-完成后,Schema 变为:
-
-```
-+-----------+-------+------+------+------+---------+-------+
-| IndexName | Field | Type | Null | Key | Default | Extra |
-+-----------+-------+------+------+------+---------+-------+
-| tbl1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k3 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-| | k5 | INT | No | true | 1 | |
-| | | | | | | |
-| rollup2 | k2 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-| | k5 | INT | No | true | 1 | |
-| | | | | | | |
-| rollup1 | k1 | INT | No | true | N/A | |
-| | k2 | INT | No | true | N/A | |
-| | k4 | INT | No | true | 1 | |
-+-----------+-------+------+------+------+---------+-------+
-```
-
-可以看到,Base 表 tbl1 也自动加入了 k4, k5 列。即给任意 rollup 增加的列,都会自动加入到 Base 表中。
-
-同时,不允许向 Rollup 中加入 Base 表已经存在的列。如果用户需要这样做,可以重新建立一个包含新增列的 Rollup,之后再删除原 Rollup。
-
-## 注意事项
-
-* 一张表在同一时间只能有一个 Schema Change 作业在运行。
-
-* Schema Change 操作不阻塞导入和查询操作。
-
-* 分区列和分桶列不能修改。
-
-* 如果 Schema 中有 REPLACE 方式聚合的 value 列,则不允许删除 Key 列。
-
- 如果删除 Key 列,Doris 无法决定 REPLACE 列的取值。
-
- Unique 数据模型表的所有非 Key 列都是 REPLACE 聚合方式。
-
-* 在新增聚合类型为 SUM 或者 REPLACE 的 value 列时,该列的默认值对历史数据没有含义。
-
- 因为历史数据已经失去明细信息,所以默认值的取值并不能实际反映聚合后的取值。
-
-* 当修改列类型时,除 Type 以外的字段都需要按原列上的信息补全。
-
- 如修改列 `k1 INT SUM NULL DEFAULT "1"` 类型为 BIGINT,则需执行命令如下:
-
- ```ALTER TABLE tbl1 MODIFY COLUMN `k1` BIGINT SUM NULL DEFAULT "1";```
-
- 注意,除新的列类型外,如聚合方式,Nullable 属性,以及默认值都要按照原信息补全。
-
-* 不支持修改列名称、聚合类型、Nullable 属性、默认值以及列注释。
-
-## 常见问题
-
-* Schema Change 的执行速度
-
- 目前 Schema Change 执行速度按照最差效率估计约为 10MB/s。保守起见,用户可以根据这个速率来设置作业的超时时间。
-
-* 提交作业报错 `Table xxx is not stable. ...`
-
- Schema Change 只有在表数据完整且非均衡状态下才可以开始。如果表的某些数据分片副本不完整,或者某些副本正在进行均衡操作,则提交会被拒绝。
-
- 数据分片副本是否完整,可以通过以下命令查看:
-
- ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";```
-
- 如果有返回结果,则说明有副本有问题。通常系统会自动修复这些问题,用户也可以通过以下命令优先修复这个表:
-
- ```ADMIN REPAIR TABLE tbl1;```
-
- 用户可以通过以下命令查看是否有正在运行的均衡任务:
-
- ```SHOW PROC "/cluster_balance/pending_tablets";```
-
- 可以等待均衡任务完成,或者通过以下命令临时禁止均衡操作:
-
- ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");```
-
-## 相关配置
-
-### FE 配置
-
-* `alter_table_timeout_second`:作业默认超时时间,86400 秒。
-
-### BE 配置
-
-* `alter_tablet_worker_count`:在 BE 端用于执行历史数据转换的线程数。默认为 3。如果希望加快 Schema Change 作业的速度,可以适当调大这个参数后重启 BE。但过多的转换线程可能会导致 IO 压力增加,影响其他操作。该线程和 Rollup 作业共用。
-
-
-
-
-
-
-
-
diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md
deleted file mode 100644
index b46759642f..0000000000
--- a/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md
+++ /dev/null
@@ -1,298 +0,0 @@
----
-{
- "title": "临时分区",
- "language": "zh-CN"
-}
----
-
-
-
-# 临时分区
-
-在 0.12 版本中,Doris 支持了临时分区功能。
-
-临时分区是归属于某一分区表的。只有分区表可以创建临时分区。
-
-## 规则
-
-* 临时分区的分区列和正式分区相同,且不可修改。
-* 一张表所有临时分区之间的分区范围不可重叠,但临时分区的范围和正式分区范围可以重叠。
-* 临时分区的分区名称不能和正式分区以及其他临时分区重复。
-
-## 支持的操作
-
-临时分区支持添加、删除、替换操作。
-
-### 添加临时分区
-
-可以通过 `ALTER TABLE ADD TEMPORARY PARTITION` 语句对一个表添加临时分区:
-
-```
-ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN("2020-02-01");
-
-ALTER TABLE tbl2 ADD TEMPORARY PARTITION tp1 VALUES [("2020-01-01"), ("2020-02-01"));
-
-ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN("2020-02-01")
-("in_memory" = "true", "replication_num" = "1")
-DISTRIBUTED BY HASH(k1) BUCKETS 5;
-
-ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai");
-
-ALTER TABLE tbl4 ADD TEMPORARY PARTITION tp1 VALUES IN ((1, "Beijing"), (1, "Shanghai"));
-
-ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai")
-("in_memory" = "true", "replication_num" = "1")
-DISTRIBUTED BY HASH(k1) BUCKETS 5;
-
-```
-
-通过 `HELP ALTER TABLE;` 查看更多帮助和示例。
-
-添加操作的一些说明:
-
-* 临时分区的添加和正式分区的添加操作相似。临时分区的分区范围独立于正式分区。
-* 临时分区可以独立指定一些属性。包括分桶数、副本数、是否是内存表、存储介质等信息。
-
-### 删除临时分区
-
-可以通过 `ALTER TABLE DROP TEMPORARY PARTITION` 语句删除一个表的临时分区:
-
-```
-ALTER TABLE tbl1 DROP TEMPORARY PARTITION tp1;
-```
-
-通过 `HELP ALTER TABLE;` 查看更多帮助和示例。
-
-删除操作的一些说明:
-
-* 删除临时分区,不影响正式分区的数据。
-
-### 替换分区
-
-可以通过 `ALTER TABLE REPLACE PARTITION` 语句将一个表的正式分区替换为临时分区。
-
-```
-ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1);
-
-ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2, tp3);
-
-ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2)
-PROPERTIES (
- "strict_range" = "false",
- "use_temp_partition_name" = "true"
-);
-```
-
-通过 `HELP ALTER TABLE;` 查看更多帮助和示例。
-
-替换操作有两个特殊的可选参数:
-
-1. `strict_range`
-
- 默认为 true。
-
- 对于 Range 分区,当该参数为 true 时,表示要被替换的所有正式分区的范围并集需要和替换的临时分区的范围并集完全相同。当置为 false 时,只需要保证替换后,新的正式分区间的范围不重叠即可。
-
- 对于 List 分区,该参数恒为 true。要被替换的所有正式分区的枚举值必须和替换的临时分区枚举值完全相同。
-
- 下面举例说明:
-
- * 示例1
-
- 待替换的分区 p1, p2, p3 的范围 (=> 并集):
-
- ```
- [10, 20), [20, 30), [40, 50) => [10, 30), [40, 50)
- ```
-
- 替换分区 tp1, tp2 的范围(=> 并集):
-
- ```
- [10, 30), [40, 45), [45, 50) => [10, 30), [40, 50)
- ```
-
- 范围并集相同,则可以使用 tp1 和 tp2 替换 p1, p2, p3。
-
- * 示例2
-
- 待替换的分区 p1 的范围 (=> 并集):
-
- ```
- [10, 50) => [10, 50)
- ```
-
- 替换分区 tp1, tp2 的范围(=> 并集):
-
- ```
- [10, 30), [40, 50) => [10, 30), [40, 50)
- ```
-
- 范围并集不相同,如果 `strict_range` 为 true,则不可以使用 tp1 和 tp2 替换 p1。如果为 false,且替换后的两个分区范围 `[10, 30), [40, 50)` 和其他正式分区不重叠,则可以替换。
-
- * 示例3
-
- 待替换的分区 p1, p2 的枚举值(=> 并集):
-
- ```
- (1, 2, 3), (4, 5, 6) => (1, 2, 3, 4, 5, 6)
- ```
-
- 替换分区 tp1, tp2, tp3 的枚举值(=> 并集):
-
- ```
- (1, 2, 3), (4), (5, 6) => (1, 2, 3, 4, 5, 6)
- ```
-
- 枚举值并集相同,可以使用 tp1,tp2,tp3 替换 p1,p2
-
- * 示例4
-
- 待替换的分区 p1, p2,p3 的枚举值(=> 并集):
-
- ```
- (("1","beijing"), ("1", "shanghai")), (("2","beijing"), ("2", "shanghai")), (("3","beijing"), ("3", "shanghai")) => (("1","beijing"), ("1", "shanghai"), ("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai"))
- ```
-
- 替换分区 tp1, tp2 的枚举值(=> 并集):
-
- ```
- (("1","beijing"), ("1", "shanghai")), (("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai")) => (("1","beijing"), ("1", "shanghai"), ("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai"))
- ```
-
- 枚举值并集相同,可以使用 tp1,tp2 替换 p1,p2,p3
-
-2. `use_temp_partition_name`
-
- 默认为 false。当该参数为 false,并且待替换的分区和替换分区的个数相同时,则替换后的正式分区名称维持不变。如果为 true,则替换后,正式分区的名称为替换分区的名称。下面举例说明:
-
- * 示例1
-
- ```
- ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1);
- ```
-
- `use_temp_partition_name` 默认为 false,则在替换后,分区的名称依然为 p1,但是相关的数据和属性都替换为 tp1 的。
-
- 如果 `use_temp_partition_name` 默认为 true,则在替换后,分区的名称为 tp1。p1 分区不再存在。
-
- * 示例2
-
- ```
- ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1);
- ```
-
- `use_temp_partition_name` 默认为 false,但因为待替换分区的个数和替换分区的个数不同,则该参数无效。替换后,分区名称为 tp1,p1 和 p2 不再存在。
-
-替换操作的一些说明:
-
-* 分区替换成功后,被替换的分区将被删除且不可恢复。
-
-## 临时分区的导入和查询
-
-用户可以将数据导入到临时分区,也可以指定临时分区进行查询。
-
-1. 导入临时分区
-
- 根据导入方式的不同,指定导入临时分区的语法稍有差别。这里通过示例进行简单说明
-
- ```
- INSERT INTO tbl TEMPORARY PARTITION(tp1, tp2, ...) SELECT ....
- ```
-
- ```
- curl --location-trusted -u root: -H "label:123" -H "temporary_partitions: tp1, tp2, ..." -T testData http://host:port/api/testDb/testTbl/_stream_load
- ```
-
- ```
- LOAD LABEL example_db.label1
- (
- DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file")
- INTO TABLE `my_table`
- TEMPORARY PARTITION (tp1, tp2, ...)
- ...
- )
- WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password");
- ```
-
- ```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100),
- TEMPORARY PARTITIONS(tp1, tp2, ...),
- WHERE k1 > 100
- PROPERTIES
- (...)
- FROM KAFKA
- (...);
- ```
-
-2. 查询临时分区
-
- ```
- SELECT ... FROM
- tbl1 TEMPORARY PARTITION(tp1, tp2, ...)
- JOIN
- tbl2 TEMPORARY PARTITION(tp1, tp2, ...)
- ON ...
- WHERE ...;
- ```
-
-## 和其他操作的关系
-
-### DROP
-
-* 使用 Drop 操作直接删除数据库或表后,可以通过 Recover 命令恢复数据库或表(限定时间内),但临时分区不会被恢复。
-* 使用 Alter 命令删除正式分区后,可以通过 Recover 命令恢复分区(限定时间内)。操作正式分区和临时分区无关。
-* 使用 Alter 命令删除临时分区后,无法通过 Recover 命令恢复临时分区。
-
-### TRUNCATE
-
-* 使用 Truncate 命令清空表,表的临时分区会被删除,且不可恢复。
-* 使用 Truncate 命令清空正式分区时,不影响临时分区。
-* 不可使用 Truncate 命令清空临时分区。
-
-### ALTER
-
-* 当表存在临时分区时,无法使用 Alter 命令对表进行 Schema Change、Rollup 等变更操作。
-* 当表在进行变更操作时,无法对表添加临时分区。
-
-
-## 最佳实践
-
-1. 原子的覆盖写操作
-
- 某些情况下,用户希望能够重写某一分区的数据,但如果采用先删除再导入的方式进行,在中间会有一段时间无法查看数据。这时,用户可以先创建一个对应的临时分区,将新的数据导入到临时分区后,通过替换操作,原子的替换原有分区,以达到目的。对于非分区表的原子覆盖写操作,请参阅[替换表文档](./alter-table-replace-table.md)
-
-2. 修改分桶数
-
- 某些情况下,用户在创建分区时使用了不合适的分桶数。则用户可以先创建一个对应分区范围的临时分区,并指定新的分桶数。然后通过 `INSERT INTO` 命令将正式分区的数据导入到临时分区中,通过替换操作,原子的替换原有分区,以达到目的。
-
-3. 合并或分割分区
-
- 某些情况下,用户希望对分区的范围进行修改,比如合并两个分区,或将一个大分区分割成多个小分区。则用户可以先建立对应合并或分割后范围的临时分区,然后通过 `INSERT INTO` 命令将正式分区的数据导入到临时分区中,通过替换操作,原子的替换原有分区,以达到目的。
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/zh-CN/administrator-guide/backup-restore.md b/docs/zh-CN/administrator-guide/backup-restore.md
deleted file mode 100644
index 15f3ff8f59..0000000000
--- a/docs/zh-CN/administrator-guide/backup-restore.md
+++ /dev/null
@@ -1,193 +0,0 @@
----
-{
- "title": "备份与恢复",
- "language": "zh-CN"
-}
----
-
-
-
-# 备份与恢复
-
-Doris 支持将当前数据以文件的形式,通过 broker 备份到远端存储系统中。之后可以通过 恢复 命令,从远端存储系统中将数据恢复到任意 Doris 集群。通过这个功能,Doris 可以支持将数据定期的进行快照备份。也可以通过这个功能,在不同集群间进行数据迁移。
-
-该功能需要 Doris 版本 0.8.2+
-
-使用该功能,需要部署对应远端存储的 broker。如 BOS、HDFS 等。可以通过 `SHOW BROKER;` 查看当前部署的 broker。
-
-## 简要原理说明
-
-### 备份(Backup)
-
-备份操作是将指定表或分区的数据,直接以 Doris 存储的文件的形式,上传到远端仓库中进行存储。当用户提交 Backup 请求后,系统内部会做如下操作:
-
-1. 快照及快照上传
-
- 快照阶段会对指定的表或分区数据文件进行快照。之后,备份都是对快照进行操作。在快照之后,对表进行的更改、导入等操作都不再影响备份的结果。快照只是对当前数据文件产生一个硬链,耗时很少。快照完成后,会开始对这些快照文件进行逐一上传。快照上传由各个 Backend 并发完成。
-
-2. 元数据准备及上传
-
- 数据文件快照上传完成后,Frontend 会首先将对应元数据写成本地文件,然后通过 broker 将本地元数据文件上传到远端仓库。完成最终备份作业
-
-2. 动态分区表说明
-
- 如果该表是动态分区表,备份之后会自动禁用动态分区属性,在做恢复的时候需要手动将该表的动态分区属性启用,命令如下:
- ```sql
- ALTER TABLE tbl1 SET ("dynamic_partition.enable"="true")
- ```
-
-### 恢复(Restore)
-
-恢复操作需要指定一个远端仓库中已存在的备份,然后将这个备份的内容恢复到本地集群中。当用户提交 Restore 请求后,系统内部会做如下操作:
-
-1. 在本地创建对应的元数据
-
- 这一步首先会在本地集群中,创建恢复对应的表分区等结构。创建完成后,该表可见,但是不可访问。
-
-2. 本地snapshot
-
- 这一步是将上一步创建的表做一个快照。这其实是一个空快照(因为刚创建的表是没有数据的),其目的主要是在 Backend 上产生对应的快照目录,用于之后接收从远端仓库下载的快照文件。
-
-3. 下载快照
-
- 远端仓库中的快照文件,会被下载到对应的上一步生成的快照目录中。这一步由各个 Backend 并发完成。
-
-4. 生效快照
-
- 快照下载完成后,我们要将各个快照映射为当前本地表的元数据。然后重新加载这些快照,使之生效,完成最终的恢复作业。
-
-## 最佳实践
-
-### 备份
-
-当前我们支持最小分区(Partition)粒度的全量备份(增量备份有可能在未来版本支持)。如果需要对数据进行定期备份,首先需要在建表时,合理的规划表的分区及分桶,比如按时间进行分区。然后在之后的运行过程中,按照分区粒度进行定期的数据备份。
-
-### 数据迁移
-
-用户可以先将数据备份到远端仓库,再通过远端仓库将数据恢复到另一个集群,完成数据迁移。因为数据备份是通过快照的形式完成的,所以,在备份作业的快照阶段之后的新的导入数据,是不会备份的。因此,在快照完成后,到恢复作业完成这期间,在原集群上导入的数据,都需要在新集群上同样导入一遍。
-
-建议在迁移完成后,对新旧两个集群并行导入一段时间。完成数据和业务正确性校验后,再将业务迁移到新的集群。
-
-## 重点说明
-
-1. 备份恢复相关的操作目前只允许拥有 ADMIN 权限的用户执行。
-2. 一个 Database 内,只允许有一个正在执行的备份或恢复作业。
-3. 备份和恢复都支持最小分区(Partition)级别的操作,当表的数据量很大时,建议按分区分别执行,以降低失败重试的代价。
-4. 因为备份恢复操作,操作的都是实际的数据文件。所以当一个表的分片过多,或者一个分片有过多的小版本时,可能即使总数据量很小,依然需要备份或恢复很长时间。用户可以通过 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLET FROM table_name;` 来查看各个分区的分片数量,以及各个分片的文件版本数量,来预估作业执行时间。文件数量对作业执行的时间影响非常大,所以建议在建表时,合理规划分区分桶,以避免过多的分片。
-5. 当通过 `SHOW BACKUP` 或者 `SHOW RESTORE` 命令查看作业状态时。有可能会在 `TaskErrMsg` 一列中看到错误信息。但只要 `State` 列不为
- `CANCELLED`,则说明作业依然在继续。这些 Task 有可能会重试成功。当然,有些 Task 错误,也会直接导致作业失败。
-6. 如果恢复作业是一次覆盖操作(指定恢复数据到已经存在的表或分区中),那么从恢复作业的 `COMMIT` 阶段开始,当前集群上被覆盖的数据有可能不能再被还原。此时如果恢复作业失败或被取消,有可能造成之前的数据已损坏且无法访问。这种情况下,只能通过再次执行恢复操作,并等待作业完成。因此,我们建议,如无必要,尽量不要使用覆盖的方式恢复数据,除非确认当前数据已不再使用。
-
-## 相关命令
-
-和备份恢复功能相关的命令如下。以下命令,都可以通过 mysql-client 连接 Doris 后,使用 `help cmd;` 的方式查看详细帮助。
-
-1. CREATE REPOSITORY
-
- 创建一个远端仓库路径,用于备份或恢复。该命令需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](broker.md),也可以直接通过S3 协议备份到支持AWS S3协议的远程存储上去,具体参考 [创建远程仓库文档](../sql-reference/sql-statements/Data%20Definition/CREATE%20REPOSITORY.md)
-
-1. BACKUP
-
- 执行一次备份操作。
-
-3. SHOW BACKUP
-
- 查看最近一次 backup 作业的执行情况,包括:
-
- * JobId:本次备份作业的 id。
- * SnapshotName:用户指定的本次备份作业的名称(Label)。
- * DbName:备份作业对应的 Database。
- * State:备份作业当前所在阶段:
- * PENDING:作业初始状态。
- * SNAPSHOTING:正在进行快照操作。
- * UPLOAD_SNAPSHOT:快照结束,准备上传。
- * UPLOADING:正在上传快照。
- * SAVE_META:正在本地生成元数据文件。
- * UPLOAD_INFO:上传元数据文件和本次备份作业的信息。
- * FINISHED:备份完成。
- * CANCELLED:备份失败或被取消。
- * BackupObjs:本次备份涉及的表和分区的清单。
- * CreateTime:作业创建时间。
- * SnapshotFinishedTime:快照完成时间。
- * UploadFinishedTime:快照上传完成时间。
- * FinishedTime:本次作业完成时间。
- * UnfinishedTasks:在 `SNAPSHOTTING`,`UPLOADING` 等阶段,会有多个子任务在同时进行,这里展示的当前阶段,未完成的子任务的 task id。
- * TaskErrMsg:如果有子任务执行出错,这里会显示对应子任务的错误信息。
- * Status:用于记录在整个作业过程中,可能出现的一些状态信息。
- * Timeout:作业的超时时间,单位是秒。
-
-4. SHOW SNAPSHOT
-
- 查看远端仓库中已存在的备份。
-
- * Snapshot:备份时指定的该备份的名称(Label)。
- * Timestamp:备份的时间戳。
- * Status:该备份是否正常。
-
- 如果在 `SHOW SNAPSHOT` 后指定了 where 子句,则可以显示更详细的备份信息。
-
- * Database:备份时对应的 Database。
- * Details:展示了该备份完整的数据目录结构。
-
-5. RESTORE
-
- 执行一次恢复操作。
-
-6. SHOW RESTORE
-
- 查看最近一次 restore 作业的执行情况,包括:
-
- * JobId:本次恢复作业的 id。
- * Label:用户指定的仓库中备份的名称(Label)。
- * Timestamp:用户指定的仓库中备份的时间戳。
- * DbName:恢复作业对应的 Database。
- * State:恢复作业当前所在阶段:
- * PENDING:作业初始状态。
- * SNAPSHOTING:正在进行本地新建表的快照操作。
- * DOWNLOAD:正在发送下载快照任务。
- * DOWNLOADING:快照正在下载。
- * COMMIT:准备生效已下载的快照。
- * COMMITTING:正在生效已下载的快照。
- * FINISHED:恢复完成。
- * CANCELLED:恢复失败或被取消。
- * AllowLoad:恢复期间是否允许导入。
- * ReplicationNum:恢复指定的副本数。
- * RestoreObjs:本次恢复涉及的表和分区的清单。
- * CreateTime:作业创建时间。
- * MetaPreparedTime:本地元数据生成完成时间。
- * SnapshotFinishedTime:本地快照完成时间。
- * DownloadFinishedTime:远端快照下载完成时间。
- * FinishedTime:本次作业完成时间。
- * UnfinishedTasks:在 `SNAPSHOTTING`,`DOWNLOADING`, `COMMITTING` 等阶段,会有多个子任务在同时进行,这里展示的当前阶段,未完成的子任务的 task id。
- * TaskErrMsg:如果有子任务执行出错,这里会显示对应子任务的错误信息。
- * Status:用于记录在整个作业过程中,可能出现的一些状态信息。
- * Timeout:作业的超时时间,单位是秒。
-
-7. CANCEL BACKUP
-
- 取消当前正在执行的备份作业。
-
-8. CANCEL RESTORE
-
- 取消当前正在执行的恢复作业。
-
-9. DROP REPOSITORY
-
- 删除已创建的远端仓库。删除仓库,仅仅是删除该仓库在 Doris 中的映射,不会删除实际的仓库数据。
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/block-rule/sql-block.md b/docs/zh-CN/administrator-guide/block-rule/sql-block.md
deleted file mode 100644
index 6c34d9a05e..0000000000
--- a/docs/zh-CN/administrator-guide/block-rule/sql-block.md
+++ /dev/null
@@ -1,93 +0,0 @@
----
-{
-"title": "SQL黑名单",
-"language": "zh-CN"
-}
----
-
-
-
-# SQL黑名单
-
-该功能仅用于限制查询语句,并且不会限制 explain 语句的执行。
-支持按用户配置SQL黑名单:
-
-1. 通过正则匹配的方式拒绝指定SQL
-
-2. 通过设置partition_num, tablet_num, cardinality, 检查一个查询是否达到其中一个限制
- - partition_num, tablet_num, cardinality 可以一起设置,一旦一个查询达到其中一个限制,查询将会被拦截
-
-## 规则
-
-对SQL规则增删改查
-- 创建SQL阻止规则
- - sql:匹配规则(基于正则匹配,特殊字符需要转译),可选,默认值为 "NULL"
- - sqlHash: sql hash值,用于完全匹配,我们会在`fe.audit.log`打印这个值,可选,这个参数和sql只能二选一,默认值为 "NULL"
- - partition_num: 一个扫描节点会扫描的最大partition数量,默认值为0L
- - tablet_num: 一个扫描节点会扫描的最大tablet数量,默认值为0L
- - cardinality: 一个扫描节点粗略的扫描行数,默认值为0L
- - global:是否全局(所有用户)生效,默认为false
- - enable:是否开启阻止规则,默认为true
-```sql
-CREATE SQL_BLOCK_RULE test_rule
-PROPERTIES(
- "sql"="select * from order_analysis",
- "global"="false",
- "enable"="true",
- "sqlHash"=""
-)
-```
-当我们去执行刚才我们定义在规则里的sql时就会返回异常错误,示例如下:
-```sql
-mysql> select * from order_analysis;
-ERROR 1064 (HY000): errCode = 2, detailMessage = sql match regex sql block rule: order_analysis_rule
-```
-
-- 创建 test_rule2,将最大扫描的分区数量限制在30个,最大扫描基数限制在100亿行,示例如下:
-```sql
-CREATE SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "30", "cardinality"="10000000000","global"="false","enable"="true")
-```
-
-- 查看已配置的SQL阻止规则,不指定规则名则为查看所有规则
-
-```sql
-SHOW SQL_BLOCK_RULE [FOR RULE_NAME]
-```
-- 修改SQL阻止规则,允许对sql/sqlHash/partition_num/tablet_num/cardinality/global/enable等每一项进行修改
- - sql 和 sqlHash 不能同时被设置。这意味着,如果一个rule设置了sql或者sqlHash,则另一个属性将无法被修改
- - sql/sqlHash 和 partition_num/tablet_num/cardinality 不能同时被设置。举个例子,如果一个rule设置了partition_num,那么sql或者sqlHash将无法被修改
-```sql
-ALTER SQL_BLOCK_RULE test_rule PROPERTIES("sql"="select \\* from test_table","enable"="true")
-```
-
-```
-ALTER SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "10","tablet_num"="300","enable"="true")
-```
-
-- 删除SQL阻止规则,支持多规则,以`,`隔开
-```
-DROP SQL_BLOCK_RULE test_rule1,test_rule2
-```
-
-## 用户规则绑定
-如果配置global=false,则需要配置指定用户的规则绑定,多个规则使用`,`分隔
-```sql
-SET PROPERTY [FOR 'jack'] 'sql_block_rules' = 'test_rule1,test_rule2'
-```
diff --git a/docs/zh-CN/administrator-guide/bloomfilter.md b/docs/zh-CN/administrator-guide/bloomfilter.md
deleted file mode 100644
index 235f34f5e2..0000000000
--- a/docs/zh-CN/administrator-guide/bloomfilter.md
+++ /dev/null
@@ -1,133 +0,0 @@
----
-{
- "title": "BloomFilter索引",
- "language": "zh-CN"
-}
----
-
-
-
-# BloomFilter索引
-
-BloomFilter是由Bloom在1970年提出的一种多哈希函数映射的快速查找算法。通常应用在一些需要快速判断某个元素是否属于集合,但是并不严格要求100%正确的场合,BloomFilter有以下特点:
-
-- 空间效率高的概率型数据结构,用来检查一个元素是否在一个集合中。
-- 对于一个元素检测是否存在的调用,BloomFilter会告诉调用者两个结果之一:可能存在或者一定不存在。
-- 缺点是存在误判,告诉你可能存在,不一定真实存在。
-
-布隆过滤器实际上是由一个超长的二进制位数组和一系列的哈希函数组成。二进制位数组初始全部为0,当给定一个待查询的元素时,这个元素会被一系列哈希函数计算映射出一系列的值,所有的值在位数组的偏移量处置为1。
-
-下图所示出一个 m=18, k=3 (m是该Bit数组的大小,k是Hash函数的个数)的Bloom Filter示例。集合中的 x、y、z 三个元素通过 3 个不同的哈希函数散列到位数组中。当查询元素w时,通过Hash函数计算之后因为有一个比特为0,因此w不在该集合中。
-
-
-
-那么怎么判断谋和元素是否在集合中呢?同样是这个元素经过哈希函数计算后得到所有的偏移位置,若这些位置全都为1,则判断这个元素在这个集合中,若有一个不为1,则判断这个元素不在这个集合中。就是这么简单!
-
-## Doris BloomFilter索引及使用使用场景
-
-我们在使用HBase的时候,知道Hbase数据块索引提供了一个有效的方法,在访问一个特定的行时用来查找应该读取的HFile的数据块。但是它的效用是有限的。HFile数据块的默认大小是64KB,这个大小不能调整太多。
-
-如果你要查找一个短行,只在整个数据块的起始行键上建立索引无法给你细粒度的索引信息。例如,如果你的行占用100字节存储空间,一个64KB的数据块包含(64 * 1024)/100 = 655.53 = ~700行,而你只能把起始行放在索引位上。你要查找的行可能落在特定数据块上的行区间里,但也不是肯定存放在那个数据块上。这有多种情况的可能,或者该行在表里不存在,或者存放在另一个HFile里,甚至在MemStore里。这些情况下,从硬盘读取数据块会带来IO开销,也会滥用数据块缓存。这会影响性能,尤其是当你面对一个巨大的数据集并且有很多并发读用户时。
-
-所以HBase提供了布隆过滤器允许你对存储在每个数据块的数据做一个反向测试。当某行被请求时,先检查布隆过滤器看看该行是否不在这个数据块。布隆过滤器要么确定回答该行不在,要么回答它不知道。这就是为什么我们称它是反向测试。布隆过滤器也可以应用到行里的单元上。当访问某列标识符时先使用同样的反向测试。
-
-布隆过滤器也不是没有代价。存储这个额外的索引层次占用额外的空间。布隆过滤器随着它们的索引对象数据增长而增长,所以行级布隆过滤器比列标识符级布隆过滤器占用空间要少。当空间不是问题时,它们可以帮助你榨干系统的性能潜力。
-
-Doris的BloomFilter索引是从通过建表的时候指定,或者通过表的ALTER操作来完成。Bloom Filter本质上是一种位图结构,用于快速的判断一个给定的值是否在一个集合中。这种判断会产生小概率的误判。即如果返回false,则一定不在这个集合内。而如果范围true,则有可能在这个集合内。
-
-BloomFilter索引也是以Block为粒度创建的。每个Block中,指定列的值作为一个集合生成一个BloomFilter索引条目,用于在查询是快速过滤不满足条件的数据。
-
-下面我们通过实例来看看Doris怎么创建BloomFilter索引。
-
-### 创建BloomFilter索引
-
-Doris BloomFilter索引的创建是通过在建表语句的PROPERTIES里加上"bloom_filter_columns"="k1,k2,k3",这个属性,k1,k2,k3是你要创建的BloomFilter索引的Key列名称,例如下面我们对表里的saler_id,category_id创建了BloomFilter索引。
-
-```sql
-CREATE TABLE IF NOT EXISTS sale_detail_bloom (
- sale_date date NOT NULL COMMENT "销售时间",
- customer_id int NOT NULL COMMENT "客户编号",
- saler_id int NOT NULL COMMENT "销售员",
- sku_id int NOT NULL COMMENT "商品编号",
- category_id int NOT NULL COMMENT "商品分类",
- sale_count int NOT NULL COMMENT "销售数量",
- sale_price DECIMAL(12,2) NOT NULL COMMENT "单价",
- sale_amt DECIMAL(20,2) COMMENT "销售总金额"
-)
-Duplicate KEY(sale_date, customer_id,saler_id,sku_id,category_id)
-PARTITION BY RANGE(sale_date)
-(
-PARTITION P_202111 VALUES [('2021-11-01'), ('2021-12-01'))
-)
-DISTRIBUTED BY HASH(saler_id) BUCKETS 10
-PROPERTIES (
-"replication_num" = "3",
-"bloom_filter_columns"="saler_id,category_id",
-"dynamic_partition.enable" = "true",
-"dynamic_partition.time_unit" = "MONTH",
-"dynamic_partition.time_zone" = "Asia/Shanghai",
-"dynamic_partition.start" = "-2147483648",
-"dynamic_partition.end" = "2",
-"dynamic_partition.prefix" = "P_",
-"dynamic_partition.replication_num" = "3",
-"dynamic_partition.buckets" = "3"
-);
-```
-
-### 查看BloomFilter索引
-
-查看我们在表上建立的BloomFilter索引是使用:
-
-```
-SHOW CREATE TABLE
-```
-
-### 删除BloomFilter索引
-
-删除索引即为将索引列从bloom_filter_columns属性中移除:
-
-```
-ALTER TABLE SET ("bloom_filter_columns" = "");
-```
-
-### 修改BloomFilter索引
-
-修改索引即为修改表的bloom_filter_columns属性:
-
-```
-ALTER TABLE SET ("bloom_filter_columns" = "k1,k3");
-```
-
-### **Doris BloomFilter使用场景**
-
-满足以下几个条件时可以考虑对某列建立Bloom Filter 索引:
-
-1. 首先BloomFilter适用于非前缀过滤.
-
-2. 查询会根据该列高频过滤,而且查询条件大多是in和 = 过滤.
-
-3. 不同于Bitmap, BloomFilter适用于高基数列。比如UserID。因为如果创建在低基数的列上,比如”性别“列,则每个Block几乎都会包含所有取值,导致BloomFilter索引失去意义
-
-### **Doris BloomFilter使用注意事项**
-
-1. 不支持对Tinyint、Float、Double 类型的列建Bloom Filter索引。
-
-2. Bloom Filter索引只对in和 = 过滤查询有加速效果。
-3. 如果要查看某个查询是否命中了Bloom Filter索引,可以通过查询的Profile信息查看
diff --git a/docs/zh-CN/administrator-guide/broker.md b/docs/zh-CN/administrator-guide/broker.md
deleted file mode 100644
index 40cdab9948..0000000000
--- a/docs/zh-CN/administrator-guide/broker.md
+++ /dev/null
@@ -1,282 +0,0 @@
----
-{
- "title": "Broker",
- "language": "zh-CN"
-}
----
-
-
-
-# Broker
-
-Broker 是 Doris 集群中一种可选进程,主要用于支持 Doris 读写远端存储上的文件和目录,如 HDFS、BOS 和 AFS 等。
-
-Broker 通过提供一个 RPC 服务端口来提供服务,是一个无状态的 Java 进程,负责为远端存储的读写操作封装一些类 POSIX 的文件操作,如 open,pread,pwrite 等等。除此之外,Broker 不记录任何其他信息,所以包括远端存储的连接信息、文件信息、权限信息等等,都需要通过参数在 RPC 调用中传递给 Broker 进程,才能使得 Broker 能够正确读写文件。
-
-Broker 仅作为一个数据通路,并不参与任何计算,因此仅需占用较少的内存。通常一个 Doris 系统中会部署一个或多个 Broker 进程。并且相同类型的 Broker 会组成一个组,并设定一个 **名称(Broker name)**。
-
-Broker 在 Doris 系统架构中的位置如下:
-
-```
-+----+ +----+
-| FE | | BE |
-+-^--+ +--^-+
- | |
- | |
-+-v---------v-+
-| Broker |
-+------^------+
- |
- |
-+------v------+
-|HDFS/BOS/AFS |
-+-------------+
-```
-
-本文档主要介绍 Broker 在访问不同远端存储时需要的参数,如连接信息、权限认证信息等等。
-
-## 支持的存储系统
-
-不同的 Broker 类型支持不同的存储系统。
-
-1. 社区版 HDFS
-
- * 支持简单认证访问
- * 支持通过 kerberos 认证访问
- * 支持 HDFS HA 模式访问
-
-2. 百度 HDFS/AFS(开源版本不支持)
-
- * 支持通过 ugi 简单认证访问
-
-3. 百度对象存储 BOS(开源版本不支持)
-
- * 支持通过 AK/SK 认证访问
-
-## 需要 Broker 的操作
-
-1. Broker Load
-
- Broker Load 功能通过 Broker 进程读取远端存储上的文件数据并导入到 Doris 中。示例如下:
-
- ```
- LOAD LABEL example_db.label6
- (
- DATA INFILE("bos://my_bucket/input/file")
- INTO TABLE `my_table`
- )
- WITH BROKER "broker_name"
- (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy"
- )
- ```
-
- 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。
-
-2. 数据导出(Export)
-
- Export 功能通过 Broker 进程,将 Doris 中存储的数据以文本的格式导出到远端存储的文件中。示例如下:
-
- ```
- EXPORT TABLE testTbl
- TO "hdfs://hdfs_host:port/a/b/c"
- WITH BROKER "broker_name"
- (
- "username" = "xxx",
- "password" = "yyy"
- );
- ```
-
- 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。
-
-3. 创建用于备份恢复的仓库(Create Repository)
-
- 当用户需要使用备份恢复功能时,需要先通过 `CREATE REPOSITORY` 命令创建一个 “仓库”,仓库元信息中记录了所使用的 Broker 以及相关信息。之后的备份恢复操作,会通过 Broker 将数据备份到这个仓库,或从这个仓库读取数据恢复到 Doris 中。示例如下:
-
- ```
- CREATE REPOSITORY `bos_repo`
- WITH BROKER `broker_name`
- ON LOCATION "bos://doris_backup"
- PROPERTIES
- (
- "bos_endpoint" = "http://gz.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy"
- );
- ```
-
- 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。
-
-
-## Broker 信息
-
-Broker 的信息包括 **名称(Broker name)** 和 **认证信息** 两部分。通常的语法格式如下:
-
-```
-WITH BROKER "broker_name"
-(
- "username" = "xxx",
- "password" = "yyy",
- "other_prop" = "prop_value",
- ...
-);
-```
-
-### 名称
-
-通常用户需要通过操作命令中的 `WITH BROKER "broker_name"` 子句来指定一个已经存在的 Broker Name。Broker Name 是用户在通过 `ALTER SYSTEM ADD BROKER` 命令添加 Broker 进程时指定的一个名称。一个名称通常对应一个或多个 Broker 进程。Doris 会根据名称选择可用的 Broker 进程。用户可以通过 `SHOW BROKER` 命令查看当前集群中已经存在的 Broker。
-
-**注:Broker Name 只是一个用户自定义名称,不代表 Broker 的类型。**
-
-### 认证信息
-
-不同的 Broker 类型,以及不同的访问方式需要提供不同的认证信息。认证信息通常在 `WITH BROKER "broker_name"` 之后的 Property Map 中以 Key-Value 的方式提供。
-
-#### 社区版 HDFS
-
-1. 简单认证
-
- 简单认证即 Hadoop 配置 `hadoop.security.authentication` 为 `simple`。
-
- 使用系统用户访问 HDFS。或者在 Broker 启动的环境变量中添加:```HADOOP_USER_NAME```。
-
- ```
- (
- "username" = "user",
- "password" = ""
- );
- ```
-
- 密码置空即可。
-
-2. Kerberos 认证
-
- 该认证方式需提供以下信息:
-
- * `hadoop.security.authentication`:指定认证方式为 kerberos。
- * `kerberos_principal`:指定 kerberos 的 principal。
- * `kerberos_keytab`:指定 kerberos 的 keytab 文件路径。该文件必须为 Broker 进程所在服务器上的文件的绝对路径。并且可以被 Broker 进程访问。
- * `kerberos_keytab_content`:指定 kerberos 中 keytab 文件内容经过 base64 编码之后的内容。这个跟 `kerberos_keytab` 配置二选一即可。
-
- 示例如下:
-
- ```
- (
- "hadoop.security.authentication" = "kerberos",
- "kerberos_principal" = "doris@YOUR.COM",
- "kerberos_keytab" = "/home/doris/my.keytab"
- )
- ```
- ```
- (
- "hadoop.security.authentication" = "kerberos",
- "kerberos_principal" = "doris@YOUR.COM",
- "kerberos_keytab_content" = "ASDOWHDLAWIDJHWLDKSALDJSDIWALD"
- )
- ```
- 如果采用Kerberos认证方式,则部署Broker进程的时候需要[krb5.conf](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html)文件,
- krb5.conf文件包含Kerberos的配置信息,通常,您应该将krb5.conf文件安装在目录/etc中。您可以通过设置环境变量KRB5_CONFIG覆盖默认位置。
- krb5.conf文件的内容示例如下:
- ```
- [libdefaults]
- default_realm = DORIS.HADOOP
- default_tkt_enctypes = des3-hmac-sha1 des-cbc-crc
- default_tgs_enctypes = des3-hmac-sha1 des-cbc-crc
- dns_lookup_kdc = true
- dns_lookup_realm = false
-
- [realms]
- DORIS.HADOOP = {
- kdc = kerberos-doris.hadoop.service:7005
- }
- ```
-
-3. HDFS HA 模式
-
- 这个配置用于访问以 HA 模式部署的 HDFS 集群。
-
- * `dfs.nameservices`:指定 hdfs 服务的名字,自定义,如:"dfs.nameservices" = "my_ha"。
- * `dfs.ha.namenodes.xxx`:自定义 namenode 的名字,多个名字以逗号分隔。其中 xxx 为 `dfs.nameservices` 中自定义的名字,如: "dfs.ha.namenodes.my_ha" = "my_nn"。
- * `dfs.namenode.rpc-address.xxx.nn`:指定 namenode 的rpc地址信息。其中 nn 表示 `dfs.ha.namenodes.xxx` 中配置的 namenode 的名字,如:"dfs.namenode.rpc-address.my_ha.my_nn" = "host:port"。
- * `dfs.client.failover.proxy.provider`:指定 client 连接 namenode 的 provider,默认为:org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider。
-
- 示例如下:
-
- ```
- (
- "dfs.nameservices" = "my_ha",
- "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
- ```
-
- HA 模式可以和前面两种认证方式组合,进行集群访问。如通过简单认证访问 HA HDFS:
-
- ```
- (
- "username"="user",
- "password"="passwd",
- "dfs.nameservices" = "my_ha",
- "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2",
- "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port",
- "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port",
- "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
- )
- ```
- 关于HDFS集群的配置可以写入hdfs-site.xml文件中,用户使用Broker进程读取HDFS集群的信息时,只需要填写集群的文件路径名和认证信息即可。
-
-#### 百度对象存储 BOS
-
-**(开源版本不支持)**
-
-1. 通过 AK/SK 访问
-
- * AK/SK:Access Key 和 Secret Key。在百度云安全认证中心可以查看用户的 AK/SK。
- * Region Endpoint:BOS 所在地区的 Endpoint。
- * BOS支持的区域及对应 Endpoint 请查看[获取访问域名](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%8E%B7%E5%8F%96%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D)
-
- 示例如下:
-
- ```
- (
- "bos_endpoint" = "http://bj.bcebos.com",
- "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx",
- "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy"
- )
- ```
-
-#### 百度 HDFS/AFS
-
-**(开源版本不支持)**
-
-百度 AFS 和 HDFS 仅支持使用 ugi 的简单认证访问。示例如下:
-
-```
-(
- "username" = "user",
- "password" = "passwd"
-);
-```
-
-其中 user 和 passwd 为 Hadoop 的 UGI 配置。
diff --git a/docs/zh-CN/administrator-guide/bucket-shuffle-join.md b/docs/zh-CN/administrator-guide/bucket-shuffle-join.md
deleted file mode 100644
index 67ac4a20af..0000000000
--- a/docs/zh-CN/administrator-guide/bucket-shuffle-join.md
+++ /dev/null
@@ -1,106 +0,0 @@
----
-{
- "title": "Bucket Shuffle Join",
- "language": "zh-CN"
-}
----
-
-
-
-# Bucket Shuffle Join
-
-Bucket Shuffle Join 是在 Doris 0.14 版本中正式加入的新功能。旨在为某些 Join 查询提供本地性优化,来减少数据在节点间的传输耗时,来加速查询。
-
-它的设计、实现和效果可以参阅 [ISSUE 4394](https://github.com/apache/incubator-doris/issues/4394)。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* 左表:Join查询时,左边的表。进行Probe操作。可被Join Reorder调整顺序。
-* 右表:Join查询时,右边的表。进行Build操作。可被Join Reorder调整顺序。
-
-## 原理
-Doris支持的常规分布式Join方式包括了shuffle join 和broadcast join。这两种join都会导致不小的网络开销:
-
-举个例子,当前存在A表与B表的Join查询,它的Join方式为HashJoin,不同Join类型的开销如下:
-* **Broadcast Join**: 如果根据数据分布,查询规划出A表有3个执行的HashJoinNode,那么需要将B表全量的发送到3个HashJoinNode,那么它的网络开销是`3B`,它的内存开销也是`3B`。
-* **Shuffle Join**: Shuffle Join会将A,B两张表的数据根据哈希计算分散到集群的节点之中,所以它的网络开销为 ```A + B```,内存开销为`B`。
-
-在FE之中保存了Doris每个表的数据分布信息,如果join语句命中了表的数据分布列,我们应该使用数据分布信息来减少join语句的网络与内存开销,这就是Bucket Shuffle Join的思路来源。
-
-
-
-上面的图片展示了Bucket Shuffle Join的工作原理。SQL语句为 A表 join B表,并且join的等值表达式命中了A的数据分布列。而Bucket Shuffle Join会根据A表的数据分布信息,将B表的数据发送到对应的A表的数据存储计算节点。Bucket Shuffle Join开销如下:
-
-* 网络开销: ``` B < min(3B, A + B) ```
-
-* 内存开销: ``` B <= min(3B, B) ```
-
-可见,相比于Broadcast Join与Shuffle Join, Bucket Shuffle Join有着较为明显的性能优势。减少数据在节点间的传输耗时和Join时的内存开销。相对于Doris原有的Join方式,它有着下面的优点
-
-* 首先,Bucket-Shuffle-Join降低了网络与内存开销,使一些Join查询具有了更好的性能。尤其是当FE能够执行左表的分区裁剪与桶裁剪时。
-* 其次,同时与Colocate Join不同,它对于表的数据分布方式并没有侵入性,这对于用户来说是透明的。对于表的数据分布没有强制性的要求,不容易导致数据倾斜的问题。
-* 最后,它可以为Join Reorder提供更多可能的优化空间。
-
-## 使用方式
-
-### 设置Session变量
-
-将session变量`enable_bucket_shuffle_join`设置为`true`,则FE在进行查询规划时就会默认将能够转换为Bucket Shuffle Join的查询自动规划为Bucket Shuffle Join。
-
-```
-set enable_bucket_shuffle_join = true;
-```
-
-在FE进行分布式查询规划时,优先选择的顺序为 Colocate Join -> Bucket Shuffle Join -> Broadcast Join -> Shuffle Join。但是如果用户显式hint了Join的类型,如:
-
-```
-select * from test join [shuffle] baseall on test.k1 = baseall.k1;
-```
-
-则上述的选择优先顺序则不生效。
-
-该session变量在0.14版本默认为`true`, 而0.13版本需要手动设置为`true`。
-
-### 查看Join的类型
-
-可以通过`explain`命令来查看Join是否为Bucket Shuffle Join:
-
-```
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BUCKET_SHUFFLE) |
-| | hash predicates: |
-| | colocate: false, reason: table not in the same group |
-| | equal join conjunct: `test`.`k1` = `baseall`.`k1`
-```
-
-在Join类型之中会指明使用的Join方式为:`BUCKET_SHUFFLE`。
-
-## Bucket Shuffle Join的规划规则
-
-在绝大多数场景之中,用户只需要默认打开session变量的开关就可以透明的使用这种Join方式带来的性能提升,但是如果了解Bucket Shuffle Join的规划规则,可以帮助我们利用它写出更加高效的SQL。
-
-* Bucket Shuffle Join只生效于Join条件为等值的场景,原因与Colocate Join类似,它们都依赖hash来计算确定的数据分布。
-* 在等值Join条件之中包含两张表的分桶列,当左表的分桶列为等值的Join条件时,它有很大概率会被规划为Bucket Shuffle Join。
-* 由于不同的数据类型的hash值计算结果不同,所以Bucket Shuffle Join要求左表的分桶列的类型与右表等值join列的类型需要保持一致,否则无法进行对应的规划。
-* Bucket Shuffle Join只作用于Doris原生的OLAP表,对于ODBC,MySQL,ES等外表,当其作为左表时是无法规划生效的。
-* 对于分区表,由于每一个分区的数据分布规则可能不同,所以Bucket Shuffle Join只能保证左表为单分区时生效。所以在SQL执行之中,需要尽量使用`where`条件使分区裁剪的策略能够生效。
-* 假如左表为Colocate的表,那么它每个分区的数据分布规则是确定的,Bucket Shuffle Join能在Colocate表上表现更好。
diff --git a/docs/zh-CN/administrator-guide/colocation-join.md b/docs/zh-CN/administrator-guide/colocation-join.md
deleted file mode 100644
index 18051b950a..0000000000
--- a/docs/zh-CN/administrator-guide/colocation-join.md
+++ /dev/null
@@ -1,409 +0,0 @@
----
-{
- "title": "Colocation Join",
- "language": "zh-CN"
-}
----
-
-
-
-# Colocation Join
-
-Colocation Join 是在 Doris 0.9 版本中引入的新功能。旨在为某些 Join 查询提供本地性优化,来减少数据在节点间的传输耗时,加速查询。
-
-最初的设计、实现和效果可以参阅 [ISSUE 245](https://github.com/apache/incubator-doris/issues/245)。
-
-Colocation Join 功能经过一次改版,设计和使用方式和最初设计稍有不同。本文档主要介绍 Colocation Join 的原理、实现、使用方式和注意事项。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* Colocation Group(CG):一个 CG 中会包含一张及以上的 Table。在同一个 Group 内的 Table 有着相同的 Colocation Group Schema,并且有着相同的数据分片分布。
-* Colocation Group Schema(CGS):用于描述一个 CG 中的 Table,和 Colocation 相关的通用 Schema 信息。包括分桶列类型,分桶数以及副本数等。
-
-## 原理
-
-Colocation Join 功能,是将一组拥有相同 CGS 的 Table 组成一个 CG。并保证这些 Table 对应的数据分片会落在同一个 BE 节点上。使得当 CG 内的表进行分桶列上的 Join 操作时,可以通过直接进行本地数据 Join,减少数据在节点间的传输耗时。
-
-一个表的数据,最终会根据分桶列值 Hash、对桶数取模的后落在某一个分桶内。假设一个 Table 的分桶数为 8,则共有 `[0, 1, 2, 3, 4, 5, 6, 7]` 8 个分桶(Bucket),我们称这样一个序列为一个 `BucketsSequence`。每个 Bucket 内会有一个或多个数据分片(Tablet)。当表为单分区表时,一个 Bucket 内仅有一个 Tablet。如果是多分区表,则会有多个。
-
-为了使得 Table 能够有相同的数据分布,同一 CG 内的 Table 必须保证以下属性相同:
-
-1. 分桶列和分桶数
-
- 分桶列,即在建表语句中 `DISTRIBUTED BY HASH(col1, col2, ...)` 中指定的列。分桶列决定了一张表的数据通过哪些列的值进行 Hash 划分到不同的 Tablet 中。同一 CG 内的 Table 必须保证分桶列的类型和数量完全一致,并且桶数一致,才能保证多张表的数据分片能够一一对应的进行分布控制。
-
-2. 副本数
-
- 同一个 CG 内所有表的所有分区(Partition)的副本数必须一致。如果不一致,可能出现某一个 Tablet 的某一个副本,在同一个 BE 上没有其他的表分片的副本对应。
-
-同一个 CG 内的表,分区的个数、范围以及分区列的类型不要求一致。
-
-在固定了分桶列和分桶数后,同一个 CG 内的表会拥有相同的 BucketsSequence。而副本数决定了每个分桶内的 Tablet 的多个副本,存放在哪些 BE 上。假设 BucketsSequence 为 `[0, 1, 2, 3, 4, 5, 6, 7]`,BE 节点有 `[A, B, C, D]` 4个。则一个可能的数据分布如下:
-
-```
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-| 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 |
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-| A | | B | | C | | D | | A | | B | | C | | D |
-| | | | | | | | | | | | | | | |
-| B | | C | | D | | A | | B | | C | | D | | A |
-| | | | | | | | | | | | | | | |
-| C | | D | | A | | B | | C | | D | | A | | B |
-+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+
-```
-
-CG 内所有表的数据都会按照上面的规则进行统一分布,这样就保证了,分桶列值相同的数据都在同一个 BE 节点上,可以进行本地数据 Join。
-
-## 使用方式
-
-### 建表
-
-建表时,可以在 `PROPERTIES` 中指定属性 `"colocate_with" = "group_name"`,表示这个表是一个 Colocation Join 表,并且归属于一个指定的 Colocation Group。
-
-示例:
-
-```
-CREATE TABLE tbl (k1 int, v1 int sum)
-DISTRIBUTED BY HASH(k1)
-BUCKETS 8
-PROPERTIES(
- "colocate_with" = "group1"
-);
-```
-
-如果指定的 Group 不存在,则 Doris 会自动创建一个只包含当前这张表的 Group。如果 Group 已存在,则 Doris 会检查当前表是否满足 Colocation Group Schema。如果满足,则会创建该表,并将该表加入 Group。同时,表会根据已存在的 Group 中的数据分布规则创建分片和副本。
-Group 归属于一个 Database,Group 的名字在一个 Database 内唯一。在内部存储是 Group 的全名为 `dbId_groupName`,但用户只感知 groupName。
-
-### 删表
-
-当 Group 中最后一张表彻底删除后(彻底删除是指从回收站中删除。通常,一张表通过 `DROP TABLE` 命令删除后,会在回收站默认停留一天的时间后,再删除),该 Group 也会被自动删除。
-
-### 查看 Group
-
-以下命令可以查看集群内已存在的 Group 信息。
-
-```
-SHOW PROC '/colocation_group';
-
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-| GroupId | GroupName | TableIds | BucketsNum | ReplicationNum | DistCols | IsStable |
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-| 10005.10008 | 10005_group1 | 10007, 10040 | 10 | 3 | int(11) | true |
-+-------------+--------------+--------------+------------+----------------+----------+----------+
-```
-
-* GroupId: 一个 Group 的全集群唯一标识,前半部分为 db id,后半部分为 group id。
-* GroupName: Group 的全名。
-* TabletIds: 该 Group 包含的 Table 的 id 列表。
-* BucketsNum: 分桶数。
-* ReplicationNum: 副本数。
-* DistCols: Distribution columns,即分桶列类型。
-* IsStable: 该 Group 是否稳定(稳定的定义,见 `Colocation 副本均衡和修复` 一节)。
-
-通过以下命令可以进一步查看一个 Group 的数据分布情况:
-
-```
-SHOW PROC '/colocation_group/10005.10008';
-
-+-------------+---------------------+
-| BucketIndex | BackendIds |
-+-------------+---------------------+
-| 0 | 10004, 10002, 10001 |
-| 1 | 10003, 10002, 10004 |
-| 2 | 10002, 10004, 10001 |
-| 3 | 10003, 10002, 10004 |
-| 4 | 10002, 10004, 10003 |
-| 5 | 10003, 10002, 10001 |
-| 6 | 10003, 10004, 10001 |
-| 7 | 10003, 10004, 10002 |
-+-------------+---------------------+
-```
-
-* BucketIndex: 分桶序列的下标。
-* BackendIds: 分桶中数据分片所在的 BE 节点 id 列表。
-
-> 以上命令需要 ADMIN 权限。暂不支持普通用户查看。
-
-### 修改表 Colocate Group 属性
-
-可以对一个已经创建的表,修改其 Colocation Group 属性。示例:
-
-`ALTER TABLE tbl SET ("colocate_with" = "group2");`
-
-* 如果该表之前没有指定过 Group,则该命令检查 Schema,并将该表加入到该 Group(Group 不存在则会创建)。
-* 如果该表之前有指定其他 Group,则该命令会先将该表从原有 Group 中移除,并加入新 Group(Group 不存在则会创建)。
-
-也可以通过以下命令,删除一个表的 Colocation 属性:
-
-`ALTER TABLE tbl SET ("colocate_with" = "");`
-
-### 其他相关操作
-
-当对一个具有 Colocation 属性的表进行增加分区(ADD PARTITION)、修改副本数时,Doris 会检查修改是否会违反 Colocation Group Schema,如果违反则会拒绝。
-
-## Colocation 副本均衡和修复
-
-Colocation 表的副本分布需要遵循 Group 中指定的分布,所以在副本修复和均衡方面和普通分片有所区别。
-
-Group 自身有一个 Stable 属性,当 Stable 为 true 时,表示当前 Group 内的表的所有分片没有正在进行变动,Colocation 特性可以正常使用。当 Stable 为 false 时(Unstable),表示当前 Group 内有部分表的分片正在做修复或迁移,此时,相关表的 Colocation Join 将退化为普通 Join。
-
-### 副本修复
-
-副本只能存储在指定的 BE 节点上。所以当某个 BE 不可用时(宕机、Decommission 等),需要寻找一个新的 BE 进行替换。Doris 会优先寻找负载最低的 BE 进行替换。替换后,该 Bucket 内的所有在旧 BE 上的数据分片都要做修复。迁移过程中,Group 被标记为 Unstable。
-
-### 副本均衡
-
-Doris 会尽力将 Colocation 表的分片均匀分布在所有 BE 节点上。对于普通表的副本均衡,是以单副本为粒度的,即单独为每一个副本寻找负载较低的 BE 节点即可。而 Colocation 表的均衡是 Bucket 级别的,即一个 Bucket 内的所有副本都会一起迁移。我们采用一个简单的均衡算法,即在不考虑副本实际大小,而只根据副本数量,将 BucketsSequence 均匀的分布在所有 BE 上。具体算法可以参阅 `ColocateTableBalancer.java` 中的代码注释。
-
-> 注1:当前的 Colocation 副本均衡和修复算法,对于异构部署的 Doris 集群效果可能不佳。所谓异构部署,即 BE 节点的磁盘容量、数量、磁盘类型(SSD 和 HDD)不一致。在异构部署情况下,可能出现小容量的 BE 节点和大容量的 BE 节点存储了相同的副本数量。
->
-> 注2:当一个 Group 处于 Unstable 状态时,其中的表的 Join 将退化为普通 Join。此时可能会极大降低集群的查询性能。如果不希望系统自动均衡,可以设置 FE 的配置项 `disable_colocate_balance` 来禁止自动均衡。然后在合适的时间打开即可。(具体参阅 `高级操作` 一节)
-
-## 查询
-
-对 Colocation 表的查询方式和普通表一样,用户无需感知 Colocation 属性。如果 Colocation 表所在的 Group 处于 Unstable 状态,将自动退化为普通 Join。
-
-举例说明:
-
-表1:
-
-```
-CREATE TABLE `tbl1` (
- `k1` date NOT NULL COMMENT "",
- `k2` int(11) NOT NULL COMMENT "",
- `v1` int(11) SUM NOT NULL COMMENT ""
-) ENGINE=OLAP
-AGGREGATE KEY(`k1`, `k2`)
-PARTITION BY RANGE(`k1`)
-(
- PARTITION p1 VALUES LESS THAN ('2019-05-31'),
- PARTITION p2 VALUES LESS THAN ('2019-06-30')
-)
-DISTRIBUTED BY HASH(`k2`) BUCKETS 8
-PROPERTIES (
- "colocate_with" = "group1"
-);
-```
-
-表2:
-
-```
-CREATE TABLE `tbl2` (
- `k1` datetime NOT NULL COMMENT "",
- `k2` int(11) NOT NULL COMMENT "",
- `v1` double SUM NOT NULL COMMENT ""
-) ENGINE=OLAP
-AGGREGATE KEY(`k1`, `k2`)
-DISTRIBUTED BY HASH(`k2`) BUCKETS 8
-PROPERTIES (
- "colocate_with" = "group1"
-);
-```
-
-查看查询计划:
-
-```
-DESC SELECT * FROM tbl1 INNER JOIN tbl2 ON (tbl1.k2 = tbl2.k2);
-
-+----------------------------------------------------+
-| Explain String |
-+----------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`tbl1`.`k1` | |
-| PARTITION: RANDOM |
-| |
-| RESULT SINK |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN |
-| | hash predicates: |
-| | colocate: true |
-| | `tbl1`.`k2` = `tbl2`.`k2` |
-| | tuple ids: 0 1 |
-| | |
-| |----1:OlapScanNode |
-| | TABLE: tbl2 |
-| | PREAGGREGATION: OFF. Reason: null |
-| | partitions=0/1 |
-| | rollup: null |
-| | buckets=0/0 |
-| | cardinality=-1 |
-| | avgRowSize=0.0 |
-| | numNodes=0 |
-| | tuple ids: 1 |
-| | |
-| 0:OlapScanNode |
-| TABLE: tbl1 |
-| PREAGGREGATION: OFF. Reason: No AggregateInfo |
-| partitions=0/2 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 0 |
-+----------------------------------------------------+
-```
-如果 Colocation Join 生效,则 Hash Join 节点会显示 `colocate: true`。
-
-如果没有生效,则查询计划如下:
-
-```
-+----------------------------------------------------+
-| Explain String |
-+----------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`tbl1`.`k1` | |
-| PARTITION: RANDOM |
-| |
-| RESULT SINK |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BROADCAST) |
-| | hash predicates: |
-| | colocate: false, reason: group is not stable |
-| | `tbl1`.`k2` = `tbl2`.`k2` |
-| | tuple ids: 0 1 |
-| | |
-| |----3:EXCHANGE |
-| | tuple ids: 1 |
-| | |
-| 0:OlapScanNode |
-| TABLE: tbl1 |
-| PREAGGREGATION: OFF. Reason: No AggregateInfo |
-| partitions=0/2 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 0 |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: RANDOM |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 03 |
-| UNPARTITIONED |
-| |
-| 1:OlapScanNode |
-| TABLE: tbl2 |
-| PREAGGREGATION: OFF. Reason: null |
-| partitions=0/1 |
-| rollup: null |
-| buckets=0/0 |
-| cardinality=-1 |
-| avgRowSize=0.0 |
-| numNodes=0 |
-| tuple ids: 1 |
-+----------------------------------------------------+
-```
-
-HASH JOIN 节点会显示对应原因:`colocate: false, reason: group is not stable`。同时会有一个 EXCHANGE 节点生成。
-
-
-## 高级操作
-
-### FE 配置项
-
-* disable\_colocate\_relocate
-
- 是否关闭 Doris 的自动 Colocation 副本修复。默认为 false,即不关闭。该参数只影响 Colocation 表的副本修复,不影响普通表。
-
-* disable\_colocate\_balance
-
- 是否关闭 Doris 的自动 Colocation 副本均衡。默认为 false,即不关闭。该参数只影响 Colocation 表的副本均衡,不影响普通表。
-
-以上参数可以动态修改,设置方式请参阅 `HELP ADMIN SHOW CONFIG;` 和 `HELP ADMIN SET CONFIG;`。
-
-* disable\_colocate\_join
-
- 是否关闭 Colocation Join 功能。在 0.10 及之前的版本,默认为 true,即关闭。在之后的某个版本中将默认为 false,即开启。
-
-* use\_new\_tablet\_scheduler
-
- 在 0.10 及之前的版本中,新的副本调度逻辑与 Colocation Join 功能不兼容,所以在 0.10 及之前版本,如果 `disable_colocate_join = false`,则需设置 `use_new_tablet_scheduler = false`,即关闭新的副本调度器。之后的版本中,`use_new_tablet_scheduler` 将衡为 true。
-
-### HTTP Restful API
-
-Doris 提供了几个和 Colocation Join 有关的 HTTP Restful API,用于查看和修改 Colocation Group。
-
-该 API 实现在 FE 端,使用 `fe_host:fe_http_port` 进行访问。需要 ADMIN 权限。
-
-1. 查看集群的全部 Colocation 信息
-
- ```
- GET /api/colocate
-
- 返回以 Json 格式表示内部 Colocation 信息。
-
- {
- "msg": "success",
- "code": 0,
- "data": {
- "infos": [
- ["10003.12002", "10003_group1", "10037, 10043", "1", "1", "int(11)", "true"]
- ],
- "unstableGroupIds": [],
- "allGroupIds": [{
- "dbId": 10003,
- "grpId": 12002
- }]
- },
- "count": 0
- }
- ```
-
-2. 将 Group 标记为 Stable 或 Unstable
-
- * 标记为 Stable
-
- ```
- POST /api/colocate/group_stable?db_id=10005&group_id=10008
-
- 返回:200
- ```
-
- * 标记为 Unstable
-
- ```
- DELETE /api/colocate/group_stable?db_id=10005&group_id=10008
-
- 返回:200
- ```
-
-3. 设置 Group 的数据分布
-
- 该接口可以强制设置某一 Group 的数分布。
-
- ```
- POST /api/colocate/bucketseq?db_id=10005&group_id=10008
-
- Body:
- [[10004,10002],[10003,10002],[10002,10004],[10003,10002],[10002,10004],[10003,10002],[10003,10004],[10003,10004],[10003,10004],[10002,10004]]
-
- 返回 200
- ```
- 其中 Body 是以嵌套数组表示的 BucketsSequence 以及每个 Bucket 中分片分布所在 BE 的 id。
-
- 注意,使用该命令,可能需要将 FE 的配置 `disable_colocate_relocate` 和 `disable_colocate_balance` 设为 true。即关闭系统自动的 Colocation 副本修复和均衡。否则可能在修改后,会被系统自动重置。
diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md
deleted file mode 100644
index 5820889df7..0000000000
--- a/docs/zh-CN/administrator-guide/config/be_config.md
+++ /dev/null
@@ -1,1543 +0,0 @@
----
-{
- "title": "BE 配置项",
- "language": "zh-CN"
-}
----
-
-
-
-
-
-# BE 配置项
-
-该文档主要介绍 BE 的相关配置项。
-
-BE 的配置文件 `be.conf` 通常存放在 BE 部署路径的 `conf/` 目录下。 而在 0.14 版本中会引入另一个配置文件 `be_custom.conf`。该配置文件用于记录用户在运行时动态配置并持久化的配置项。
-
-BE 进程启动后,会先读取 `be.conf` 中的配置项,之后再读取 `be_custom.conf` 中的配置项。`be_custom.conf` 中的配置项会覆盖 `be.conf` 中相同的配置项。
-
-## 查看配置项
-
-用户可以通过访问 BE 的 Web 页面查看当前配置项:
-
-`http://be_host:be_webserver_port/varz`
-
-## 设置配置项
-
-BE 的配置项有两种方式进行配置:
-
-1. 静态配置
-
- 在 `conf/be.conf` 文件中添加和设置配置项。`be.conf` 中的配置项会在 BE 进行启动时被读取。没有在 `be.conf` 中的配置项将使用默认值。
-
-2. 动态配置
-
- BE 启动后,可以通过以下命令动态设置配置项。
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}'
- ```
-
- 在 0.13 版本及之前,通过该方式修改的配置项将在 BE 进程重启后失效。在 0.14 及之后版本中,可以通过以下命令持久化修改后的配置。修改后的配置项存储在 `be_custom.conf` 文件中。
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}&persist=true'
- ```
-
-## 应用举例
-
-1. 静态方式修改 `max_compaction_concurrency`
-
- 通过在 `be.conf` 文件中添加:
-
- ```max_compaction_concurrency=5```
-
- 之后重启 BE 进程以生效该配置。
-
-2. 动态方式修改 `streaming_load_max_mb`
-
- BE 启动后,通过下面命令动态设置配置项 `streaming_load_max_mb`:
-
- ```curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024```
-
- 返回值如下,则说明设置成功。
-
- ```
- {
- "status": "OK",
- "msg": ""
- }
- ```
-
- BE 重启后该配置将失效。如果想持久化修改结果,使用如下命令:
-
- ```
- curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024\&persist=true
- ```
-
-## 配置项列表
-
-### `alter_tablet_worker_count`
-
-默认值:3
-
-进行schema change的线程数
-
-### `base_compaction_check_interval_seconds`
-
-默认值:60 (s)
-
-BaseCompaction线程轮询的间隔
-
-### `base_compaction_interval_seconds_since_last_operation`
-
-默认值:86400
-
-BaseCompaction触发条件之一:上一次BaseCompaction距今的间隔
-
-### `base_compaction_num_cumulative_deltas`
-
-默认值:5
-
-BaseCompaction触发条件之一:Cumulative文件数目要达到的限制,达到这个限制之后会触发BaseCompaction
-
-### `base_compaction_write_mbytes_per_sec`
-
-默认值:5(MB)
-
-BaseCompaction任务每秒写磁盘最大速度
-
-### `base_cumulative_delta_ratio`
-
-默认值:0.3 (30%)
-
-BaseCompaction触发条件之一:Cumulative文件大小达到Base文件的比例
-
-### `base_compaction_trace_threshold`
-
-* 类型:int32
-* 描述:打印base compaction的trace信息的阈值,单位秒
-* 默认值:10
-
-base compaction是一个耗时较长的后台操作,为了跟踪其运行信息,可以调整这个阈值参数来控制trace日志的打印。打印信息如下:
-
-```
-W0610 11:26:33.804431 56452 storage_engine.cpp:552] Trace:
-0610 11:23:03.727535 (+ 0us) storage_engine.cpp:554] start to perform base compaction
-0610 11:23:03.728961 (+ 1426us) storage_engine.cpp:560] found best tablet 546859
-0610 11:23:03.728963 (+ 2us) base_compaction.cpp:40] got base compaction lock
-0610 11:23:03.729029 (+ 66us) base_compaction.cpp:44] rowsets picked
-0610 11:24:51.784439 (+108055410us) compaction.cpp:46] got concurrency lock and start to do compaction
-0610 11:24:51.784818 (+ 379us) compaction.cpp:74] prepare finished
-0610 11:26:33.359265 (+101574447us) compaction.cpp:87] merge rowsets finished
-0610 11:26:33.484481 (+125216us) compaction.cpp:102] output rowset built
-0610 11:26:33.484482 (+ 1us) compaction.cpp:106] check correctness finished
-0610 11:26:33.513197 (+ 28715us) compaction.cpp:110] modify rowsets finished
-0610 11:26:33.513300 (+ 103us) base_compaction.cpp:49] compaction finished
-0610 11:26:33.513441 (+ 141us) base_compaction.cpp:56] unused rowsets have been moved to GC queue
-Metrics: {"filtered_rows":0,"input_row_num":3346807,"input_rowsets_count":42,"input_rowsets_data_size":1256413170,"input_segments_num":44,"merge_rowsets_latency_us":101574444,"merged_rows":0,"output_row_num":3346807,"output_rowset_data_size":1228439659,"output_segments_num":6}
-```
-
-### `be_port`
-
-* 类型:int32
-* 描述:BE 上 thrift server 的端口号,用于接收来自 FE 的请求
-* 默认值:9060
-
-### `be_service_threads`
-* 类型:int32
-* 描述:BE 上 thrift server service的执行线程数,代表可以用于执行FE请求的线程数。
-* 默认值:64
-
-### `brpc_max_body_size`
-
-这个配置主要用来修改 brpc 的参数 `max_body_size`。
-
-有时查询失败,在 BE 日志中会出现 `body_size is too large` 的错误信息。这可能发生在 SQL 模式为 multi distinct + 无 group by + 超过1T 数据量的情况下。这个错误表示 brpc 的包大小超过了配置值。此时可以通过调大该配置避免这个错误。
-
-### `brpc_socket_max_unwritten_bytes`
-
-这个配置主要用来修改 brpc 的参数 `socket_max_unwritten_bytes`。
-
-有时查询失败,BE 日志中会出现 `The server is overcrowded` 的错误信息,表示连接上有过多的未发送数据。当查询需要发送较大的bitmap字段时,可能会遇到该问题,此时可能通过调大该配置避免该错误。
-
-### `transfer_data_by_brpc_attachment`
-
-* 类型: bool
-* 描述:该配置用来控制是否将ProtoBuf Request中的RowBatch转移到Controller Attachment后通过brpc发送。ProtoBuf Request的长度超过2G时会报错: Bad request, error_text=[E1003]Fail to compress request,将RowBatch放到Controller Attachment中将更快且避免这个错误。
-* 默认值:false
-
-### `brpc_num_threads`
-
-该配置主要用来修改brpc中bthreads的数量. 该配置的默认值被设置为-1, 这意味着bthreads的数量将被设置为机器的cpu核数。
-
-用户可以将该配置的值调大来获取更好的QPS性能。更多的信息可以参考`https://github.com/apache/incubator-brpc/blob/master/docs/cn/benchmark.md`。
-
-### `brpc_port`
-
-* 类型:int32
-* 描述:BE 上的 brpc 的端口,用于 BE 之间通讯
-* 默认值:8060
-
-### `buffer_pool_clean_pages_limit`
-
-默认值:20G
-
-清理可能被缓冲池保存的Page
-
-### `buffer_pool_limit`
-
-* 类型:string
-* 描述:buffer pool之中最大的可分配内存
-* 默认值:20%
-
-BE缓存池最大的内存可用量,buffer pool是BE新的内存管理结构,通过buffer page来进行内存管理,并能够实现数据的落盘。并发的所有查询的内存申请都会通过buffer pool来申请。当前buffer pool仅作用在**AggregationNode**与**ExchangeNode**。
-
-### `check_auto_compaction_interval_seconds`
-
-* 类型:int32
-* 描述:当自动执行compaction的功能关闭时,检查自动compaction开关是否被开启的时间间隔。
-* 默认值:5
-
-### `check_consistency_worker_count`
-
-默认值:1
-
-计算tablet的校验和(checksum)的工作线程数
-
-### `chunk_reserved_bytes_limit`
-
-默认值:2147483648
-
-Chunk Allocator的reserved bytes限制,默认为2GB,增加这个变量可以提高性能,但是会获得更多其他模块无法使用的空闲内存
-
-### `clear_transaction_task_worker_count`
-
-默认值:1
-
-用于清理事务的线程数
-
-### `clone_worker_count`
-
-默认值:3
-
-用于执行克隆任务的线程数
-
-### `cluster_id`
-
-* 类型:int32
-
-* 描述:配置BE的所属于的集群id。
-
-* 默认值:-1
-
-该值通常由FE通过心跳向BE下发,不需要额外进行配置。当确认某BE属于某一个确定的Drois集群时,可以进行配置,同时需要修改数据目录下的cluster_id文件,使二者相同。
-
-### `column_dictionary_key_ratio_threshold`
-
-默认值:0
-
-字符串类型的取值比例,小于这个比例采用字典压缩算法
-
-### `column_dictionary_key_size_threshold`
-
-默认值:0
-
-字典压缩列大小,小于这个值采用字典压缩算法
-
-### `compaction_tablet_compaction_score_factor`
-
-* 类型:int32
-* 描述:选择tablet进行compaction时,计算 tablet score 的公式中 compaction score的权重。
-* 默认值:1
-
-### `compaction_tablet_scan_frequency_factor`
-
-* 类型:int32
-* 描述:选择tablet进行compaction时,计算 tablet score 的公式中 tablet scan frequency 的权重。
-* 默认值:0
-
-选择一个tablet执行compaction任务时,可以将tablet的scan频率作为一个选择依据,对当前最近一段时间频繁scan的tablet优先执行compaction。
-tablet score可以通过以下公式计算:
-
-tablet_score = compaction_tablet_scan_frequency_factor * tablet_scan_frequency + compaction_tablet_compaction_score_factor * compaction_score
-
-### `compaction_task_num_per_disk`
-
-* 类型:int32
-* 描述:每个磁盘(HDD)可以并发执行的compaction任务数量。
-* 默认值:2
-
-### `compaction_task_num_per_fast_disk`
-
-* 类型:int32
-* 描述:每个高速磁盘(SSD)可以并发执行的compaction任务数量。
-* 默认值:4
-
-### `compress_rowbatches`
-* 类型:bool
-
-* 描述:序列化RowBatch时是否使用Snappy压缩算法进行数据压缩
-
-* 默认值:true
-
-### `create_tablet_worker_count`
-
-默认值:3
-
-BE创建tablet的工作线程数
-
-### `cumulative_compaction_rounds_for_each_base_compaction_round`
-
-* 类型:int32
-* 描述:Compaction任务的生产者每次连续生产多少轮cumulative compaction任务后生产一轮base compaction。
-* 默认值:9
-
-### `disable_auto_compaction`
-
-* 类型:bool
-* 描述:关闭自动执行compaction任务
-* 默认值:false
-
-一般需要为关闭状态,当调试或测试环境中想要手动操作compaction任务时,可以对该配置进行开启
-
-### `cumulative_compaction_budgeted_bytes`
-
-默认值:104857600
-
-BaseCompaction触发条件之一:Singleton文件大小限制,100MB
-
-### `cumulative_compaction_check_interval_seconds`
-
-默认值:10 (s)
-
-CumulativeCompaction线程轮询的间隔
-
-### `cumulative_compaction_skip_window_seconds`
-
-默认值:30 (s)
-
-CumulativeCompaction会跳过最近发布的增量,以防止压缩可能被查询的版本(以防查询计划阶段花费一些时间)。改参数是设置跳过的窗口时间大小
-
-### `cumulative_compaction_trace_threshold`
-
-* 类型:int32
-* 描述:打印cumulative compaction的trace信息的阈值,单位秒
-* 默认值:2
-
-与base_compaction_trace_threshold类似。
-
-### disable_compaction_trace_log
-
-* 类型: bool
-* 描述: 关闭compaction的trace日志
-* 默认值: true
-
-如果设置为true,`cumulative_compaction_trace_threshold` 和 `base_compaction_trace_threshold` 将不起作用。并且trace日志将关闭。
-
-### `cumulative_compaction_policy`
-
-* 类型:string
-* 描述:配置 cumulative compaction 阶段的合并策略,目前实现了两种合并策略,num_based和size_based
-* 默认值:size_based
-
-详细说明,ordinary,是最初版本的cumulative compaction合并策略,做一次cumulative compaction之后直接base compaction流程。size_based,通用策略是ordinary策略的优化版本,仅当rowset的磁盘体积在相同数量级时才进行版本合并。合并之后满足条件的rowset进行晋升到base compaction阶段。能够做到在大量小批量导入的情况下:降低base compact的写入放大率,并在读取放大率和空间放大率之间进行权衡,同时减少了文件版本的数据。
-
-### `cumulative_size_based_promotion_size_mbytes`
-
-* 类型:int64
-* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小超过了此配置大小,该rowset将用于base compaction。单位是m字节。
-* 默认值:1024
-
-一般情况下,配置在2G以内,为了防止cumulative compaction时间过长,导致版本积压。
-
-### `cumulative_size_based_promotion_ratio`
-
-* 类型:double
-* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小超过base版本rowset的配置比例时,该rowset将用于base compaction。
-* 默认值:0.05
-
-一般情况下,建议配置不要高于0.1,低于0.02。
-
-### `cumulative_size_based_promotion_min_size_mbytes`
-
-* 类型:int64
-* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小低于此配置大小,该rowset将不进行base compaction,仍然处于cumulative compaction流程中。单位是m字节。
-* 默认值:64
-
-一般情况下,配置在512m以内,配置过大会导致base版本早期的大小过小,一直不进行base compaction。
-
-### `cumulative_size_based_compaction_lower_size_mbytes`
-
-* 类型:int64
-* 描述:在size_based策略下,cumulative compaction进行合并时,选出的要进行合并的rowset的总磁盘大小大于此配置时,才按级别策略划分合并。小于这个配置时,直接执行合并。单位是m字节。
-* 默认值:64
-
-一般情况下,配置在128m以内,配置过大会导致cumulative compaction写放大较多。
-
-### `custom_config_dir`
-
-配置 `be_custom.conf` 文件的位置。默认为 `conf/` 目录下。
-
-在某些部署环境下,`conf/` 目录可能因为系统的版本升级被覆盖掉。这会导致用户在运行是持久化修改的配置项也被覆盖。这时,我们可以将 `be_custom.conf` 存储在另一个指定的目录中,以防止配置文件被覆盖。
-
-### `default_num_rows_per_column_file_block`
-* 类型:int32
-* 描述:配置单个RowBlock之中包含多少行的数据。
-* 默认值:1024
-
-### `default_rowset_type`
-* 类型:string
-* 描述:标识BE默认选择的存储格式,可配置的参数为:"**ALPHA**", "**BETA**"。主要起以下两个作用
-1. 当建表的storage_format设置为Default时,通过该配置来选取BE的存储格式。
-2. 进行Compaction时选择BE的存储格式
-* 默认值:BETA
-
-### `delete_worker_count`
-
-默认值:3
-
-执行数据删除任务的线程数
-
-### `disable_mem_pools`
-
-默认值:false
-
-是否禁用内存缓存池,默认不禁用
-
-### `disable_storage_page_cache`
-
-* 类型:bool
-* 描述:是否进行使用page cache进行index的缓存,该配置仅在BETA存储格式时生效
-* 默认值:false
-
-### `disk_stat_monitor_interval`
-
-默认值:5 (s)
-
-磁盘状态检查时间间隔
-
-### `doris_cgroups`
-
-默认值:空
-
-分配给doris的cgroups
-
-### `doris_max_pushdown_conjuncts_return_rate`
-
-* 类型:int32
-* 描述:BE在进行HashJoin时,会采取动态分区裁剪的方式将join条件下推到OlapScanner上。当OlapScanner扫描的数据大于32768行时,BE会进行过滤条件检查,如果该过滤条件的过滤率低于该配置,则Doris会停止使用动态分区裁剪的条件进行数据过滤。
-* 默认值:90
-
-
-### `doris_max_scan_key_num`
-
-* 类型:int
-* 描述:用于限制一个查询请求中,scan node 节点能拆分的最大 scan key 的个数。当一个带有条件的查询请求到达 scan node 节点时,scan node 会尝试将查询条件中 key 列相关的条件拆分成多个 scan key range。之后这些 scan key range 会被分配给多个 scanner 线程进行数据扫描。较大的数值通常意味着可以使用更多的 scanner 线程来提升扫描操作的并行度。但在高并发场景下,过多的线程可能会带来更大的调度开销和系统负载,反而会降低查询响应速度。一个经验数值为 50。该配置可以单独进行会话级别的配置,具体可参阅 [变量](../variables.md) 中 `max_scan_key_num` 的说明。
-* 默认值:1024
-
-当在高并发场景下发下并发度无法提升时,可以尝试降低该数值并观察影响。
-
-### `doris_scan_range_row_count`
-
-* 类型:int32
-* 描述:BE在进行数据扫描时,会将同一个扫描范围拆分为多个ScanRange。该参数代表了每个ScanRange代表扫描数据范围。通过该参数可以限制单个OlapScanner占用io线程的时间。
-* 默认值:524288
-
-### `doris_scanner_queue_size`
-
-* 类型:int32
-* 描述:TransferThread与OlapScanner之间RowBatch的缓存队列的长度。Doris进行数据扫描时是异步进行的,OlapScanner扫描上来的Rowbatch会放入缓存队列之中,等待上层TransferThread取走。
-* 默认值:1024
-
-### `doris_scanner_row_num`
-
-默认值:16384
-
-每个扫描线程单次执行最多返回的数据行数
-
-### `doris_scanner_thread_pool_queue_size`
-
-* 类型:int32
-* 描述:Scanner线程池的队列长度。在Doris的扫描任务之中,每一个Scanner会作为一个线程task提交到线程池之中等待被调度,而提交的任务数目超过线程池队列的长度之后,后续提交的任务将阻塞直到队列之中有新的空缺。
-* 默认值:102400
-
-### `doris_scanner_thread_pool_thread_num`
-
-* 类型:int32
-* 描述:Scanner线程池线程数目。在Doris的扫描任务之中,每一个Scanner会作为一个线程task提交到线程池之中等待被调度,该参数决定了Scanner线程池的大小。
-* 默认值:48
-
-### `download_low_speed_limit_kbps`
-
-默认值:50 (KB/s)
-
-下载最低限速
-
-### `download_low_speed_time`
-
-默认值:300 (s)
-
-下载时间限制,默认300秒
-
-### `download_worker_count`
-
-默认值:1
-
-下载线程数,默认1个
-
-### `drop_tablet_worker_count`
-
-默认值:3
-
-删除tablet的线程数
-
-### `enable_metric_calculator`
-
-默认值:true
-
-如果设置为 true,metric calculator 将运行,收集BE相关指标信息,如果设置成false将不运行
-
-### `enable_partitioned_aggregation`
-
-* 类型:bool
-* 描述:BE节点是否通过PartitionAggregateNode来实现聚合操作,如果false的话将会执行AggregateNode完成聚合。非特殊需求场景不建议设置为false。
-* 默认值:true
-
-### `enable_prefetch`
-
-* 类型:bool
-* 描述:当使用PartitionedHashTable进行聚合和join计算时,是否进行HashBuket的预取,推荐设置为true。
-* 默认值:true
-
-### `enable_quadratic_probing`
-
-* 类型:bool
-* 描述:当使用PartitionedHashTable时发生Hash冲突时,是否采用平方探测法来解决Hash冲突。该值为false的话,则选用线性探测发来解决Hash冲突。关于平方探测法可参考:[quadratic_probing](https://en.wikipedia.org/wiki/Quadratic_probing)
-* 默认值:true
-
-### `enable_system_metrics`
-
-默认值:true
-
-用户控制打开和关闭系统指标
-
-### `enable_token_check`
-
-默认值:true
-
-用于向前兼容,稍后将被删除
-
-### `es_http_timeout_ms`
-
-默认值:5000 (ms)
-
-通过http连接ES的超时时间,默认是5秒
-
-### `es_scroll_keepalive`
-
-默认值:5m
-
-es scroll Keeplive保持时间,默认5分钟
-
-### `etl_thread_pool_queue_size`
-
-默认值:256
-
-ETL线程池的大小
-
-### `etl_thread_pool_size`
-
-### `exchg_node_buffer_size_bytes`
-
-* 类型:int32
-* 描述:ExchangeNode节点Buffer队列的大小,单位为byte。来自Sender端发送的数据量大于ExchangeNode的Buffer大小之后,后续发送的数据将阻塞直到Buffer腾出可写入的空间。
-* 默认值:10485760
-
-### `file_descriptor_cache_capacity`
-
-默认值:32768
-
-文件句柄缓存的容量,默认缓存32768个文件句柄
-
-### `cache_clean_interval`
-
-默认值:1800 (s)
-
-文件句柄缓存清理的间隔,用于清理长期不用的文件句柄。
-同时也是Segment Cache的清理间隔时间。
-
-### `flush_thread_num_per_store`
-
-默认值:2
-
-每个store用于刷新内存表的线程数
-
-### `force_recovery`
-
-### `fragment_pool_queue_size`
-
-默认值:2048
-
-单节点上能够处理的查询请求上限
-
-### `fragment_pool_thread_num_min`
-
-默认值:64
-
-### `fragment_pool_thread_num_max`
-
-默认值:256
-
-查询线程数,默认最小启动64个线程,后续查询请求动态创建线程,最大创建256个线程
-
-### `heartbeat_service_port`
-
-* 类型:int32
-* 描述:BE 上心跳服务端口(thrift),用于接收来自 FE 的心跳
-* 默认值:9050
-
-### `heartbeat_service_thread_count`
-
-* 类型:int32
-* 描述:执行BE上心跳服务的线程数,默认为1,不建议修改
-* 默认值:1
-
-### `ignore_broken_disk`
-
- 当BE启动时,会检查``storage_root_path`` 配置下的所有路径。
-
- - `ignore_broken_disk=true`
-
- 如果路径不存在或路径下无法进行读写文件(坏盘),将忽略此路径,如果有其他可用路径则不中断启动。
-
- - `ignore_broken_disk=false`
-
- 如果路径不存在或路径下无法进行读写文件(坏盘),将中断启动失败退出。
-
- 默认为false
-
-### `ignore_load_tablet_failure`
-
-* 类型:bool
-* 描述:用来决定在有tablet 加载失败的情况下是否忽略错误,继续启动be
-* 默认值:false
-
-BE启动时,会对每个数据目录单独启动一个线程进行 tablet header 元信息的加载。默认配置下,如果某个数据目录有 tablet 加载失败,则启动进程会终止。同时会在 `be.INFO` 日志中看到如下错误信息:
-
-```
-load tablets from header failed, failed tablets size: xxx, path=xxx
-```
-
-表示该数据目录共有多少 tablet 加载失败。同时,日志中也会有加载失败的 tablet 的具体信息。此时需要人工介入来对错误原因进行排查。排查后,通常有两种方式进行恢复:
-
-1. tablet 信息不可修复,在确保其他副本正常的情况下,可以通过 `meta_tool` 工具将错误的tablet删除。
-2. 将 `ignore_load_tablet_failure` 设置为 true,则 BE 会忽略这些错误的 tablet,正常启动。
-
-### `ignore_rowset_stale_unconsistent_delete`
-
-* 类型:bool
-* 描述:用来决定当删除过期的合并过的rowset后无法构成一致的版本路径时,是否仍要删除。
-* 默认值:false
-
-合并的过期 rowset 版本路径会在半个小时后进行删除。在异常下,删除这些版本会出现构造不出查询一致路径的问题,当配置为false时,程序检查比较严格,程序会直接报错退出。
-当配置为true时,程序会正常运行,忽略这个错误。一般情况下,忽略这个错误不会对查询造成影响,仅会在fe下发了合并过的版本时出现-230错误。
-
-### `inc_rowset_expired_sec`
-
-默认值:1800 (s)
-
-导入激活的数据,存储引擎保留的时间,用于增量克隆
-
-### `index_stream_cache_capacity`
-
-默认值:10737418240
-
-BloomFilter/Min/Max等统计信息缓存的容量
-
-### `kafka_broker_version_fallback`
-
-默认值:0.10.0
-
-如果依赖的 kafka 版本低于routine load依赖的 kafka 客户端版本, 将使用回退版本 kafka_broker_version_fallback 设置的值,有效值为:0.9.0、0.8.2、0.8.1、0.8.0。
-
-### `load_data_reserve_hours`
-
-默认值:4 (小时)
-
-用于mini load。mini load数据文件将在此时间后被删除
-
-### `load_error_log_reserve_hours`
-
-默认值:48(小时)
-
-load错误日志将在此时间后删除
-
-### `load_process_max_memory_limit_bytes`
-
-默认值:107374182400
-
-单节点上所有的导入线程占据的内存上限,默认值:100G
-
-将这些默认值设置得很大,因为我们不想在用户升级 Doris 时影响负载性能。 如有必要,用户应正确设置这些配置。
-
-### `load_process_max_memory_limit_percent`
-
-默认值:80
-
-单节点上所有的导入线程占据的内存上限比例,默认80%
-
-将这些默认值设置得很大,因为我们不想在用户升级 Doris 时影响负载性能。 如有必要,用户应正确设置这些配置。
-
-### `log_buffer_level`
-
-默认值:空
-
-日志刷盘的策略,默认保持在内存中
-
-### `madvise_huge_pages`
-
-默认值:false
-
-是否使用linux内存大页,默认不启用
-
-### `make_snapshot_worker_count`
-
-默认值:5
-
-制作快照的线程数
-
-### `max_client_cache_size_per_host`
-
-默认值:10
-
-每个主机的最大客户端缓存数,BE 中有多种客户端缓存,但目前我们使用相同的缓存大小配置。 如有必要,使用不同的配置来设置不同的客户端缓存。
-
-### `max_compaction_threads`
-
-* 类型:int32
-* 描述:Compaction线程池中线程数量的最大值。
-* 默认值:10
-
-### `max_consumer_num_per_group`
-
-默认值:3
-
-一个数据消费者组中的最大消费者数量,用于routine load
-
-### `min_cumulative_compaction_num_singleton_deltas`
-
-默认值:5
-
-cumulative compaction策略:最小增量文件的数量
-
-### `max_cumulative_compaction_num_singleton_deltas`
-
-默认值:1000
-
-cumulative compaction策略:最大增量文件的数量
-
-### `max_download_speed_kbps`
-
-默认值:50000 (kb/s)
-
-最大下载速度限制
-
-### `max_free_io_buffers`
-
-默认值:128
-
-对于每个 io 缓冲区大小,IoMgr 将保留的最大缓冲区数从 1024B 到 8MB 的缓冲区,最多约为 2GB 的缓冲区。
-
-### `max_garbage_sweep_interval`
-
-默认值:3600
-
-磁盘进行垃圾清理的最大间隔,默认一个小时
-
-### `max_memory_sink_batch_count`
-
-默认值:20
-
-最大外部扫描缓存批次计数,表示缓存max_memory_cache_batch_count * batch_size row,默认为20,batch_size的默认值为1024,表示将缓存20 * 1024行
-
-### `max_percentage_of_error_disk`
-
-* 类型:int32
-* 描述:存储引擎允许存在损坏硬盘的百分比,损坏硬盘超过改比例后,BE将会自动退出。
-* 默认值:0
-
-### `max_pushdown_conditions_per_column`
-
-* 类型:int
-* 描述:用于限制一个查询请求中,针对单个列,能够下推到存储引擎的最大条件数量。在查询计划执行的过程中,一些列上的过滤条件可以下推到存储引擎,这样可以利用存储引擎中的索引信息进行数据过滤,减少查询需要扫描的数据量。比如等值条件、IN 谓词中的条件等。这个参数在绝大多数情况下仅影响包含 IN 谓词的查询。如 `WHERE colA IN (1,2,3,4,...)`。较大的数值意味值 IN 谓词中更多的条件可以推送给存储引擎,但过多的条件可能会导致随机读的增加,某些情况下可能会降低查询效率。该配置可以单独进行会话级别的配置,具体可参阅 [变量](../variables.md) 中 `max_pushdown_conditions_per_column ` 的说明。
-* 默认值:1024
-
-* 示例
-
- 表结构为 `id INT, col2 INT, col3 varchar(32), ...`。
-
- 查询请求为 `... WHERE id IN (v1, v2, v3, ...)`
-
- 如果 IN 谓词中的条件数量超过了该配置,则可以尝试增加该配置值,观察查询响应是否有所改善。
-
-### `max_runnings_transactions_per_txn_map`
-
-默认值:100
-
-txn 管理器中每个 txn_partition_map 的最大 txns 数,这是一种自我保护,以避免在管理器中保存过多的 txns
-
-### `max_send_batch_parallelism_per_job`
-
-* 类型:int
-* 描述:OlapTableSink 发送批处理数据的最大并行度,用户为 `send_batch_parallelism` 设置的值不允许超过 `max_send_batch_parallelism_per_job` ,如果超过, `send_batch_parallelism` 将被设置为 `max_send_batch_parallelism_per_job` 的值。
-* 默认值:5
-
-### `max_tablet_num_per_shard`
-
-默认:1024
-
-每个shard的tablet数目,用于划分tablet,防止单个目录下tablet子目录过多
-
-### `max_tablet_version_num`
-
-* 类型:int
-* 描述:限制单个 tablet 最大 version 的数量。用于防止导入过于频繁,或 compaction 不及时导致的大量 version 堆积问题。当超过限制后,导入任务将被拒绝。
-* 默认值:500
-
-### `mem_limit`
-
-* 类型:string
-* 描述:限制BE进程使用服务器最大内存百分比。用于防止BE内存挤占太多的机器内存,该参数必须大于0,当百分大于100%之后,该值会默认为100%。
-* 默认值:80%
-
-### `memory_limitation_per_thread_for_schema_change`
-
-默认值:2 (GB)
-
-单个schema change任务允许占用的最大内存
-
-### `memory_maintenance_sleep_time_s`
-
-默认值:10
-
-内存维护迭代之间的休眠时间(以秒为单位)
-
-### `memory_max_alignment`
-
-默认值:16
-
-最大校对内存
-
-### `read_size`
-
-默认值:8388608
-
-读取大小是发送到 os 的读取大小。 在延迟和整个过程之间进行权衡,试图让磁盘保持忙碌但不引入寻道。 对于 8 MB 读取,随机 io 和顺序 io 的性能相似
-
-### `min_buffer_size`
-
-默认值:1024
-
-最小读取缓冲区大小(以字节为单位)
-
-### `min_compaction_failure_interval_sec`
-
-* 类型:int32
-* 描述:在 cumulative compaction 过程中,当选中的 tablet 没能成功的进行版本合并,则会等待一段时间后才会再次有可能被选中。等待的这段时间就是这个配置的值。
-* 默认值:5
-* 单位:秒
-
-### `min_compaction_threads`
-
-* 类型:int32
-* 描述:Compaction线程池中线程数量的最小值。
-* 默认值:10
-
-### `min_file_descriptor_number`
-
-默认值:60000
-
-BE进程的文件句柄limit要求的下限
-
-### `min_garbage_sweep_interval`
-
-默认值:180
-
-磁盘进行垃圾清理的最小间隔,时间秒
-
-### `mmap_buffers`
-
-默认值:false
-
-是否使用mmap分配内存,默认不使用
-
-### `num_cores`
-
-* 类型:int32
-* 描述:BE可以使用CPU的核数。当该值为0时,BE将从/proc/cpuinfo之中获取本机的CPU核数。
-* 默认值:0
-
-### `num_disks`
-
-默认值:0
-
-控制机器上的磁盘数量。 如果为 0,则来自系统设置。
-
-### `num_threads_per_core`
-
-默认值:3
-
-控制每个内核运行工作的线程数。 通常选择 2 倍或 3 倍的内核数量。 这使核心保持忙碌而不会导致过度抖动
-
-### `num_threads_per_disk`
-
-默认值:0
-
-每个磁盘的最大线程数也是每个磁盘的最大队列深度
-
-### `number_tablet_writer_threads`
-
-默认值:16
-
-tablet写线程数
-
-### `path_gc_check`
-
-默认值:true
-
-是否启用回收扫描数据线程检查,默认启用
-
-### `path_gc_check_interval_second`
-
-默认值:86400
-
-回收扫描数据线程检查时间间隔,单位秒
-
-### `path_gc_check_step`
-
-默认值:1000
-
-### `path_gc_check_step_interval_ms`
-
-默认值:10 (ms)
-
-### `path_scan_interval_second`
-
-默认值:86400
-
-### `pending_data_expire_time_sec`
-
-默认值:1800
-
-存储引擎保留的未生效数据的最大时长,默认单位:秒
-
-### `periodic_counter_update_period_ms`
-
-默认值:500
-
-更新速率计数器和采样计数器的周期,默认单位:毫秒
-
-### `plugin_path`
-
-默认值:${DORIS_HOME}/plugin
-
-插件路径
-
-### `port`
-
-* 类型:int32
-* 描述:BE单测时使用的端口,在实际环境之中无意义,可忽略。
-* 默认值:20001
-
-### `pprof_profile_dir`
-
-默认值:${DORIS_HOME}/log
-
-pprof profile保存目录
-
-### `priority_networks`
-
-默认值:空
-
-为那些有很多 ip 的服务器声明一个选择策略。 请注意,最多应该有一个 ip 与此列表匹配。 这是一个以分号分隔格式的列表,用 CIDR 表示法,例如 10.10.10.0/24 , 如果没有匹配这条规则的ip,会随机选择一个。
-
-### `priority_queue_remaining_tasks_increased_frequency`
-
-默认值:512
-
- the increased frequency of priority for remaining tasks in BlockingPriorityQueue
-
-### `publish_version_worker_count`
-
-默认值:8
-
-生效版本的线程数
-
-### `pull_load_task_dir`
-
-默认值:${DORIS_HOME}/var/pull_load
-
-拉取laod任务的目录
-
-### `push_worker_count_high_priority`
-
-默认值:3
-
-导入线程数,用于处理HIGH优先级任务
-
-### `push_worker_count_normal_priority`
-
-默认值:3
-
-导入线程数,用于处理NORMAL优先级任务
-
-### `push_write_mbytes_per_sec`
-
-+ 类型:int32
-+ 描述:导入数据速度控制,默认最快每秒10MB。适用于所有的导入方式。
-+ 单位:MB
-+ 默认值:10
-
-### `query_scratch_dirs`
-
-* 类型:string
-* 描述:BE进行数据落盘时选取的目录来存放临时数据,与存储路径配置类似,多目录之间用;分隔。
-* 默认值:${DORIS_HOME}
-
-### `release_snapshot_worker_count`
-
-默认值:5
-
-释放快照的线程数
-
-### `report_disk_state_interval_seconds`
-
-默认值:60
-
-代理向 FE 报告磁盘状态的间隔时间(秒)
-
-### `report_tablet_interval_seconds`
-
-默认值:60
-
-代理向 FE 报告 olap 表的间隔时间(秒)
-
-### `report_task_interval_seconds`
-
-默认值:10
-
-代理向 FE 报告任务签名的间隔时间(秒)
-
-### `result_buffer_cancelled_interval_time`
-
-默认值:300
-
-结果缓冲区取消时间(单位:秒)
-
-### `routine_load_thread_pool_size`
-
-默认值:10
-
-routine load任务的线程池大小。 这应该大于 FE 配置 'max_concurrent_task_num_per_be'(默认 5)
-
-### `row_nums_check`
-
-默认值:true
-
-检查 BE/CE 和schema更改的行号。 true 是打开的,false 是关闭的。
-
-### `row_step_for_compaction_merge_log`
-
-* 类型:int64
-* 描述:Compaction执行过程中,每次合并row_step_for_compaction_merge_log行数据会打印一条LOG。如果该参数被设置为0,表示merge过程中不需要打印LOG。
-* 默认值: 0
-* 可动态修改:是
-
-### `scan_context_gc_interval_min`
-
-默认值:5
-
-此配置用于上下文gc线程调度周期 , 注意:单位为分钟,默认为 5 分钟
-
-### `send_batch_thread_pool_thread_num`
-
-* 类型:int32
-* 描述:SendBatch线程池线程数目。在NodeChannel的发送数据任务之中,每一个NodeChannel的SendBatch操作会作为一个线程task提交到线程池之中等待被调度,该参数决定了SendBatch线程池的大小。
-* 默认值:256
-
-### `send_batch_thread_pool_queue_size`
-
-* 类型:int32
-* 描述:SendBatch线程池的队列长度。在NodeChannel的发送数据任务之中,每一个NodeChannel的SendBatch操作会作为一个线程task提交到线程池之中等待被调度,而提交的任务数目超过线程池队列的长度之后,后续提交的任务将阻塞直到队列之中有新的空缺。
-* 默认值:102400
-
-### `serialize_batch`
-
-默认值:false
-
-BE之间rpc通信是否序列化RowBatch,用于查询层之间的数据传输
-
-### `sleep_one_second`
-+ 类型:int32
-+ 描述:全局变量,用于BE线程休眠1秒,不应该被修改
-+ 默认值:1
-
-### `small_file_dir`
-
-默认值:${DORIS_HOME}/lib/small_file/
-
-用于保存 SmallFileMgr 下载的文件的目录
-
-### `snapshot_expire_time_sec`
-
-默认值:172800
-
-快照文件清理的间隔,默认值:48小时
-
-### `status_report_interval`
-
-默认值:5
-
-配置文件报告之间的间隔;单位:秒
-
-### `storage_flood_stage_left_capacity_bytes`
-
-默认值:1073741824
-
-数据目录应该剩下的最小存储空间,默认1G
-
-### `storage_flood_stage_usage_percent`
-
-默认值:95 (95%)
-
-storage_flood_stage_usage_percent和storage_flood_stage_left_capacity_bytes两个配置限制了数据目录的磁盘容量的最大使用。 如果这两个阈值都达到,则无法将更多数据写入该数据目录。 数据目录的最大已用容量百分比
-
-### `storage_medium_migrate_count`
-
-默认值:1
-
-要克隆的线程数
-
-### `storage_page_cache_limit`
-
-默认值:20%
-
-缓存存储页大小
-
-### `index_page_cache_percentage`
-* 类型:int32
-* 描述:索引页缓存占总页面缓存的百分比,取值为[0, 100]。
-* 默认值:10
-
-### `storage_root_path`
-
-* 类型:string
-
-* 描述:BE数据存储的目录,多目录之间用英文状态的分号`;`分隔。可以通过路径区别存储目录的介质,HDD或SSD。可以添加容量限制在每个路径的末尾,通过英文状态逗号`,`隔开。
-
- 示例1如下:
-
- **注意:如果是SSD磁盘要在目录后面加上`.SSD`,HDD磁盘在目录后面加`.HDD`**
-
- `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,10;/home/disk2/doris`
-
- * /home/disk1/doris.HDD,50,表示存储限制为50GB,HDD;
- * /home/disk2/doris.SSD,10,存储限制为10GB,SSD;
- * /home/disk2/doris,存储限制为磁盘最大容量,默认为HDD
-
- 示例2如下:
-
- **注意:不论HHD磁盘目录还是SSD磁盘目录,文件夹目录名称都无需添加后缀,storage_root_path参数里指定medium即可**
-
- `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50`
-
- **说明**
-
- - /home/disk1/doris,medium:hdd,capacity:10,表示存储限制为10GB, HHD;
- - /home/disk2/doris,medium:ssd,capacity:50,表示存储限制为50GB, SSD;
-
-
-* 默认值:${DORIS_HOME}
-
-### `storage_strict_check_incompatible_old_format`
-* 类型:bool
-* 描述:用来检查不兼容的旧版本格式时是否使用严格的验证方式
-* 默认值: true
-* 可动态修改:否
-
-配置用来检查不兼容的旧版本格式时是否使用严格的验证方式,当含有旧版本的 hdr 格式时,使用严谨的方式时,程序会
-打出 fatal log 并且退出运行;否则,程序仅打印 warn log.
-
-### `streaming_load_max_mb`
-
-* 类型:int64
-* 描述:用于限制数据格式为 csv 的一次 Stream load 导入中,允许的最大数据量。单位 MB。
-* 默认值: 10240
-* 可动态修改:是
-
-Stream Load 一般适用于导入几个GB以内的数据,不适合导入过大的数据。
-
-### `streaming_load_json_max_mb`
-
-* 类型:int64
-* 描述:用于限制数据格式为 json 的一次 Stream load 导入中,允许的最大数据量。单位 MB。
-* 默认值: 100
-* 可动态修改:是
-
-一些数据格式,如 JSON,无法进行拆分处理,必须读取全部数据到内存后才能开始解析,因此,这个值用于限制此类格式数据单次导入最大数据量。
-
-### `streaming_load_rpc_max_alive_time_sec`
-
-默认值:1200
-
-TabletsChannel 的存活时间。如果此时通道没有收到任何数据, 通道将被删除。
-
-### `sync_tablet_meta`
-
-默认值:false
-
-存储引擎是否开sync保留到磁盘上
-
-### `sys_log_dir`
-
-* 类型:string
-* 描述:BE日志数据的存储目录
-* 默认值:${DORIS_HOME}/log
-
-### `sys_log_level`
-
-默认值:INFO
-
-日志级别,INFO < WARNING < ERROR < FATAL
-
-### `sys_log_roll_mode`
-
-默认值:SIZE-MB-1024
-
-日志拆分的大小,每1G拆分一个日志文件
-
-### `sys_log_roll_num`
-
-默认值:10
-
-日志文件保留的数目
-
-### `sys_log_verbose_level`
-
-默认值:10
-
-日志显示的级别,用于控制代码中VLOG开头的日志输出
-
-### `sys_log_verbose_modules`
-
-默认值:空
-
-日志打印的模块,写olap就只打印olap模块下的日志
-
-### `tablet_map_shard_size`
-
-默认值:1
-
-tablet_map_lock 分片大小,值为 2^n, n=0,1,2,3,4 ,这是为了更好地管理tablet
-
-### `tablet_meta_checkpoint_min_interval_secs`
-
-默认值:600 (秒)
-
-TabletMeta Checkpoint线程轮询的时间间隔
-
-### `tablet_meta_checkpoint_min_new_rowsets_num`
-
-默认值:10
-
-TabletMeta Checkpoint的最小Rowset数目
-
-### `tablet_scan_frequency_time_node_interval_second`
-
-* 类型:int64
-* 描述:用来表示记录 metric 'query_scan_count' 的时间间隔。为了计算当前一段时间的tablet的scan频率,需要每隔一段时间记录一次 metric 'query_scan_count'。
-* 默认值:300
-
-### `tablet_stat_cache_update_interval_second`
-
-默认值:300
-
-tablet状态缓存的更新间隔,单位:秒
-
-### `tablet_rowset_stale_sweep_time_sec`
-
-* 类型:int64
-* 描述:用来表示清理合并版本的过期时间,当当前时间 now() 减去一个合并的版本路径中rowset最近创建创建时间大于tablet_rowset_stale_sweep_time_sec时,对当前路径进行清理,删除这些合并过的rowset, 单位为s。
-* 默认值:1800
-
-当写入过于频繁,磁盘空间不足时,可以配置较少这个时间。不过这个时间过短小于5分钟时,可能会引发fe查询不到已经合并过的版本,引发查询-230错误。
-
-### `tablet_writer_open_rpc_timeout_sec`
-
-默认值:60
-
-在远程BE 中打开tablet writer的 rpc 超时。 操作时间短,可设置短超时时间
-
-### `tablet_writer_ignore_eovercrowded`
-
-* 类型:bool
-* 描述:写入时可忽略brpc的'[E1011]The server is overcrowded'错误。
-* 默认值:false
-
-当遇到'[E1011]The server is overcrowded'的错误时,可以调整配置项`brpc_socket_max_unwritten_bytes`,但这个配置项不能动态调整。所以可通过设置此项为`true`来临时避免写失败。注意,此配置项只影响写流程,其他的rpc请求依旧会检查是否overcrowded。
-
-### `tc_free_memory_rate`
-
-默认值:20 (%)
-
-可用内存,取值范围:[0-100]
-
-### `tc_max_total_thread_cache_bytes`
-
-* 类型:int64
-* 描述:用来限制 tcmalloc 中总的线程缓存大小。这个限制不是硬限,因此实际线程缓存使用可能超过这个限制。具体可参阅 [TCMALLOC\_MAX\_TOTAL\_THREAD\_CACHE\_BYTES](https://gperftools.github.io/gperftools/tcmalloc.html)
-* 默认值: 1073741824
-
-如果发现系统在高压力场景下,通过 BE 线程堆栈发现大量线程处于 tcmalloc 的锁竞争阶段,如大量的 `SpinLock` 相关堆栈,则可以尝试增大该参数来提升系统性能。[参考](https://github.com/gperftools/gperftools/issues/1111)
-
-### `tc_use_memory_min`
-
-默认值:10737418240
-
-TCmalloc 的最小内存,当使用的内存小于这个时,不返回给操作系统
-
-### `thrift_client_retry_interval_ms`
-
-* 类型:int64
-* 描述:用来为be的thrift客户端设置重试间隔, 避免fe的thrift server发生雪崩问题,单位为ms。
-* 默认值:1000
-
-### `thrift_connect_timeout_seconds`
-
-默认值:3
-
-默认thrift客户端连接超时时间(单位:秒)
-
-### `thrift_rpc_timeout_ms`
-
-默认值:5000
-
-thrift默认超时时间,默认:5秒
-
-### `thrift_server_type_of_fe`
-
-该配置表示FE的Thrift服务使用的服务模型, 类型为string, 大小写不敏感,该参数需要和fe的thrift_server_type参数的设置保持一致。目前该参数的取值有两个,`THREADED`和`THREAD_POOL`。
-
-若该参数为`THREADED`, 该模型为非阻塞式I/O模型,
-
-若该参数为`THREAD_POOL`, 该模型为阻塞式I/O模型。
-
-### `total_permits_for_compaction_score`
-
-* 类型:int64
-* 描述:被所有的compaction任务所能持有的 "permits" 上限,用来限制compaction占用的内存。
-* 默认值:10000
-* 可动态修改:是
-
-### `trash_file_expire_time_sec`
-
-默认值:259200
-
-回收站清理的间隔,72个小时,当磁盘空间不足时,trash下的文件保存期可不遵守这个参数
-
-### `txn_commit_rpc_timeout_ms`
-
-默认值:10000
-
-txn 提交 rpc 超时,默认10秒
-
-### `txn_map_shard_size`
-
-默认值:128
-
-txn_map_lock 分片大小,取值为2^n,n=0,1,2,3,4。这是一项增强功能,可提高管理 txn 的性能
-
-### `txn_shard_size`
-
-默认值:1024
-
-txn_lock 分片大小,取值为2^n,n=0,1,2,3,4, 这是一项增强功能,可提高提交和发布 txn 的性能
-
-### `unused_rowset_monitor_interval`
-
-默认值:30
-
-清理过期Rowset的时间间隔,单位:秒
-
-### `upload_worker_count`
-
-默认值:1
-
-上传文件最大线程数
-
-### `use_mmap_allocate_chunk`
-
-默认值:false
-
-是否使用 mmap 分配块。 如果启用此功能,最好增加 vm.max_map_count 的值,其默认值为 65530。您可以通过“sysctl -w vm.max_map_count=262144”或“echo 262144 > /proc/sys/vm/”以 root 身份进行操作 max_map_count" ,当这个设置为true时,你必须将chunk_reserved_bytes_limit设置为一个相对较大的数字,否则性能非常非常糟糕。
-
-### `user_function_dir`
-
-默认值:${DORIS_HOME}/lib/udf
-
-udf函数目录
-
-### `webserver_num_workers`
-
-默认值:48
-
-webserver默认工作线程数
-
-### `webserver_port`
-* 类型:int32
-* 描述:BE 上的 http server 的服务端口
-* 默认值:8040
-
-### `write_buffer_size`
-
-默认值:104857600
-
-刷写前缓冲区的大小
-
-### `zone_map_row_num_threshold`
-
-* 类型: int32
-* 描述: 如果一个page中的行数小于这个值就不会创建zonemap,用来减少数据膨胀
-* 默认值: 20
-
-### `aws_log_level`
-
-* 类型: int32
-* 描述: AWS SDK 的日志级别
- ```
- Off = 0,
- Fatal = 1,
- Error = 2,
- Warn = 3,
- Info = 4,
- Debug = 5,
- Trace = 6
- ```
-* 默认值: 3
-
-### `track_new_delete`
-
-* 类型:bool
-* 描述:是否Hook TCmalloc new/delete,目前在Hook中统计thread local MemTracker。
-* 默认值:true
-
-### `mem_tracker_level`
-
-* 类型: int16
-* 描述: MemTracker在Web页面上展示的级别,等于或低于这个级别的MemTracker会在Web页面上展示
- ```
- OVERVIEW = 0
- TASK = 1
- INSTANCE = 2
- VERBOSE = 3
- ```
-* 默认值: 0
-
-### `mem_tracker_consume_min_size_bytes`
-
-* 类型: int32
-* 描述: TCMalloc Hook consume/release MemTracker时的最小长度,小于该值的consume size会持续累加,避免频繁调用MemTracker的consume/release,减小该值会增加consume/release的频率,增大该值会导致MemTracker统计不准,理论上一个MemTracker的统计值与真实值相差 = (mem_tracker_consume_min_size_bytes * 这个MemTracker所在的BE线程数)。
-* 默认值: 1048576
-
-### `memory_leak_detection`
-
-* 类型: bool
-* 描述: 是否启动内存泄漏检测,当 MemTracker 为负值时认为发生了内存泄漏,但实际 MemTracker 记录不准确时也会导致负值,所以这个功能处于实验阶段。
-* 默认值: false
-
-### `max_segment_num_per_rowset`
-
-* 类型: int32
-* 描述: 用于限制导入时,新产生的rowset中的segment数量。如果超过阈值,导入会失败并报错 -238。过多的 segment 会导致compaction占用大量内存引发 OOM 错误。
-* 默认值: 200
-
-### `remote_storage_read_buffer_mb`
-
-* 类型: int32
-* 描述: 读取hdfs或者对象存储上的文件时,使用的缓存大小。
-* 默认值: 16MB
-
-增大这个值,可以减少远端数据读取的调用次数,但会增加内存开销。
-
-### `external_table_connect_timeout_sec`
-
-* 类型: int32
-* 描述: 和外部表建立连接的超时时间。
-* 默认值: 5秒
-
-### `segment_cache_capacity`
-
-* 类型: int32
-* 描述: Segment Cache 缓存的 Segment 最大数量
-* 默认值: 1000000
-
-默认值目前只是一个经验值,可能需要根据实际场景修改。增大该值可以缓存更多的segment从而避免一些IO。减少该值则会降低内存使用。
-
-### `auto_refresh_brpc_channel`
-
-* 类型: bool
-* 描述: 获取brpc连接时,通过hand_shake rpc 判断连接的可用性,如果不可用则重新建立连接
-* 默认值: false
-
-### `high_priority_flush_thread_num_per_store`
-
-* 类型:int32
-* 描述:每个存储路径所分配的用于高优导入任务的 flush 线程数量。
-* 默认值:1
-
-### `routine_load_consumer_pool_size`
-
-* 类型:int32
-* 描述:routine load 所使用的 data consumer 的缓存数量。
-* 默认值:10
-
-### `load_task_high_priority_threshold_second`
-
-* 类型:int32
-* 描述:当一个导入任务的超时时间小于这个阈值是,Doris 将认为他是一个高优任务。高优任务会使用独立的 flush 线程池。
-* 默认:120
-
-### `min_load_rpc_timeout_ms`
-
-* 类型:int32
-* 描述:load 作业中各个rpc 的最小超时时间。
-* 默认:20
-
-### `doris_scan_range_max_mb`
-* 类型: int32
-* 描述: 每个OlapScanner 读取的最大数据量
-* 默认值: 1024
-
-### `string_type_length_soft_limit_bytes`
-* 类型: int32
-* 描述: String 类型最大长度的软限,单位是字节
-* 默认值: 1048576
diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md b/docs/zh-CN/administrator-guide/config/fe_config.md
deleted file mode 100644
index 8b14b7aa18..0000000000
--- a/docs/zh-CN/administrator-guide/config/fe_config.md
+++ /dev/null
@@ -1,2234 +0,0 @@
----
-{
- "title": "FE 配置项",
- "language": "zh-CN"
-}
-
----
-
-
-
-# Doris FE配置参数
-
-该文档主要介绍 FE 的相关配置项。
-
-FE 的配置文件 `fe.conf` 通常存放在 FE 部署路径的 `conf/` 目录下。 而在 0.14 版本中会引入另一个配置文件 `fe_custom.conf`。该配置文件用于记录用户在运行是动态配置并持久化的配置项。
-
-FE 进程启动后,会先读取 `fe.conf` 中的配置项,之后再读取 `fe_custom.conf` 中的配置项。`fe_custom.conf` 中的配置项会覆盖 `fe.conf` 中相同的配置项。
-
-`fe_custom.conf` 文件的位置可以在 `fe.conf` 通过 `custom_config_dir` 配置项配置。
-
-## 查看配置项
-
-FE 的配置项有两种方式进行查看:
-
-1. FE 前端页面查看
-
- 在浏览器中打开 FE 前端页面 `http://fe_host:fe_http_port/variable`。在 `Configure Info` 中可以看到当前生效的 FE 配置项。
-
-2. 通过命令查看
-
- FE 启动后,可以在 MySQL 客户端中,通过以下命令查看 FE 的配置项:
-
- `ADMIN SHOW FRONTEND CONFIG;`
-
- 结果中各列含义如下:
-
- - Key:配置项名称。
- - Value:当前配置项的值。
- - Type:配置项值类型,如果整型、字符串。
- - IsMutable:是否可以动态配置。如果为 true,表示该配置项可以在运行时进行动态配置。如果false,则表示该配置项只能在 `fe.conf` 中配置并且重启 FE 后生效。
- - MasterOnly:是否为 Master FE 节点独有的配置项。如果为 true,则表示该配置项仅在 Master FE 节点有意义,对其他类型的 FE 节点无意义。如果为 false,则表示该配置项在所有 FE 节点中均有意义。
- - Comment:配置项的描述。
-
-## 设置配置项
-
-FE 的配置项有两种方式进行配置:
-
-1. 静态配置
-
- 在 `conf/fe.conf` 文件中添加和设置配置项。`fe.conf` 中的配置项会在 FE 进程启动时被读取。没有在 `fe.conf` 中的配置项将使用默认值。
-
-2. 通过 MySQL 协议动态配置
-
- FE 启动后,可以通过以下命令动态设置配置项。该命令需要管理员权限。
-
- `ADMIN SET FRONTEND CONFIG ("fe_config_name" = "fe_config_value");`
-
- 不是所有配置项都支持动态配置。可以通过 `ADMIN SHOW FRONTEND CONFIG;` 命令结果中的 `IsMutable` 列查看是否支持动态配置。
-
- 如果是修改 `MasterOnly` 的配置项,则该命令会直接转发给 Master FE 并且仅修改 Master FE 中对应的配置项。
-
- **通过该方式修改的配置项将在 FE 进程重启后失效。**
-
- 更多该命令的帮助,可以通过 `HELP ADMIN SET CONFIG;` 命令查看。
-
-3. 通过 HTTP 协议动态配置
-
- 具体请参阅 [Set Config Action](http://doris.apache.org/master/zh-CN/administrator-guide/http-actions/fe/set-config-action.html)
-
- 该方式也可以持久化修改后的配置项。配置项将持久化在 `fe_custom.conf` 文件中,在 FE 重启后仍会生效。
-
-## 应用举例
-
-1. 修改 `async_pending_load_task_pool_size`
-
- 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项不能动态配置(`IsMutable` 为 false)。则需要在 `fe.conf` 中添加:
-
- `async_pending_load_task_pool_size=20`
-
- 之后重启 FE 进程以生效该配置。
-
-2. 修改 `dynamic_partition_enable`
-
- 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且是 Master FE 独有配置。则首先我们可以连接到任意 FE,执行如下命令修改配置:
-
- ```text
- ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true");`
- ```
-
- 之后可以通过如下命令查看修改后的值:
-
- ```text
- set forward_to_master=true;
- ADMIN SHOW FRONTEND CONFIG;
- ```
-
- 通过以上方式修改后,如果 Master FE 重启或进行了 Master 切换,则配置将失效。可以通过在 `fe.conf` 中直接添加配置项,并重启 FE 后,永久生效该配置项。
-
-3. 修改 `max_distribution_pruner_recursion_depth`
-
- 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且不是 Master FE 独有配置。
-
- 同样,我们可以通过动态修改配置的命令修改该配置。因为该配置不是 Master FE 独有配置,所以需要单独连接到不同的 FE,进行动态修改配置的操作,这样才能保证所有 FE 都使用了修改后的配置值
-
-## 配置项列表
-
-### `max_dynamic_partition_num`
-
-默认值:500
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-用于限制创建动态分区表时可以创建的最大分区数,避免一次创建过多分区。 数量由动态分区参数中的“开始”和“结束”决定。
-
-### `grpc_max_message_size_bytes`
-
-默认值:1G
-
-用于设置 GRPC 客户端通道的初始流窗口大小,也用于设置最大消息大小。当结果集较大时,可能需要增大该值。
-
-### `min_replication_num_per_tablet`
-
-默认值:1
-
-用于设置单个tablet的最小replication数量。
-
-### `max_replication_num_per_tablet`
-
-默认值:32767
-
-用于设置单个 tablet 的最大 replication 数量。
-
-### `enable_outfile_to_local`
-
-默认值:false
-
-是否允许 outfile 函数将结果导出到本地磁盘
-
-### `enable_access_file_without_broker`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-此配置用于在通过代理访问 bos 或其他云存储时尝试跳过代理
-
-### `enable_bdbje_debug_mode`
-
-默认值:false
-
-如果设置为 true,FE 将在 BDBJE 调试模式下启动,在 Web 页面 `System->bdbje` 可以查看相关信息,否则不可以查看
-
-### `enable_alpha_rowset`
-
-默认值:false
-
-是否支持创建 alpha rowset。默认为 false,只应在紧急情况下使用,此配置应在未来的某个版本中删除
-
-### `enable_http_server_v2`
-
-默认值:从官方 0.14.0 release 版之后默认是 true,之前默认 false
-
-HTTP Server V2 由 SpringBoot 实现, 并采用前后端分离的架构。只有启用 httpv2,用户才能使用新的前端 UI 界面
-
-### `jetty_server_acceptors`
-
-默认值:2
-
-### `jetty_server_selectors`
-
-默认值:4
-
-### `jetty_server_workers`
-
-默认值:0
-
-Jetty 的线程数量由以上三个参数控制。Jetty的线程架构模型非常简单,分为 acceptors、selectors 和 workers 三个线程池。acceptors 负责接受新连接,然后交给 selectors 处理HTTP消息协议的解包,最后由 workers 处理请求。前两个线程池采用非阻塞模型,一个线程可以处理很多 socket 的读写,所以线程池数量较小。
-
-大多数项目,acceptors 线程只需要4个,selectors 线程配置4个足矣。workers 是阻塞性的业务逻辑,往往有较多的数据库操作,需要的线程数量较多,具体数量随应用程序的 QPS 和 IO 事件占比而定。QPS 越高,需要的线程数量越多,IO 占比越高,等待的线程数越多,需要的总线程数也越多。
-
-workers 线程池默认不做设置,根据自己需要进行设置
-
-### `jetty_threadPool_minThreads`
-
-Jetty线程池最小线程数,默认为20
-
-### `jetty_threadPool_maxThreads`
-
-Jetty线程池最大线程数,默认为400
-
-### `jetty_server_max_http_post_size`
-
-默认值:100 * 1024 * 1024 (100MB)
-
-这个是 put 或 post 方法上传文件的最大字节数,默认值:100MB
-
-### **`disable_mini_load`**
-
-是否禁用mini load数据导入方式,默认是:true (禁用)
-
-### `default_max_filter_ratio`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-可过滤数据(由于数据不规则等原因)的最大百分比。默认值为0,表示严格模式,只要数据有一条被过滤掉整个导入失败
-
-### `default_db_data_quota_bytes`
-
-默认值:1PB
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-用于设置默认数据库数据配额大小,设置单个数据库的配额大小可以使用:
-
-```
-设置数据库数据量配额,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB
-ALTER DATABASE db_name SET DATA QUOTA quota;
-查看配置
-show data (其他用法:HELP SHOW DATA)
-```
-
-### `default_db_replica_quota_size`
-
-默认值:1073741824
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-用于设置默认数据库Replica数量配额大小,设置单个数据库配额大小可以使用:
-
-```
-设置数据库Replica数量配额
-ALTER DATABASE db_name SET REPLICA QUOTA quota;
-查看配置
-show data (其他用法:HELP SHOW DATA)
-```
-
-### `enable_batch_delete_by_default`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-创建唯一表时是否添加删除标志列,具体原理参照官方文档:操作手册->数据导入->批量删除
-
-### `recover_with_empty_tablet`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-在某些情况下,某些 tablet 可能会损坏或丢失所有副本。 此时数据已经丢失,损坏的 tablet 会导致整个查询失败,无法查询剩余的健康 tablet。 在这种情况下,您可以将此配置设置为 true。 系统会将损坏的 tablet 替换为空 tablet,以确保查询可以执行。 (但此时数据已经丢失,所以查询结果可能不准确)
-
-### `max_allowed_in_element_num_of_delete`
-
-默认值:1024
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-用于限制 delete 语句中 Predicate 的元素个数
-
-### `cache_result_max_row_count`
-
-默认值:3000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-设置可以缓存的最大行数,详细的原理可以参考官方文档:操作手册->分区缓存
-
-### `cache_last_version_interval_second`
-
-默认值:900
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-缓存结果时上一版本的最小间隔,该参数区分离线更新和实时更新
-
-### `cache_enable_partition_mode`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-如果设置为 true,FE 将从 BE cache 中获取数据,该选项适用于部分分区的实时更新。
-
-### `cache_enable_sql_mode`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-如果设置为 true,FE 会启用 sql 结果缓存,该选项适用于离线数据更新场景
-
-| | case1 | case2 | case3 | case4 |
-| ---------------------- | ----- | ----- | ----- | ----- |
-| enable_sql_cache | false | true | true | false |
-| enable_partition_cache | false | false | true | true |
-
-### `min_clone_task_timeout_sec` 和 `max_clone_task_timeout_sec`
-
-默认值:最小3分钟,最大两小时
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-`min_clone_task_timeout_sec` 和 `max_clone_task_timeout_sec` 用于限制克隆任务的最小和最大超时间。 一般情况下,克隆任务的超时时间是通过数据量和最小传输速度(5MB/s)来估计的。 但在特殊情况下,您可能需要手动设置这两个配置,以确保克隆任务不会因超时而失败。
-
-### `agent_task_resend_wait_time_ms`
-
-默认值:5000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-当代理任务的创建时间被设置的时候,此配置将决定是否重新发送代理任务, 当且仅当当前时间减去创建时间大于 `agent_task_task_resend_wait_time_ms` 时,ReportHandler可以重新发送代理任务。
-
-该配置目前主要用来解决 `PUBLISH_VERSION` 代理任务的重复发送问题, 目前该配置的默认值是5000,是个实验值,由于把代理任务提交到代理任务队列和提交到 BE 存在一定的时间延迟,所以调大该配置的值可以有效解决代理任务的重复发送问题,
-
-但同时会导致提交失败或者执行失败的代理任务再次被执行的时间延长。
-
-### `enable_odbc_table`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-是否启用 ODBC 表,默认不启用,在使用的时候需要手动配置启用,该参数可以通过:
-
-`ADMIN SET FRONTEND CONFIG("key"="value") `方式进行设置
-
-### `enable_spark_load`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-是否临时启用 spark load,默认不启用
-
-### `disable_storage_medium_check`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果 disable_storage_medium_check 为true, ReportHandler 将不会检查 tablet 的存储介质, 并使得存储冷却功能失效,默认值为false。当您不关心 tablet 的存储介质是什么时,可以将值设置为true 。
-
-### `drop_backend_after_decommission`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-该配置用于控制系统在成功下线(Decommission) BE 后,是否 Drop 该 BE。如果为 true,则在 BE 成功下线后,会删除掉该 BE 节点。如果为 false,则在 BE 成功下线后,该 BE 会一直处于 DECOMMISSION 状态,但不会被删除。
-
-该配置在某些场景下可以发挥作用。假设一个 Doris 集群的初始状态为每个 BE 节点有一块磁盘。运行一段时间后,系统进行了纵向扩容,即每个 BE 节点新增2块磁盘。因为 Doris 当前还不支持 BE 内部各磁盘间的数据均衡,所以会导致初始磁盘的数据量可能一直远高于新增磁盘的数据量。此时我们可以通过以下操作进行人工的磁盘间均衡:
-
-1. 将该配置项置为 false。
-2. 对某一个 BE 节点,执行 decommission 操作,该操作会将该 BE 上的数据全部迁移到其他节点中。
-3. decommission 操作完成后,该 BE 不会被删除。此时,取消掉该 BE 的 decommission 状态。则数据会开始从其他 BE 节点均衡回这个节点。此时,数据将会均匀的分布到该 BE 的所有磁盘上。
-4. 对所有 BE 节点依次执行 2,3 两个步骤,最终达到所有节点磁盘均衡的目的。
-
-### `period_of_auto_resume_min`
-
-默认值:5 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-自动恢复 Routine load 的周期
-
-### `max_tolerable_backend_down_num`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-只要有一个BE宕机,Routine Load 就无法自动恢复
-
-### `enable_materialized_view`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-该配置用于开启和关闭创建物化视图功能。如果设置为 true,则创建物化视图功能开启。用户可以通过 `CREATE MATERIALIZED VIEW` 命令创建物化视图。如果设置为 false,则无法创建物化视图。
-
-如果在创建物化视图的时候报错 `The materialized view is coming soon` 或 `The materialized view is disabled` 则说明改配置被设置为了 false,创建物化视图功能关闭了。可以通过修改配置为 true 来启动创建物化视图功能。
-
-该变量为动态配置,用户可以在 FE 进程启动后,通过命令修改配置。也可以通过修改 FE 的配置文件,重启 FE 来生效
-
-### `check_java_version`
-
-默认值:true
-
-Doris 将检查已编译和运行的 Java 版本是否兼容,如果不兼容将抛出Java版本不匹配的异常信息,并终止启动
-
-### `max_running_rollup_job_num_per_table`
-
-默认值:1
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-控制 Rollup 作业并发限制
-
-### `dynamic_partition_enable`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-是否启用动态分区,默认启用
-
-### `dynamic_partition_check_interval_seconds`
-
-默认值:600秒,10分钟
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-检查动态分区的频率
-
-### `disable_cluster_feature`
-
-默认值:true
-
-是否可以动态配置:true
-
-多集群功能将在 0.12 版本中弃用 ,将此配置设置为 true 将禁用与集群功能相关的所有操作,包括:
-
-1. 创建/删除集群
-2. 添加、释放BE/将BE添加到集群/停用集群balance
-3. 更改集群的后端数量
-4. 链接/迁移数据库
-
-### `force_do_metadata_checkpoint`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为 true,则无论 jvm 内存使用百分比如何,检查点线程都会创建检查点
-
-### `metadata_checkpoint_memory_threshold`
-
-默认值:60 (60%)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果 jvm 内存使用百分比(堆或旧内存池)超过此阈值,则检查点线程将无法工作以避免 OOM。
-
-### `max_distribution_pruner_recursion_depth`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-这将限制哈希分布修剪器的最大递归深度。 例如:其中 a in(5 个元素)和 b in(4 个元素)和 c in(3 个元素)和 d in(2 个元素)。 a/b/c/d 是分布式列,所以递归深度为 5 * 4 * 3 * 2 = 120,大于 100, 因此该分发修剪器将不起作用,只会返回所有 buckets。 增加深度可以支持更多元素的分布修剪,但可能会消耗更多的 CPU
-
-通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且不是 Master FE 独有配置。
-
-同样,我们可以通过动态修改配置的命令修改该配置。因为该配置不是 Master FE 独有配置,所以需要单独连接到不同的 FE,进行动态修改配置的操作,这样才能保证所有 FE 都使用了修改后的配置值
-
-
-### `using_old_load_usage_pattern`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为 true,处理错误的 insert stmt 仍将返回一个标签给用户。 用户可以使用此标签来检查加载作业的状态。 默认值为 false,表示插入操作遇到错误,不带加载标签,直接抛出异常给用户客户端。
-
-### `small_file_dir`
-
-默认值:DORIS_HOME_DIR + “/small_files”
-
-保存小文件的目录
-
-### `max_small_file_size_bytes`
-
-默认值:1M
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-SmallFileMgr 中单个文件存储的最大大小
-
-### `max_small_file_number`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-SmallFileMgr 中存储的最大文件数
-
-### `max_routine_load_task_num_per_be`
-
-默认值:5
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-每个 BE 的最大并发例 Routine Load 任务数。 这是为了限制发送到 BE 的 Routine Load 任务的数量,并且它也应该小于 BE config `routine_load_thread_pool_size`(默认 10),这是 BE 上的 Routine Load 任务线程池大小。
-
-### `max_routine_load_task_concurrent_num`
-
-默认值:5
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-单个 Routine Load 作业的最大并发任务数
-
-### `max_routine_load_job_num`
-
-默认值:100
-
-最大 Routine Load 作业数,包括 NEED_SCHEDULED, RUNNING, PAUSE
-
-### `max_backup_restore_job_num_per_db`
-
-默认值:10
-
-此配置用于控制每个 DB 能够记录的 backup/restore 任务的数量
-
-### `max_running_txn_num_per_db`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-这个配置主要是用来控制同一个 DB 的并发导入个数的。
-
-当集群中有过多的导入任务正在运行时,新提交的导入任务可能会报错:
-
-```text
-current running txns on db xxx is xx, larger than limit xx
-```
-
-该遇到该错误时,说明当前集群内正在运行的导入任务超过了该配置值。此时建议在业务侧进行等待并重试导入任务。
-
-一般来说不推荐增大这个配置值。过高的并发数可能导致系统负载过大
-
-### `enable_metric_calculator`
-
-默认值:true
-
-如果设置为 true,指标收集器将作为守护程序计时器运行,以固定间隔收集指标
-
-### `report_queue_size`
-
-默认值: 100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-这个阈值是为了避免在 FE 中堆积过多的报告任务,可能会导致 OOM 异常等问题。 并且每个 BE 每 1 分钟会报告一次 tablet 信息,因此无限制接收报告是不可接受的。以后我们会优化 tablet 报告的处理速度
-
-**不建议修改这个值**
-
-### `partition_rebalance_max_moves_num_per_selection`
-
-默认值:10
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-仅在使用 PartitionRebalancer 时有效 ,
-
-### `partition_rebalance_move_expire_after_access`
-
-默认值:600 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-仅在使用 PartitionRebalancer 时有效。 如果更改,缓存的移动将被清除
-
-### tablet_rebalancer_type
-
-默认值:BeLoad
-
-是否为 Master FE 节点独有的配置项:true
-
-rebalancer 类型(忽略大小写):BeLoad、Partition。 如果类型解析失败,默认使用 BeLoad
-
-### `max_balancing_tablets`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果 TabletScheduler 中的 balance tablet 数量超过 `max_balancing_tablets`,则不再进行 balance 检查
-
-### `max_scheduling_tablets`
-
-默认值:2000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果 TabletScheduler 中调度的 tablet 数量超过 `max_scheduling_tablets`, 则跳过检查。
-
-### `disable_balance`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为 true,TabletScheduler 将不会做 balance
-
-### `balance_load_score_threshold`
-
-默认值:0.1 (10%)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-集群 balance 百分比的阈值,如果一个BE的负载分数比平均分数低10%,这个后端将被标记为低负载,如果负载分数比平均分数高10%,将被标记为高负载。
-
-### `schedule_slot_num_per_path`
-
-默认值:2
-
-tablet 调度程序中每个路径的默认 slot 数量
-
-### `tablet_repair_delay_factor_second`
-
-默认值:60 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-决定修复 tablet 前的延迟时间因素。
-
-1. 如果优先级为 VERY_HIGH,请立即修复。
-2. HIGH,延迟 tablet_repair_delay_factor_second * 1;
-3. 正常:延迟 tablet_repair_delay_factor_second * 2;
-4. 低:延迟 tablet_repair_delay_factor_second * 3;
-
-### `es_state_sync_interval_second`
-
-默认值:10
-
-FE 会在每隔 es_state_sync_interval_secs 调用 es api 获取 es 索引分片信息
-
-### `disable_hadoop_load`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-默认不禁用,将来不推荐使用 hadoop 集群 load。 设置为 true 以禁用这种 load 方式。
-
-### `db_used_data_quota_update_interval_secs`
-
-默认值:300 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-一个主守护线程将每 `db_used_data_quota_update_interval_secs` 更新数据库 txn 管理器的数据库使用数据配额
-
-为了更好的数据导入性能,在数据导入之前的数据库已使用的数据量是否超出配额的检查中,我们并不实时计算数据库已经使用的数据量,而是获取后台线程周期性更新的值。
-
-该配置用于设置更新数据库使用的数据量的值的时间间隔
-
-### `disable_load_job`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-不禁用,如果这设置为 true
-
-- 调用开始 txn api 时,所有挂起的加载作业都将失败
-- 调用 commit txn api 时,所有准备加载作业都将失败
-- 所有提交的加载作业将等待发布
-
-### `catalog_try_lock_timeout_ms`
-
-默认值:5000 (ms)
-
-是否可以动态配置:true
-
-元数据锁的 tryLock 超时配置。 通常它不需要改变,除非你需要测试一些东西。
-
-### `max_query_retry_time`
-
-默认值:1
-
-是否可以动态配置:true
-
-查询重试次数。 如果我们遇到 RPC 异常并且没有将结果发送给用户,则可能会重试查询。 您可以减少此数字以避免雪崩灾难。
-
-### `remote_fragment_exec_timeout_ms`
-
-默认值:5000 (ms)
-
-是否可以动态配置:true
-
-异步执行远程 fragment 的超时时间。 在正常情况下,异步远程 fragment 将在短时间内执行。 如果系统处于高负载状态,请尝试将此超时设置更长的时间。
-
-### `enable_local_replica_selection`
-
-默认值:false
-
-是否可以动态配置:true
-
-如果设置为 true,Planner 将尝试在与此前端相同的主机上选择 tablet 的副本。
-在以下情况下,这可能会减少网络传输:
-
-1. N 个主机,部署了 N 个 BE 和 N 个 FE。
-
-2. 数据有N个副本。
-
-3. 高并发查询均匀发送到所有 Frontends
-
-在这种情况下,所有 Frontends 只能使用本地副本进行查询。如果想当本地副本不可用时,使用非本地副本服务查询,请将 enable_local_replica_selection_fallback 设置为 true
-
-### `enable_local_replica_selection_fallback`
-
-默认值:false
-
-是否可以动态配置:true
-
-与 enable_local_replica_selection 配合使用,当本地副本不可用时,使用非本地副本服务查询。
-
-### `max_unfinished_load_job`
-
-默认值:1000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-最大加载任务数,包括 PENDING、ETL、LOADING、QUORUM_FINISHED。 如果超过此数量,则不允许提交加载作业。
-
-### `max_bytes_per_broker_scanner`
-
-默认值:3 * 1024 * 1024 * 1024L (3G)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-broker scanner 程序可以在一个 broker 加载作业中处理的最大字节数。 通常,每个 BE 都有一个 broker scanner 程序。
-
-### `enable_auth_check`
-
-默认值:true
-
-如果设置为 false,则身份验证检查将被禁用,以防新权限系统出现问题。
-
-### `tablet_stat_update_interval_second`
-
-默认值:300,(5分钟)
-
-tablet 状态更新间隔
-所有 FE 将在每个时间间隔从所有 BE 获取 tablet 统计信息
-
-### `storage_flood_stage_usage_percent `
-
-默认值:95 (95%)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### ` storage_flood_stage_left_capacity_bytes`
-
-默认值:
-
- storage_flood_stage_usage_percent : 95 (95%)
-
- storage_flood_stage_left_capacity_bytes : 1 * 1024 * 1024 * 1024 (1GB)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果磁盘容量达到 `storage_flood_stage_usage_percent` 和 `storage_flood_stage_left_capacity_bytes` 以下操作将被拒绝:
-
-1. load 作业
-2. restore 工作
-
-### `storage_high_watermark_usage_percent`
-
-默认值:85 (85%)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### `storage_min_left_capacity_bytes`
-
-默认值: 2 * 1024 * 1024 * 1024 (2GB)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-`storage_high_watermark_usage_percent` 限制 BE 端存储路径使用最大容量百的分比。 `storage_min_left_capacity_bytes`限制 BE 端存储路径的最小剩余容量。 如果达到这两个限制,则不能选择此存储路径作为 tablet 存储目的地。 但是对于 tablet 恢复,我们可能会超过这些限制以尽可能保持数据完整性。
-
-### `backup_job_default_timeout_ms`
-
-默认值:86400 * 1000 (1天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-备份作业的默认超时时间
-
-### `with_k8s_certs`
-
-默认值:false
-
-如果在本地使用 k8s 部署管理器,请将其设置为 true 并准备证书文件
-
-### `dpp_hadoop_client_path`
-
-默认值:/lib/hadoop-client/hadoop/bin/hadoop
-
-### `dpp_bytes_per_reduce`
-
-默认值:100 * 1024 * 1024L (100M)
-
-### `dpp_default_cluster`
-
-默认值:palo-dpp
-
-### `dpp_default_config_str`
-
-默认值:{
- hadoop_configs : 'mapred.job.priority=NORMAL;mapred.job.map.capacity=50;mapred.job.reduce.capacity=50;mapred.hce.replace.streaming=false;abaci.long.stored.job=true;dce.shuffle.enable=false;dfs.client.authserver.force_stop=true;dfs.client.auth.method=0'
- }
-
-### dpp_config_str
-
-默认值:{
- palo-dpp : {
- hadoop_palo_path : '/dir',
- hadoop_configs : 'fs.default.name=hdfs://host:port;mapred.job.tracker=host:port;hadoop.job.ugi=user,password'
- }
- }
-
-### `enable_deploy_manager`
-
-默认值:disable
-
-如果使用第三方部署管理器部署 Doris,则设置为 true
-
-有效的选项是:
-
-- disable:没有部署管理器
-- k8s:Kubernetes
-- ambari:Ambari
-- local:本地文件(用于测试或 Boxer2 BCC 版本)
-
-### `enable_token_check`
-
-默认值:true
-
-为了向前兼容,稍后将被删除。 下载image文件时检查令牌。
-
-### `expr_depth_limit`
-
-默认值:3000
-
-是否可以动态配置:true
-
-限制 expr 树的深度。 超过此限制可能会导致在持有 db read lock 时分析时间过长。
-
-### `expr_children_limit`
-
-默认值:10000
-
-是否可以动态配置:true
-
-限制 expr 树的 expr 子节点的数量。 超过此限制可能会导致在持有数据库读锁时分析时间过长。
-
-### `proxy_auth_magic_prefix`
-
-默认值:x@8
-
-### `proxy_auth_enable`
-
-默认值:false
-
-### `meta_publish_timeout_ms`
-
-默认值:1000ms
-
-默认元数据发布超时时间
-
-### `disable_colocate_balance`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-此配置可以设置为 true 以禁用自动 colocate 表的重新定位和平衡。 如果 `disable_colocate_balance'`设置为 true,则 ColocateTableBalancer 将不会重新定位和平衡并置表。
-
-**注意:**
-
-1. 一般情况下,根本不需要关闭平衡。
-2. 因为一旦关闭平衡,不稳定的 colocate 表可能无法恢复
-3. 最终查询时无法使用 colocate 计划。
-
-### `query_colocate_join_memory_limit_penalty_factor`
-
-默认值:1
-
-是否可以动态配置:true
-
-colocote join PlanFragment instance 的 memory_limit = exec_mem_limit / min (query_colocate_join_memory_limit_penalty_factor, instance_num)
-
-### `max_connection_scheduler_threads_num`
-
-默认值:4096
-
-查询请求调度器中的最大线程数。
-
-前的策略是,有请求过来,就为其单独申请一个线程进行服务
-
-### `qe_max_connection`
-
-默认值:1024
-
-每个 FE 的最大连接数
-
-### `check_consistency_default_timeout_second`
-
-默认值:600 (10分钟)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-单个一致性检查任务的默认超时。 设置足够长以适合您的tablet大小。
-
-### `consistency_check_start_time`
-
-默认值:23
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-一致性检查开始时间
-
-一致性检查器将从 `consistency_check_start_time` 运行到 `consistency_check_end_time`。 默认为 23:00 至 04:00
-
-### `consistency_check_end_time`
-
-默认值:04
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-一致性检查结束时间
-
-一致性检查器将从 `consistency_check_start_time` 运行到 `consistency_check_end_time`。 默认为 23:00 至 04:00
-
-### `export_tablet_num_per_task`
-
-默认值:5
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-每个导出查询计划的 tablet 数量
-
-### `export_task_default_timeout_second`
-
-默认值:2 * 3600 (2小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-导出作业的默认超时时间
-
-### `export_running_job_num_limit`
-
-默认值:5
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-运行导出作业的并发限制,默认值为 5,0 表示无限制
-
-### `export_checker_interval_second`
-
-默认值:5
-
-导出检查器的运行间隔
-
-### `default_load_parallelism`
-
-默认值:1
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-单个节点broker load导入的默认并发度。
-如果用户在提交broker load任务时,在properties中自行指定了并发度,则采用用户自定义的并发度。
-此参数将与`max_broker_concurrency`、`min_bytes_per_broker_scanner`等多个配置共同决定导入任务的并发度。
-
-### `max_broker_concurrency`
-
-默认值:10
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-broker scanner 的最大并发数。
-
-### `min_bytes_per_broker_scanner`
-
-默认值:67108864L (64M)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-单个 broker scanner 将读取的最小字节数。
-
-### `catalog_trash_expire_second`
-
-默认值:86400L (1天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-删除数据库(表/分区)后,您可以使用 RECOVER stmt 恢复它。 这指定了最大数据保留时间。 一段时间后,数据将被永久删除。
-
-### `storage_cooldown_second`
-
-默认值:30 * 24 * 3600L (30天)
-
-创建表(或分区)时,可以指定其存储介质(HDD 或 SSD)。 如果设置为 SSD,这将指定tablet在 SSD 上停留的默认时间。 之后,tablet将自动移动到 HDD。 您可以在 `CREATE TABLE stmt` 中设置存储冷却时间。
-
-### `default_storage_medium`
-
-默认值:HDD
-
-创建表(或分区)时,可以指定其存储介质(HDD 或 SSD)。 如果未设置,则指定创建时的默认介质。
-
-### `max_backend_down_time_second`
-
-默认值:3600 (1小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果 BE 关闭了 `max_backend_down_time_second`,将触发 BACKEND_DOWN 事件。
-
-### `alter_table_timeout_second`
-
-默认值:86400 (1天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-ALTER TABLE 请求的最大超时时间。 设置足够长以适合您的表格数据大小
-
-### `capacity_used_percent_high_water`
-
-默认值:0.75 (75%)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-磁盘容量的高水位使用百分比。 这用于计算后端的负载分数
-
-### `clone_distribution_balance_threshold`
-
-默认值:0.2
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-BE副本数的平衡阈值。
-
-### `clone_capacity_balance_threshold`
-
-默认值:0.2
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-* BE 中数据大小的平衡阈值。
-
- 平衡算法为:
-
- 1. 计算整个集群的平均使用容量(AUC)(总数据大小/BE数)
-
- 2. 高水位为(AUC * (1 + clone_capacity_balance_threshold))
-
- 3. 低水位为(AUC * (1 - clone_capacity_balance_threshold))
-
-克隆检查器将尝试将副本从高水位 BE 移动到低水位 BE。
-
-### `replica_delay_recovery_second`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-副本之间的最小延迟秒数失败,并且尝试使用克隆来恢复它。
-
-### `clone_high_priority_delay_second`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-高优先级克隆作业的延迟触发时间
-
-### `clone_normal_priority_delay_second`
-
-默认值:300 (5分钟)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-正常优先级克隆作业的延迟触发时间
-
-### `clone_low_priority_delay_second`
-
-默认值:600 (10分钟)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-低优先级克隆作业的延迟触发时间。 克隆作业包含需要克隆(恢复或迁移)的tablet。 如果优先级为 LOW,则会延迟 `clone_low_priority_delay_second `,在作业创建之后然后被执行。 这是为了避免仅因为主机短时间停机而同时运行大量克隆作业。
-
-注意这个配置(还有 `clone_normal_priority_delay_second`) 如果它小于 `clone_checker_interval_second` 将不起作用
-
-### `clone_max_job_num`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-低优先级克隆作业的并发数。 高优先级克隆作业的并发性目前是无限的。
-
-### `clone_job_timeout_second`
-
-默认值:7200 (2小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-单个克隆作业的默认超时。 设置足够长以适合您的副本大小。 副本数据越大,完成克隆所需的时间就越多
-
-### `clone_checker_interval_second`
-
-默认值:300 (5分钟)
-
-克隆检查器的运行间隔
-
-### `tablet_delete_timeout_second`
-
-默认值:2
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-与 `tablet_create_timeout_second` 含义相同,但在删除 tablet 时使用
-
-### `async_loading_load_task_pool_size`
-
-默认值:10
-
-是否可以动态配置:false
-
-是否为 Master FE 节点独有的配置项:true
-
-`loading_load`任务执行程序池大小。 该池大小限制了正在运行的最大 `loading_load`任务数。
-
-当前,它仅限制 `broker load`的 `loading_load`任务的数量。
-
-### `async_pending_load_task_pool_size`
-
-默认值:10
-
-是否可以动态配置:false
-
-是否为 Master FE 节点独有的配置项:true
-
-`pending_load`任务执行程序池大小。 该池大小限制了正在运行的最大 `pending_load`任务数。
-
-当前,它仅限制 `broker load`和 `spark load`的 `pending_load`任务的数量。
-
-它应该小于 `max_running_txn_num_per_db`的值
-
-### `async_load_task_pool_size`
-
-默认值:10
-
-是否可以动态配置:false
-
-是否为 Master FE 节点独有的配置项:true
-
-此配置只是为了兼容旧版本,此配置已被 `async_loading_load_task_pool_size`取代,以后会被移除。
-
-### `disable_show_stream_load`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-是否禁用显示 stream load 并清除内存中的 stream load 记录。
-
-### `max_stream_load_record_size`
-
-默认值:5000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-可以存储在内存中的最近 stream load 记录的默认最大数量
-
-### `fetch_stream_load_record_interval_second`
-
-默认值:120
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-获取 stream load 记录间隔
-
-### `desired_max_waiting_jobs`
-
-默认值:100
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-routine load V2 版本加载的默认等待作业数 ,这是一个理想的数字。 在某些情况下,例如切换 master,当前数量可能超过` desired_max_waiting_jobs`
-
-### `yarn_config_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/lib/yarn-config"
-
-
-默认的 Yarn 配置文件目录每次运行 Yarn 命令之前,我们需要检查一下这个路径下是否存在 config 文件,如果不存在,则创建它们。
-
-
-### `yarn_client_path`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/lib/yarn-client/hadoop/bin/yarn"
-
-默认 Yarn 客户端路径
-
-### `spark_launcher_log_dir`
-
-默认值: sys_log_dir + "/spark_launcher_log"
-
-指定的 Spark 启动器日志目录
-
-### `spark_resource_path`
-
-默认值:空
-
-默认值的 Spark 依赖路径
-
-### `spark_home_default_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/lib/spark2x"
-
-默认的 Spark home 路径
-
-### `spark_load_default_timeout_second`
-
-默认值:86400 (1天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-默认 Spark 加载超时时间
-
-### `spark_dpp_version`
-
-默认值:1.0.0
-
-Spark 默认版本号
-
-### `hadoop_load_default_timeout_second`
-
-默认值:86400 * 3 (3天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-Hadoop 加载超时时间
-
-### `min_load_timeout_second`
-
-默认值:1 (1秒)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-mini load 超时时间,适用于所有类型的加载
-
-### `max_stream_load_timeout_second`
-
-默认值:259200 (3天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-stream load 和 mini load 最大超时时间
-
-### `max_load_timeout_second`
-
-默认值:259200 (3天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-load 最大超时时间,适用于除 stream load 之外的所有类型的加载
-
-### `stream_load_default_timeout_second`
-
-默认值:600 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-默认 stream load 和 mini load 超时时间
-
-### `insert_load_default_timeout_second`
-
-默认值:3600 (1小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-默认 insert load 超时时间
-
-### `mini_load_default_timeout_second`
-
-默认值:3600 (1小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-默认非 stream load 类型的 mini load 的超时时间
-
-### `broker_load_default_timeout_second`
-
-默认值:14400 (4小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-Broker load 的默认超时时间
-
-### `load_running_job_num_limit`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-Load 任务数量限制,默认0,无限制
-
-### `load_input_size_limit_gb`
-
-默认值:0
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-Load 作业输入的数据大小,默认是0,无限制
-
-### `delete_thread_num`
-
-默认值:10
-
-删除作业的并发线程数
-
-### `load_etl_thread_num_normal_priority`
-
-默认值:10
-
-NORMAL 优先级 etl 加载作业的并发数。
-
-### `load_etl_thread_num_high_priority`
-
-默认值:3
-
-高优先级 etl 加载作业的并发数。
-
-### `load_pending_thread_num_normal_priority`
-
-默认值:10
-
-NORMAL 优先级挂起加载作业的并发数。
-
-### `load_pending_thread_num_high_priority`
-
-默认值:3
-
-高优先级挂起加载作业的并发数。 加载作业优先级定义为 HIGH 或 NORMAL。 所有小批量加载作业都是 HIGH 优先级,其他类型的加载作业是 NORMAL 优先级。 设置优先级是为了避免慢加载作业长时间占用线程。 这只是内部优化的调度策略。 目前,您无法手动指定作业优先级。
-
-### `load_checker_interval_second`
-
-默认值:5 (s)
-
-负载调度器运行间隔。 加载作业将其状态从 PENDING 转移到 LOADING 到 FINISHED。 加载调度程序将加载作业从 PENDING 转移到 LOADING 而 txn 回调会将加载作业从 LOADING 转移到 FINISHED。 因此,当并发未达到上限时,加载作业最多需要一个时间间隔才能完成。
-
-### `max_layout_length_per_row`
-
-默认值:100000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-一行的最大内存布局长度。 默认为 100 KB。
-在 BE 中,RowBlock 的最大大小为 100MB(在 be.conf 中配置为 `max_unpacked_row_block_size `)。
-每个 RowBlock 包含 1024 行。 因此,一行的最大大小约为 100 KB。
-
-例如。
-schema:k1(int), v1(decimal), v2(varchar(2000))
-那么一行的内存布局长度为:4(int) + 16(decimal) + 2000(varchar) = 2020 (Bytes)
-
-查看所有类型的内存布局长度,在 mysql-client 中运行 `help create table`。
-
-如果要增加此数字以支持一行中的更多列,则还需要增加
-be.conf 中的 `max_unpacked_row_block_size `,但性能影响未知。
-
-### `load_straggler_wait_second`
-
-默认值:300
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-负载中落后节点的最大等待秒数
-例如:有 3 个副本 A, B, C load 已经在 t1 时仲裁完成 (A,B) 并且 C 没有完成,如果 (current_time-t1)> 300s,那么 doris会将 C 视为故障节点,将调用事务管理器提交事务并告诉事务管理器 C 失败。
-
-这也用于等待发布任务时
-
-**注意:**这个参数是所有作业的默认值,DBA 可以为单独的作业指定它
-
-### `thrift_server_type`
-
-该配置表示FE的Thrift服务使用的服务模型, 类型为string, 大小写不敏感。
-
-若该参数为 `SIMPLE`, 则使用 `TSimpleServer` 模型, 该模型一般不适用于生产环境,仅限于测试使用。
-
-若该参数为 `THREADED`, 则使用 `TThreadedSelectorServer` 模型,该模型为非阻塞式I/O模型,即主从 Reactor 模型,该模型能及时响应大量的并发连接请求,在多数场景下有较好的表现。
-
-若该参数为 `THREAD_POOL`, 则使用 `TThreadPoolServer` 模型,该模型为阻塞式I/O模型,使用线程池处理用户连接,并发连接数受限于线程池的数量,如果能提前预估并发请求的数量,并且能容忍足够多的线程资源开销,该模型会有较好的性能表现,默认使用该服务模型
-
-### `thrift_server_max_worker_threads`
-
-默认值:4096
-
-Thrift Server最大工作线程数
-
-### `publish_version_interval_ms`
-
-默认值:10 (ms)
-
-两个发布版本操作之间的最小间隔
-
-### `publish_version_timeout_second`
-
-默认值:30 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-一个事务的所有发布版本任务完成的最大等待时间
-
-### `max_create_table_timeout_second`
-
-默认值:60 (s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-为了在创建表(索引)不等待太久,设置一个最大超时时间
-
-### `tablet_create_timeout_second`
-
-默认值:1(s)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-创建单个副本的最长等待时间。
-例如。
-如果您为每个表创建一个包含 m 个 tablet 和 n 个副本的表,
-创建表请求将在超时前最多运行 (m * n * tablet_create_timeout_second)。
-
-### `max_mysql_service_task_threads_num`
-
-默认值:4096
-
-mysql 中处理任务的最大线程数。
-
-### `cluster_id`
-
-默认值:-1
-
-如果节点(FE 或 BE)具有相同的集群 id,则将认为它们属于同一个Doris 集群。 Cluster id 通常是主 FE 首次启动时生成的随机整数。 您也可以指定一个。
-
-### `auth_token`
-
-默认值:空
-
-用于内部身份验证的集群令牌。
-
-### `cluster_name`
-
-默认值: Apache doris
-
-集群名称,将显示为网页标题
-
-### `mysql_service_io_threads_num`
-
-默认值:4
-
-mysql 中处理 io 事件的线程数。
-
-### `mysql_service_nio_enabled`
-
-默认值:true
-
-mysql 服务 nio 选项是否启用,默认启用
-
-### `query_port`
-
-默认值:9030
-
-Doris FE 通过 mysql 协议查询连接端口
-
-### `rewrite_count_distinct_to_bitmap_hll`
-
-默认值:true
-
-该变量为 session variable,session 级别生效。
-
-- 类型:boolean
-- 描述:**仅对于 AGG 模型的表来说**,当变量为 true 时,用户查询时包含 count(distinct c1) 这类聚合函数时,如果 c1 列本身类型为 bitmap,则 count distnct 会改写为 bitmap_union_count(c1)。 当 c1 列本身类型为 hll,则 count distinct 会改写为 hll_union_agg(c1) 如果变量为 false,则不发生任何改写。
-
-### `rpc_port`
-
-默认值:9020
-
-FE Thrift Server的端口
-
-### `thrift_backlog_num`
-
-默认值:1024
-
-thrift 服务器的 backlog_num 当你扩大这个 backlog_num 时,你应该确保它的值大于 linux `/proc/sys/net/core/somaxconn` 配置
-
-### `thrift_client_timeout_ms`
-
-默认值:0
-
-thrift 服务器的连接超时和套接字超时配置 thrift_client_timeout_ms 的默认值设置为零以防止读取超时
-
-### `mysql_nio_backlog_num`
-
-默认值:1024
-
-mysql nio server 的 backlog_num 当你放大这个 backlog_num 时,你应该同时放大 linux `/proc/sys/net/core/somaxconn`文件中的值
-
-### `http_backlog_num`
-
-默认值:1024
-
-netty http server 的 backlog_num 当你放大这个 backlog_num 时,你应该同时放大 linux `/proc/sys/net/core/somaxconn`文件中的值
-
-### `http_max_line_length`
-
-默认值:4096
-
-HTTP 服务允许接收请求的 URL 的最大长度,单位为比特
-
-### `http_max_header_size`
-
-默认值:8192
-
-HTTP 服务允许接收请求的 Header 的最大长度,单位为比特
-
-### `http_max_chunk_size`
-
-默认值:8192
-
-http 上下文 chunk 块的最大尺寸
-
-### `http_port`
-
-默认值:8030
-
-FE http 端口,当前所有 FE http 端口都必须相同
-
-### `max_bdbje_clock_delta_ms`
-
-默认值:5000 (5秒)
-
-设置非主 FE 到主 FE 主机之间的最大可接受时钟偏差。 每当非主 FE 通过 BDBJE 建立到主 FE 的连接时,都会检查该值。 如果时钟偏差大于此值,则放弃连接。
-
-### `ignore_meta_check`
-
-默认值:false
-
-是否可以动态配置:true
-
-如果为 true,非主 FE 将忽略主 FE 与其自身之间的元数据延迟间隙,即使元数据延迟间隙超过 `meta_delay_toleration_second`。 非主 FE 仍将提供读取服务。 当您出于某种原因尝试停止 Master FE 较长时间,但仍希望非 Master FE 可以提供读取服务时,这会很有帮助。
-
-### `metadata_failure_recovery`
-
-默认值:false
-
-如果为 true,FE 将重置 bdbje 复制组(即删除所有可选节点信息)并应该作为 Master 启动。 如果所有可选节点都无法启动,我们可以将元数据复制到另一个节点并将此配置设置为 true 以尝试重新启动 FE。
-
-### `priority_networks`
-
-默认值:空
-
-为那些有很多 ip 的服务器声明一个选择策略。 请注意,最多应该有一个 ip 与此列表匹配。 这是一个以分号分隔格式的列表,用 CIDR 表示法,例如 10.10.10.0/24。 如果没有匹配这条规则的ip,会随机选择一个。
-
-### `txn_rollback_limit`
-
-默认值:100
-
-尝试重新加入组时 bdbje 可以回滚的最大 txn 数
-
-### `max_agent_task_threads_num`
-
-默认值:4096
-
-是否为 Master FE 节点独有的配置项:true
-
-代理任务线程池中处理代理任务的最大线程数。
-
-### `heartbeat_mgr_blocking_queue_size`
-
-默认值:1024
-
-是否为 Master FE 节点独有的配置项:true
-
-在 heartbeat_mgr 中存储心跳任务的阻塞队列大小。
-
-### `heartbeat_mgr_threads_num`
-
-默认值:8
-
-是否为 Master FE 节点独有的配置项:true
-
-heartbeat _mgr 中处理心跳事件的线程数。
-
-### `bdbje_replica_ack_timeout_second`
-
-默认值:10
-
-元数据会同步写入到多个 Follower FE,这个参数用于控制 Master FE 等待 Follower FE 发送 ack 的超时时间。当写入的数据较大时,可能 ack 时间较长,如果超时,会导致写元数据失败,FE 进程退出。此时可以适当调大这个参数。
-
-### `bdbje_lock_timeout_second`
-
-默认值:1
-
-bdbje 操作的 lock timeout 如果 FE WARN 日志中有很多 LockTimeoutException,可以尝试增加这个值
-
-### `bdbje_heartbeat_timeout_second`
-
-默认值:30
-
-master 和 follower 之间 bdbje 的心跳超时。 默认为 30 秒,与 bdbje 中的默认值相同。 如果网络遇到暂时性问题,一些意外的长 Java GC 使您烦恼,您可以尝试增加此值以减少错误超时的机会
-
-### `replica_ack_policy`
-
-默认值:SIMPLE_MAJORITY
-
-选项:ALL, NONE, SIMPLE_MAJORITY
-
-bdbje 的副本 ack 策略。 更多信息,请参见:http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html
-
-### `replica_sync_policy`
-
-默认值:SYNC
-
-选项:SYNC, NO_SYNC, WRITE_NO_SYNC
-
-bdbje 的Follower FE 同步策略。
-
-### `master_sync_policy`
-
-默认值:SYNC
-
-选项:SYNC, NO_SYNC, WRITE_NO_SYNC
-
-Master FE 的 bdbje 同步策略。 如果您只部署一个 Follower FE,请将其设置为“SYNC”。 如果你部署了超过 3 个 Follower FE,你可以将这个和下面的 `replica_sync_policy ` 设置为 WRITE_NO_SYNC。 更多信息,参见:http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html
-
-### `meta_delay_toleration_second`
-
-默认值:300 (5分钟)
-
-如果元数据延迟间隔超过 `meta_delay_toleration_second `,非主 FE 将停止提供服务
-
-### `edit_log_roll_num`
-
-默认值:50000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-Master FE will save image every `edit_log_roll_num ` meta journals.。
-
-### `edit_log_port`
-
-默认值:9010
-
-bdbje端口
-
-### `edit_log_type`
-
-默认值:BDB
-
-编辑日志类型。
-BDB:将日志写入 bdbje
-LOCAL:已弃用。
-
-### `tmp_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/temp_dir"
-
-temp dir 用于保存某些过程的中间结果,例如备份和恢复过程。 这些过程完成后,将清除此目录中的文件。
-
-### `meta_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/doris-meta"
-
-Doris 元数据将保存在这里。 强烈建议将此目录的存储为:
-
-1. 高写入性能(SSD)
-
-2. 安全(RAID)
-
-### `custom_config_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/conf"
-
-自定义配置文件目录
-
-配置 `fe_custom.conf` 文件的位置。默认为 `conf/` 目录下。
-
-在某些部署环境下,`conf/` 目录可能因为系统的版本升级被覆盖掉。这会导致用户在运行是持久化修改的配置项也被覆盖。这时,我们可以将 `fe_custom.conf` 存储在另一个指定的目录中,以防止配置文件被覆盖。
-
-### `log_roll_size_mb`
-
-默认值:1024 (1G)
-
-一个系统日志和审计日志的最大大小
-
-### `sys_log_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/log"
-
-sys_log_dir:
-
-这指定了 FE 日志目录。 FE 将产生 2 个日志文件:
-
-1. fe.log:FE进程的所有日志。
-2. fe.warn.log FE 进程的所有警告和错误日志。
-
-### `sys_log_level`
-
-默认值:INFO
-
-日志级别,可选项:INFO, WARNING, ERROR, FATAL
-
-### `sys_log_roll_num`
-
-默认值:10
-
-要保存在 `sys_log_roll_interval ` 内的最大 FE 日志文件。 默认为 10,表示一天最多有 10 个日志文件
-
-### `sys_log_verbose_modules`
-
-默认值:{}
-
-详细模块。 VERBOSE 级别由 log4j DEBUG 级别实现。
-
-例如:
- sys_log_verbose_modules = org.apache.doris.catalog
- 这只会打印包 org.apache.doris.catalog 及其所有子包中文件的调试日志。
-
-### `sys_log_roll_interval`
-
-默认值:DAY
-
-可选项:
-
-- DAY: log 前缀是 yyyyMMdd
-- HOUR: log 前缀是 yyyyMMddHH
-
-### `sys_log_delete_age`
-
-默认值:7d
-
-默认为 7 天,如果日志的最后修改时间为 7 天前,则将其删除。
-
-支持格式:
-
-- 7d: 7 天
-- 10h: 10 小时
-- 60m: 60 分钟
-- 120s: 120 秒
-
-### `audit_log_dir`
-
-默认值:PaloFe.DORIS_HOME_DIR + "/log"
-
-审计日志目录:
-这指定了 FE 审计日志目录。
-审计日志 fe.audit.log 包含所有请求以及相关信息,如 `user, host, cost, status ` 等。
-
-### `audit_log_roll_num`
-
-默认值:90
-
-保留在 `audit_log_roll_interval ` 内的最大 FE 审计日志文件。
-
-### `audit_log_modules`
-
-默认值:{"slow_query", "query", "load", "stream_load"}
-
-慢查询包含所有开销超过 *qe_slow_log_ms* 的查询
-
-### `qe_slow_log_ms`
-
-默认值:5000 (5秒)
-
-如果查询的响应时间超过此阈值,则会在审计日志中记录为 slow_query。
-
-### `audit_log_roll_interval`
-
-默认值:DAY
-
-DAY: log前缀是:yyyyMMdd
-HOUR: log前缀是:yyyyMMddHH
-
-### `audit_log_delete_age`
-
-默认值:30d
-
-默认为 30 天,如果日志的最后修改时间为 30 天前,则将其删除。
-支持格式:
-7d 7 天
-10 小时 10 小时
-60m 60 分钟
-120s 120 秒
-
-### `plugin_dir`
-
-默认值:DORIS_HOME + "/plugins
-
-插件安装目录
-
-### `plugin_enable`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-插件是否启用,默认启用
-
-### `label_keep_max_second`
-
-默认值:3 * 24 * 3600 (3天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-`label_keep_max_second `后将删除已完成或取消的加载作业的标签,
-
-1. 去除的标签可以重复使用。
-2. 设置较短的时间会降低 FE 内存使用量 (因为所有加载作业的信息在被删除之前都保存在内存中)
-
-在高并发写的情况下,如果出现大量作业积压,出现 `call frontend service failed`的情况,查看日志如果是元数据写占用锁的时间太长,可以将这个值调成12小时,或者更小6小时
-
-### `streaming_label_keep_max_second`
-
-默认值:43200 (12小时)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-对于一些高频负载工作,例如:INSERT、STREAMING LOAD、ROUTINE_LOAD_TASK 。 如果过期,则删除已完成的作业或任务。
-
-### `history_job_keep_max_second`
-
-默认值:7 * 24 * 3600 (7天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-某些作业的最大保留时间。 像 schema 更改和 Rollup 作业。
-
-### `label_clean_interval_second`
-
-默认值:4 * 3600 (4小时)
-
-load 标签清理器将每隔 `label_clean_interval_second` 运行一次以清理过时的作业。
-
-### `delete_info_keep_max_second`
-
-默认值:3 * 24 * 3600 (3天)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-删除元数据中创建时间大于`delete_info_keep_max_second`的delete信息。
-
-设置较短的时间将减少 FE 内存使用量和镜像文件大小。(因为所有的deleteInfo在被删除之前都存储在内存和镜像文件中)
-
-### `transaction_clean_interval_second`
-
-默认值:30
-
-如果事务 visible 或者 aborted 状态,事务将在 `transaction_clean_interval_second` 秒后被清除 ,我们应该让这个间隔尽可能短,每个清洁周期都尽快
-
-
-### `default_max_query_instances`
-
-默认值:-1
-
-用户属性max_query_instances小于等于0时,使用该配置,用来限制单个用户同一时刻可使用的查询instance个数。该参数小于等于0表示无限制。
-
-### `use_compact_thrift_rpc`
-
-默认值:true
-
-是否使用压缩格式发送查询计划结构体。开启后,可以降低约50%的查询计划结构体大小,从而避免一些 "send fragment timeout" 错误。
-但是在某些高并发小查询场景下,可能会降低约10%的并发度。
-
-### `disable_tablet_scheduler`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为true,将关闭副本修复和均衡逻辑。
-
-
-
-### `enable_force_drop_redundant_replica`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为 true,系统会在副本调度逻辑中,立即删除冗余副本。这可能导致部分正在对对应副本写入的导入作业失败,但是会加速副本的均衡和修复速度。
-当集群中有大量等待被均衡或修复的副本时,可以尝试设置此参数,以牺牲部分导入成功率为代价,加速副本的均衡和修复。
-
-### `repair_slow_replica`
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-如果设置为 true,会自动检测compaction比较慢的副本,并将迁移到其他机器,检测条件是 最慢副本的版本计数超过 `min_version_count_indicate_replica_compaction_too_slow` 的值, 且与最快副本的版本计数差异所占比例超过 `valid_version_count_delta_ratio_between_replicas` 的值
-
-### `colocate_group_relocate_delay_second`
-
-默认值:1800
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-重分布一个 Colocation Group 可能涉及大量的tablet迁移。因此,我们需要一个更保守的策略来避免不必要的 Colocation 重分布。
-重分布通常发生在 Doris 检测到有 BE 节点宕机后。这个参数用于推迟对BE宕机的判断。如默认参数下,如果 BE 节点能够在 1800 秒内恢复,则不会触发 Colocation 重分布。
-
-### `allow_replica_on_same_host`
-
-默认值:false
-
-是否可以动态配置:false
-
-是否为 Master FE 节点独有的配置项:false
-
-是否允许同一个 tablet 的多个副本分布在同一个 host 上。这个参数主要用于本地测试是,方便搭建多个 BE 已测试某些多副本情况。不要用于非测试环境。
-
-### `min_version_count_indicate_replica_compaction_too_slow`
-
-默认值:300
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-版本计数阈值,用来判断副本做 compaction 的速度是否太慢
-
-### `valid_version_count_delta_ratio_between_replicas`
-
-默认值:0.5
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-最慢副本的版本计数与最快副本的差异有效比率阈值,如果设置 `repair_slow_replica` 为 true,则用于判断是否修复最慢的副本
-
-### `min_bytes_indicate_replica_too_large`
-
-默认值:2 * 1024 * 1024 * 1024 (2G)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-数据大小阈值,用来判断副本的数据量是否太大
-
-### skip_compaction_slower_replica
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-如果设置为true,则在选择可查询副本时,将跳过 compaction 较慢的副本
-
-### enable_create_sync_job
-
-开启 MySQL 数据同步作业功能。默认是 false,关闭此功能
-
-默认值:false
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### sync_commit_interval_second
-
-提交事务的最大时间间隔。若超过了这个时间 channel 中还有数据没有提交,consumer 会通知 channel 提交事务。
-
-默认值:10(秒)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### min_sync_commit_size
-
-提交事务需满足的最小 event 数量。若 Fe 接收到的 event 数量小于它,会继续等待下一批数据直到时间超过了 `sync_commit_interval_second ` 为止。默认值是 10000 个 events,如果你想修改此配置,请确保此值小于 canal 端的 `canal.instance.memory.buffer.size` 配置(默认16384),否则在 ack 前Fe会尝试获取比 store 队列长度更多的 event,导致 store 队列阻塞至超时为止。
-
-默认值:10000
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### min_bytes_sync_commit
-
-提交事务需满足的最小数据大小。若 Fe 接收到的数据大小小于它,会继续等待下一批数据直到时间超过了 `sync_commit_interval_second` 为止。默认值是 15 MB,如果你想修改此配置,请确保此值小于 canal 端的 `canal.instance.memory.buffer.size` 和 `canal.instance.memory.buffer.memunit` 的乘积(默认 16 MB),否则在 ack 前 Fe 会尝试获取比 store 空间更大的数据,导致 store 队列阻塞至超时为止。
-
-默认值:15 * 1024 * 1024(15M)
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-### max_bytes_sync_commit
-
- 数据同步作业线程池中的最大线程数量。此线程池整个FE中只有一个,用于处理FE中所有数据同步作业向BE发送数据的任务 task,线程池的实现在 `SyncTaskPool` 类。
-
-默认值:10
-
-是否可以动态配置:false
-
-是否为 Master FE 节点独有的配置项:false
diff --git a/docs/zh-CN/administrator-guide/config/user_property.md b/docs/zh-CN/administrator-guide/config/user_property.md
deleted file mode 100644
index ff57234296..0000000000
--- a/docs/zh-CN/administrator-guide/config/user_property.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-{
- "title": "用户配置项",
- "language": "zh-CN"
-}
----
-
-
-
-# User 配置项
-
-该文档主要介绍了 User 级别的相关配置项。User 级别的配置生效范围为单个用户。每个用户都可以设置自己的 User property。相互不影响。
-
-## 查看配置项
-
-FE 启动后,在 MySQL 客户端,通过下面命令查看 User 的配置项:
-
-`SHOW PROPERTY [FOR user] [LIKE key pattern]`
-
-具体语法可通过命令:`help show property;` 查询。
-
-## 设置配置项
-
-FE 启动后,在MySQL 客户端,通过下面命令修改 User 的配置项:
-
-`SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value']`
-
-具体语法可通过命令:`help set property;` 查询。
-
-User 级别的配置项只会对指定用户生效,并不会影响其他用户的配置。
-
-## 应用举例
-
-1. 修改用户 Billie 的 `max_user_connections`
-
- 通过 `SHOW PROPERTY FOR 'Billie' LIKE '%max_user_connections%';` 查看 Billie 用户当前的最大链接数为 100。
-
- 通过 `SET PROPERTY FOR 'Billie' 'max_user_connections' = '200';` 修改 Billie 用户的当前最大连接数到 200。
-
-## 配置项列表
-
-### max_user_connections
-
- 用户最大的连接数,默认值为100。一般情况不需要更改该参数,除非查询的并发数超过了默认值。
-
-### max_query_instances
-
- 用户同一时间点可使用的instance个数, 默认是-1,小于等于0将会使用配置default_max_query_instances.
-
-### resource
-
-### quota
-
-### default_load_cluster
-
-### load_cluster
diff --git a/docs/zh-CN/administrator-guide/dynamic-partition.md b/docs/zh-CN/administrator-guide/dynamic-partition.md
deleted file mode 100644
index 0249b37ecb..0000000000
--- a/docs/zh-CN/administrator-guide/dynamic-partition.md
+++ /dev/null
@@ -1,460 +0,0 @@
----
-{
- "title": "动态分区",
- "language": "zh-CN"
-}
----
-
-
-
-# 动态分区
-
-动态分区是在 Doris 0.12 版本中引入的新功能。旨在对表级别的分区实现生命周期管理(TTL),减少用户的使用负担。
-
-目前实现了动态添加分区及动态删除分区的功能。
-
-动态分区只支持 Range 分区。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-
-## 原理
-
-在某些使用场景下,用户会将表按照天进行分区划分,每天定时执行例行任务,这时需要使用方手动管理分区,否则可能由于使用方没有创建分区导致数据导入失败,这给使用方带来了额外的维护成本。
-
-通过动态分区功能,用户可以在建表时设定动态分区的规则。FE 会启动一个后台线程,根据用户指定的规则创建或删除分区。用户也可以在运行时对现有规则进行变更。
-
-## 使用方式
-
-动态分区的规则可以在建表时指定,或者在运行时进行修改。当前仅支持对单分区列的分区表设定动态分区规则。
-
-* 建表时指定:
-
- ```
- CREATE TABLE tbl1
- (...)
- PROPERTIES
- (
- "dynamic_partition.prop1" = "value1",
- "dynamic_partition.prop2" = "value2",
- ...
- )
- ```
-
-* 运行时修改
-
- ```
- ALTER TABLE tbl1 SET
- (
- "dynamic_partition.prop1" = "value1",
- "dynamic_partition.prop2" = "value2",
- ...
- )
- ```
-
-### 动态分区规则参数
-
-动态分区的规则参数都以 `dynamic_partition.` 为前缀:
-
-* `dynamic_partition.enable`
-
- 是否开启动态分区特性。可指定为 `TRUE` 或 `FALSE`。如果不填写,默认为 `TRUE`。如果为 `FALSE`,则 Doris 会忽略该表的动态分区规则。
-
-* `dynamic_partition.time_unit`
-
- 动态分区调度的单位。可指定为 `HOUR`、`DAY`、`WEEK`、`MONTH`。分别表示按天、按星期、按月进行分区创建或删除。
-
- 当指定为 `HOUR` 时,动态创建的分区名后缀格式为 `yyyyMMddHH`,例如`2020032501`。小时为单位的分区列数据类型不能为 DATE。
-
- 当指定为 `DAY` 时,动态创建的分区名后缀格式为 `yyyyMMdd`,例如`20200325`。
-
- 当指定为 `WEEK` 时,动态创建的分区名后缀格式为`yyyy_ww`。即当前日期属于这一年的第几周,例如 `2020-03-25` 创建的分区名后缀为 `2020_13`, 表明目前为2020年第13周。
-
- 当指定为 `MONTH` 时,动态创建的分区名后缀格式为 `yyyyMM`,例如 `202003`。
-
-* `dynamic_partition.time_zone`
-
- 动态分区的时区,如果不填写,则默认为当前机器的系统的时区,例如 `Asia/Shanghai`,如果想获取当前支持的时区设置,可以参考 `https://en.wikipedia.org/wiki/List_of_tz_database_time_zones`。
-
-* `dynamic_partition.start`
-
- 动态分区的起始偏移,为负数。根据 `time_unit` 属性的不同,以当天(星期/月)为基准,分区范围在此偏移之前的分区将会被删除。如果不填写,则默认为 `-2147483648`,即不删除历史分区。
-
-* `dynamic_partition.end`
-
- 动态分区的结束偏移,为正数。根据 `time_unit` 属性的不同,以当天(星期/月)为基准,提前创建对应范围的分区。
-
-* `dynamic_partition.prefix`
-
- 动态创建的分区名前缀。
-
-* `dynamic_partition.buckets`
-
- 动态创建的分区所对应的分桶数量。
-
-* `dynamic_partition.replication_num`
-
- 动态创建的分区所对应的副本数量,如果不填写,则默认为该表创建时指定的副本数量。
-
-* `dynamic_partition.start_day_of_week`
-
- 当 `time_unit` 为 `WEEK` 时,该参数用于指定每周的起始点。取值为 1 到 7。其中 1 表示周一,7 表示周日。默认为 1,即表示每周以周一为起始点。
-
-* `dynamic_partition.start_day_of_month`
-
- 当 `time_unit` 为 `MONTH` 时,该参数用于指定每月的起始日期。取值为 1 到 28。其中 1 表示每月1号,28 表示每月28号。默认为 1,即表示每月以1号位起始点。暂不支持以29、30、31号为起始日,以避免因闰年或闰月带来的歧义。
-
-* `dynamic_partition.create_history_partition`
-
- 默认为 false。当置为 true 时,Doris 会自动创建所有分区,具体创建规则见下文。同时,FE 的参数 `max_dynamic_partition_num` 会限制总分区数量,以避免一次性创建过多分区。当期望创建的分区个数大于 `max_dynamic_partition_num` 值时,操作将被禁止。
-
- 当不指定 `start` 属性时,该参数不生效。
-
-* `dynamic_partition.history_partition_num`
-
- 当 `create_history_partition` 为 `true` 时,该参数用于指定创建历史分区数量。默认值为 -1, 即未设置。
-
-* `dynamic_partition.hot_partition_num`
-
- 指定最新的多少个分区为热分区。对于热分区,系统会自动设置其 `storage_medium` 参数为SSD,并且设置 `storage_cooldown_time`。
-
- `hot_partition_num` 是往前 n 天和未来所有分区
-
- 我们举例说明。假设今天是 2021-05-20,按天分区,动态分区的属性设置为:hot_partition_num=2, end=3, start=-3。则系统会自动创建以下分区,并且设置 `storage_medium` 和 `storage_cooldown_time` 参数:
-
- ```
- p20210517:["2021-05-17", "2021-05-18") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59
- p20210518:["2021-05-18", "2021-05-19") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59
- p20210519:["2021-05-19", "2021-05-20") storage_medium=SSD storage_cooldown_time=2021-05-21 00:00:00
- p20210520:["2021-05-20", "2021-05-21") storage_medium=SSD storage_cooldown_time=2021-05-22 00:00:00
- p20210521:["2021-05-21", "2021-05-22") storage_medium=SSD storage_cooldown_time=2021-05-23 00:00:00
- p20210522:["2021-05-22", "2021-05-23") storage_medium=SSD storage_cooldown_time=2021-05-24 00:00:00
- p20210523:["2021-05-23", "2021-05-24") storage_medium=SSD storage_cooldown_time=2021-05-25 00:00:00
- ```
-
-* `dynamic_partition.reserved_history_periods`
-
- 需要保留的历史分区的时间范围。当`dynamic_partition.time_unit` 设置为 "DAY/WEEK/MONTH" 时,需要以 `[yyyy-MM-dd,yyyy-MM-dd],[...,...]` 格式进行设置。当`dynamic_partition.time_unit` 设置为 "HOUR" 时,需要以 `[yyyy-MM-dd HH:mm:ss,yyyy-MM-dd HH:mm:ss],[...,...]` 的格式来进行设置。如果不设置,默认为 `"NULL"`。
-
- 我们举例说明。假设今天是 2021-09-06,按天分类,动态分区的属性设置为:
-
- ```time_unit="DAY/WEEK/MONTH", end=3, start=-3, reserved_history_periods="[2020-06-01,2020-06-20],[2020-10-31,2020-11-15]"```。
-
- 则系统会自动保留:
-
- ```
- ["2020-06-01","2020-06-20"],
- ["2020-10-31","2020-11-15"]
- ```
-
- 或者
-
- ```time_unit="HOUR", end=3, start=-3, reserved_history_periods="[2020-06-01 00:00:00,2020-06-01 03:00:00]"```.
-
- 则系统会自动保留:
-
- ```
- ["2020-06-01 00:00:00","2020-06-01 03:00:00"]
- ```
-
- 这两个时间段的分区。其中,`reserved_history_periods` 的每一个 `[...,...]` 是一对设置项,两者需要同时被设置,且第一个时间不能大于第二个时间``。
-
-#### 创建历史分区规则
-
-当 `create_history_partition` 为 `true`,即开启创建历史分区功能时,Doris 会根据 `dynamic_partition.start` 和 `dynamic_partition.history_partition_num` 来决定创建历史分区的个数。
-
-假设需要创建的历史分区数量为 `expect_create_partition_num`,根据不同的设置具体数量如下:
-
-1. `create_history_partition` = `true`
- - `dynamic_partition.history_partition_num` 未设置,即 -1.
- `expect_create_partition_num` = `end` - `start`;
-
- - `dynamic_partition.history_partition_num` 已设置
- `expect_create_partition_num` = `end` - max(`start`, `-histoty_partition_num`);
-
-2. `create_history_partition` = `false`
- 不会创建历史分区,`expect_create_partition_num` = `end` - 0;
-
-当 `expect_create_partition_num` 大于 `max_dynamic_partition_num`(默认500)时,禁止创建过多分区。
-
-**举例说明:**
-
-1. 假设今天是 2021-05-20,按天分区,动态分区的属性设置为:`create_history_partition=true, end=3, start=-3, history_partition_num=1`,则系统会自动创建以下分区:
-
- ```
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-2. `history_partition_num=5`,其余属性与 1 中保持一直,则系统会自动创建以下分区:
-
- ```
- p20210517
- p20210518
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-3. `history_partition_num=-1` 即不设置历史分区数量,其余属性与 1 中保持一直,则系统会自动创建以下分区:
-
- ```
- p20210517
- p20210518
- p20210519
- p20210520
- p20210521
- p20210522
- p20210523
- ```
-
-### 注意事项
-
-动态分区使用过程中,如果因为一些意外情况导致 `dynamic_partition.start` 和 `dynamic_partition.end` 之间的某些分区丢失,那么当前时间与 `dynamic_partition.end` 之间的丢失分区会被重新创建,`dynamic_partition.start`与当前时间之间的丢失分区不会重新创建。
-
-## 示例
-
-1. 表 tbl1 分区列 k1 类型为 DATE,创建一个动态分区规则。按天分区,只保留最近7天的分区,并且预先创建未来3天的分区。
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATE,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-7",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "32"
- );
- ```
-
- 假设当前日期为 2020-05-29。则根据以上规则,tbl1 会产生以下分区:
-
- ```
- p20200529: ["2020-05-29", "2020-05-30")
- p20200530: ["2020-05-30", "2020-05-31")
- p20200531: ["2020-05-31", "2020-06-01")
- p20200601: ["2020-06-01", "2020-06-02")
- ```
-
- 在第二天,即 2020-05-30,会创建新的分区 `p20200602: ["2020-06-02", "2020-06-03")`
-
- 在 2020-06-06 时,因为 `dynamic_partition.start` 设置为 7,则将删除7天前的分区,即删除分区 `p20200529`。
-
-2. 表 tbl1 分区列 k1 类型为 DATETIME,创建一个动态分区规则。按星期分区,只保留最近2个星期的分区,并且预先创建未来2个星期的分区。
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATETIME,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "WEEK",
- "dynamic_partition.start" = "-2",
- "dynamic_partition.end" = "2",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "8"
- );
- ```
-
- 假设当前日期为 2020-05-29,是 2020 年的第 22 周。默认每周起始为星期一。则根于以上规则,tbl1 会产生以下分区:
-
- ```
- p2020_22: ["2020-05-25 00:00:00", "2020-06-01 00:00:00")
- p2020_23: ["2020-06-01 00:00:00", "2020-06-08 00:00:00")
- p2020_24: ["2020-06-08 00:00:00", "2020-06-15 00:00:00")
- ```
-
- 其中每个分区的起始日期为当周的周一。同时,因为分区列 k1 的类型为 DATETIME,则分区值会补全时分秒部分,且皆为 0。
-
- 在 2020-06-15,即第25周时,会删除2周前的分区,即删除 `p2020_22`。
-
- 在上面的例子中,假设用户指定了周起始日为 `"dynamic_partition.start_day_of_week" = "3"`,即以每周三为起始日。则分区如下:
-
- ```
- p2020_22: ["2020-05-27 00:00:00", "2020-06-03 00:00:00")
- p2020_23: ["2020-06-03 00:00:00", "2020-06-10 00:00:00")
- p2020_24: ["2020-06-10 00:00:00", "2020-06-17 00:00:00")
- ```
-
- 即分区范围为当周的周三到下周的周二。
-
- * 注:2019-12-31 和 2020-01-01 在同一周内,如果分区的起始日期为 2019-12-31,则分区名为 `p2019_53`,如果分区的起始日期为 2020-01-01,则分区名为 `p2020_01`。
-
-3. 表 tbl1 分区列 k1 类型为 DATE,创建一个动态分区规则。按月分区,不删除历史分区,并且预先创建未来2个月的分区。同时设定以每月3号为起始日。
-
- ```
- CREATE TABLE tbl1
- (
- k1 DATE,
- ...
- )
- PARTITION BY RANGE(k1) ()
- DISTRIBUTED BY HASH(k1)
- PROPERTIES
- (
- "dynamic_partition.enable" = "true",
- "dynamic_partition.time_unit" = "MONTH",
- "dynamic_partition.end" = "2",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "8",
- "dynamic_partition.start_day_of_month" = "3"
- );
- ```
-
- 假设当前日期为 2020-05-29。则根于以上规则,tbl1 会产生以下分区:
-
- ```
- p202005: ["2020-05-03", "2020-06-03")
- p202006: ["2020-06-03", "2020-07-03")
- p202007: ["2020-07-03", "2020-08-03")
- ```
-
- 因为没有设置 `dynamic_partition.start`,则不会删除历史分区。
-
- 假设今天为 2020-05-20,并设置以每月28号为起始日,则分区范围为:
-
- ```
- p202004: ["2020-04-28", "2020-05-28")
- p202005: ["2020-05-28", "2020-06-28")
- p202006: ["2020-06-28", "2020-07-28")
- ```
-
-## 修改动态分区属性
-
-通过如下命令可以修改动态分区的属性:
-
-```
-ALTER TABLE tbl1 SET
-(
- "dynamic_partition.prop1" = "value1",
- ...
-);
-```
-
-某些属性的修改可能会产生冲突。假设之前分区粒度为 DAY,并且已经创建了如下分区:
-
-```
-p20200519: ["2020-05-19", "2020-05-20")
-p20200520: ["2020-05-20", "2020-05-21")
-p20200521: ["2020-05-21", "2020-05-22")
-```
-
-如果此时将分区粒度改为 MONTH,则系统会尝试创建范围为 `["2020-05-01", "2020-06-01")` 的分区,而该分区的分区范围和已有分区冲突,所以无法创建。而范围为 `["2020-06-01", "2020-07-01")` 的分区可以正常创建。因此,2020-05-22 到 2020-05-30 时间段的分区,需要自行填补。
-
-### 查看动态分区表调度情况
-
-通过以下命令可以进一步查看当前数据库下,所有动态分区表的调度情况:
-
-```
-mysql> SHOW DYNAMIC PARTITION TABLES;
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-| TableName | Enable | TimeUnit | Start | End | Prefix | Buckets | StartOf | LastUpdateTime | LastSchedulerTime | State | LastCreatePartitionMsg | LastDropPartitionMsg | ReservedHistoryPeriods |
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-| d3 | true | WEEK | -3 | 3 | p | 1 | MONDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | [2021-12-01,2021-12-31] |
-| d5 | true | DAY | -7 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d4 | true | WEEK | -3 | 3 | p | 1 | WEDNESDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d6 | true | MONTH | -2147483648 | 2 | p | 8 | 3rd | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d2 | true | DAY | -3 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-| d7 | true | MONTH | -2147483648 | 5 | p | 8 | 24th | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL |
-+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+
-7 rows in set (0.02 sec)
-```
-
-* LastUpdateTime: 最后一次修改动态分区属性的时间
-* LastSchedulerTime: 最后一次执行动态分区调度的时间
-* State: 最后一次执行动态分区调度的状态
-* LastCreatePartitionMsg: 最后一次执行动态添加分区调度的错误信息
-* LastDropPartitionMsg: 最后一次执行动态删除分区调度的错误信息
-
-## 高级操作
-
-### FE 配置项
-
-* dynamic\_partition\_enable
-
- 是否开启 Doris 的动态分区功能。默认为 false,即关闭。该参数只影响动态分区表的分区操作,不影响普通表。可以通过修改 fe.conf 中的参数并重启 FE 生效。也可以在运行时执行以下命令生效:
-
- MySQL 协议:
-
- `ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true")`
-
- HTTP 协议:
-
- `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_enable=true`
-
- 若要全局关闭动态分区,则设置此参数为 false 即可。
-
-* dynamic\_partition\_check\_interval\_seconds
-
- 动态分区线程的执行频率,默认为600(10分钟),即每10分钟进行一次调度。可以通过修改 fe.conf 中的参数并重启 FE 生效。也可以在运行时执行以下命令修改:
-
- MySQL 协议:
-
- `ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "7200")`
-
- HTTP 协议:
-
- `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_check_interval_seconds=432000`
-
-### 动态分区表与手动分区表相互转换
-
-对于一个表来说,动态分区和手动分区可以自由转换,但二者不能同时存在,有且只有一种状态。
-
-#### 手动分区转换为动态分区
-
-如果一个表在创建时未指定动态分区,可以通过 `ALTER TABLE` 在运行时修改动态分区相关属性来转化为动态分区,具体示例可以通过 `HELP ALTER TABLE` 查看。
-
-开启动态分区功能后,Doris 将不再允许用户手动管理分区,会根据动态分区属性来自动管理分区。
-
-**注意**:如果已设定 `dynamic_partition.start`,分区范围在动态分区起始偏移之前的历史分区将会被删除。
-
-#### 动态分区转换为手动分区
-
-通过执行 `ALTER TABLE tbl_name SET ("dynamic_partition.enable" = "false")` 即可关闭动态分区功能,将其转换为手动分区表。
-
-关闭动态分区功能后,Doris 将不再自动管理分区,需要用户手动通过 `ALTER TABLE` 的方式创建或删除分区。
-
-## 常见问题
-
-1. 创建动态分区表后提示 ```Could not create table with dynamic partition when fe config dynamic_partition_enable is false```
-
- 由于动态分区的总开关,也就是 FE 的配置 ```dynamic_partition_enable``` 为 false,导致无法创建动态分区表。
-
- 这时候请修改 FE 的配置文件,增加一行 ```dynamic_partition_enable=true```,并重启 FE。或者执行命令 ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true") 将动态分区开关打开即可。
diff --git a/docs/zh-CN/administrator-guide/export-manual.md b/docs/zh-CN/administrator-guide/export-manual.md
deleted file mode 100644
index b71f9bfce7..0000000000
--- a/docs/zh-CN/administrator-guide/export-manual.md
+++ /dev/null
@@ -1,202 +0,0 @@
----
-{
- "title": "数据导出",
- "language": "zh-CN"
-}
----
-
-
-
-# 数据导出
-
-数据导出(Export)是 Doris 提供的一种将数据导出的功能。该功能可以将用户指定的表或分区的数据,以文本的格式,通过 Broker 进程导出到远端存储上,如 HDFS/BOS 等。
-
-本文档主要介绍 Export 的基本原理、使用方式、最佳实践以及注意事项。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* Broker:Doris 可以通过 Broker 进程对远端存储进行文件操作。
-* Tablet:数据分片。一个表会划分成多个数据分片。
-
-## 原理
-
-用户提交一个 Export 作业后。Doris 会统计这个作业涉及的所有 Tablet。然后对这些 Tablet 进行分组,每组生成一个特殊的查询计划。该查询计划会读取所包含的 Tablet 上的数据,然后通过 Broker 将数据写到远端存储指定的路径中,也可以通过S3协议直接导出到支持S3协议的远端存储上。
-
-总体的调度方式如下:
-
-```
-+--------+
-| Client |
-+---+----+
- | 1. Submit Job
- |
-+---v--------------------+
-| FE |
-| |
-| +-------------------+ |
-| | ExportPendingTask | |
-| +-------------------+ |
-| | 2. Generate Tasks
-| +--------------------+ |
-| | ExportExporingTask | |
-| +--------------------+ |
-| |
-| +-----------+ | +----+ +------+ +---------+
-| | QueryPlan +----------------> BE +--->Broker+---> |
-| +-----------+ | +----+ +------+ | Remote |
-| +-----------+ | +----+ +------+ | Storage |
-| | QueryPlan +----------------> BE +--->Broker+---> |
-| +-----------+ | +----+ +------+ +---------+
-+------------------------+ 3. Execute Tasks
-
-```
-
-1. 用户提交一个 Export 作业到 FE。
-2. FE 的 Export 调度器会通过两阶段来执行一个 Export 作业:
- 1. PENDING:FE 生成 ExportPendingTask,向 BE 发送 snapshot 命令,对所有涉及到的 Tablet 做一个快照。并生成多个查询计划。
- 2. EXPORTING:FE 生成 ExportExportingTask,开始执行查询计划。
-
-### 查询计划拆分
-
-Export 作业会生成多个查询计划,每个查询计划负责扫描一部分 Tablet。每个查询计划扫描的 Tablet 个数由 FE 配置参数 `export_tablet_num_per_task` 指定,默认为 5。即假设一共 100 个 Tablet,则会生成 20 个查询计划。用户也可以在提交作业时,通过作业属性 `tablet_num_per_task` 指定这个数值。
-
-一个作业的多个查询计划顺序执行。
-
-### 查询计划执行
-
-一个查询计划扫描多个分片,将读取的数据以行的形式组织,每 1024 行为一个 batch,调用 Broker 写入到远端存储上。
-
-查询计划遇到错误会整体自动重试 3 次。如果一个查询计划重试 3 次依然失败,则整个作业失败。
-
-Doris 会首先在指定的远端存储的路径中,建立一个名为 `__doris_export_tmp_12345` 的临时目录(其中 `12345` 为作业 id)。导出的数据首先会写入这个临时目录。每个查询计划会生成一个文件,文件名示例:
-
-`export-data-c69fcf2b6db5420f-a96b94c1ff8bccef-1561453713822`
-
-其中 `c69fcf2b6db5420f-a96b94c1ff8bccef` 为查询计划的 query id。`1561453713822` 为文件生成的时间戳。
-
-当所有数据都导出后,Doris 会将这些文件 rename 到用户指定的路径中。
-
-### Broker 参数
-
-Export 需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](./broker.md)
-
-## 使用示例
-
-Export 的详细命令可以通过 `HELP EXPORT;` 。举例如下:
-
-```
-EXPORT TABLE db1.tbl1
-PARTITION (p1,p2)
-[WHERE [expr]]
-TO "hdfs://host/path/to/export/"
-PROPERTIES
-(
- "label" = "mylabel",
- "column_separator"=",",
- "columns" = "col1,col2",
- "exec_mem_limit"="2147483648",
- "timeout" = "3600"
-)
-WITH BROKER "hdfs"
-(
- "username" = "user",
- "password" = "passwd"
-);
-```
-
-* `label`:本次导出作业的标识。后续可以使用这个标识查看作业状态。
-* `column_separator`:列分隔符。默认为 `\t`。支持不可见字符,比如 '\x07'。
-* `columns`:要导出的列,使用英文状态逗号隔开,如果不填这个参数默认是导出表的所有列。
-* `line_delimiter`:行分隔符。默认为 `\n`。支持不可见字符,比如 '\x07'。
-* `exec_mem_limit`: 表示 Export 作业中,一个查询计划在单个 BE 上的内存使用限制。默认 2GB。单位字节。
-* `timeout`:作业超时时间。默认 2小时。单位秒。
-* `tablet_num_per_task`:每个查询计划分配的最大分片数。默认为 5。
-
-提交作业后,可以通过 `SHOW EXPORT` 命令查询导入作业状态。结果举例如下:
-
-```
- JobId: 14008
- Label: mylabel
- State: FINISHED
- Progress: 100%
- TaskInfo: {"partitions":["*"],"exec mem limit":2147483648,"column separator":",","line delimiter":"\n","tablet num":1,"broker":"hdfs","coord num":1,"db":"default_cluster:db1","tbl":"tbl3"}
- Path: bos://bj-test-cmy/export/
-CreateTime: 2019-06-25 17:08:24
- StartTime: 2019-06-25 17:08:28
-FinishTime: 2019-06-25 17:08:34
- Timeout: 3600
- ErrorMsg: N/A
-```
-
-* JobId:作业的唯一 ID
-* Label:自定义作业标识
-* State:作业状态:
- * PENDING:作业待调度
- * EXPORTING:数据导出中
- * FINISHED:作业成功
- * CANCELLED:作业失败
-* Progress:作业进度。该进度以查询计划为单位。假设一共 10 个查询计划,当前已完成 3 个,则进度为 30%。
-* TaskInfo:以 Json 格式展示的作业信息:
- * db:数据库名
- * tbl:表名
- * partitions:指定导出的分区。`*` 表示所有分区。
- * exec mem limit:查询计划内存使用限制。单位字节。
- * column separator:导出文件的列分隔符。
- * line delimiter:导出文件的行分隔符。
- * tablet num:涉及的总 Tablet 数量。
- * broker:使用的 broker 的名称。
- * coord num:查询计划的个数。
-* Path:远端存储上的导出路径。
-* CreateTime/StartTime/FinishTime:作业的创建时间、开始调度时间和结束时间。
-* Timeout:作业超时时间。单位是秒。该时间从 CreateTime 开始计算。
-* ErrorMsg:如果作业出现错误,这里会显示错误原因。
-
-## 最佳实践
-
-### 查询计划的拆分
-
-一个 Export 作业有多少查询计划需要执行,取决于总共有多少 Tablet,以及一个查询计划最多可以分配多少个 Tablet。因为多个查询计划是串行执行的,所以如果让一个查询计划处理更多的分片,则可以减少作业的执行时间。但如果查询计划出错(比如调用 Broker 的 RPC 失败,远端存储出现抖动等),过多的 Tablet 会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数,在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在 3-5 GB内(一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLET FROM tbl_name;` 语句查看。)。
-
-### exec\_mem\_limit
-
-通常一个 Export 作业的查询计划只有 `扫描`-`导出` 两部分,不涉及需要太多内存的计算逻辑。所以通常 2GB 的默认内存限制可以满足需求。但在某些场景下,比如一个查询计划,在同一个 BE 上需要扫描的 Tablet 过多,或者 Tablet 的数据版本过多时,可能会导致内存不足。此时需要通过这个参数设置更大的内存,比如 4GB、8GB 等。
-
-## 注意事项
-
-* 不建议一次性导出大量数据。一个 Export 作业建议的导出数据量最大在几十 GB。过大的导出会导致更多的垃圾文件和更高的重试成本。
-* 如果表数据量过大,建议按照分区导出。
-* 在 Export 作业运行过程中,如果 FE 发生重启或切主,则 Export 作业会失败,需要用户重新提交。
-* 如果 Export 作业运行失败,在远端存储中产生的 `__doris_export_tmp_xxx` 临时目录,以及已经生成的文件不会被删除,需要用户手动删除。
-* 如果 Export 作业运行成功,在远端存储中产生的 `__doris_export_tmp_xxx` 目录,根据远端存储的文件系统语义,可能会保留,也可能会被清除。比如在百度对象存储(BOS)中,通过 rename 操作将一个目录中的最后一个文件移走后,该目录也会被删除。如果该目录没有被清除,用户可以手动清除。
-* 当 Export 运行完成后(成功或失败),FE 发生重启或切主,则 `SHOW EXPORT` 展示的作业的部分信息会丢失,无法查看。
-* Export 作业只会导出 Base 表的数据,不会导出 Rollup Index 的数据。
-* Export 作业会扫描数据,占用 IO 资源,可能会影响系统的查询延迟。
-
-## 相关配置
-
-### FE
-
-* `export_checker_interval_second`:Export 作业调度器的调度间隔,默认为 5 秒。设置该参数需重启 FE。
-* `export_running_job_num_limit`:正在运行的 Export 作业数量限制。如果超过,则作业将等待并处于 PENDING 状态。默认为 5,可以运行时调整。
-* `export_task_default_timeout_second`:Export 作业默认超时时间。默认为 2 小时。可以运行时调整。
-* `export_tablet_num_per_task`:一个查询计划负责的最大分片数。默认为 5。
-
diff --git a/docs/zh-CN/administrator-guide/export_with_mysql_dump.md b/docs/zh-CN/administrator-guide/export_with_mysql_dump.md
deleted file mode 100644
index 1bd664fabf..0000000000
--- a/docs/zh-CN/administrator-guide/export_with_mysql_dump.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-{
-"title": "使用mysqldump数据导出表结构或者数据",
-"language": "zh-CN"
-}
----
-
-
-
-# 使用mysqldump数据导出表结构或者数据
-Doris 在0.15 之后的版本已经支持通过`mysqldump` 工具导出数据或者表结构
-
-## 使用示例
-### 导出
- 1. 导出 test 数据库中的 table1 表:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test --tables table1`
- 2. 导出 test 数据库中的 table1 表结构:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test --tables table1 --no-data`
- 3. 导出 test1, test2 数据库中所有表:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test1 test2`
- 4. 导出所有数据库和表 `mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --all-databases`
-更多的使用参数可以参考`mysqldump` 的使用手册
-### 导入
- `mysqldump` 导出的结果可以重定向到文件中,之后可以通过 source 命令导入到Doris 中 `source filename.sql`
-## 注意
- 1. 由于Doris 中没有mysql 里的 tablespace 概念,因此在使用mysqldump 时要加上 `--no-tablespaces` 参数
- 2. 使用mysqldump 导出数据和表结构仅用于开发测试或者数据量很小的情况,请勿用于大数据量的生产环境
diff --git a/docs/zh-CN/administrator-guide/ldap.md b/docs/zh-CN/administrator-guide/ldap.md
deleted file mode 100644
index 5511fa15c0..0000000000
--- a/docs/zh-CN/administrator-guide/ldap.md
+++ /dev/null
@@ -1,177 +0,0 @@
----
-{
- "title": "LDAP",
- "language": "zh-CN"
-}
----
-
-
-
-# LDAP
-
-接入第三方LDAP服务为Doris提供验证登录和组授权服务。
-
-LDAP验证登录指的是接入LDAP服务的密码验证来补充Doris的验证登录。Doris优先使用LDAP验证用户密码,如果LDAP服务中不存在该用户则继续使用Doris验证密码,如果LDAP密码正确但是Doris中没有对应账户则创建临时用户登录Doris。
-
-LDAP组授权是将LDAP中的group映射到Doris中的Role,如果用户在LDAP中属于多个用户组,登录Doris后用户将获得所有组对应Role的权限,要求组名与Role名字相同。
-
-## 名词解释
-
-* LDAP: 轻量级目录访问协议,能够实现账号密码的集中管理。
-* 权限 Privilege:权限作用的对象是节点、数据库或表。不同的权限代表不同的操作许可。
-* 角色 Role:Doris可以创建自定义命名的角色。角色可以被看做是一组权限的集合。
-
-## 启用LDAP认证
-### server端配置
-
-需要在fe/conf/ldap.conf文件中配置LDAP基本信息,另有LDAP管理员密码需要使用sql语句进行设置。
-
-#### 配置fe/conf/ldap.conf文件:
-* ldap_authentication_enabled = false
- 设置值为“true”启用LDAP验证;当值为“false”时,不启用LDAP验证,该配置文件的其他配置项都无效。
-
-* ldap_host = 127.0.0.1
- LDAP服务ip。
-
-* ldap_port = 389
- LDAP服务端口,默认明文传输端口为389,目前Doris的LDAP功能仅支持明文密码传输。
-
-* ldap_admin_name = cn=admin,dc=domain,dc=com
- LDAP管理员账户“Distinguished Name”。当用户使用LDAP验证登录Doris时,Doris会绑定该管理员账户在LDAP中搜索用户信息。
-
-* ldap_user_basedn = ou=people,dc=domain,dc=com
- Doris在LDAP中搜索用户信息时的base dn。
-
-* ldap_user_filter = (&(uid={login}))
-*
- Doris在LDAP中搜索用户信息时的过滤条件,占位符“{login}”会被替换为登录用户名。必须保证通过该过滤条件搜索的用户唯一,否则Doris无法通过LDAP验证密码,登录时会出现“ERROR 5081 (42000): user is not unique in LDAP server.”的错误信息。
-
- 例如使用LDAP用户节点uid属性作为登录Doris的用户名可以配置该项为:
- ldap_user_filter = (&(uid={login}));
- 使用LDAP用户邮箱前缀作为用户名可配置该项:
- ldap_user_filter = (&(mail={login}@baidu.com))。
-
-* ldap_group_basedn = ou=group,dc=domain,dc=com
- Doris在LDAP中搜索组信息时的base dn。如果不配置该项,将不启用LDAP组授权。
-
-#### 设置LDAP管理员密码:
-配置好ldap.conf文件后启动fe,使用root或admin账号登录Doris,执行sql:
-```
-set ldap_admin_password = 'ldap_admin_password';
-```
-
-### Client端配置
-客户端使用LDAP验证需要启用mysql客户端明文验证插件,使用命令行登录Doris可以使用下面两种方式之一启用mysql明文验证插件:
-
-* 设置环境变量LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN值1。
-
- 例如在linux或者max环境中可以使用:
- ```
- echo "export LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1" >> ~/.bash_profile && source ~/.bash_profile
- ```
-
-* 每次登录Doris时添加参数“--enable-cleartext-plugin”:
- ```
- mysql -hDORIS_HOST -PDORIS_PORT -u user -p --enable-cleartext-plugin
-
- 输入ldap密码
- ```
-
-## LDAP认证详解
-LDAP密码验证和组授权是Doris密码验证和授权的补充,开启LDAP功能并不能完全替代Doris的密码验证和授权,而是与Doris密码验证和授权并存。
-
-### LDAP验证登录详解
-开启LDAP后,用户在Doris和DLAP中存在以下几种情况:
-
-|LDAP用户|Doris用户|密码|登录情况|登录Doris的用户|
-|--|--|--|--|--|
-|存在|存在|LDAP密码|登录成功|Doris用户|
-|存在|存在|Doris密码|登录失败|无|
-|不存在|存在|Doris密码|登录成功|Doris用户|
-|存在|不存在|LDAP密码|登录成功|Ldap临时用户|
-
-开启LDAP后,用户使用mysql client登录时,Doris会先通过LDAP服务验证用户密码,如果LDAP存在用户且密码正确,Doris则使用该用户登录;此时Doris若存在对应账户则直接登录该账户,如果不存在对应账户则为用户创建临时账户并登录该账户。临时账户具有具有相应对权限(参见LDAP组授权),仅对当前连接有效,doris不会创建该用户,也不会产生创建用户对元数据。
-如果LDAP服务中不存在登录用户,则使用Doris进行密码认证。
-
-以下假设已开启LDAP认证,配置ldap_user_filter = (&(uid={login})),且其他配置项都正确,客户端设置环境变量LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1
-
-例如:
-
-#### 1:Doris和LDAP中都存在账户:
-
-存在Doris账户:jack@'172.10.1.10',密码:123456
-LDAP用户节点存在属性:uid: jack 用户密码:abcdef
-使用以下命令登录Doris可以登录jack@'172.10.1.10'账户:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef
-```
-
-使用以下命令将登录失败:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456
-```
-
-#### 2:LDAP中存在用户,Doris中不存在对应账户:
-
-LDAP用户节点存在属性:uid: jack 用户密码:abcdef
-使用以下命令创建临时用户并登录jack@'%',临时用户具有基本权限 DatabasePrivs:Select_priv, 用户退出登录后Doris将删除该临时用户:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef
-```
-
-#### 3:LDAP不存在用户:
-
-存在Doris账户:jack@'172.10.1.10',密码:123456
-使用Doris密码登录账户,成功:
-```
-mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456
-```
-
-### LDAP组授权详解
-
-DLAP用户dn是LDAP组节点的“member”属性则Doris认为用户属于该组。LDAP组授权是将LDAP中的group映射到Doris中的role,并将所有对应的role权限授予登录用户,用户退出登录后Doris会撤销对应的role权限。在使用LDAP组授权前应该在Doris中创建相应对role,并为role授权。
-
-登录用户权限跟Doris用户和组权限有关,见下表:
-|LDAP用户|Doris用户|登录用户的权限|
-|--|--|--|
-|存在|存在|LDAP组权限 + Doris用户权限|
-|不存在|存在|Doris用户权限|
-|存在|不存在|LDAP组权限|
-
-如果登录的用户为临时用户,且不存在组权限,则该用户默认具有information_schema的select_priv权限
-
-举例:
-LDAP用户dn是LDAP组节点的“member”属性则认为用户属于该组,Doris会截取组dn的第一个Rdn作为组名。
-例如用户dn为“uid=jack,ou=aidp,dc=domain,dc=com”, 组信息如下:
-```
-dn: cn=doris_rd,ou=group,dc=domain,dc=com
-objectClass: groupOfNames
-member: uid=jack,ou=aidp,dc=domain,dc=com
-```
-则组名为doris_rd。
-
-假如jack还属于LDAP组doris_qa、doris_pm;Doris存在role:doris_rd、doris_qa、doris_pm,在使用LDAP验证登录后,用户不但具有该账户原有的权限,还将获得role doris_rd、doris_qa和doris_pm的权限。
-
-## LDAP验证的局限
-
-* 目前Doris的LDAP功能只支持明文密码验证,即用户登录时,密码在client与fe之间、fe与LDAP服务之间以明文的形式传输。
-* 当前的LDAP验证只支持在mysql协议下进行密码验证,如果使用Http接口则无法使用LDAP用户进行验证。
-* 临时用户不具有用户属性。
-
diff --git a/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md b/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md
deleted file mode 100644
index e86cc895b6..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md
+++ /dev/null
@@ -1,205 +0,0 @@
----
-{
- "title": "批量删除",
- "language": "zh-CN"
-}
----
-
-
-
-# 批量删除
-目前Doris 支持broker load, routine load, stream load 等多种导入方式,对于数据的删除目前只能通过delete 语句进行删除,使用delete 语句的方式删除时,每执行一次delete 都会生成一个新的数据版本,如果频繁删除会严重影响查询性能,并且在使用delete 方式删除时,是通过生成一个空的rowset来记录删除条件实现,每次读取都要对删除条件进行过滤,同样在条件较多时会对性能造成影响。对比其他的系统,greenplum 的实现方式更像是传统数据库产品,snowflake 通过merge 语法实现。
-
-对于类似于cdc 数据的导入的场景,数据中insert 和delete 一般是穿插出现的,面对这种场景我们目前的导入方式也无法满足,即使我们能够分离出insert 和delete 虽然可以解决导入的问题,但是仍然解决不了删除的问题。使用批量删除功能可以解决这些个场景的需求。
-数据导入有三种合并方式:
-1. APPEND: 数据全部追加到现有数据中
-2. DELETE: 删除所有与导入数据key 列值相同的行
-3. MERGE: 根据 DELETE ON 的决定 APPEND 还是 DELETE
-
-## 原理
-通过增加一个隐藏列`__DORIS_DELETE_SIGN__`实现,因为我们只是在unique 模型上做批量删除,因此只需要增加一个 类型为bool 聚合函数为replace 的隐藏列即可。在be 各种聚合写入流程都和正常列一样,读取方案有两个:
-
-在fe遇到 * 等扩展时去掉`__DORIS_DELETE_SIGN__`,并且默认加上 `__DORIS_DELETE_SIGN__ != true` 的条件
-be 读取时都会加上一列进行判断,通过条件确定是否删除。
-
-### 导入
-
-导入时在fe 解析时将隐藏列的值设置成 `DELETE ON` 表达式的值,其他的聚合行为和replace的聚合列相同
-
-### 读取
-
-读取时在所有存在隐藏列的olapScanNode上增加`__DORIS_DELETE_SIGN__ != true` 的条件,be 不感知这一过程,正常执行
-
-### Cumulative Compaction
-
-Cumulative Compaction 时将隐藏列看作正常的列处理,Compaction逻辑没有变化
-
-### Base Compaction
-
-Base Compaction 时要将标记为删除的行的删掉,以减少数据占用的空间
-
-### 语法
-导入的语法设计方面主要是增加一个指定删除标记列的字段的column 映射,并且需要在导入数据中增加这一列,各个导入方式设置的方法如下
-
-#### stream load
-
-stream load 的写法在在header 中的 columns 字段增加一个设置删除标记列的字段, 示例
-` -H "columns: k1, k2, label_c3" -H "merge_type: [MERGE|APPEND|DELETE]" -H "delete: label_c3=1"`
-
-#### broker load
-
-在`PROPERTIES ` 处设置删除标记列的字段
-
-```
-LOAD LABEL db1.label1
-(
- [MERGE|APPEND|DELETE] DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2, label_c3)
- SET
- (
- id=tmp_c2,
- name=tmp_c1,
- )
- [DELETE ON label=true]
-
-)
-WITH BROKER 'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-
-);
-
-```
-
-#### routine load
-
-routine load 在`columns` 字段增加映射 映射方式同上,示例如下
-
-```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- [WITH MERGE|APPEND|DELETE]
- COLUMNS(k1, k2, k3, v1, v2, label),
- WHERE k1 > 100 and k2 like "%doris%"
- [DELETE ON label=true]
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-```
-
-## 启用批量删除支持
-启用批量删除支持 有两种形式:
-1. 通过在fe 配置文件中增加`enable_batch_delete_by_default=true` 重启fe 后新建表的都支持批量删除,此选项默认为false
-
-2. 对于没有更改上述fe 配置或对于以存在的不支持批量删除功能的表,可以使用如下语句:
-`ALTER TABLE tablename ENABLE FEATURE "BATCH_DELETE"` 来启用批量删除。本操作本质上是一个schema change 操作,操作立即返回,可以通过`show alter table column` 来确认操作是否完成。
-
-如果确定一个表是否支持批量删除,可以通过 设置一个session variable 来显示隐藏列 `SET show_hidden_columns=true` ,之后使用`desc tablename`,如果输出中有`__DORIS_DELETE_SIGN__` 列则支持,如果没有则不支持
-
-## 注意
-1. 由于除stream load 外的导入操作在doris 内部有可能乱序执行,因此在使用`MERGE` 方式导入时如果不是stream load,需要与 load sequence 一起使用,具体的 语法可以参照sequence列 相关的文档
-2. `DELETE ON` 条件只能与 MERGE 一起使用
-
-## 使用示例
-下面以stream load 为例 展示下使用方式
-1. 正常导入数据:
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: APPEND" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load
-```
-其中的APPEND 条件可以省略,与下面的语句效果相同:
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load
-```
-2. 将与导入数据key 相同的数据全部删除
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: DELETE" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load
-```
-假设导入表中原有数据为:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 3 | 2 | tom | 2 |
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
-导入数据为:
-```
-3,2,tom,0
-```
-导入后数据变成:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
-3. 将导入数据中与`site_id=1` 的行的key列相同的行
-```
-curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: MERGE" -H "delete: siteid=1" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load
-```
-假设导入前数据为:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 5 | 3 | helen | 3 |
-| 1 | 1 | jim | 2 |
-+--------+----------+----------+------+
-```
- 导入数据为:
-```
-2,1,grace,2
-3,2,tom,2
-1,1,jim,2
-```
-导入后为:
-```
-+--------+----------+----------+------+
-| siteid | citycode | username | pv |
-+--------+----------+----------+------+
-| 4 | 3 | bush | 3 |
-| 2 | 1 | grace | 2 |
-| 3 | 2 | tom | 2 |
-| 5 | 3 | helen | 3 |
-+--------+----------+----------+------+
-```
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
deleted file mode 100644
index 8862a0a113..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
+++ /dev/null
@@ -1,502 +0,0 @@
----
-{
- "title": "Binlog Load",
- "language": "zh-CN"
-}
----
-
-
-
-
-# Binlog Load
-Binlog Load提供了一种使Doris增量同步用户在Mysql数据库的对数据更新操作的CDC(Change Data Capture)功能。
-
-## 适用场景
-
-* INSERT/UPDATE/DELETE支持
-* 过滤Query
-* 暂不兼容DDL语句
-
-## 名词解释
-1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入 plan 生成和导入任务的调度工作。
-2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。
-3. Canal:阿里巴巴开源的Mysql Binlog日志解析工具。提供增量数据订阅&消费等功能。
-4. Batch:Canal发送到客户端的一批数据,具有全局唯一自增的ID。
-5. SyncJob:用户提交的一个数据同步作业。
-6. Receiver: 负责订阅并接收Canal的数据。
-7. Consumer: 负责分发Receiver接收的数据到各个Channel。
-8. Channel: 接收Consumer分发的数据的渠道,创建发送数据的子任务,控制单个表事务的开启、提交、终止。
-9. Task:Channel向Be发送数据的子任务。
-
-## 基本原理
-在第一期的设计中,Binlog Load需要依赖canal作为中间媒介,让canal伪造成一个从节点去获取Mysql主节点上的Binlog并解析,再由Doris去获取Canal上解析好的数据,主要涉及Mysql端、Canal端以及Doris端,总体数据流向如下:
-
-```
-+---------------------------------------------+
-| Mysql |
-+----------------------+----------------------+
- | Binlog
-+----------------------v----------------------+
-| Canal Server |
-+-------------------+-----^-------------------+
- Get | | Ack
-+-------------------|-----|-------------------+
-| FE | | |
-| +-----------------|-----|----------------+ |
-| | Sync Job | | | |
-| | +------------v-----+-----------+ | |
-| | | Canal Client | | |
-| | | +-----------------------+ | | |
-| | | | Receiver | | | |
-| | | +-----------------------+ | | |
-| | | +-----------------------+ | | |
-| | | | Consumer | | | |
-| | | +-----------------------+ | | |
-| | +------------------------------+ | |
-| +----+---------------+--------------+----+ |
-| | | | |
-| +----v-----+ +-----v----+ +-----v----+ |
-| | Channel1 | | Channel2 | | Channel3 | |
-| | [Table1] | | [Table2] | | [Table3] | |
-| +----+-----+ +-----+----+ +-----+----+ |
-| | | | |
-| +--|-------+ +---|------+ +---|------+|
-| +---v------+| +----v-----+| +----v-----+||
-| +----------+|+ +----------+|+ +----------+|+|
-| | Task |+ | Task |+ | Task |+ |
-| +----------+ +----------+ +----------+ |
-+----------------------+----------------------+
- | | |
-+----v-----------------v------------------v---+
-| Coordinator |
-| BE |
-+----+-----------------+------------------+---+
- | | |
-+----v---+ +---v----+ +----v---+
-| BE | | BE | | BE |
-+--------+ +--------+ +--------+
-
-```
-
-如上图,用户向FE提交一个数据同步作业。
-
-FE会为每个数据同步作业启动一个canal client,来向canal server端订阅并获取数据。
-
-client中的receiver将负责通过Get命令接收数据,每获取到一个数据batch,都会由consumer根据对应表分发到不同的channel,每个channel都会为此数据batch产生一个发送数据的子任务Task。
-
-在FE上,一个Task是channel向BE发送数据的子任务,里面包含分发到当前channel的同一个batch的数据。
-
-channel控制着单个表事务的开始、提交、终止。一个事务周期内,一般会从consumer获取到多个batch的数据,因此会产生多个向BE发送数据的子任务Task,在提交事务成功前,这些Task不会实际生效。
-
-满足一定条件时(比如超过一定时间、达到提交最大数据大小),consumer将会阻塞并通知各个channel提交事务。
-
-当且仅当所有channel都提交成功,才会通过Ack命令通知canal并继续获取并消费数据。
-
-如果有任意channel提交失败,将会重新从上一次消费成功的位置获取数据并再次提交(已提交成功的channel不会再次提交以保证幂等性)。
-
-整个数据同步作业中,FE通过以上流程不断的从canal获取数据并提交到BE,来完成数据同步。
-
-## 配置Mysql端
-
-在Mysql Cluster模式的主从同步中,二进制日志文件(Binlog)记录了主节点上的所有数据变化,数据在Cluster的多个节点间同步、备份都要通过Binlog日志进行,从而提高集群的可用性。架构通常由一个主节点(负责写)和一个或多个从节点(负责读)构成,所有在主节点上发生的数据变更将会复制给从节点。
-
-**注意:目前必须要使用Mysql 5.7及以上的版本才能支持Binlog Load功能。**
-
-要打开mysql的二进制binlog日志功能,则需要编辑my.cnf配置文件设置一下。
-
-```
-[mysqld]
-log-bin = mysql-bin # 开启 binlog
-binlog-format=ROW # 选择 ROW 模式
-```
-
-### Mysql端说明
-
-在Mysql上,Binlog命名格式为mysql-bin.000001、mysql-bin.000002... ,满足一定条件时mysql会去自动切分Binlog日志:
-
-1. mysql重启了
-2. 客户端输入命令flush logs
-3. binlog文件大小超过1G
-
-要定位Binlog的最新的消费位置,可以通过binlog文件名和position(偏移量)。
-
-例如,各个从节点上会保存当前消费到的binlog位置,方便随时断开连接、重新连接和继续消费。
-
-```
---------------------- ---------------------
-| Slave | read | Master |
-| FileName/Position | <<<--------------------------- | Binlog Files |
---------------------- ---------------------
-```
-
-对于主节点来说,它只负责写入Binlog,多个从节点可以同时连接到一台主节点上,消费Binlog日志的不同部分,互相之间不会影响。
-
-Binlog日志支持两种主要格式(此外还有混合模式mixed-based):
-
-```
-statement-based格式: Binlog只保存主节点上执行的sql语句,从节点将其复制到本地重新执行
-row-based格式: Binlog会记录主节点的每一行所有列的数据的变更信息,从节点会复制并执行每一行的变更到本地
-```
-
-第一种格式只写入了执行的sql语句,虽然日志量会很少,但是有下列缺点
-
- 1. 没有保存每一行实际的数据
- 2. 在主节点上执行的UDF、随机、时间函数会在从节点上结果不一致
- 3. limit语句执行顺序可能不一致
-
-因此我们需要选择第二种格式,才能从Binlog日志中解析出每一行数据。
-
-在row-based格式下,Binlog会记录每一条binlog event的时间戳,server id,偏移量等信息,如下面一条带有两条insert语句的事务:
-
-```
-begin;
-insert into canal_test.test_tbl values (3, 300);
-insert into canal_test.test_tbl values (4, 400);
-commit;
-```
-
-对应将会有四条binlog event,其中一条begin event,两条insert event,一条commit event:
-
-```
-SET TIMESTAMP=1538238301/*!*/;
-BEGIN
-/*!*/.
-# at 211935643
-# at 211935698
-#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25
-#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F
-...
-'/*!*/;
-### INSERT INTO canal_test.test_tbl
-### SET
-### @1=1
-### @2=100
-# at 211935744
-#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641
-...
-'/*!*/;
-### INSERT INTO canal_test.test_tbl
-### SET
-### @1=2
-### @2=200
-# at 211935771
-#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641
-COMMIT/*!*/;
-```
-
-如上图所示,每条Insert event中包含了修改的数据。在进行Delete/Update操作时,一条event还能包含多行数据,使得Binlog日志更加的紧密。
-
-
-
-### 开启GTID模式 [可选]
-一个全局事务Id(global transaction identifier)标识出了一个曾在主节点上提交过的事务,在全局都是唯一有效的。开启了Binlog后,GTID会被写入到Binlog文件中,与事务一一对应。
-
-要打开mysql的GTID模式,则需要编辑my.cnf配置文件设置一下
-
-```
-gtid-mode=on // 开启gtid模式
-enforce-gtid-consistency=1 // 强制gtid和事务的一致性
-```
-
-在GTID模式下,主服务器可以不需要Binlog的文件名和偏移量,就能很方便的追踪事务、恢复数据、复制副本。
-
-在GTID模式下,由于GTID的全局有效性,从节点将不再需要通过保存文件名和偏移量来定位主节点上的Binlog位置,而通过数据本身就可以定位了。在进行数据同步中,从节点会跳过执行任意被识别为已执行的GTID事务。
-
-GTID的表现形式为一对坐标, `source_id`标识出主节点,`transaction_id`表示此事务在主节点上执行的顺序(最大263-1)。
-
-```
-GTID = source_id:transaction_id
-```
-
-例如,在同一主节点上执行的第23个事务的gtid为
-
-```
-3E11FA47-71CA-11E1-9E33-C80AA9429562:23
-```
-
-## 配置Canal端
-canal是属于阿里巴巴otter项目下的一个子项目,主要用途是基于 MySQL 数据库增量日志解析,提供增量数据订阅和消费,用于解决跨机房同步的业务场景,建议使用canal 1.1.5及以上版本,[下载地址](https://github.com/alibaba/canal/releases),下载完成后,请按以下步骤完成部署。
-
-1. 解压canal deployer
-2. 在conf文件夹下新建目录并重命名,作为instance的根目录,目录名即后文提到的destination
-3. 修改instance配置文件(可拷贝conf/example/instance.properties)
-
- ```
- vim conf/{your destination}/instance.properties
- ```
- ```
- ## canal instance serverId
- canal.instance.mysql.slaveId = 1234
- ## mysql adress
- canal.instance.master.address = 127.0.0.1:3306
- ## mysql username/password
- canal.instance.dbUsername = canal
- canal.instance.dbPassword = canal
- ```
-
-4. 启动
-
- ```
- sh bin/startup.sh
- ```
-
-5. 验证启动成功
-
- ```
- cat logs/{your destination}/{your destination}.log
- ```
- ```
- 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
- 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties]
- 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx
- 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful....
- ```
-
-### canal端说明
-
-canal通过伪造自己的mysql dump协议,去伪装成一个从节点,获取主节点的Binlog日志并解析。
-
-canal server上可启动多个instance,一个instance可看作一个从节点,每个instance由下面几个部分组成:
-
-```
--------------------------------------------------
-| Server |
-| -------------------------------------------- |
-| | Instance 1 | |
-| | ----------- ----------- ----------- | |
-| | | Parser | | Sink | | Store | | |
-| | ----------- ----------- ----------- | |
-| | ----------------------------------- | |
-| | | MetaManager | | |
-| | ----------------------------------- | |
-| -------------------------------------------- |
--------------------------------------------------
-```
-
-* parser:数据源接入,模拟slave协议和master进行交互,协议解析
-* sink:parser和store链接器,进行数据过滤,加工,分发的工作
-* store:数据存储
-* meta manager:元数据管理模块
-
-每个instance都有自己在cluster内的唯一标识,即server Id。
-
-在canal server内,instance用字符串表示,此唯一字符串被记为destination,canal client需要通过destination连接到对应的instance。
-
-**注意:canal client和canal instance是一一对应的**,Binlog Load已限制多个数据同步作业不能连接到同一个destination。
-
-数据在instance内的流向是binlog -> parser -> sink -> store。
-
-instance通过parser模块解析binlog日志,解析出来的数据缓存在store里面,当用户向FE提交一个数据同步作业时,会启动一个canal client订阅并获取对应instance中的store内的数据。
-
-store实际上是一个环形的队列,用户可以自行配置它的长度和存储空间。
-
-
-
-store通过三个指针去管理队列内的数据:
-
-1. get指针:get指针代表客户端最后获取到的位置。
-2. ack指针:ack指针记录着最后消费成功的位置。
-3. put指针:代表sink模块最后写入store成功的位置。
-
-```
-canal client异步获取store中数据
-
- get 0 get 1 get 2 put
- | | | ...... |
- v v v v
---------------------------------------------------------------------- store环形队列
- ^ ^
- | |
- ack 0 ack 1
-```
-
-canal client调用get命令时,canal server会产生数据batch发送给client,并右移get指针,client可以获取多个batch,直到get指针赶上put指针为止。
-
-当消费数据成功时,client会返回ack + batch Id通知已消费成功了,并右移ack指针,store会从队列中删除此batch的数据,腾出空间来从上游sink模块获取数据,并右移put指针。
-
-当数据消费失败时,client会返回rollback通知消费失败,store会将get指针重置左移到ack指针位置,使下一次client获取的数据能再次从ack指针处开始。
-
-和Mysql中的从节点一样,canal也需要去保存client最新消费到的位置。canal中所有元数据(如GTID、Binlog位置)都是由MetaManager去管理的,目前元数据默认以json格式持久化在instance根目录下的meta.dat文件内。
-
-## 基本操作
-
-### 配置目标表属性
-
-用户需要先在Doris端创建好与Mysql端对应的目标表
-
-Binlog Load只能支持Unique类型的目标表,且必须激活目标表的Batch Delete功能。
-
-开启Batch Delete的方法可以参考`help alter table`中的批量删除功能。
-
-示例:
-
-```
--- create target table
-CREATE TABLE `test1` (
- `a` int(11) NOT NULL COMMENT "",
- `b` int(11) NOT NULL COMMENT ""
-) ENGINE=OLAP
-UNIQUE KEY(`a`)
-COMMENT "OLAP"
-DISTRIBUTED BY HASH(`a`) BUCKETS 8;
-
--- enable batch delete
-ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
-```
-
-### 创建同步作业
-创建数据同步作业的的详细语法可以连接到 Doris 后,执行 HELP CREATE SYNC JOB; 查看语法帮助。这里主要详细介绍,创建作业时的注意事项。
-
-* job_name
-
- `job_name`是数据同步作业在当前数据库内的唯一标识,相同`job_name`的作业只能有一个在运行。
-
-* channel_desc
-
- `channel_desc `用来定义任务下的数据通道,可表示mysql源表到doris目标表的映射关系。在设置此项时,如果存在多个映射关系,必须满足mysql源表应该与doris目标表是一一对应关系,其他的任何映射关系(如一对多关系),检查语法时都被视为不合法。
-
-* column_mapping
-
- `column_mapping`主要指mysql源表和doris目标表的列之间的映射关系,如果不指定,FE会默认源表和目标表的列按顺序一一对应。但是我们依然建议显式的指定列的映射关系,这样当目标表的结构发生变化(比如增加一个 nullable 的列),数据同步作业依然可以进行。否则,当发生上述变动后,因为列映射关系不再一一对应,导入将报错。
-
-* binlog_desc
-
- `binlog_desc`中的属性定义了对接远端Binlog地址的一些必要信息,目前可支持的对接类型只有canal方式,所有的配置项前都需要加上canal前缀。
-
- 1. `canal.server.ip`: canal server的地址
- 2. `canal.server.port`: canal server的端口
- 3. `canal.destination`: 前文提到的instance的字符串标识
- 4. `canal.batchSize`: 每批从canal server处获取的batch大小的最大值,默认8192
- 5. `canal.username`: instance的用户名
- 6. `canal.password`: instance的密码
- 7. `canal.debug`: 设置为true时,会将batch和每一行数据的详细信息都打印出来,会影响性能。
-
-### 查看作业状态
-
-查看作业状态的具体命令和示例可以通过 `HELP SHOW SYNC JOB;` 命令查看。
-
-返回结果集的参数意义如下:
-
-* State
-
- 作业当前所处的阶段。作业状态之间的转换如下图所示:
-
- ```
- +-------------+
- create job | PENDING | resume job
- +-----------+ <-------------+
- | +-------------+ |
- +----v-------+ +-------+----+
- | RUNNING | pause job | PAUSED |
- | +-----------------------> |
- +----+-------+ run error +-------+----+
- | +-------------+ |
- | | CANCELLED | |
- +-----------> <-------------+
- stop job +-------------+ stop job
- system error
- ```
- 作业提交之后状态为PENDING,由FE调度执行启动canal client后状态变成RUNNING,用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复,操作后作业状态分别为CANCELLED/PAUSED/RUNNING。
-
- 作业的最终阶段只有一个CANCELLED,当作业状态变为CANCELLED后,将无法再次恢复。当作业发生了错误时,若错误是不可恢复的,状态会变成CANCELLED,否则会变成PAUSED。
-
-* Channel
-
- 作业所有源表到目标表的映射关系。
-
-* Status
-
- 当前binlog的消费位置(若设置了GTID模式,会显示GTID),以及doris端执行时间相比mysql端的延迟时间。
-
-* JobConfig
-
- 对接的远端服务器信息,如canal server的地址与连接instance的destination
-
-### 控制作业
-用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复。可以通过`HELP STOP SYNC JOB`; `HELP PAUSE SYNC JOB`; 以及 `HELP RESUME SYNC JOB`; 三个命令查看帮助和示例。
-
-## 相关参数
-
-### CANAL配置
-
-下面配置属于canal端的配置,主要通过修改 conf 目录下的 canal.properties 调整配置值。
-
-* `canal.ip`
-
- canal server的ip地址
-
-* `canal.port`
-
- canal server的端口
-
-* `canal.instance.memory.buffer.size`
-
- canal端的store环形队列的队列长度,必须设为2的幂次方,默认长度16384。此值等于canal端能缓存event数量的最大值,也直接决定了Doris端一个事务内所能容纳的最大event数量。建议将它改的足够大,防止Doris端一个事务内能容纳的数据量上限太小,导致提交事务太过频繁造成数据的版本堆积。
-
-* `canal.instance.memory.buffer.memunit`
-
- canal端默认一个event所占的空间,默认空间为1024 bytes。此值乘上store环形队列的队列长度等于store的空间最大值,比如store队列长度为16384,则store的空间为16MB。但是,一个event的实际大小并不等于此值,而是由这个event内有多少行数据和每行数据的长度决定的,比如一张只有两列的表的insert event只有30字节,但delete event可能达到数千字节,这是因为通常delete event的行数比insert event多。
-
-### FE配置
-
-下面配置属于数据同步作业的系统级别配置,主要通过修改 fe.conf 来调整配置值。
-
-* `enable_create_sync_job`
-
- 开启数据同步作业功能。默认为 false,关闭此功能。
-
-* `sync_commit_interval_second`
-
- 提交事务的最大时间间隔。若超过了这个时间channel中还有数据没有提交,consumer会通知channel提交事务。
-
-* `min_sync_commit_size`
-
- 提交事务需满足的最小event数量。若Fe接收到的event数量小于它,会继续等待下一批数据直到时间超过了`sync_commit_interval_second `为止。默认值是10000个events,如果你想修改此配置,请确保此值小于canal端的`canal.instance.memory.buffer.size`配置(默认16384),否则在ack前Fe会尝试获取比store队列长度更多的event,导致store队列阻塞至超时为止。
-
-* `min_bytes_sync_commit`
-
- 提交事务需满足的最小数据大小。若Fe接收到的数据大小小于它,会继续等待下一批数据直到时间超过了`sync_commit_interval_second `为止。默认值是15MB,如果你想修改此配置,请确保此值小于canal端的`canal.instance.memory.buffer.size`和`canal.instance.memory.buffer.memunit`的乘积(默认16MB),否则在ack前Fe会尝试获取比store空间更大的数据,导致store队列阻塞至超时为止。
-
-* `max_bytes_sync_commit`
-
- 提交事务时的数据大小的最大值。若Fe接收到的数据大小大于它,会立即提交事务并发送已积累的数据。默认值是64MB,如果你想修改此配置,请确保此值大于canal端的`canal.instance.memory.buffer.size`和`canal.instance.memory.buffer.memunit`的乘积(默认16MB)和`min_bytes_sync_commit`。
-
-* `max_sync_task_threads_num`
-
- 数据同步作业线程池中的最大线程数量。此线程池整个FE中只有一个,用于处理FE中所有数据同步作业向BE发送数据的任务task,线程池的实现在`SyncTaskPool`类。
-
-## 常见问题
-
-1. 修改表结构是否会影响数据同步作业?
-
- 会影响。数据同步作业并不能禁止`alter table`的操作,当表结构发生了变化,如果列的映射无法匹配,可能导致作业发生错误暂停,建议通过在数据同步作业中显式指定列映射关系,或者通过增加 Nullable 列或带 Default 值的列来减少这类问题。
-
-2. 删除了数据库后数据同步作业还会继续运行吗?
-
- 不会。删除数据库后的几秒日志中可能会出现找不到元数据的错误,之后该数据同步作业会被FE的定时调度检查时停止。
-
-3. 多个数据同步作业可以配置相同的`ip:port + destination`吗?
-
- 不能。创建数据同步作业时会检查`ip:port + destination`与已存在的作业是否重复,防止出现多个作业连接到同一个instance的情况。
-
-4. 为什么数据同步时浮点类型的数据精度在Mysql端和Doris端不一样?
-
- Doris本身浮点类型的精度与Mysql不一样。可以选择用Decimal类型代替。
-
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md b/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md
deleted file mode 100644
index 9c7ea16401..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md
+++ /dev/null
@@ -1,544 +0,0 @@
----
-{
- "title": "Broker Load",
- "language": "zh-CN"
-}
----
-
-
-
-# Broker Load
-
-Broker load 是一个异步的导入方式,支持的数据源取决于 Broker 进程支持的数据源。
-
-用户需要通过 MySQL 协议 创建 Broker load 导入,并通过查看导入命令检查导入结果。
-
-## 适用场景
-
-* 源数据在 Broker 可以访问的存储系统中,如 HDFS。
-* 数据量在 几十到百GB 级别。
-
-## 名词解释
-
-1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入 plan 生成和导入任务的调度工作。
-2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。
-3. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。
-4. Plan:导入执行计划,BE 会执行导入执行计划将数据导入到 Doris 系统中。
-
-## 基本原理
-
-用户在提交导入任务后,FE 会生成对应的 Plan 并根据目前 BE 的个数和文件的大小,将 Plan 分给 多个 BE 执行,每个 BE 执行一部分导入数据。
-
-BE 在执行的过程中会从 Broker 拉取数据,在对数据 transform 之后将数据导入系统。所有 BE 均完成导入,由 FE 最终决定导入是否成功。
-
-```
- +
- | 1. user create broker load
- v
- +----+----+
- | |
- | FE |
- | |
- +----+----+
- |
- | 2. BE etl and load the data
- +--------------------------+
- | | |
-+---v---+ +--v----+ +---v---+
-| | | | | |
-| BE | | BE | | BE |
-| | | | | |
-+---+-^-+ +---+-^-+ +--+-^--+
- | | | | | |
- | | | | | | 3. pull data from broker
-+---v-+-+ +---v-+-+ +--v-+--+
-| | | | | |
-|Broker | |Broker | |Broker |
-| | | | | |
-+---+-^-+ +---+-^-+ +---+-^-+
- | | | | | |
-+---v-+-----------v-+----------v-+-+
-| HDFS/BOS/AFS cluster |
-| |
-+----------------------------------+
-
-```
-
-## 基本操作
-
-### 创建导入
-
-Broker load 创建导入语句
-
-语法:
-
-```
-LOAD LABEL db_name.label_name
-(data_desc, ...)
-WITH BROKER broker_name broker_properties
-[PROPERTIES (key1=value1, ... )]
-
-* data_desc:
-
- DATA INFILE ('file_path', ...)
- [NEGATIVE]
- INTO TABLE tbl_name
- [PARTITION (p1, p2)]
- [COLUMNS TERMINATED BY separator ]
- [(col1, ...)]
- [PRECEDING FILTER predicate]
- [SET (k1=f1(xx), k2=f2(xx))]
- [WHERE predicate]
-
-* broker_properties:
-
- (key1=value1, ...)
-```
-示例:
-
-```
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2)
- SET
- (
- id=tmp_c2,
- name=tmp_c1
- ),
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2")
- INTO TABLE tbl2
- COLUMNS TERMINATED BY ","
- (col1, col2)
- where col1 > 1
-)
-WITH BROKER 'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-创建导入的详细语法执行 ```HELP BROKER LOAD``` 查看语法帮助。这里主要介绍 Broker load 的创建导入语法中参数意义和注意事项。
-
-#### Label
-
-导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 Label。Label 是用户在导入命令中自定义的名称。通过这个 Label,用户可以查看对应导入任务的执行情况。
-
-Label 的另一个作用,是防止用户重复导入相同的数据。**强烈推荐用户同一批次数据使用相同的label。这样同一批次数据的重复请求只会被接受一次,保证了 At-Most-Once 语义**
-
-当 Label 对应的导入作业状态为 CANCELLED 时,可以再次使用该 Label 提交导入作业。
-
-#### 数据描述类参数
-
-数据描述类参数主要指的是 Broker load 创建导入语句中的属于 ```data_desc``` 部分的参数。每组 ```data_desc ``` 主要表述了本次导入涉及到的数据源地址,ETL 函数,目标表及分区等信息。
-
-下面主要对数据描述类的部分参数详细解释:
-
-+ 多表导入
-
- Broker load 支持一次导入任务涉及多张表,每个 Broker load 导入任务可在多个 ``` data_desc ``` 声明多张表来实现多表导入。每个单独的 ```data_desc``` 还可以指定属于该表的数据源地址。Broker load 保证了单次导入的多张表之间原子性成功或失败。
-
-+ negative
-
- ```data_desc```中还可以设置数据取反导入。这个功能主要用于,当数据表中聚合列的类型都为 SUM 类型时。如果希望撤销某一批导入的数据。则可以通过 `negative` 参数导入同一批数据。Doris 会自动为这一批数据在聚合列上数据取反,以达到消除同一批数据的功能。
-
-+ partition
-
- 在 ```data_desc``` 中可以指定待导入表的 partition 信息,如果待导入数据不属于指定的 partition 则不会被导入。同时,不在指定 Partition 的数据会被认为是错误数据。
-
-+ set column mapping
-
- 在 ```data_desc``` 中的 SET 语句负责设置列函数变换,这里的列函数变换支持所有查询的等值表达式变换。如果原始数据的列和表中的列不一一对应,就需要用到这个属性。
-
-+ preceding filter predicate
-
- 用于过滤原始数据。原始数据是未经列映射、转换的数据。用户可以在对转换前的数据前进行一次过滤,选取期望的数据,再进行转换。
-
-+ where predicate
-
- 在 ```data_desc``` 中的 WHERE 语句中负责过滤已经完成 transform 的数据,被 filter 的数据不会进入容忍率的统计中。如果多个 data_desc 中声明了同一张表的多个条件的话,则会 merge 同一张表的多个条件,merge 策略是 AND 。
-
-#### 导入作业参数
-
-导入作业参数主要指的是 Broker load 创建导入语句中的属于 ```opt_properties```部分的参数。导入作业参数是作用于整个导入作业的。
-
-下面主要对导入作业参数的部分参数详细解释:
-
-+ timeout
-
- 导入作业的超时时间(以秒为单位),用户可以在 ```opt_properties``` 中自行设置每个导入的超时时间。导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。Broker load 的默认导入超时时间为4小时。
-
- 通常情况下,用户不需要手动设置导入任务的超时时间。当在默认超时时间内无法完成导入时,可以手动设置任务的超时时间。
-
- > 推荐超时时间
- >
- > 总文件大小(MB) / 用户 Doris 集群最慢导入速度(MB/s) > timeout > ((总文件大小(MB) * 待导入的表及相关 Roll up 表的个数) / (10 * 导入并发数) )
-
- > 导入并发数见文档最后的导入系统配置说明,公式中的 10 为目前的导入限速 10MB/s。
-
- > 例如一个 1G 的待导入数据,待导入表包含3个 Rollup 表,当前的导入并发数为 3。则 timeout 的 最小值为 ```(1 * 1024 * 3 ) / (10 * 3) = 102 秒```
-
- 由于每个 Doris 集群的机器环境不同且集群并发的查询任务也不同,所以用户 Doris 集群的最慢导入速度需要用户自己根据历史的导入任务速度进行推测。
-
-+ max\_filter\_ratio
-
- 导入任务的最大容忍率,默认为0容忍,取值范围是0~1。当导入的错误率超过该值,则导入失败。
-
- 如果用户希望忽略错误的行,可以通过设置这个参数大于 0,来保证导入可以成功。
-
- 计算公式为:
-
- ``` max_filter_ratio = (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) ```
-
- ```dpp.abnorm.ALL``` 表示数据质量不合格的行数。如类型不匹配,列数不匹配,长度不匹配等等。
-
- ```dpp.norm.ALL``` 指的是导入过程中正确数据的条数。可以通过 ```SHOW LOAD``` 命令查询导入任务的正确数据量。
-
- 原始文件的行数 = `dpp.abnorm.ALL + dpp.norm.ALL`
-
-+ exec\_mem\_limit
-
- 导入内存限制。默认是 2GB。单位为字节。
-
-+ strict\_mode
-
- Broker load 导入可以开启 strict mode 模式。开启方式为 ```properties ("strict_mode" = "true")``` 。默认的 strict mode 为关闭。
-
- strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下:
-
- 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。
-
- 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。
-
- 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。
-+ merge\_type
- 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理
-
-#### strict mode 与 source data 的导入关系
-
-这里以列类型为 TinyInt 来举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|---------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa or 2000 | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 | 1 | true or false | correct data|
-
-这里以列类型为 Decimal(1,0) 举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|--------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 or 10 | 1 | true or false | correct data|
-
-> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。
-
-#### Broker 参数
-
-Broker Load 需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](../broker.md)
-
-### 查看导入
-
-Broker load 导入方式由于是异步的,所以用户必须将创建导入的 Label 记录,并且在**查看导入命令中使用 Label 来查看导入结果**。查看导入命令在所有导入方式中是通用的,具体语法可执行 ```HELP SHOW LOAD``` 查看。
-
-示例:
-
-```
-mysql> show load order by createtime desc limit 1\G
-*************************** 1. row ***************************
- JobId: 76391
- Label: label1
- State: FINISHED
- Progress: ETL:100%; LOAD:100%
- Type: BROKER
- EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376
- TaskInfo: cluster:N/A; timeout(s):10800; max_filter_ratio:5.0E-5
- ErrorMsg: N/A
- CreateTime: 2019-07-27 11:46:42
- EtlStartTime: 2019-07-27 11:46:44
- EtlFinishTime: 2019-07-27 11:46:44
- LoadStartTime: 2019-07-27 11:46:44
-LoadFinishTime: 2019-07-27 11:50:16
- URL: http://192.168.1.1:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_4bb00753932c491a-a6da6e2725415317_4bb00753932c491a_a6da6e2725415317
- JobDetails: {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824}
-```
-
-下面主要介绍了查看导入命令返回结果集中参数意义:
-
-+ JobId
-
- 导入任务的唯一ID,每个导入任务的 JobId 都不同,由系统自动生成。与 Label 不同的是,JobId永远不会相同,而 Label 则可以在导入任务失败后被复用。
-
-+ Label
-
- 导入任务的标识。
-
-+ State
-
- 导入任务当前所处的阶段。在 Broker load 导入过程中主要会出现 PENDING 和 LOADING 这两个导入中的状态。如果 Broker load 处于 PENDING 状态,则说明当前导入任务正在等待被执行;LOADING 状态则表示正在执行中。
-
- 导入任务的最终阶段有两个:CANCELLED 和 FINISHED,当 Load job 处于这两个阶段时,导入完成。其中 CANCELLED 为导入失败,FINISHED 为导入成功。
-
-+ Progress
-
- 导入任务的进度描述。分为两种进度:ETL 和 LOAD,对应了导入流程的两个阶段 ETL 和 LOADING。目前 Broker load 由于只有 LOADING 阶段,所以 ETL 则会永远显示为 `100%`
-
- LOAD 的进度范围为:0~100%。
-
- ```LOAD 进度 = 当前完成导入的表个数 / 本次导入任务设计的总表个数 * 100%```
-
- **如果所有导入表均完成导入,此时 LOAD 的进度为 99%** 导入进入到最后生效阶段,整个导入完成后,LOAD 的进度才会改为 100%。
-
- 导入进度并不是线性的。所以如果一段时间内进度没有变化,并不代表导入没有在执行。
-
-+ Type
-
- 导入任务的类型。Broker load 的 type 取值只有 BROKER。
-
-+ EtlInfo
-
- 主要显示了导入的数据量指标 ```unselected.rows``` , ```dpp.norm.ALL``` 和 ```dpp.abnorm.ALL```。用户可以根据第一个数值判断 where 条件过滤了多少行,后两个指标验证当前导入任务的错误率是否超过 ```max_filter_ratio```。
-
- 三个指标之和就是原始数据量的总行数。
-
-+ TaskInfo
-
- 主要显示了当前导入任务参数,也就是创建 Broker load 导入任务时用户指定的导入任务参数,包括:`cluster`,`timeout` 和`max_filter_ratio`。
-
-+ ErrorMsg
-
- 在导入任务状态为CANCELLED,会显示失败的原因,显示分两部分:type 和 msg,如果导入任务成功则显示 ```N/A```。
-
- type的取值意义:
-
- ```
- USER_CANCEL: 用户取消的任务
- ETL_RUN_FAIL:在ETL阶段失败的导入任务
- ETL_QUALITY_UNSATISFIED:数据质量不合格,也就是错误数据率超过了 max_filter_ratio
- LOAD_RUN_FAIL:在LOADING阶段失败的导入任务
- TIMEOUT:导入任务没在超时时间内完成
- UNKNOWN:未知的导入错误
- ```
-
-+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime
-
- 这几个值分别代表导入创建的时间,ETL阶段开始的时间,ETL阶段完成的时间,Loading阶段开始的时间和整个导入任务完成的时间。
-
- Broker load 导入由于没有 ETL 阶段,所以其 EtlStartTime, EtlFinishTime, LoadStartTime 被设置为同一个值。
-
- 导入任务长时间停留在 CreateTime,而 LoadStartTime 为 N/A 则说明目前导入任务堆积严重。用户可减少导入提交的频率。
-
- ```
- LoadFinishTime - CreateTime = 整个导入任务所消耗时间
- LoadFinishTime - LoadStartTime = 整个 Broker load 导入任务执行时间 = 整个导入任务所消耗时间 - 导入任务等待的时间
- ```
-
-+ URL
-
- 导入任务的错误数据样例,访问 URL 地址既可获取本次导入的错误数据样例。当本次导入不存在错误数据时,URL 字段则为 N/A。
-
-+ JobDetails
-
- 显示一些作业的详细运行状态。包括导入文件的个数、总大小(字节)、子任务个数、已处理的原始行数,运行子任务的 BE 节点 Id,未完成的 BE 节点 Id。
-
- ```
- {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824}
- ```
-
- 其中已处理的原始行数,每 5 秒更新一次。该行数仅用于展示当前的进度,不代表最终实际的处理行数。实际处理行数以 EtlInfo 中显示的为准。
-
-### 取消导入
-
-当 Broker load 作业状态不为 CANCELLED 或 FINISHED 时,可以被用户手动取消。取消时需要指定待取消导入任务的 Label 。取消导入命令语法可执行 ```HELP CANCEL LOAD```查看。
-
-## 相关系统配置
-
-### FE 配置
-
-下面几个配置属于 Broker load 的系统级别配置,也就是作用于所有 Broker load 导入任务的配置。主要通过修改 ``` fe.conf```来调整配置值。
-
-+ min\_bytes\_per\_broker\_scanner/max\_bytes\_per\_broker\_scanner/max\_broker\_concurrency
-
- 前两个配置限制了单个 BE 处理的数据量的最小和最大值。第三个配置限制了一个作业的最大的导入并发数。最小处理的数据量,最大并发数,源文件的大小和当前集群 BE 的个数 **共同决定了本次导入的并发数**。
-
- ```
- 本次导入并发数 = Math.min(源文件大小/最小处理量,最大并发数,当前BE节点个数)
- 本次导入单个BE的处理量 = 源文件大小/本次导入的并发数
- ```
-
- 通常一个导入作业支持的最大数据量为 `max_bytes_per_broker_scanner * BE 节点数`。如果需要导入更大数据量,则需要适当调整 `max_bytes_per_broker_scanner` 参数的大小。
-
- 默认配置:
-
- ```
- 参数名:min_bytes_per_broker_scanner, 默认 64MB,单位bytes。
- 参数名:max_broker_concurrency, 默认 10。
- 参数名:max_bytes_per_broker_scanner,默认 3G,单位bytes。
- ```
-
-## 最佳实践
-
-### 应用场景
-
-使用 Broker load 最适合的场景就是原始数据在文件系统(HDFS,BOS,AFS)中的场景。其次,由于 Broker load 是单次导入中唯一的一种异步导入的方式,所以如果用户在导入大文件中,需要使用异步接入,也可以考虑使用 Broker load。
-
-### 数据量
-
-这里仅讨论单个 BE 的情况,如果用户集群有多个 BE 则下面标题中的数据量应该乘以 BE 个数来计算。比如:如果用户有3个 BE,则 3G 以下(包含)则应该乘以 3,也就是 9G 以下(包含)。
-
-+ 3G 以下(包含)
-
- 用户可以直接提交 Broker load 创建导入请求。
-
-+ 3G 以上
-
- 由于单个导入 BE 最大的处理量为 3G,超过 3G 的待导入文件就需要通过调整 Broker load 的导入参数来实现大文件的导入。
-
- 1. 根据当前 BE 的个数和原始文件的大小修改单个 BE 的最大扫描量和最大并发数。
-
- ```
- 修改 fe.conf 中配置
-
- max_broker_concurrency = BE 个数
- 当前导入任务单个 BE 处理的数据量 = 原始文件大小 / max_broker_concurrency
- max_bytes_per_broker_scanner >= 当前导入任务单个 BE 处理的数据量
-
- 比如一个 100G 的文件,集群的 BE 个数为 10 个
- max_broker_concurrency = 10
- max_bytes_per_broker_scanner >= 10G = 100G / 10
-
- ```
-
- 修改后,所有的 BE 会并发的处理导入任务,每个 BE 处理原始文件的一部分。
-
- *注意:上述两个 FE 中的配置均为系统配置,也就是说其修改是作用于所有的 Broker load的任务的。*
-
- 2. 在创建导入的时候自定义当前导入任务的 timeout 时间
-
- ```
- 当前导入任务单个 BE 处理的数据量 / 用户 Doris 集群最慢导入速度(MB/s) >= 当前导入任务的 timeout 时间 >= 当前导入任务单个 BE 处理的数据量 / 10M/s
-
- 比如一个 100G 的文件,集群的 BE 个数为 10个
- timeout >= 1000s = 10G / 10M/s
-
- ```
-
- 3. 当用户发现第二步计算出的 timeout 时间超过系统默认的导入最大超时时间 4小时
-
- 这时候不推荐用户将导入最大超时时间直接改大来解决问题。单个导入时间如果超过默认的导入最大超时时间4小时,最好是通过切分待导入文件并且分多次导入来解决问题。主要原因是:单次导入超过4小时的话,导入失败后重试的时间成本很高。
-
- 可以通过如下公式计算出 Doris 集群期望最大导入文件数据量:
-
- ```
- 期望最大导入文件数据量 = 14400s * 10M/s * BE 个数
- 比如:集群的 BE 个数为 10个
- 期望最大导入文件数据量 = 14400s * 10M/s * 10 = 1440000M ≈ 1440G
-
- 注意:一般用户的环境可能达不到 10M/s 的速度,所以建议超过 500G 的文件都进行文件切分,再导入。
-
- ```
-
-### 作业调度
-
-系统会限制一个集群内,正在运行的 Broker Load 作业数量,以防止同时运行过多的 Load 作业。
-
-首先, FE 的配置参数:`desired_max_waiting_jobs` 会限制一个集群内,未开始或正在运行(作业状态为 PENDING 或 LOADING)的 Broker Load 作业数量。默认为 100。如果超过这个阈值,新提交的作业将会被直接拒绝。
-
-一个 Broker Load 作业会被分为 pending task 和 loading task 阶段。其中 pending task 负责获取导入文件的信息,而 loading task 会发送给BE执行具体的导入任务。
-
-FE 的配置参数 `async_pending_load_task_pool_size` 用于限制同时运行的 pending task 的任务数量。也相当于控制了实际正在运行的导入任务数量。该参数默认为 10。也就是说,假设用户提交了100个Load作业,同时只会有10个作业会进入 LOADING 状态开始执行,而其他作业处于 PENDING 等待状态。
-
-FE 的配置参数 `async_loading_load_task_pool_size` 用于限制同时运行的 loading task 的任务数量。一个 Broker Load 作业会有 1 个 pending task 和多个 loading task (等于 LOAD 语句中 DATA INFILE 子句的个数)。所以 `async_loading_load_task_pool_size` 应该大于等于 `async_pending_load_task_pool_size`。
-
-### 性能分析
-
-可以在提交 LOAD 作业前,先执行 `set enable_profile=true` 打开会话变量。然后提交导入作业。待导入作业完成后,可以在 FE 的 web 页面的 `Queris` 标签中查看到导入作业的 Profile。
-
-这个 Profile 可以帮助分析导入作业的运行状态。
-
-当前只有作业成功执行后,才能查看 Profile。
-
-### 完整例子
-
-数据情况:用户数据在 HDFS 中,文件地址为 hdfs://abc.com:8888/store_sales, hdfs 的认证用户名为 root, 密码为 password, 数据量大小约为 30G,希望导入到数据库 bj_sales 的表 store_sales 中。
-
-集群情况:集群的 BE 个数约为 3 个,Broker 名称均为 broker。
-
-+ step1: 经过上述方法的计算,本次导入的单个 BE 导入量为 10G,则需要先修改 FE 的配置,将单个 BE 导入最大量修改为:
-
- ```
- max_bytes_per_broker_scanner = 10737418240
-
- ```
-
-+ step2: 经计算,本次导入的时间大约为 1000s,并未超过默认超时时间,可不配置导入自定义超时时间。
-
-+ step3:创建导入语句
-
- ```
- LOAD LABEL bj_sales.store_sales_broker_load_01
- (
- DATA INFILE("hdfs://abc.com:8888/store_sales")
- INTO TABLE store_sales
- )
- WITH BROKER 'broker'
- ("username"="root", "password"="password");
- ```
-
-## 常见问题
-
-* 导入报错:`Scan bytes per broker scanner exceed limit:xxx`
-
- 请参照文档中最佳实践部分,修改 FE 配置项 `max_bytes_per_broker_scanner` 和 `max_broker_concurrency`
-
-* 导入报错:`failed to send batch` 或 `TabletWriter add batch with unknown id`
-
- 请参照 [导入手册](./load-manual.md) 中 **通用系统配置** 中 **BE 配置**,适当修改 `query_timeout` 和 `streaming_load_rpc_max_alive_time_sec`。
-
-* 导入报错:`LOAD_RUN_FAIL; msg:Invalid Column Name:xxx`
-
- 如果是PARQUET或者ORC格式的数据,需要再文件头的列名与doris表中的列名一致,如 :
- ```
- (tmp_c1,tmp_c2)
- SET
- (
- id=tmp_c2,
- name=tmp_c1
- )
- ```
- 代表获取在parquet或orc中以(tmp_c1, tmp_c2)为列名的列,映射到doris表中的(id, name)列。如果没有设置set, 则以column中的列作为映射。
-
- 注:如果使用某些hive版本直接生成的orc文件,orc文件中的表头并非hive meta数据,而是(_col0, _col1, _col2, ...), 可能导致Invalid Column Name错误,那么则需要使用set进行映射
-
diff --git a/docs/zh-CN/administrator-guide/load-data/delete-manual.md b/docs/zh-CN/administrator-guide/load-data/delete-manual.md
deleted file mode 100644
index 6bfdeab57e..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/delete-manual.md
+++ /dev/null
@@ -1,189 +0,0 @@
----
-{
- "title": "Delete",
- "language": "zh-CN"
-}
----
-
-
-
-# Delete
-
-Delete不同于其他导入方式,它是一个同步过程。和Insert into相似,所有的Delete操作在Doris中是一个独立的导入作业,一般Delete语句需要指定表和分区以及删除的条件来筛选要删除的数据,并将会同时删除base表和rollup表的数据。
-
-## 语法
-
-主要的Delete语法如下:
-
-```
-DELETE FROM table_name [PARTITION partition_name]
-WHERE
-column_name1 op value[ AND column_name2 op value ...];
-```
-
-示例1:
-
-```
-DELETE FROM my_table PARTITION p1 WHERE k1 = 3;
-```
-
-示例2:
-
-```
-DELETE FROM my_table PARTITION p1 WHERE k1 < 3 AND k2 = "abc";
-```
-
-下面介绍删除语句中使用到的参数:
-
-* PARTITION
-
- Delete语句的目标分区,若未指定,则此表必须为单分区表,否则无法delete
-
-* WHERE
-
- Delete语句的条件语句,所有删除语句都必须指定WHERE语句
-
-说明:
-
-1. `Where`语句中的op的类型可包括`=, >, <, >=, <=, !=, in, not in`。
-2. `Where`语句中的列只能是`key`列
-3. 当选定的`key`列不存在某个 rollup 表内时,无法进行 delete
-4. 条件语句中各个条件只能是`and`关系,如希望达成`or`可将条件分别写入两个 delete 语句中
-5. 如果指定表为 RANGE 或者 LIST 分区表,则必须指定 `PARTITION`。如果是单分区表,可以不指定。
-6. 不同于 Insert into 命令,delete 不能手动指定`label`,有关 label 的概念可以查看[Insert Into文档](./insert-into-manual.md)
-
-## 返回结果
-
-Delete命令是一个SQL命令,返回结果是同步的,分为以下几种:
-
-1. 执行成功
-
- 如果Delete顺利执行完成并可见,将返回下列结果,`Query OK`表示成功
-
- ```
- mysql> delete from test_tbl PARTITION p1 where k1 = 1;
- Query OK, 0 rows affected (0.04 sec)
- {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'VISIBLE', 'txnId':'4005'}
- ```
-
-2. 提交成功,但未可见
-
- Doris的事务提交分为两步:提交和发布版本,只有完成了发布版本步骤,结果才对用户是可见的。若已经提交成功了,那么就可以认为最终一定会发布成功,Doris会尝试在提交完后等待发布一段时间,如果超时后即使发布版本还未完成也会优先返回给用户,提示用户提交已经完成。若如果Delete已经提交并执行,但是仍未发布版本和可见,将返回下列结果
-
- ```
- mysql> delete from test_tbl PARTITION p1 where k1 = 1;
- Query OK, 0 rows affected (0.04 sec)
- {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'COMMITTED', 'txnId':'4005', 'err':'delete job is committed but may be taking effect later' }
- ```
-
- 结果会同时返回一个json字符串:
-
- `affected rows`表示此次删除影响的行,由于Doris的删除目前是逻辑删除,因此对于这个值是恒为0。
-
- `label`为自动生成的 label,是该导入作业的标识。每个导入作业,都有一个在单 database 内部唯一的 Label。
-
- `status`表示数据删除是否可见,如果可见,显示`VISIBLE`,如果不可见,显示`COMMITTED`。
-
- `txnId`为这个Delete job对应的事务id
-
- `err`字段会显示一些本次删除的详细信息
-
-3. 提交失败,事务取消
-
- 如果Delete语句没有提交成功,将会被Doris自动中止,返回下列结果
-
- ```
- mysql> delete from test_tbl partition p1 where k1 > 80;
- ERROR 1064 (HY000): errCode = 2, detailMessage = {错误原因}
- ```
-
- 示例:
-
- 比如说一个超时的删除,将会返回timeout时间和未完成的`(tablet=replica)`
-
- ```
- mysql> delete from test_tbl partition p1 where k1 > 80;
- ERROR 1064 (HY000): errCode = 2, detailMessage = failed to delete replicas from job: 4005, Unfinished replicas:10000=60000, 10001=60000, 10002=60000
- ```
-
- **综上,对于Delete操作返回结果的正确处理逻辑为:**
-
- 1. 如果返回结果为`ERROR 1064 (HY000)`,则表示删除失败
-
- 2. 如果返回结果为`Query OK`,则表示删除执行成功
-
- 1. 如果`status`为`COMMITTED`,表示数据仍不可见,用户可以稍等一段时间再用`show delete`命令查看结果
- 2. 如果`status`为`VISIBLE`,表示数据删除成功。
-
-## 可配置项
-
-### FE配置
-
-**TIMEOUT配置**
-
-总体来说,Doris的删除作业的超时时间限制在30秒到5分钟时间内,具体时间可通过下面配置项调整
-
-* `tablet_delete_timeout_second`
-
- delete自身的超时时间是可受指定分区下tablet的数量弹性改变的,此项配置为平均一个tablet所贡献的timeout时间,默认值为2。
-
- 假设此次删除所指定分区下有5个tablet,那么可提供给delete的timeout时间为10秒,由于低于最低超时时间30秒,因此最终超时时间为30秒。
-
-* `load_straggler_wait_second`
-
- 如果用户预估的数据量确实比较大,使得5分钟的上限不足时,用户可以通过此项调整timeout上限,默认值为300。
-
- **TIMEOUT的具体计算规则为(秒)**
-
- `TIMEOUT = MIN(load_straggler_wait_second, MAX(30, tablet_delete_timeout_second * tablet_num))`
-
-* `query_timeout`
-
- 因为delete本身是一个SQL命令,因此删除语句也会受session限制,timeout还受Session中的`query_timeout`值影响,可以通过`SET query_timeout = xxx`来增加超时时间,单位是秒。
-
-**IN谓词配置**
-
-* `max_allowed_in_element_num_of_delete`
-
- 如果用户在使用in谓词时需要占用的元素比较多,用户可以通过此项调整允许携带的元素上限,默认值为1024。
-
-## 查看历史记录
-
-1. 用户可以通过show delete语句查看历史上已执行完成的删除记录
-
- 语法
-
- ```
- SHOW DELETE [FROM db_name]
- ```
-
- 示例
-
- ```
- mysql> show delete from test_db;
- +-----------+---------------+---------------------+-----------------+----------+
- | TableName | PartitionName | CreateTime | DeleteCondition | State |
- +-----------+---------------+---------------------+-----------------+----------+
- | empty_tbl | p3 | 2020-04-15 23:09:35 | k1 EQ "1" | FINISHED |
- | test_tbl | p4 | 2020-04-15 23:09:53 | k1 GT "80" | FINISHED |
- +-----------+---------------+---------------------+-----------------+----------+
- 2 rows in set (0.00 sec)
- ```
-
diff --git a/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md b/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md
deleted file mode 100644
index 9d66d1f07e..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md
+++ /dev/null
@@ -1,310 +0,0 @@
----
-{
- "title": "Insert Into",
- "language": "zh-CN"
-}
----
-
-
-
-# Insert Into
-
-Insert Into 语句的使用方式和 MySQL 等数据库中 Insert Into 语句的使用方式类似。但在 Doris 中,所有的数据写入都是一个独立的导入作业。所以这里将 Insert Into 也作为一种导入方式介绍。
-
-主要的 Insert Into 命令包含以下两种;
-
-* INSERT INTO tbl SELECT ...
-* INSERT INTO tbl (col1, col2, ...) VALUES (1, 2, ...), (1,3, ...);
-
-其中第二种命令仅用于 Demo,不要使用在测试或生产环境中。
-
-## 基本操作
-
-### 创建导入
-
-Insert Into 命令需要通过 MySQL 协议提交,创建导入请求会同步返回导入结果。
-
-语法:
-
-```
-INSERT INTO table_name [partition_info] [WITH LABEL label] [col_list] [query_stmt] [VALUES];
-```
-
-示例:
-
-```
-INSERT INTO tbl2 WITH LABEL label1 SELECT * FROM tbl3;
-INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a");
-```
-
-**注意**
-
-当需要使用 `CTE(Common Table Expressions)` 作为 insert 操作中的查询部分时,必须指定 `WITH LABEL` 和 column list 部分。示例
-
-```
-INSERT INTO tbl1 WITH LABEL label1
-WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2)
-SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1;
-
-
-INSERT INTO tbl1 (k1)
-WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2)
-SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1;
-```
-
-下面主要介绍创建导入语句中使用到的参数:
-
-+ partition\_info
-
- 导入表的目标分区,如果指定目标分区,则只会导入符合目标分区的数据。如果没有指定,则默认值为这张表的所有分区。
-
-+ col\_list
-
- 导入表的目标列,可以以任意的顺序存在。如果没有指定目标列,那么默认值是这张表的所有列。如果待表中的某个列没有存在目标列中,那么这个列需要有默认值,否则 Insert Into 就会执行失败。
-
- 如果查询语句的结果列类型与目标列的类型不一致,那么会调用隐式类型转化,如果不能够进行转化,那么 Insert Into 语句会报语法解析错误。
-
-+ query\_stmt
-
- 通过一个查询语句,将查询语句的结果导入到 Doris 系统中的其他表。查询语句支持任意 Doris 支持的 SQL 查询语法。
-
-+ VALUES
-
- 用户可以通过 VALUES 语法插入一条或者多条数据。
-
- *注意:VALUES 方式仅适用于导入几条数据作为导入 DEMO 的情况,完全不适用于任何测试和生产环境。Doris 系统本身也不适合单条数据导入的场景。建议使用 INSERT INTO SELECT 的方式进行批量导入。*
-
-* WITH LABEL
-
- INSERT 操作作为一个导入任务,也可以指定一个 label。如果不指定,则系统会自动指定一个 UUID 作为 label。
-
- 该功能需要 0.11+ 版本。
-
- *注意:建议指定 Label 而不是由系统自动分配。如果由系统自动分配,但在 Insert Into 语句执行过程中,因网络错误导致连接断开等,则无法得知 Insert Into 是否成功。而如果指定 Label,则可以再次通过 Label 查看任务结果。*
-
-### 导入结果
-
-Insert Into 本身就是一个 SQL 命令,其返回结果会根据执行结果的不同,分为以下几种:
-
-1. 结果集为空
-
- 如果 insert 对应 select 语句的结果集为空,则返回如下:
-
- ```
- mysql> insert into tbl1 select * from empty_tbl;
- Query OK, 0 rows affected (0.02 sec)
- ```
-
- `Query OK` 表示执行成功。`0 rows affected` 表示没有数据被导入。
-
-2. 结果集不为空
-
- 在结果集不为空的情况下。返回结果分为如下几种情况:
-
- 1. Insert 执行成功并可见:
-
- ```
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 4 rows affected (0.38 sec)
- {'label':'insert_8510c568-9eda-4173-9e36-6adc7d35291c', 'status':'visible', 'txnId':'4005'}
-
- mysql> insert into tbl1 with label my_label1 select * from tbl2;
- Query OK, 4 rows affected (0.38 sec)
- {'label':'my_label1', 'status':'visible', 'txnId':'4005'}
-
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 2 rows affected, 2 warnings (0.31 sec)
- {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'visible', 'txnId':'4005'}
-
- mysql> insert into tbl1 select * from tbl2;
- Query OK, 2 rows affected, 2 warnings (0.31 sec)
- {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'committed', 'txnId':'4005'}
- ```
-
- `Query OK` 表示执行成功。`4 rows affected` 表示总共有4行数据被导入。`2 warnings` 表示被过滤的行数。
-
- 同时会返回一个 json 串:
-
- ```
- {'label':'my_label1', 'status':'visible', 'txnId':'4005'}
- {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'committed', 'txnId':'4005'}
- {'label':'my_label1', 'status':'visible', 'txnId':'4005', 'err':'some other error'}
- ```
-
- `label` 为用户指定的 label 或自动生成的 label。Label 是该 Insert Into 导入作业的标识。每个导入作业,都有一个在单 database 内部唯一的 Label。
-
- `status` 表示导入数据是否可见。如果可见,显示 `visible`,如果不可见,显示 `committed`。
-
- `txnId` 为这个 insert 对应的导入事务的 id。
-
- `err` 字段会显示一些其他非预期错误。
-
- 当需要查看被过滤的行时,用户可以通过如下语句
-
- ```
- show load where label="xxx";
- ```
-
- 返回结果中的 URL 可以用于查询错误的数据,具体见后面 **查看错误行** 小结。
-
- **数据不可见是一个临时状态,这批数据最终是一定可见的**
-
- 可以通过如下语句查看这批数据的可见状态:
-
- ```
- show transaction where id=4005;
- ```
-
- 返回结果中的 `TransactionStatus` 列如果为 `visible`,则表述数据可见。
-
- 2. Insert 执行失败
-
- 执行失败表示没有任何数据被成功导入,并返回如下:
-
- ```
- mysql> insert into tbl1 select * from tbl2 where k1 = "a";
- ERROR 1064 (HY000): all partitions have no load data. url: http://10.74.167.16:8042/api/_load_error_log?file=__shard_2/error_log_insert_stmt_ba8bb9e158e4879-ae8de8507c0bf8a2_ba8bb9e158e4879_ae8de8507c0bf8a2
- ```
-
- 其中 `ERROR 1064 (HY000): all partitions have no load data` 显示失败原因。后面的 url 可以用于查询错误的数据,具体见后面 **查看错误行** 小结。
-
-**综上,对于 insert 操作返回结果的正确处理逻辑应为:**
-
-1. 如果返回结果为 `ERROR 1064 (HY000)`,则表示导入失败。
-2. 如果返回结果为 `Query OK`,则表示执行成功。
- 1. 如果 `rows affected` 为 0,表示结果集为空,没有数据被导入。
- 2. 如果 `rows affected` 大于 0:
- 1. 如果 `status` 为 `committed`,表示数据还不可见。需要通过 `show transaction` 语句查看状态直到 `visible`
- 2. 如果 `status` 为 `visible`,表示数据导入成功。
- 3. 如果 `warnings` 大于 0,表示有数据被过滤,可以通过 `show load` 语句获取 url 查看被过滤的行。
-
-### SHOW LAST INSERT
-
-在上一小节中我们介绍了如何根据 insert 操作的返回结果进行后续处理。但一些语言的mysql类库中很难获取返回结果的中的 json 字符串。因此,Doris 还提供了 `SHOW LAST INSERT` 命令来显式的获取最近一次 insert 操作的结果。
-
-当执行完一个 insert 操作后,可以在同一 session 连接中执行 `SHOW LAST INSERT`。该命令会返回最近一次insert 操作的结果,如:
-
-```
-mysql> show last insert\G
-*************************** 1. row ***************************
- TransactionId: 64067
- Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b
- Database: default_cluster:db1
- Table: t1
-TransactionStatus: VISIBLE
- LoadedRows: 2
- FilteredRows: 0
-```
-
-该命令会返回 insert 以及对应事务的详细信息。因此,用户可以在每次执行完 insert 操作后,继续执行 `show last insert` 命令来获取 insert 的结果。
-
-> 注意:该命令只会返回在同一 session 连接中,最近一次 insert 操作的结果。如果连接断开或更换了新的连接,则将返回空集。
-
-## 相关系统配置
-
-### FE 配置
-
-+ timeout
-
- 导入任务的超时时间(以秒为单位),导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。
-
- 目前 Insert Into 并不支持自定义导入的 timeout 时间,所有 Insert Into 导入的超时时间是统一的,默认的 timeout 时间为1小时。如果导入的源文件无法再规定时间内完成导入,则需要调整 FE 的参数```insert_load_default_timeout_second```。
-
- 同时 Insert Into 语句收到 Session 变量 `query_timeout` 的限制。可以通过 `SET query_timeout = xxx;` 来增加超时时间,单位是秒。
-
-### Session 变量
-
-+ enable\_insert\_strict
-
- Insert Into 导入本身不能控制导入可容忍的错误率。用户只能通过 `enable_insert_strict` 这个 Session 参数用来控制。
-
- 当该参数设置为 false 时,表示至少有一条数据被正确导入,则返回成功。如果有失败数据,则还会返回一个 Label。
-
- 当该参数设置为 true 时,表示如果有一条数据错误,则导入失败。
-
- 默认为 false。可通过 `SET enable_insert_strict = true;` 来设置。
-
-+ query\_timeout
-
- Insert Into 本身也是一个 SQL 命令,因此 Insert Into 语句也受到 Session 变量 `query_timeout` 的限制。可以通过 `SET query_timeout = xxx;` 来增加超时时间,单位是秒。
-
-## 最佳实践
-
-### 应用场景
-1. 用户希望仅导入几条假数据,验证一下 Doris 系统的功能。此时适合使用 INSERT INTO VALUES 的语法。
-2. 用户希望将已经在 Doris 表中的数据进行 ETL 转换并导入到一个新的 Doris 表中,此时适合使用 INSERT INTO SELECT 语法。
-3. 用户可以创建一种外部表,如 MySQL 外部表映射一张 MySQL 系统中的表。或者创建 Broker 外部表来映射 HDFS 上的数据文件。然后通过 INSERT INTO SELECT 语法将外部表中的数据导入到 Doris 表中存储。
-
-### 数据量
-Insert Into 对数据量没有限制,大数据量导入也可以支持。但 Insert Into 有默认的超时时间,用户预估的导入数据量过大,就需要修改系统的 Insert Into 导入超时时间。
-
-```
-导入数据量 = 36G 约≤ 3600s * 10M/s
-其中 10M/s 是最大导入限速,用户需要根据当前集群情况计算出平均的导入速度来替换公式中的 10M/s
-```
-
-### 完整例子
-
-用户有一张表 store\_sales 在数据库 sales 中,用户又创建了一张表叫 bj\_store\_sales 也在数据库 sales 中,用户希望将 store\_sales 中销售记录在 bj 的数据导入到这张新建的表 bj\_store\_sales 中。导入的数据量约为:10G。
-
-```
-store_sales schema:
-(id, total, user_id, sale_timestamp, region)
-
-bj_store_sales schema:
-(id, total, user_id, sale_timestamp)
-
-```
-
-集群情况:用户当前集群的平均导入速度约为 5M/s
-
-+ Step1: 判断是否要修改 Insert Into 的默认超时时间
-
- ```
- 计算导入的大概时间
- 10G / 5M/s = 2000s
-
- 修改 FE 配置
- insert_load_default_timeout_second = 2000
- ```
-
-+ Step2:创建导入任务
-
- 由于用户是希望将一张表中的数据做 ETL 并导入到目标表中,所以应该使用 Insert into query\_stmt 方式导入。
-
- ```
- INSERT INTO bj_store_sales WITH LABEL `label` SELECT id, total, user_id, sale_timestamp FROM store_sales where region = "bj";
- ```
-
-## 常见问题
-
-* 查看错误行
-
- 由于 Insert Into 无法控制错误率,只能通过 `enable_insert_strict` 设置为完全容忍错误数据或完全忽略错误数据。因此如果 `enable_insert_strict` 设为 true,则 Insert Into 可能会失败。而如果 `enable_insert_strict` 设为 false,则可能出现仅导入了部分合格数据的情况。
-
- 当返回结果中提供了 url 字段时,可以通过以下命令查看错误行:
-
- ```SHOW LOAD WARNINGS ON "url";```
-
- 示例:
-
- ```SHOW LOAD WARNINGS ON "http://ip:port/api/_load_error_log?file=__shard_13/error_log_insert_stmt_d2cac0a0a16d482d-9041c949a4b71605_d2cac0a0a16d482d_9041c949a4b71605";```
-
- 错误的原因通常如:源数据列长度超过目的数据列长度、列类型不匹配、分区不匹配、列顺序不匹配等等。
diff --git a/docs/zh-CN/administrator-guide/load-data/load-json-format.md b/docs/zh-CN/administrator-guide/load-data/load-json-format.md
deleted file mode 100644
index b7e9451b3e..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/load-json-format.md
+++ /dev/null
@@ -1,470 +0,0 @@
----
-{
- "title": "导入 Json 格式数据",
- "language": "zh-CN"
-}
----
-
-
-
-# 导入 Json 格式数据
-
-Doris 从 0.12 版本开始支持 Json 格式的数据导入。
-
-## 支持的导入方式
-
-目前只有以下导入方式支持 Json 格式的数据导入:
-
-* Stream Load
-* Routine Load
-
-关于以上导入方式的具体说明,请参阅相关文档。本文档主要介绍在这些导入方式中关于 Json 部分的使用说明。
-
-## 支持的 Json 格式
-
-当前前仅支持以下两种 Json 格式:
-
-1. 以 Array 表示的多行数据
-
- 以 Array 为根节点的 Json 格式。Array 中的每个元素表示要导入的一行数据,通常是一个 Object。示例如下:
-
- ```
- [
- { "id": 123, "city" : "beijing"},
- { "id": 456, "city" : "shanghai"},
- ...
- ]
- ```
-
- ```
- [
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian"}},
- { "id": 456, "city" : { "name" : "beijing", "region" : "chaoyang"}},
- ...
- ]
- ```
-
- 这种方式通常用于 Stream Load 导入方式,以便在一批导入数据中表示多行数据。
-
- 这种方式必须配合设置 `strip_outer_array=true` 使用。Doris在解析时会将数组展开,然后依次解析其中的每一个 Object 作为一行数据。
-
-2. 以 Object 表示的单行数据
-
- 以 Object 为根节点的 Json 格式。整个 Object 即表示要导入的一行数据。示例如下:
-
- ```
- { "id": 123, "city" : "beijing"}
- ```
-
- ```
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }}
- ```
-
- 这种方式通常用于 Routine Load 导入方式,如表示 Kafka 中的一条消息,即一行数据。
-
-## Json Path
-
-Doris 支持通过 Json Path 抽取 Json 中指定的数据。
-
-**注:因为对于 Array 类型的数据,Doris 会先进行数组展开,最终按照 Object 格式进行单行处理。所以本文档之后的示例都以单个 Object 格式的 Json 数据进行说明。**
-
-* 不指定 Json Path
-
- 如果没有指定 Json Path,则 Doris 会默认使用表中的列名查找 Object 中的元素。示例如下:
-
- 表中包含两列: `id`, `city`
-
- Json 数据如下:
-
- ```
- { "id": 123, "city" : "beijing"}
- ```
-
- 则 Doris 会使用 `id`, `city` 进行匹配,得到最终数据 `123` 和 `beijing`。
-
- 如果 Json 数据如下:
-
- ```
- { "id": 123, "name" : "beijing"}
- ```
-
- 则使用 `id`, `city` 进行匹配,得到最终数据 `123` 和 `null`。
-
-* 指定 Json Path
-
- 通过一个 Json 数据的形式指定一组 Json Path。数组中的每个元素表示一个要抽取的列。示例如下:
-
- ```
- ["$.id", "$.name"]
- ```
- ```
- ["$.id.sub_id", "$.name[0]", "$.city[0]"]
- ```
-
- Doris 会使用指定的 Json Path 进行数据匹配和抽取。
-
-* 匹配非基本类型
-
- 前面的示例最终匹配到的数值都是基本类型,如整型、字符串等。Doris 当前暂不支持复合类型,如 Array、Map 等。所以当匹配到一个非基本类型时,Doris 会将该类型转换为 Json 格式的字符串,并以字符串类型进行导入。示例如下:
-
- Json 数据为:
-
- ```
- { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }}
- ```
-
- Json Path 为 `["$.city"]`。则匹配到的元素为:
-
- ```
- { "name" : "beijing", "region" : "haidian" }
- ```
-
- 该元素会被转换为字符串进行后续导入操作:
-
- ```
- "{'name':'beijing','region':'haidian'}"
- ```
-
-* 匹配失败
-
- 当匹配失败时,将会返回 `null`。示例如下:
-
- Json 数据为:
-
- ```
- { "id": 123, "name" : "beijing"}
- ```
-
- Json Path 为 `["$.id", "$.info"]`。则匹配到的元素为 `123` 和 `null`。
-
- Doris 当前不区分 Json 数据中表示的 null 值,和匹配失败时产生的 null 值。假设 Json 数据为:
-
- ```
- { "id": 123, "name" : null }
- ```
-
- 则使用以下两种 Json Path 会获得相同的结果:`123` 和 `null`。
-
- ```
- ["$.id", "$.name"]
- ```
- ```
- ["$.id", "$.info"]
- ```
-
-* 完全匹配失败
-
- 为防止一些参数设置错误导致的误操作。Doris 在尝试匹配一行数据时,如果所有列都匹配失败,则会认为这个是一个错误行。假设 Json 数据为:
-
- ```
- { "id": 123, "city" : "beijing" }
- ```
-
- 如果 Json Path 错误的写为(或者不指定 Json Path 时,表中的列不包含 `id` 和 `city`):
-
- ```
- ["$.ad", "$.infa"]
- ```
-
- 则会导致完全匹配失败,则该行会标记为错误行,而不是产出 `null, null`。
-
-## Json Path 和 Columns
-
-Json Path 用于指定如何对 JSON 格式中的数据进行抽取,而 Columns 指定列的映射和转换关系。两者可以配合使用。
-
-换句话说,相当于通过 Json Path,将一个 Json 格式的数据,按照 Json Path 中指定的列顺序进行了列的重排。之后,可以通过 Columns,将这个重排后的源数据和表的列进行映射。举例如下:
-
-数据内容:
-
-```
-{"k1" : 1, "k2": 2}
-```
-
-表结构:
-
-`k2 int, k1 int`
-
-导入语句1(以 Stream Load 为例):
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-导入语句1中,仅指定了 Json Path,没有指定 Columns。其中 Json Path 的作用是将 Json 数据按照 Json Path 中字段的顺序进行抽取,之后会按照表结构的顺序进行写入。最终导入的数据结果如下:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 2 | 1 |
-+------+------+
-```
-
-会看到,实际的 k1 列导入了 Json 数据中的 "k2" 列的值。这是因为,Json 中字段名称并不等同于表结构中字段的名称。我们需要显式的指定这两者之间的映射关系。
-
-导入语句2:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, k1" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-相比如导入语句1,这里增加了 Columns 字段,用于描述列的映射关系,按 `k2, k1` 的顺序。即按Json Path 中字段的顺序抽取后,指定第一列为表中 k2 列的值,而第二列为表中 k1 列的值。最终导入的数据结果如下:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | 2 |
-+------+------+
-```
-
-当然,如其他导入一样,可以在 Columns 中进行列的转换操作。示例如下:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, tmp_k1, k1 = tmp_k1 * 100" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-上述示例会将 k1 的值乘以 100 后导入。最终导入的数据结果如下:
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 100 | 2 |
-+------+------+
-```
-
-## NULL 和 Default 值
-
-示例数据如下:
-
-```
-[
- {"k1": 1, "k2": "a"},
- {"k1": 2},
- {"k1": 3, "k2": "c"},
-]
-```
-
-表结构为:`k1 int null, k2 varchar(32) null default "x"`
-
-导入语句如下:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-用户可能期望的导入结果如下,即对于缺失的列,填写默认值。
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | a |
-+------+------+
-| 2 | x |
-+------+------+
-| 3 | c |
-+------+------+
-```
-
-但实际的导入结果如下,即对于缺失的列,补上了 NULL。
-
-```
-+------+------+
-| k1 | k2 |
-+------+------+
-| 1 | a |
-+------+------+
-| 2 | NULL |
-+------+------+
-| 3 | c |
-+------+------+
-```
-
-这是因为通过导入语句中的信息,Doris 并不知道 “缺失的列是表中的 k2 列”。
-如果要对以上数据按照期望结果导入,则导入语句如下:
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -H "jsonpaths: [\"$.k1\", \"$.k2\"]" -H "columns: k1, tmp_k2, k2 = ifnull(tmp_k2, 'x')" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-## LargetInt与Decimal
-
-Doris支持LargeInt与Decimal等数据范围更大,数据精度更高的数据类型。但是由于Doris使用的Rapid Json库对于数字类型能够解析的最大范围为Int64与Double,这导致了通过Json导入LargeInt或Decimal时可能会出现:精度丢失,数据转换出错等问题。
-
-示例数据如下:
-
-```
-[
- {"k1": 1, "k2":9999999999999.999999 }
-]
-```
-
-
-导入k2列类型为`Decimal(16, 9)`,数据为:`9999999999999.999999`。在进行Json导入时,由于Double转换的精度丢失导致了导入的数据为:`10000000000000.0002`,引发了导入出错。
-
-为了解决这个问题,Doris在导入时提供了 `num_as_string`的开关。Doris在解析Json数据时会将数字类型转为字符串,然后在确保不会出现精度丢失的情况下进行导入。
-
-```
-curl -v --location-trusted -u root: -H "format: json" -H "num_as_string: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load
-```
-
-但是开启这个开关会引起一些意想不到的副作用。Doris 当前暂不支持复合类型,如 Array、Map 等。所以当匹配到一个非基本类型时,Doris 会将该类型转换为 Json 格式的字符串,而`num_as_string`会同样将复合类型的数字转换为字符串,举个例子:
-
-Json 数据为:
-
- { "id": 123, "city" : { "name" : "beijing", "city_id" : 1 }}
-
-不开启`num_as_string`时,导入的city列的数据为:
-
-`{ "name" : "beijing", "city_id" : 1 }`
-
-而开启了`num_as_string`时,导入的city列的数据为:
-
-`{ "name" : "beijing", "city_id" : "1" }`
-
-注意,这里导致了复合类型原先为1的数字类型的city_id被作为字符串列处理并添加上了引号,与原始数据相比,产生了变化。
-
-所以用在使用Json导入时,要尽量避免LargeInt与Decimal与复合类型的同时导入。如果无法避免,则需要充分了解开启`num_as_string`后对复合类型导入的**副作用**。
-
-
-## 应用示例
-
-### Stream Load
-
-因为 Json 格式的不可拆分特性,所以在使用 Stream Load 导入 Json 格式的文件时,文件内容会被全部加载到内存后,才开始处理。因此,如果文件过大的话,可能会占用较多的内存。
-
-假设表结构为:
-
-```
-id INT NOT NULL,
-city VARHCAR NULL,
-code INT NULL
-```
-
-1. 导入单行数据1
-
- ```
- {"id": 100, "city": "beijing", "code" : 1}
- ```
-
- * 不指定 Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- 导入结果:
-
- ```
- 100 beijing 1
- ```
-
- * 指定 Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- 导入结果:
-
- ```
- 100 beijing 1
- ```
-
-2. 导入单行数据2
-
- ```
- {"id": 100, "content": {"city": "beijing", "code" : 1}}
- ```
-
- * 指定 Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.content.city\",\"$.content.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- 导入结果:
-
- ```
- 100 beijing 1
- ```
-
-3. 导入多行数据
-
- ```
- [
- {"id": 100, "city": "beijing", "code" : 1},
- {"id": 101, "city": "shanghai"},
- {"id": 102, "city": "tianjin", "code" : 3},
- {"id": 103, "city": "chongqing", "code" : 4},
- {"id": 104, "city": ["zhejiang", "guangzhou"], "code" : 5},
- {
- "id": 105,
- "city": {
- "order1": ["guangzhou"]
- },
- "code" : 6
- }
- ]
- ```
-
- * 指定 Json Path
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- 导入结果:
-
- ```
- 100 beijing 1
- 101 shanghai NULL
- 102 tianjin 3
- 103 chongqing 4
- 104 ["zhejiang","guangzhou"] 5
- 105 {"order1":["guangzhou"]} 6
- ```
-
-4. 对导入数据进行转换
-
- 数据依然是示例3中的多行数据,现需要对导入数据中的 `code` 列加1后导入。
-
- ```
- curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -H "columns: id, city, tmpc, code=tmpc+1" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load
- ```
-
- 导入结果:
-
- ```
- 100 beijing 2
- 101 shanghai NULL
- 102 tianjin 4
- 103 chongqing 5
- 104 ["zhejiang","guangzhou"] 6
- 105 {"order1":["guangzhou"]} 7
- ```
-
-### Routine Load
-
-Routine Load 对 Json 数据的处理原理和 Stream Load 相同。在此不再赘述。
-
-对于 Kafka 数据源,每个 Massage 中的内容被视作一个完整的 Json 数据。如果一个 Massage 中是以 Array 格式的表示的多行数据,则会导入多行,而 Kafka 的 offset 只会增加 1。而如果一个 Array 格式的 Json 表示多行数据,但是因为 Json 格式错误导致解析 Json 失败,则错误行只会增加 1(因为解析失败,实际上 Doris 无法判断其中包含多少行数据,只能按一行错误数据记录)。
diff --git a/docs/zh-CN/administrator-guide/load-data/load-manual.md b/docs/zh-CN/administrator-guide/load-data/load-manual.md
deleted file mode 100644
index 2b82075f35..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/load-manual.md
+++ /dev/null
@@ -1,227 +0,0 @@
----
-{
- "title": "导入总览",
- "language": "zh-CN"
-}
----
-
-
-
-# 导入总览
-
-导入(Load)功能就是将用户的原始数据导入到 Doris 中。导入成功后,用户即可通过 Mysql 客户端查询数据。
-
-Doris 支持多种导入方式。建议先完整阅读本文档,再根据所选择的导入方式,查看各自导入方式的详细文档。
-
-## 基本概念
-
-1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入规划生成和导入任务的调度工作。
-2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。
-3. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。
-4. 导入作业(Load job):导入作业读取用户提交的源数据,转换或清洗后,将数据导入到 Doris 系统中。导入完成后,数据即可被用户查询到。
-5. Label:所有导入作业都有一个 Label。Label 在一个数据库内唯一,可由用户指定或系统自动生成,用于标识一个导入作业。相同的 Label 仅可用于一个成功的导入作业。
-6. MySQL 协议/HTTP 协议:Doris 提供两种访问协议接口。 MySQL 协议和 HTTP 协议。部分导入方式使用 MySQL 协议接口提交作业,部分导入方式使用 HTTP 协议接口提交作业。
-
-## 导入方式
-
-为适配不同的数据导入需求,Doris 系统提供了6种不同的导入方式。每种导入方式支持不同的数据源,存在不同的使用方式(异步,同步)。
-
-所有导入方式都支持 csv 数据格式。其中 Broker load 还支持 parquet 和 orc 数据格式。
-
-每个导入方式的说明请参阅单个导入方式的操作手册。
-
-* Broker load
-
- 通过 Broker 进程访问并读取外部数据源(如 HDFS)导入到 Doris。用户通过 Mysql 协议提交导入作业后,异步执行。通过 `SHOW LOAD` 命令查看导入结果。
-
-* Stream load
-
- 用户通过 HTTP 协议提交请求并携带原始数据创建导入。主要用于快速将本地文件或数据流中的数据导入到 Doris。导入命令同步返回导入结果。
-
-* Insert
-
- 类似 MySQL 中的 Insert 语句,Doris 提供 `INSERT INTO tbl SELECT ...;` 的方式从 Doris 的表中读取数据并导入到另一张表。或者通过 `INSERT INTO tbl VALUES(...);` 插入单条数据。
-
-* Multi load
-
- 用户通过 HTTP 协议提交多个导入作业。Multi Load 可以保证多个导入作业的原子生效。
-
-* Routine load
-
- 用户通过 MySQL 协议提交例行导入作业,生成一个常驻线程,不间断的从数据源(如 Kafka)中读取数据并导入到 Doris 中。
-
-* 通过S3协议直接导入
-
- 用户通过S3协议直接导入数据,用法和Broker Load 类似
-
-## 基本原理
-
-### 导入执行流程
-
-```
-+---------+ +---------+ +----------+ +-----------+
-| | | | | | | |
-| PENDING +----->+ ETL +----->+ LOADING +----->+ FINISHED |
-| | | | | | | |
-+---------+ +---+-----+ +----+-----+ +-----------+
- | | |
- | | |
- | | |
- | | | +-----------+
- | | | | |
- +---------------+-----------------+------------> CANCELLED |
- | |
- +-----------+
-
-```
-
-如上图,一个导入作业主要经过上面4个阶段。
-
-+ PENDING(非必须): 该阶段只有 Broker Load 才有。Broker Load 被用户提交后会短暂停留在这个阶段,直到被 FE 中的 Scheduler 调度。 其中 Scheduler 的调度间隔为5秒。
-
-+ ETL(非必须): 该阶段在版本 0.10.0(包含) 之前存在,主要是用于将原始数据按照用户声明的方式进行变换,并且过滤不满足条件的原始数据。在 0.10.0 后的版本,ETL 阶段不再存在,其中数据 transform 的工作被合并到 LOADING 阶段。
-
-+ LOADING: 该阶段在版本 0.10.0(包含)之前主要用于将变换后的数据推到对应的 BE 存储中。在 0.10.0 后的版本,该阶段先对数据进行清洗和变换,然后将数据发送到 BE 存储中。当所有导入数据均完成导入后,进入等待生效过程,此时 Load job 依旧是 LOADING。
-
-+ FINISHED: 在 Load job 涉及的所有数据均生效后,Load job 的状态变成 FINISHED。FINISHED 后导入的数据均可查询。
-
-+ CANCELLED: 在作业 FINISHED 之前,作业都可能被取消并进入 CANCELLED 状态。如用户手动取消,或导入出现错误等。CANCELLED 也是 Load Job 的最终状态,不可被再次执行。
-
-上述阶段,除了 PENDING 到 LOADING 阶段是 Scheduler 轮询调度的,其他阶段之前的转移都是回调机制实现。
-
-### Label 和 原子性
-
-Doris 对所有导入方式提供原子性保证。即保证同一个导入作业内的数据,原子生效。不会出现仅导入部分数据的情况。
-
-同时,每一个导入作业都有一个由用户指定或者系统自动生成的 Label。Label 在一个 Database 内唯一。当一个 Label 对应的导入作业成功后,不可再重复使用该 Label 提交导入作业。如果 Label 对应的导入作业失败,则可以重复使用。
-
-用户可以通过 Label 机制,来保证 Label 对应的数据最多被导入一次,即At-Most-Once 语义。
-
-## 同步和异步
-
-Doris 目前的导入方式分为两类,同步和异步。如果是外部程序接入 Doris 的导入功能,需要判断使用导入方式是哪类再确定接入逻辑。
-
-### 同步
-
-同步导入方式即用户创建导入任务,Doris 同步执行导入,执行完成后返回用户导入结果。用户可直接根据创建导入任务命令返回的结果同步判断导入是否成功。
-
-同步类型的导入方式有: **Stream load**,**Insert**。
-
-操作步骤:
-
-1. 用户(外部系统)创建导入任务。
-2. Doris 返回导入结果。
-3. 用户(外部系统)判断导入结果,如果失败可以再次提交导入任务。
-
-*注意:如果用户使用的导入方式是同步返回的,且导入的数据量过大,则创建导入请求可能会花很长时间才能返回结果。*
-
-### 异步
-异步导入方式即用户创建导入任务后,Doris 直接返回创建成功。**创建成功不代表数据已经导入**。导入任务会被异步执行,用户在创建成功后,需要通过轮询的方式发送查看命令查看导入作业的状态。如果创建失败,则可以根据失败信息,判断是否需要再次创建。
-
-异步类型的导入方式有:**Broker load**,**Multi load**。
-
-操作步骤:
-
-1. 用户(外部系统)创建导入任务。
-2. Doris 返回导入创建结果。
-3. 用户(外部系统)判断导入创建结果,成功则进入4,失败回到重试创建导入,回到1。
-4. 用户(外部系统)轮询查看导入任务,直到状态变为 FINISHED 或 CANCELLED。
-
-### 注意事项
-无论是异步还是同步的导入类型,都不应该在 Doris 返回导入失败或导入创建失败后,无休止的重试。**外部系统在有限次数重试并失败后,保留失败信息,大部分多次重试均失败问题都是使用方法问题或数据本身问题。**
-
-## 内存限制
-
-用户可以通过设置参数来限制单个导入的内存使用,以防止导入占用过多的内存而导致系统OOM。
-不同导入方式限制内存的方式略有不同,可以参阅各自的导入手册查看。
-
-一个导入作业通常会分布在多个 Backend 上执行,导入内存限制的是一个导入作业,在单个 Backend 上的内存使用,而不是在整个集群的内存使用。
-
-同时,每个 Backend 会设置可用于导入的内存的总体上限。具体配置参阅下面的通用系统配置小节。这个配置限制了所有在该 Backend 上运行的导入任务的总体内存使用上限。
-
-较小的内存限制可能会影响导入效率,因为导入流程可能会因为内存达到上限而频繁的将内存中的数据写回磁盘。而过大的内存限制可能导致当导入并发较高时,系统OOM。所以,需要根据需求,合理的设置导入的内存限制。
-
-## 最佳实践
-
-用户在接入 Doris 导入时,一般会采用程序接入的方式,这样可以保证数据被定期的导入到 Doris 中。下面主要说明了程序接入 Doris 的最佳实践。
-
-1. 选择合适的导入方式:根据数据源所在位置选择导入方式。例如:如果原始数据存放在 HDFS 上,则使用 Broker load 导入。
-2. 确定导入方式的协议:如果选择了 Broker load 导入方式,则外部系统需要能使用 MySQL 协议定期提交和查看导入作业。
-3. 确定导入方式的类型:导入方式为同步或异步。比如 Broker load 为异步导入方式,则外部系统在提交创建导入后,必须调用查看导入命令,根据查看导入命令的结果来判断导入是否成功。
-4. 制定 Label 生成策略:Label 生成策略需满足,每一批次数据唯一且固定的原则。这样 Doris 就可以保证 At-Most-Once。
-5. 程序自身保证 At-Least-Once:外部系统需要保证自身的 At-Least-Once,这样就可以保证导入流程的 Exactly-Once。
-
-## 通用系统配置
-
-下面主要解释了几个所有导入方式均通用的系统级别的配置。
-
-### FE 配置
-
-以下配置属于 FE 的系统配置,可以通过修改 FE 的配置文件 ```fe.conf``` 来修改配置。
-
-+ max\_load\_timeout\_second 和 min\_load\_timeout\_second
-
- 这两个配置含义为:最大的导入超时时间,最小的导入超时时间,以秒为单位。默认的最大超时时间为3天, 默认的最小超时时间为1秒。用户自定义的导入超时时间不可超过这个范围。该参数通用于所有的导入方式。
-
-+ desired\_max\_waiting\_jobs
-
- 在等待队列中的导入任务个数最大值,默认为100。当在 FE 中处于 PENDING 状态(也就是等待执行的)导入个数超过该值,新的导入请求则会被拒绝。
-
- 此配置仅对异步执行的导入有效,当异步执行的导入等待个数超过默认值,则后续的创建导入请求会被拒绝。
-
-+ max\_running\_txn\_num\_per\_db
-
- 这个配置的含义是说,每个 Database 中正在运行的导入最大个数(不区分导入类型,统一计数)。默认的最大导入并发为 100。当当前 Database 正在运行的导入个数超过最大值时,后续的导入不会被执行。如果是同步导入作业,则导入会被拒绝。如果是异步导入作业。则作业会在队列中等待。
-
-### BE 配置
-
-以下配置属于 BE 的系统配置,可以通过修改 BE 的配置文件 ```be.conf``` 来修改配置。
-
-+ push\_write\_mbytes\_per\_sec
-
- BE 上单个 Tablet 的写入速度限制。默认是 10,即 10MB/s。通常 BE 对单个 Tablet 的最大写入速度,根据 Schema 以及系统的不同,大约在 10-30MB/s 之间。可以适当调整这个参数来控制导入速度。
-
-+ write\_buffer\_size
-
- 导入数据在 BE 上会先写入一个 memtable,memtable 达到阈值后才会写回磁盘。默认大小是 100MB。过小的阈值可能导致 BE 上存在大量的小文件。可以适当提高这个阈值减少文件数量。但过大的阈值可能导致 RPC 超时,见下面的配置说明。
-
-+ tablet\_writer\_rpc\_timeout\_sec
-
- 导入过程中,发送一个 Batch(1024行)的 RPC 超时时间。默认 600 秒。因为该 RPC 可能涉及多个 memtable 的写盘操作,所以可能会因为写盘导致 RPC 超时,可以适当调整这个超时时间来减少超时错误(如 `send batch fail` 错误)。同时,如果调大 `write_buffer_size` 配置,也需要适当调大这个参数。
-
-+ streaming\_load\_rpc\_max\_alive\_time\_sec
-
- 在导入过程中,Doris 会为每一个 Tablet 开启一个 Writer,用于接收数据并写入。这个参数指定了 Writer 的等待超时时间。如果在这个时间内,Writer 没有收到任何数据,则 Writer 会被自动销毁。当系统处理速度较慢时,Writer 可能长时间接收不到下一批数据,导致导入报错:`TabletWriter add batch with unknown id`。此时可适当增大这个配置。默认为 600 秒。
-
-* load\_process\_max\_memory\_limit\_bytes 和 load\_process\_max\_memory\_limit\_percent
-
- 这两个参数,限制了单个 Backend 上,可用于导入任务的内存上限。分别是最大内存和最大内存百分比。`load_process_max_memory_limit_percent` 默认为 80,表示对 Backend 总内存限制的百分比(总内存限制 `mem_limit` 默认为 80%,表示对物理内存的百分比)。即假设物理内存为 M,则默认导入内存限制为 M * 80% * 80%。
-
- `load_process_max_memory_limit_bytes` 默认为 100GB。系统会在两个参数中取较小者,作为最终的 Backend 导入内存使用上限。
-
-+ label\_keep\_max\_second
-
- 设置导入任务记录保留时间。已经完成的( FINISHED or CANCELLED )导入任务记录会保留在 Doris 系统中一段时间,时间由此参数决定。参数默认值时间为3天。该参数通用与所有类型的导入任务。
-
-### 列映射
- 假设导入数据有为 `1,2,3`,表有 `c1,c2,c3` 三列,如果数据直接导入表中可以使用如下语句 `COLUMNS(c1,c2,c3)` 此语句等价于 `COLUMNS(tmp_c1,tmp_c2,tmp_c3,c1=tmp_c1,c2=tmp_c2,c3=tmp_c3)`
-如果想再导入数据时执行变换或者使用临时变量,则变换或者临时变量一定要按照使用的顺序指定, 例如 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= c1+1, c3 =c2+1)`, 这样的语句等价于 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= tmp_c1 +1+1, c3 =tmp_c1 +1+1+1)`
-在使用某个表达式时这个表达式一定要在前面定义,例如如下语句则不合法 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = c1+1, c2 = temp + 1, temp = tmp_c1 +1, c3 =c2+1)`
-
diff --git a/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md b/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md
deleted file mode 100644
index 4a68aef24c..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md
+++ /dev/null
@@ -1,335 +0,0 @@
----
-{
- "title": "Routine Load",
- "language": "zh-CN"
-}
----
-
-
-
-# Routine Load
-
-例行导入(Routine Load)功能为用户提供了一种自动从指定数据源进行数据导入的功能。
-
-本文档主要介绍该功能的实现原理、使用方式以及最佳实践。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* RoutineLoadJob:用户提交的一个例行导入作业。
-* JobScheduler:例行导入作业调度器,用于调度和拆分一个 RoutineLoadJob 为多个 Task。
-* Task:RoutineLoadJob 被 JobScheduler 根据规则拆分的子任务。
-* TaskScheduler:任务调度器。用于调度 Task 的执行。
-
-## 原理
-
-```
- +---------+
- | Client |
- +----+----+
- |
-+-----------------------------+
-| FE | |
-| +-----------v------------+ |
-| | | |
-| | Routine Load Job | |
-| | | |
-| +---+--------+--------+--+ |
-| | | | |
-| +---v--+ +---v--+ +---v--+ |
-| | task | | task | | task | |
-| +--+---+ +---+--+ +---+--+ |
-| | | | |
-+-----------------------------+
- | | |
- v v v
- +---+--+ +--+---+ ++-----+
- | BE | | BE | | BE |
- +------+ +------+ +------+
-
-```
-
-如上图,Client 向 FE 提交一个例行导入作业。
-
-FE 通过 JobScheduler 将一个导入作业拆分成若干个 Task。每个 Task 负责导入指定的一部分数据。Task 被 TaskScheduler 分配到指定的 BE 上执行。
-
-在 BE 上,一个 Task 被视为一个普通的导入任务,通过 Stream Load 的导入机制进行导入。导入完成后,向 FE 汇报。
-
-FE 中的 JobScheduler 根据汇报结果,继续生成后续新的 Task,或者对失败的 Task 进行重试。
-
-整个例行导入作业通过不断的产生新的 Task,来完成数据不间断的导入。
-
-## Kafka 例行导入
-
-当前我们仅支持从 Kafka 系统进行例行导入。该部分会详细介绍 Kafka 例行导入使用方式和最佳实践。
-
-### 使用限制
-
-1. 支持无认证的 Kafka 访问,以及通过 SSL 方式认证的 Kafka 集群。
-2. 支持的消息格式为 csv, json 文本格式。csv 每一个 message 为一行,且行尾**不包含**换行符。
-3. 默认支持 Kafka 0.10.0 (含) 以上版本。如果要使用 Kafka 0.10.0 以下版本 (0.9.0, 0.8.2, 0.8.1, 0.8.0),需要修改 be 的配置,将 kafka_broker_version_fallback 的值设置为要兼容的旧版本,或者在创建routine load的时候直接设置 property.broker.version.fallback的值为要兼容的旧版本,使用旧版本的代价是routine load 的部分新特性可能无法使用,如根据时间设置 kafka 分区的 offset。
-
-### 创建例行导入任务
-
-创建例行导入任务的的详细语法可以连接到 Doris 后,执行 `HELP ROUTINE LOAD;` 查看语法帮助。这里主要详细介绍,创建作业时的注意事项。
-
-* columns_mapping
-
- `columns_mapping` 主要用于指定表结构和 message 中的列映射关系,以及一些列的转换。如果不指定,Doris 会默认 message 中的列和表结构的列按顺序一一对应。虽然在正常情况下,如果源数据正好一一对应,则不指定也可以进行正常的数据导入。但是我们依然强烈建议用户**显式的指定列映射关系**。这样当表结构发生变化(比如增加一个 nullable 的列),或者源文件发生变化(比如增加了一列)时,导入任务依然可以继续进行。否则,当发生上述变动后,因为列映射关系不再一一对应,导入将报错。
-
- 在 `columns_mapping` 中我们同样可以使用一些内置函数进行列的转换。但需要注意函数参数对应的实际列类型。举例说明:
-
- 假设用户需要导入只包含 `k1` 一列的表,列类型为 `int`。并且需要将源文件中的 null 值转换为 0。该功能可以通过 `ifnull` 函数实现。正确的使用方式如下:
-
- `COLUMNS (xx, k1=ifnull(xx, "0"))`
-
- 注意这里我们使用 `"0"` 而不是 `0`,虽然 `k1` 的类型为 `int`。因为对于导入任务来说,源数据中的列类型都为 `varchar`,所以这里 `xx` 虚拟列的类型也为 `varchar`。所以我们需要使用 `"0"` 来进行对应的匹配,否则 `ifnull` 函数无法找到参数为 `(varchar, int)` 的函数签名,将出现错误。
-
- 再举例,假设用户需要导入只包含 `k1` 一列的表,列类型为 `int`。并且需要将源文件中的对应列进行处理:将负数转换为正数,而将正数乘以 100。这个功能可以通过 `case when` 函数实现,正确写法应如下:
-
- `COLUMNS (xx, k1 = case when xx < 0 then cast(-xx as varchar) else cast((xx + '100') as varchar) end)`
-
- 注意这里我们需要将 `case when` 中所有的参数都最终转换为 varchar,才能得到期望的结果。
-
-* where_predicates
-
- `where_predicates` 中的的列的类型,已经是实际的列类型了,所以无需向 `columns_mapping` 那样强制的转换为 varchar 类型。按照实际的列类型书写即可。
-
-* desired\_concurrent\_number
-
- `desired_concurrent_number` 用于指定一个例行作业期望的并发度。即一个作业,最多有多少 task 同时在执行。对于 Kafka 导入而言,当前的实际并发度计算如下:
-
- ```
- Min(partition num, desired_concurrent_number, Config.max_routine_load_task_concurrrent_num)
- ```
-
- 其中 `Config.max_routine_load_task_concurrrent_num` 是系统的一个默认的最大并发数限制。这是一个 FE 配置,可以通过改配置调整。默认为 5。
-
- 其中 partition num 指订阅的 Kafka topic 的 partition 数量。
-
-* max\_batch\_interval/max\_batch\_rows/max\_batch\_size
-
- 这三个参数用于控制单个任务的执行时间。其中任意一个阈值达到,则任务结束。其中 `max_batch_rows` 用于记录从 Kafka 中读取到的数据行数。`max_batch_size` 用于记录从 Kafka 中读取到的数据量,单位是字节。目前一个任务的消费速率大约为 5-10MB/s。
-
- 那么假设一行数据 500B,用户希望每 100MB 或 10 秒为一个 task。100MB 的预期处理时间是 10-20 秒,对应的行数约为 200000 行。则一个合理的配置为:
-
- ```
- "max_batch_interval" = "10",
- "max_batch_rows" = "200000",
- "max_batch_size" = "104857600"
- ```
-
- 以上示例中的参数也是这些配置的默认参数。
-
-* max\_error\_number
-
- `max_error_number` 用于控制错误率。在错误率过高的时候,作业会自动暂停。因为整个作业是面向数据流的,且由于数据流的无边界性,我们无法像其他导入任务一样,通过一个错误比例来计算错误率。因此这里提供了一种新的计算方式,来计算数据流中的错误比例。
-
- 我们设定了一个采样窗口。窗口的大小为 `max_batch_rows * 10`。在一个采样窗口内,如果错误行数超过 `max_error_number`,则作业被暂停。如果没有超过,则下一个窗口重新开始计算错误行数。
-
- 我们假设 `max_batch_rows` 为 200000,则窗口大小为 2000000。设 `max_error_number` 为 20000,即用户预期每 2000000 行的错误行为 20000。即错误率为 1%。但是因为不是每批次任务正好消费 200000 行,所以窗口的实际范围是 [2000000, 2200000],即有 10% 的统计误差。
-
- 错误行不包括通过 where 条件过滤掉的行。但是包括没有对应的 Doris 表中的分区的行。
-
-* data\_source\_properties
-
- `data_source_properties` 中可以指定消费具体的 Kafka partition。如果不指定,则默认消费所订阅的 topic 的所有 partition。
-
- 注意,当显式的指定了 partition,则导入作业不会再动态的检测 Kafka partition 的变化。如果没有指定,则会根据 kafka partition 的变化,动态调整需要消费的 partition。
-
-* strict\_mode
-
- Routine load 导入可以开启 strict mode 模式。开启方式为在 job\_properties 中增加 ```"strict_mode" = "true"``` 。默认的 strict mode 为关闭。
-
- strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下:
-
- 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。
-
- 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。
-
- 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。
-* merge\_type
- 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理
-
-#### strict mode 与 source data 的导入关系
-
-这里以列类型为 TinyInt 来举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|---------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa or 2000 | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 | 1 | true or false | correct data|
-
-这里以列类型为 Decimal(1,0) 举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|--------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 or 10 | 1 | true or false | correct data|
-
-> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。
-
-#### 访问 SSL 认证的 Kafka 集群
-
-访问 SSL 认证的 Kafka 集群需要用户提供用于认证 Kafka Broker 公钥的证书文件(ca.pem)。如果 Kafka 集群同时开启了客户端认证,则还需提供客户端的公钥(client.pem)、密钥文件(client.key),以及密钥密码。这里所需的文件需要先通过 `CREAE FILE` 命令上传到 Doris 中,**并且 catalog 名称为 `kafka`**。`CREATE FILE` 命令的具体帮助可以参见 `HELP CREATE FILE;`。这里给出示例:
-
-1. 上传文件
-
- ```
- CREATE FILE "ca.pem" PROPERTIES("url" = "https://example_url/kafka-key/ca.pem", "catalog" = "kafka");
- CREATE FILE "client.key" PROPERTIES("url" = "https://example_urlkafka-key/client.key", "catalog" = "kafka");
- CREATE FILE "client.pem" PROPERTIES("url" = "https://example_url/kafka-key/client.pem", "catalog" = "kafka");
- ```
-
-2. 创建例行导入作业
-
- ```
- CREATE ROUTINE LOAD db1.job1 on tbl1
- PROPERTIES
- (
- "desired_concurrent_number"="1"
- )
- FROM KAFKA
- (
- "kafka_broker_list"= "broker1:9091,broker2:9091",
- "kafka_topic" = "my_topic",
- "property.security.protocol" = "ssl",
- "property.ssl.ca.location" = "FILE:ca.pem",
- "property.ssl.certificate.location" = "FILE:client.pem",
- "property.ssl.key.location" = "FILE:client.key",
- "property.ssl.key.password" = "abcdefg"
- );
- ```
-
-> Doris 通过 Kafka 的 C++ API `librdkafka` 来访问 Kafka 集群。`librdkafka` 所支持的参数可以参阅
->
->
-
-
-### 查看导入作业状态
-
-查看**作业**状态的具体命令和示例可以通过 `HELP SHOW ROUTINE LOAD;` 命令查看。
-
-查看**任务**运行状态的具体命令和示例可以通过 `HELP SHOW ROUTINE LOAD TASK;` 命令查看。
-
-只能查看当前正在运行中的任务,已结束和未开始的任务无法查看。
-
-### 修改作业属性
-
-用户可以修改已经创建的作业。具体说明可以通过 `HELP ALTER ROUTINE LOAD;` 命令查看。或参阅 [ALTER ROUTINE LOAD](../../sql-reference/sql-statements/Data%20Manipulation/alter-routine-load.md)。
-
-### 作业控制
-
-用户可以通过 `STOP/PAUSE/RESUME` 三个命令来控制作业的停止,暂停和重启。可以通过 `HELP STOP ROUTINE LOAD;`, `HELP PAUSE ROUTINE LOAD;` 以及 `HELP RESUME ROUTINE LOAD;` 三个命令查看帮助和示例。
-
-## 其他说明
-
-1. 例行导入作业和 ALTER TABLE 操作的关系
-
- * 例行导入不会阻塞 SCHEMA CHANGE 和 ROLLUP 操作。但是注意如果 SCHEMA CHANGE 完成后,列映射关系无法匹配,则会导致作业的错误数据激增,最终导致作业暂停。建议通过在例行导入作业中显式指定列映射关系,以及通过增加 Nullable 列或带 Default 值的列来减少这类问题。
- * 删除表的 Partition 可能会导致导入数据无法找到对应的 Partition,作业进入暂停。
-
-2. 例行导入作业和其他导入作业的关系(LOAD, DELETE, INSERT)
-
- * 例行导入和其他 LOAD 作业以及 INSERT 操作没有冲突。
- * 当执行 DELETE 操作时,对应表分区不能有任何正在执行的导入任务。所以在执行 DELETE 操作前,可能需要先暂停例行导入作业,并等待已下发的 task 全部完成后,才可以执行 DELETE。
-
-3. 例行导入作业和 DROP DATABASE/TABLE 操作的关系
-
- 当例行导入对应的 database 或 table 被删除后,作业会自动 CANCEL。
-
-4. kafka 类型的例行导入作业和 kafka topic 的关系
-
- 当用户在创建例行导入声明的 `kafka_topic` 在kafka集群中不存在时。
-
- * 如果用户 kafka 集群的 broker 设置了 `auto.create.topics.enable = true`,则 `kafka_topic` 会先被自动创建,自动创建的 partition 个数是由**用户方的kafka集群**中的 broker 配置 `num.partitions` 决定的。例行作业会正常的不断读取该 topic 的数据。
- * 如果用户 kafka 集群的 broker 设置了 `auto.create.topics.enable = false`, 则 topic 不会被自动创建,例行作业会在没有读取任何数据之前就被暂停,状态为 `PAUSED`。
-
- 所以,如果用户希望当 kafka topic 不存在的时候,被例行作业自动创建的话,只需要将**用户方的kafka集群**中的 broker 设置 `auto.create.topics.enable = true` 即可。
- 5. 在网络隔离的环境中可能出现的问题
- 在有些环境中存在网段和域名解析的隔离措施,所以需要注意
- 1. 创建Routine load 任务中指定的 Broker list 必须能够被Doris服务访问
- 2. Kafka 中如果配置了`advertised.listeners`, `advertised.listeners` 中的地址必须能够被Doris服务访问
-
-6. 关于指定消费的 Partition 和 Offset
-
- Doris 支持指定 Partition 和 Offset 开始消费。新版中还支持了指定时间点进行消费的功能。这里说明下对应参数的配置关系。
-
- 有三个相关参数:
-
- * `kafka_partitions`:指定待消费的 partition 列表,如:"0, 1, 2, 3"。
- * `kafka_offsets`:指定每个分区的起始offset,必须和 `kafka_partitions` 列表个数对应。如:"1000, 1000, 2000, 2000"
- * `property.kafka_default_offset`:指定分区默认的起始offset。
-
- 在创建导入作业时,这三个参数可以有以下组合:
-
- | 组合 | `kafka_partitions` | `kafka_offsets` | `property.kafka_default_offset` | 行为 |
- |---|---|---|---|---|
- |1| No | No | No | 系统会自动查找topic对应的所有分区并从 OFFSET_END 开始消费 |
- |2| No | No | Yes | 系统会自动查找topic对应的所有分区并从 default offset 指定的位置开始消费|
- |3| Yes | No | No | 系统会从指定分区的 OFFSET_END 开始消费 |
- |4| Yes | Yes | No | 系统会从指定分区的指定offset 处开始消费 |
- |5| Yes | No | Yes | 系统会从指定分区,default offset 指定的位置开始消费 |
-
- 7. STOP和PAUSE的区别
-
- FE会自动定期清理STOP状态的ROUTINE LOAD,而PAUSE状态的则可以再次被恢复启用。
-
-## 相关参数
-
-一些系统配置参数会影响例行导入的使用。
-
-1. max\_routine\_load\_task\_concurrent\_num
-
- FE 配置项,默认为 5,可以运行时修改。该参数限制了一个例行导入作业最大的子任务并发数。建议维持默认值。设置过大,可能导致同时并发的任务数过多,占用集群资源。
-
-2. max\_routine_load\_task\_num\_per\_be
-
- FE 配置项,默认为5,可以运行时修改。该参数限制了每个 BE 节点最多并发执行的子任务个数。建议维持默认值。如果设置过大,可能导致并发任务数过多,占用集群资源。
-
-3. max\_routine\_load\_job\_num
-
- FE 配置项,默认为100,可以运行时修改。该参数限制的例行导入作业的总数,包括 NEED_SCHEDULED, RUNNING, PAUSE 这些状态。超过后,不能在提交新的作业。
-
-4. max\_consumer\_num\_per\_group
-
- BE 配置项,默认为 3。该参数表示一个子任务中最多生成几个 consumer 进行数据消费。对于 Kafka 数据源,一个 consumer 可能消费一个或多个 kafka partition。假设一个任务需要消费 6 个 kafka partition,则会生成 3 个 consumer,每个 consumer 消费 2 个 partition。如果只有 2 个 partition,则只会生成 2 个 consumer,每个 consumer 消费 1 个 partition。
-
-5. push\_write\_mbytes\_per\_sec
-
- BE 配置项。默认为 10,即 10MB/s。该参数为导入通用参数,不限于例行导入作业。该参数限制了导入数据写入磁盘的速度。对于 SSD 等高性能存储设备,可以适当增加这个限速。
-
-6. max\_tolerable\_backend\_down\_num
- FE 配置项,默认值是0。在满足某些条件下,Doris可PAUSED的任务重新调度,即变成RUNNING。该参数为0代表只有所有BE节点是alive状态才允许重新调度。
-
-7. period\_of\_auto\_resume\_min
- FE 配置项,默认是5分钟。Doris重新调度,只会在5分钟这个周期内,最多尝试3次. 如果3次都失败则锁定当前任务,后续不在进行调度。但可通过人为干预,进行手动恢复。
-
-## keyword
- ROUTINE,LOAD
diff --git a/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md b/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md
deleted file mode 100644
index 3c9b6c5d88..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md
+++ /dev/null
@@ -1,94 +0,0 @@
----
-{
-"title": "S3 Load",
-"language": "zh-CN"
-}
----
-
-
-
-# S3 Load
-
-从0.14 版本开始,Doris 支持通过S3协议直接从支持S3协议的在线存储系统导入数据。
-
-本文档主要介绍如何导入 AWS S3 中存储的数据。也支持导入其他支持S3协议的对象存储系统导入,如果百度云的BOS,阿里云的OSS和腾讯云的COS等、
-
-## 适用场景
-
-* 源数据在 支持S3协议的存储系统中,如 S3,BOS 等。
-* 数据量在 几十到百GB 级别。
-
-## 准备工作
-1. 准本AK 和 SK
- 首先需要找到或者重新生成 AWS `Access keys`,可以在AWS console 的 `My Security Credentials` 找到生成方式, 如下图所示:
- [AK_SK](/images/aws_ak_sk.png)
- 选择 `Create New Access Key` 注意保存生成 AK和SK.
-2. 准备 REGION 和 ENDPOINT
- REGION 可以在创建桶的时候选择也可以在桶列表中查看到。ENDPOINT 可以通过如下页面通过REGION查到 [AWS 文档](https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region)
-
-其他云存储系统可以相应的文档找到与S3兼容的相关信息
-
-## 开始导入
-导入方式和Broker Load 基本相同,只需要将 `WITH BROKER broker_name ()` 语句替换成如下部分
-```
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION"
- )
-```
-
-完整示例如下
-```
- LOAD LABEL example_db.exmpale_label_1
- (
- DATA INFILE("s3://your_bucket_name/your_file.txt")
- INTO TABLE load_test
- COLUMNS TERMINATED BY ","
- )
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION"
- )
- PROPERTIES
- (
- "timeout" = "3600"
- );
-```
-
-## 常见问题
-
-S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持 virtual-hosted style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 path style 方式:
-
-```
- WITH S3
- (
- "AWS_ENDPOINT" = "AWS_ENDPOINT",
- "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY",
- "AWS_SECRET_KEY"="AWS_SECRET_KEY",
- "AWS_REGION" = "AWS_REGION",
- "use_path_style" = "true"
- )
-```
diff --git a/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md b/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md
deleted file mode 100644
index e9ffd0d16f..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md
+++ /dev/null
@@ -1,208 +0,0 @@
----
-{
- "title": "sequence列",
- "language": "zh-CN"
-}
----
-
-
-
-# sequence列
-sequence列目前只支持Uniq模型,Uniq模型主要针对需要唯一主键的场景,可以保证主键唯一性约束,但是由于使用REPLACE聚合方式,在同一批次中导入的数据,替换顺序不做保证,详细介绍可以参考[这里](../../getting-started/data-model-rollup.md)。替换顺序无法保证则无法确定最终导入到表中的具体数据,存在了不确定性。
-
-为了解决这个问题,Doris支持了sequence列,通过用户在导入时指定sequence列,相同key列下,REPLACE聚合类型的列将按照sequence列的值进行替换,较大值可以替换较小值,反之则无法替换。该方法将顺序的确定交给了用户,由用户控制替换顺序。
-
-## 原理
-通过增加一个隐藏列`__DORIS_SEQUENCE_COL__`实现,该列的类型由用户在建表时指定,在导入时确定该列具体值,并依据该值对REPLACE列进行替换。
-
-### 建表
-
-创建Uniq表时,将按照用户指定类型自动添加一个隐藏列`__DORIS_SEQUENCE_COL__`
-
-### 导入
-
-导入时,fe在解析的过程中将隐藏列的值设置成 `order by` 表达式的值(broker load和routine load),或者`function_column.sequence_col`表达式的值(stream load), value列将按照该值进行替换。隐藏列`__DORIS_SEQUENCE_COL__`的值既可以设置为数据源中一列,也可以是表结构中的一列。
-
-### 读取
-
-请求包含value列时需要需要额外读取`__DORIS_SEQUENCE_COL__`列,该列用于在相同key列下,REPLACE聚合函数替换顺序的依据,较大值可以替换较小值,反之则不能替换。
-
-### Cumulative Compaction
-
-Cumulative Compaction 时和读取过程原理相同
-
-### Base Compaction
-
-Base Compaction 时读取过程原理相同
-
-### 语法
-建表时语法方面在property中增加了一个属性,用来标识`__DORIS_SEQUENCE_COL__`的类型
-导入的语法设计方面主要是增加一个从sequence列的到其他column的映射,各个导入方式设置的将在下面介绍
-
-#### 建表
-创建Uniq表时,可以指定sequence列类型
-```
-PROPERTIES (
- "function_column.sequence_type" = 'Date',
-);
-```
-sequence_type用来指定sequence列的类型,可以为整型和时间类型
-
-#### stream load
-
-stream load 的写法是在header中的`function_column.sequence_col`字段添加隐藏列对应的source_sequence的映射, 示例
-```
-curl --location-trusted -u root -H "columns: k1,k2,source_sequence,v1,v2" -H "function_column.sequence_col: source_sequence" -T testData http://host:port/api/testDb/testTbl/_stream_load
-```
-
-#### broker load
-
-在`ORDER BY` 处设置隐藏列映射的source_sequence字段
-
-```
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://host:port/user/data/*/test.txt")
- INTO TABLE `tbl1`
- COLUMNS TERMINATED BY ","
- (k1,k2,source_sequence,v1,v2)
- ORDER BY source_sequence
-)
-WITH BROKER 'broker'
-(
- "username"="user",
- "password"="pass"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-#### routine load
-
-映射方式同上,示例如下
-
-```
- CREATE ROUTINE LOAD example_db.test1 ON example_tbl
- [WITH MERGE|APPEND|DELETE]
- COLUMNS(k1, k2, source_sequence, v1, v2),
- WHERE k1 > 100 and k2 like "%doris%"
- [ORDER BY source_sequence]
- PROPERTIES
- (
- "desired_concurrent_number"="3",
- "max_batch_interval" = "20",
- "max_batch_rows" = "300000",
- "max_batch_size" = "209715200",
- "strict_mode" = "false"
- )
- FROM KAFKA
- (
- "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092",
- "kafka_topic" = "my_topic",
- "kafka_partitions" = "0,1,2,3",
- "kafka_offsets" = "101,0,0,200"
- );
-```
-
-## 启用sequence column支持
-在新建表时如果设置了`function_column.sequence_type` ,则新建表将支持sequence column。
-对于一个不支持sequence column的表,如果想要使用该功能,可以使用如下语句:
-`ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")` 来启用。
-如果确定一个表是否支持sequence column,可以通过设置一个session variable来显示隐藏列 `SET show_hidden_columns=true` ,之后使用`desc tablename`,如果输出中有`__DORIS_SEQUENCE_COL__` 列则支持,如果没有则不支持
-
-## 使用示例
-下面以stream load 为例 展示下使用方式
-1. 创建支持sequence column的表
-
-表结构如下:
-```
-MySQL > desc test_table;
-+-------------+--------------+------+-------+---------+---------+
-| Field | Type | Null | Key | Default | Extra |
-+-------------+--------------+------+-------+---------+---------+
-| user_id | BIGINT | No | true | NULL | |
-| date | DATE | No | true | NULL | |
-| group_id | BIGINT | No | true | NULL | |
-| modify_date | DATE | No | false | NULL | REPLACE |
-| keyword | VARCHAR(128) | No | false | NULL | REPLACE |
-+-------------+--------------+------+-------+---------+---------+
-```
-
-2. 正常导入数据:
-
-导入如下数据
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-02-22 b
-1 2020-02-22 1 2020-03-05 c
-1 2020-02-22 1 2020-02-26 d
-1 2020-02-22 1 2020-02-22 e
-1 2020-02-22 1 2020-02-22 b
-```
-此处以stream load为例, 将sequence column映射为modify_date列
-```
-curl --location-trusted -u root: -H "function_column.sequence_col: modify_date" -T testData http://host:port/api/test/test_table/_stream_load
-```
-结果为
-```
-MySQL > select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-05 | c |
-+---------+------------+----------+-------------+---------+
-```
-在这次导入中,因sequence column的值(也就是modify_date中的值)中'2020-03-05'为最大值,所以keyword列中最终保留了c。
-
-3. 替换顺序的保证
-
-上述步骤完成后,接着导入如下数据
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-02-23 b
-```
-查询数据
-```
-MySQL [test]> select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-05 | c |
-+---------+------------+----------+-------------+---------+
-```
-由于新导入的数据的sequence column都小于表中已有的值,无法替换
-再尝试导入如下数据
-```
-1 2020-02-22 1 2020-02-22 a
-1 2020-02-22 1 2020-03-23 w
-```
-查询数据
-```
-MySQL [test]> select * from test_table;
-+---------+------------+----------+-------------+---------+
-| user_id | date | group_id | modify_date | keyword |
-+---------+------------+----------+-------------+---------+
-| 1 | 2020-02-22 | 1 | 2020-03-23 | w |
-+---------+------------+----------+-------------+---------+
-```
-此时就可以替换表中原有的数据
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md b/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md
deleted file mode 100644
index 4f1666275d..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md
+++ /dev/null
@@ -1,596 +0,0 @@
----
-{
- "title": "Spark Load",
- "language": "zh-CN"
-}
----
-
-
-
-# Spark Load
-
-Spark load 通过外部的 Spark 资源实现对导入数据的预处理,提高 Doris 大数据量的导入性能并且节省 Doris 集群的计算资源。主要用于初次迁移,大数据量导入 Doris 的场景。
-
-Spark load 是一种异步导入方式,用户需要通过 MySQL 协议创建 Spark 类型导入任务,并通过 `SHOW LOAD` 查看导入结果。
-
-## 适用场景
-
-* 源数据在 Spark 可以访问的存储系统中,如 HDFS。
-* 数据量在 几十 GB 到 TB 级别。
-
-## 名词解释
-
-1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入任务的调度工作。
-2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据写入及存储。
-3. Spark ETL:在导入流程中主要负责数据的 ETL 工作,包括全局字典构建(BITMAP类型)、分区、排序、聚合等。
-4. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。
-5. 全局字典: 保存了数据从原始值到编码值映射的数据结构,原始值可以是任意数据类型,而编码后的值为整型;全局字典主要应用于精确去重预计算的场景。
-
-## 基本原理
-
-### 基本流程
-
-用户通过 MySQL 客户端提交 Spark 类型导入任务,FE记录元数据并返回用户提交成功。
-
-Spark load 任务的执行主要分为以下5个阶段。
-
-1. FE 调度提交 ETL 任务到 Spark 集群执行。
-2. Spark 集群执行 ETL 完成对导入数据的预处理。包括全局字典构建(BITMAP类型)、分区、排序、聚合等。
-3. ETL 任务完成后,FE 获取预处理过的每个分片的数据路径,并调度相关的 BE 执行 Push 任务。
-4. BE 通过 Broker 读取数据,转化为 Doris 底层存储格式。
-5. FE 调度生效版本,完成导入任务。
-
-```
- +
- | 0. User create spark load job
- +----v----+
- | FE |---------------------------------+
- +----+----+ |
- | 3. FE send push tasks |
- | 5. FE publish version |
- +------------+------------+ |
- | | | |
-+---v---+ +---v---+ +---v---+ |
-| BE | | BE | | BE | |1. FE submit Spark ETL job
-+---^---+ +---^---+ +---^---+ |
- |4. BE push with broker | |
-+---+---+ +---+---+ +---+---+ |
-|Broker | |Broker | |Broker | |
-+---^---+ +---^---+ +---^---+ |
- | | | |
-+---+------------+------------+---+ 2.ETL +-------------v---------------+
-| HDFS +-------> Spark cluster |
-| <-------+ |
-+---------------------------------+ +-----------------------------+
-
-```
-
-## 全局字典
-### 适用场景
-目前Doris中Bitmap列是使用类库```Roaringbitmap```实现的,而```Roaringbitmap```的输入数据类型只能是整型,因此如果要在导入流程中实现对于Bitmap列的预计算,那么就需要将输入数据的类型转换成整型。
-
-在Doris现有的导入流程中,全局字典的数据结构是基于Hive表实现的,保存了原始值到编码值的映射。
-### 构建流程
-1. 读取上游数据源的数据,生成一张hive临时表,记为`hive_table`。
-2. 从`hive_table`中抽取待去重字段的去重值,生成一张新的hive表,记为`distinct_value_table`。
-3. 新建一张全局字典表,记为`dict_table`;一列为原始值,一列为编码后的值。
-4. 将`distinct_value_table`与`dict_table`做left join,计算出新增的去重值集合,然后对这个集合使用窗口函数进行编码,此时去重列原始值就多了一列编码后的值,最后将这两列的数据写回`dict_table`。
-5. 将`dict_table`与`hive_table`做join,完成`hive_table`中原始值替换成整型编码值的工作。
-6. `hive_table`会被下一步数据预处理的流程所读取,经过计算后导入到Doris中。
-
-## 数据预处理(DPP)
-### 基本流程
-1. 从数据源读取数据,上游数据源可以是HDFS文件,也可以是Hive表。
-2. 对读取到的数据进行字段映射,表达式计算以及根据分区信息生成分桶字段`bucket_id`。
-3. 根据Doris表的rollup元数据生成RollupTree。
-4. 遍历RollupTree,进行分层的聚合操作,下一个层级的rollup可以由上一个层的rollup计算得来。
-5. 每次完成聚合计算后,会对数据根据`bucket_id`进行分桶然后写入HDFS中。
-6. 后续broker会拉取HDFS中的文件然后导入Doris Be中。
-
-## Hive Bitmap UDF
-
-Spark 支持将 hive 生成的 bitmap 数据直接导入到 Doris。详见 [hive-bitmap-udf 文档](../../extending-doris/hive-bitmap-udf.md)
-
-## 基本操作
-
-### 配置 ETL 集群
-
-Spark作为一种外部计算资源在Doris中用来完成ETL工作,未来可能还有其他的外部资源会加入到Doris中使用,如Spark/GPU用于查询,HDFS/S3用于外部存储,MapReduce用于ETL等,因此我们引入resource management来管理Doris使用的这些外部资源。
-
-提交 Spark 导入任务之前,需要配置执行 ETL 任务的 Spark 集群。
-
-语法:
-
-```sql
--- create spark resource
-CREATE EXTERNAL RESOURCE resource_name
-PROPERTIES
-(
- type = spark,
- spark_conf_key = spark_conf_value,
- working_dir = path,
- broker = broker_name,
- broker.property_key = property_value
-)
-
--- drop spark resource
-DROP RESOURCE resource_name
-
--- show resources
-SHOW RESOURCES
-SHOW PROC "/resources"
-
--- privileges
-GRANT USAGE_PRIV ON RESOURCE resource_name TO user_identity
-GRANT USAGE_PRIV ON RESOURCE resource_name TO ROLE role_name
-
-REVOKE USAGE_PRIV ON RESOURCE resource_name FROM user_identity
-REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name
-```
-
-#### 创建资源
-
-`resource_name` 为 Doris 中配置的 Spark 资源的名字。
-
-`PROPERTIES` 是 Spark 资源相关参数,如下:
-
-- `type`:资源类型,必填,目前仅支持 spark。
-
-- Spark 相关参数如下:
- - `spark.master`: 必填,目前支持yarn,spark://host:port。
- - `spark.submit.deployMode`: Spark 程序的部署模式,必填,支持 cluster,client 两种。
- - `spark.hadoop.yarn.resourcemanager.address`: master为yarn时必填。
- - `spark.hadoop.fs.defaultFS`: master为yarn时必填。
- - 其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html
-- `working_dir`: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。
-- `broker`: broker 名字。spark作为ETL资源使用时必填。需要使用`ALTER SYSTEM ADD BROKER` 命令提前完成配置。
- - `broker.property_key`: broker读取ETL生成的中间文件时需要指定的认证信息等。
-
-示例:
-
-```sql
--- yarn cluster 模式
-CREATE EXTERNAL RESOURCE "spark0"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "yarn",
- "spark.submit.deployMode" = "cluster",
- "spark.jars" = "xxx.jar,yyy.jar",
- "spark.files" = "/tmp/aaa,/tmp/bbb",
- "spark.executor.memory" = "1g",
- "spark.yarn.queue" = "queue0",
- "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",
- "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker0",
- "broker.username" = "user0",
- "broker.password" = "password0"
-);
-
--- spark standalone client 模式
-CREATE EXTERNAL RESOURCE "spark1"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "spark://127.0.0.1:7777",
- "spark.submit.deployMode" = "client",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker1"
-);
-```
-
-#### 查看资源
-
-普通账户只能看到自己有USAGE_PRIV使用权限的资源。
-
-root和admin账户可以看到所有的资源。
-
-#### 资源权限
-
-资源权限通过GRANT REVOKE来管理,目前仅支持USAGE_PRIV使用权限。
-
-可以将USAGE_PRIV权限赋予某个用户或者某个角色,角色的使用与之前一致。
-
-```sql
--- 授予spark0资源的使用权限给用户user0
-GRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%";
-
--- 授予spark0资源的使用权限给角色role0
-GRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0";
-
--- 授予所有资源的使用权限给用户user0
-GRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%";
-
--- 授予所有资源的使用权限给角色role0
-GRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0";
-
--- 撤销用户user0的spark0资源使用权限
-REVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%";
-```
-
-### 配置 SPARK 客户端
-
-FE底层通过执行spark-submit的命令去提交spark任务,因此需要为FE配置spark客户端,建议使用2.4.5或以上的spark2官方版本,[spark下载地址](https://archive.apache.org/dist/spark/),下载完成后,请按步骤完成以下配置。
-
-#### 配置 SPARK_HOME 环境变量
-
-将spark客户端放在FE同一台机器上的目录下,并在FE的配置文件配置`spark_home_default_dir`项指向此目录,此配置项默认为FE根目录下的 `lib/spark2x`路径,此项不可为空。
-
-#### 配置 SPARK 依赖包
-
-将spark客户端下的jars文件夹内所有jar包归档打包成一个zip文件,并在FE的配置文件配置`spark_resource_path`项指向此zip文件,若此配置项为空,则FE会尝试寻找FE根目录下的`lib/spark2x/jars/spark-2x.zip`文件,若没有找到则会报文件不存在的错误。
-
-当提交spark load任务时,会将归档好的依赖文件上传至远端仓库,默认仓库路径挂在`working_dir/{cluster_id}`目录下,并以`__spark_repository__{resource_name}`命名,表示集群内的一个resource对应一个远端仓库,远端仓库目录结构参考如下:
-
-```
-__spark_repository__spark0/
- |-__archive_1.0.0/
- | |-__lib_990325d2c0d1d5e45bf675e54e44fb16_spark-dpp-1.0.0-jar-with-dependencies.jar
- | |-__lib_7670c29daf535efe3c9b923f778f61fc_spark-2x.zip
- |-__archive_1.1.0/
- | |-__lib_64d5696f99c379af2bee28c1c84271d5_spark-dpp-1.1.0-jar-with-dependencies.jar
- | |-__lib_1bbb74bb6b264a270bc7fca3e964160f_spark-2x.zip
- |-__archive_1.2.0/
- | |-...
-```
-
-除了spark依赖(默认以`spark-2x.zip`命名),FE还会上传DPP的依赖包至远端仓库,若此次spark load提交的所有依赖文件都已存在远端仓库,那么就不需要在上传依赖,省下原来每次重复上传大量文件的时间。
-
-### 配置 YARN 客户端
-
-FE底层通过执行yarn命令去获取正在运行的application的状态以及杀死application,因此需要为FE配置yarn客户端,建议使用2.5.2或以上的hadoop2官方版本,[hadoop下载地址](https://archive.apache.org/dist/hadoop/common/),下载完成后,请按步骤完成以下配置。
-
-#### 配置 YARN 可执行文件路径
-
-将下载好的yarn客户端放在FE同一台机器的目录下,并在FE配置文件配置`yarn_client_path`项指向yarn的二进制可执行文件,默认为FE根目录下的`lib/yarn-client/hadoop/bin/yarn`路径。
-
-(可选) 当FE通过yarn客户端去获取application的状态或者杀死application时,默认会在FE根目录下的`lib/yarn-config`路径下生成执行yarn命令所需的配置文件,此路径可通过在FE配置文件配置`yarn_config_dir`项修改,目前生成的配置文件包括`core-site.xml`和`yarn-site.xml`。
-
-### 创建导入
-
-语法:
-
-```sql
-LOAD LABEL load_label
- (data_desc, ...)
- WITH RESOURCE resource_name
- [resource_properties]
- [PROPERTIES (key1=value1, ... )]
-
-* load_label:
- db_name.label_name
-
-* data_desc:
- DATA INFILE ('file_path', ...)
- [NEGATIVE]
- INTO TABLE tbl_name
- [PARTITION (p1, p2)]
- [COLUMNS TERMINATED BY separator ]
- [(col1, ...)]
- [COLUMNS FROM PATH AS (col2, ...)]
- [SET (k1=f1(xx), k2=f2(xx))]
- [WHERE predicate]
-
- DATA FROM TABLE hive_external_tbl
- [NEGATIVE]
- INTO TABLE tbl_name
- [PARTITION (p1, p2)]
- [SET (k1=f1(xx), k2=f2(xx))]
- [WHERE predicate]
-
-* resource_properties:
- (key2=value2, ...)
-```
-示例1:上游数据源为hdfs文件的情况
-
-```sql
-LOAD LABEL db1.label1
-(
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")
- INTO TABLE tbl1
- COLUMNS TERMINATED BY ","
- (tmp_c1,tmp_c2)
- SET
- (
- id=tmp_c2,
- name=tmp_c1
- ),
- DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2")
- INTO TABLE tbl2
- COLUMNS TERMINATED BY ","
- (col1, col2)
- where col1 > 1
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-示例2:上游数据源是hive表的情况
-
-```sql
-step 1:新建hive外部表
-CREATE EXTERNAL TABLE hive_t1
-(
- k1 INT,
- K2 SMALLINT,
- k3 varchar(50),
- uuid varchar(100)
-)
-ENGINE=hive
-properties
-(
-"database" = "tmp",
-"table" = "t1",
-"hive.metastore.uris" = "thrift://0.0.0.0:8080"
-);
-
-step 2: 提交load命令,要求导入的 doris 表中的列必须在 hive 外部表中存在。
-LOAD LABEL db1.label1
-(
- DATA FROM TABLE hive_t1
- INTO TABLE tbl1
- SET
- (
- uuid=bitmap_dict(uuid)
- )
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-示例3:上游数据源是hive binary类型情况
-
-```sql
-step 1:新建hive外部表
-CREATE EXTERNAL TABLE hive_t1
-(
- k1 INT,
- K2 SMALLINT,
- k3 varchar(50),
- uuid varchar(100) //hive中的类型为binary
-)
-ENGINE=hive
-properties
-(
-"database" = "tmp",
-"table" = "t1",
-"hive.metastore.uris" = "thrift://0.0.0.0:8080"
-);
-
-step 2: 提交load命令,要求导入的 doris 表中的列必须在 hive 外部表中存在。
-LOAD LABEL db1.label1
-(
- DATA FROM TABLE hive_t1
- INTO TABLE tbl1
- SET
- (
- uuid=binary_bitmap(uuid)
- )
-)
-WITH RESOURCE 'spark0'
-(
- "spark.executor.memory" = "2g",
- "spark.shuffle.compress" = "true"
-)
-PROPERTIES
-(
- "timeout" = "3600"
-);
-
-```
-
-创建导入的详细语法执行 ```HELP SPARK LOAD``` 查看语法帮助。这里主要介绍 Spark load 的创建导入语法中参数意义和注意事项。
-
-#### Label
-
-导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 Label。具体规则与 `Broker Load` 一致。
-
-#### 数据描述类参数
-
-目前支持的数据源有CSV和hive table。其他规则与 `Broker Load` 一致。
-
-#### 导入作业参数
-
-导入作业参数主要指的是 Spark load 创建导入语句中的属于 ```opt_properties```部分的参数。导入作业参数是作用于整个导入作业的。规则与 `Broker Load` 一致。
-
-#### Spark资源参数
-
-Spark资源需要提前配置到 Doris系统中并且赋予用户USAGE_PRIV权限后才能使用 Spark load。
-
-当用户有临时性的需求,比如增加任务使用的资源而修改 Spark configs,可以在这里设置,设置仅对本次任务生效,并不影响 Doris 集群中已有的配置。
-
-```sql
-WITH RESOURCE 'spark0'
-(
- "spark.driver.memory" = "1g",
- "spark.executor.memory" = "3g"
-)
-```
-#### 数据源为hive表时的导入
-目前如果期望在导入流程中将hive表作为数据源,那么需要先新建一张类型为hive的外部表,
-然后提交导入命令时指定外部表的表名即可。
-
-#### 导入流程构建全局字典
-适用于doris表聚合列的数据类型为bitmap类型。
-在load命令中指定需要构建全局字典的字段即可,格式为:```doris字段名称=bitmap_dict(hive表字段名称)```
-需要注意的是目前只有在上游数据源为hive表时才支持全局字典的构建。
-
-#### hive binary(bitmap)类型列的导入
-适用于doris表聚合列的数据类型为bitmap类型,且数据源hive表中对应列的数据类型为binary(通过FE中spark-dpp中的org.apache.doris.load.loadv2.dpp.BitmapValue类序列化)类型。
-无需构建全局字典,在load命令中指定相应字段即可,格式为:```doris字段名称=binary_bitmap(hive表字段名称)```
-同样,目前只有在上游数据源为hive表时才支持binary(bitmap)类型的数据导入。
-
-### 查看导入
-
-Spark load 导入方式同 Broker load 一样都是异步的,所以用户必须将创建导入的 Label 记录,并且在**查看导入命令中使用 Label 来查看导入结果**。查看导入命令在所有导入方式中是通用的,具体语法可执行 ```HELP SHOW LOAD``` 查看。
-
-示例:
-
-```
-mysql> show load order by createtime desc limit 1\G
-*************************** 1. row ***************************
- JobId: 76391
- Label: label1
- State: FINISHED
- Progress: ETL:100%; LOAD:100%
- Type: SPARK
- EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376
- TaskInfo: cluster:cluster0; timeout(s):10800; max_filter_ratio:5.0E-5
- ErrorMsg: N/A
- CreateTime: 2019-07-27 11:46:42
- EtlStartTime: 2019-07-27 11:46:44
- EtlFinishTime: 2019-07-27 11:49:44
- LoadStartTime: 2019-07-27 11:49:44
-LoadFinishTime: 2019-07-27 11:50:16
- URL: http://1.1.1.1:8089/proxy/application_1586619723848_0035/
- JobDetails: {"ScannedRows":28133395,"TaskNumber":1,"FileNumber":1,"FileSize":200000}
-```
-
-返回结果集中参数意义可以参考 Broker load。不同点如下:
-
-+ State
-
- 导入任务当前所处的阶段。任务提交之后状态为 PENDING,提交 Spark ETL 之后状态变为 ETL,ETL 完成之后 FE 调度 BE 执行 push 操作状态变为 LOADING,push 完成并且版本生效后状态变为 FINISHED。
-
- 导入任务的最终阶段有两个:CANCELLED 和 FINISHED,当 Load job 处于这两个阶段时导入完成。其中 CANCELLED 为导入失败,FINISHED 为导入成功。
-
-+ Progress
-
- 导入任务的进度描述。分为两种进度:ETL 和 LOAD,对应了导入流程的两个阶段 ETL 和 LOADING。
-
- LOAD 的进度范围为:0~100%。
-
- ```LOAD 进度 = 当前已完成所有replica导入的tablet个数 / 本次导入任务的总tablet个数 * 100%```
-
- **如果所有导入表均完成导入,此时 LOAD 的进度为 99%** 导入进入到最后生效阶段,整个导入完成后,LOAD 的进度才会改为 100%。
-
- 导入进度并不是线性的。所以如果一段时间内进度没有变化,并不代表导入没有在执行。
-
-+ Type
-
- 导入任务的类型。Spark load 为 SPARK。
-
-+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime
-
- 这几个值分别代表导入创建的时间,ETL 阶段开始的时间,ETL 阶段完成的时间,LOADING 阶段开始的时间和整个导入任务完成的时间。
-
-+ JobDetails
-
- 显示一些作业的详细运行状态,ETL 结束的时候更新。包括导入文件的个数、总大小(字节)、子任务个数、已处理的原始行数等。
-
- ```{"ScannedRows":139264,"TaskNumber":1,"FileNumber":1,"FileSize":940754064}```
-
-+ URL
-
- 可复制输入到浏览器,跳转至相应application的web界面
-
-### 查看 spark launcher 提交日志
-
-有时用户需要查看spark任务提交过程中产生的详细日志,日志默认保存在FE根目录下`log/spark_launcher_log`路径下,并以`spark_launcher_{load_job_id}_{label}.log`命名,日志会在此目录下保存一段时间,当FE元数据中的导入信息被清理时,相应的日志也会被清理,默认保存时间为3天。
-
-### 取消导入
-
-当 Spark load 作业状态不为 CANCELLED 或 FINISHED 时,可以被用户手动取消。取消时需要指定待取消导入任务的 Label 。取消导入命令语法可执行 ```HELP CANCEL LOAD```查看。
-
-
-
-## 相关系统配置
-
-### FE 配置
-
-下面配置属于 Spark load 的系统级别配置,也就是作用于所有 Spark load 导入任务的配置。主要通过修改 ``` fe.conf```来调整配置值。
-
-+ `enable_spark_load`
-
- 开启 Spark load 和创建 resource 功能。默认为 false,关闭此功能。
-
-+ `spark_load_default_timeout_second`
-
- 任务默认超时时间为259200秒(3天)。
-
-+ `spark_home_default_dir`
-
- spark客户端路径 (`fe/lib/spark2x`) 。
-
-+ `spark_resource_path`
-
- 打包好的spark依赖文件路径(默认为空)。
-
-+ `spark_launcher_log_dir`
-
- spark客户端的提交日志存放的目录(`fe/log/spark_launcher_log`)。
-
-+ `yarn_client_path`
-
- yarn二进制可执行文件路径 (`fe/lib/yarn-client/hadoop/bin/yarn`) 。
-
-+ `yarn_config_dir`
-
- yarn配置文件生成路径 (`fe/lib/yarn-config`) 。
-
-
-## 最佳实践
-
-### 应用场景
-
-使用 Spark load 最适合的场景就是原始数据在文件系统(HDFS)中,数据量在 几十 GB 到 TB 级别。小数据量还是建议使用 Stream load 或者 Broker load。
-
-
-
-## 常见问题
-
-* 使用Spark load时没有在spark客户端的spark-env.sh配置`HADOOP_CONF_DIR`环境变量。
-
-如果`HADOOP_CONF_DIR`环境变量没有设置,会报 `When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.` 错误。
-
-* 使用Spark load时`spark_home_default_dir`配置项没有指定spark客户端根目录。
-
-提交Spark job时用到spark-submit命令,如果`spark_home_default_dir`设置错误,会报 `Cannot run program "xxx/bin/spark-submit": error=2, No such file or directory` 错误。
-
-* 使用Spark load时`spark_resource_path`配置项没有指向打包好的zip文件。
-
-如果`spark_resource_path`没有设置正确,会报`File xxx/jars/spark-2x.zip does not exist` 错误。
-
-* 使用Spark load时`yarn_client_path`配置项没有指定yarn的可执行文件。
-
-如果`yarn_client_path`没有设置正确,会报`yarn client does not exist in path: xxx/yarn-client/hadoop/bin/yarn` 错误
-
-
-
-
-
-
diff --git a/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md b/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md
deleted file mode 100644
index a5ba62acc4..0000000000
--- a/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md
+++ /dev/null
@@ -1,415 +0,0 @@
----
-{
- "title": "Stream load",
- "language": "zh-CN"
-}
----
-
-
-
-# Stream load
-
-Stream load 是一个同步的导入方式,用户通过发送 HTTP 协议发送请求将本地文件或数据流导入到 Doris 中。Stream load 同步执行导入并返回导入结果。用户可直接通过请求的返回体判断本次导入是否成功。
-
-Stream load 主要适用于导入本地文件,或通过程序导入数据流中的数据。
-
-## 基本原理
-
-下图展示了 Stream load 的主要流程,省略了一些导入细节。
-
-```
- ^ +
- | |
- | | 1A. User submit load to FE
- | |
- | +--v-----------+
- | | FE |
-5. Return result to user | +--+-----------+
- | |
- | | 2. Redirect to BE
- | |
- | +--v-----------+
- +---+Coordinator BE| 1B. User submit load to BE
- +-+-----+----+-+
- | | |
- +-----+ | +-----+
- | | | 3. Distrbute data
- | | |
- +-v-+ +-v-+ +-v-+
- |BE | |BE | |BE |
- +---+ +---+ +---+
-```
-
-Stream load 中,Doris 会选定一个节点作为 Coordinator 节点。该节点负责接数据并分发数据到其他数据节点。
-
-用户通过 HTTP 协议提交导入命令。如果提交到 FE,则 FE 会通过 HTTP redirect 指令将请求转发给某一个 BE。用户也可以直接提交导入命令给某一指定 BE。
-
-导入的最终结果由 Coordinator BE 返回给用户。
-
-## 支持数据格式
-
-目前 Stream Load 支持两个数据格式:CSV(文本) 和 JSON
-
-## 基本操作
-### 创建导入
-
-Stream load 通过 HTTP 协议提交和传输数据。这里通过 `curl` 命令展示如何提交导入。
-
-用户也可以通过其他 HTTP client 进行操作。
-
-```
-curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load
-
-Header 中支持属性见下面的 ‘导入任务参数’ 说明
-格式为: -H "key1:value1"
-```
-
-示例:
-
-```
-curl --location-trusted -u root -T date -H "label:123" http://abc.com:8030/api/test/date/_stream_load
-```
-创建导入的详细语法帮助执行 ```HELP STREAM LOAD``` 查看, 下面主要介绍创建 Stream load 的部分参数意义。
-
-#### 签名参数
-
-+ user/passwd
-
- Stream load 由于创建导入的协议使用的是 HTTP 协议,通过 Basic access authentication 进行签名。Doris 系统会根据签名验证用户身份和导入权限。
-
-#### 导入任务参数
-
-Stream load 由于使用的是 HTTP 协议,所以所有导入任务有关的参数均设置在 Header 中。下面主要介绍了 Stream load 导入任务参数的部分参数意义。
-
-+ label
-
- 导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 label。label 是用户在导入命令中自定义的名称。通过这个 label,用户可以查看对应导入任务的执行情况。
-
- label 的另一个作用,是防止用户重复导入相同的数据。**强烈推荐用户同一批次数据使用相同的 label。这样同一批次数据的重复请求只会被接受一次,保证了 At-Most-Once**
-
- 当 label 对应的导入作业状态为 CANCELLED 时,该 label 可以再次被使用。
-
-+ column_separator
-
- 用于指定导入文件中的列分隔符,默认为\t。如果是不可见字符,则需要加\x作为前缀,使用十六进制来表示分隔符。
-
- 如hive文件的分隔符\x01,需要指定为-H "column_separator:\x01"。
-
- 可以使用多个字符的组合作为列分隔符。
-
-+ line_delimiter
-
- 用于指定导入文件中的换行符,默认为\n。
-
- 可以使用做多个字符的组合作为换行符。
-
-+ max\_filter\_ratio
-
- 导入任务的最大容忍率,默认为0容忍,取值范围是0~1。当导入的错误率超过该值,则导入失败。
-
- 如果用户希望忽略错误的行,可以通过设置这个参数大于 0,来保证导入可以成功。
-
- 计算公式为:
-
- ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ```
-
- ```dpp.abnorm.ALL``` 表示数据质量不合格的行数。如类型不匹配,列数不匹配,长度不匹配等等。
-
- ```dpp.norm.ALL``` 指的是导入过程中正确数据的条数。可以通过 ```SHOW LOAD``` 命令查询导入任务的正确数据量。
-
- 原始文件的行数 = `dpp.abnorm.ALL + dpp.norm.ALL`
-
-+ where
-
- 导入任务指定的过滤条件。Stream load 支持对原始数据指定 where 语句进行过滤。被过滤的数据将不会被导入,也不会参与 filter ratio 的计算,但会被计入```num_rows_unselected```。
-
-+ partition
-
- 待导入表的 Partition 信息,如果待导入数据不属于指定的 Partition 则不会被导入。这些数据将计入 ```dpp.abnorm.ALL ```
-
-+ columns
-
- 待导入数据的函数变换配置,目前 Stream load 支持的函数变换方法包含列的顺序变化以及表达式变换,其中表达式变换的方法与查询语句的一致。
-
- ```
- 列顺序变换例子:原始数据有三列(src_c1,src_c2,src_c3), 目前doris表也有三列(dst_c1,dst_c2,dst_c3)
-
- 如果原始表的src_c1列对应目标表dst_c1列,原始表的src_c2列对应目标表dst_c2列,原始表的src_c3列对应目标表dst_c3列,则写法如下:
- columns: dst_c1, dst_c2, dst_c3
-
- 如果原始表的src_c1列对应目标表dst_c2列,原始表的src_c2列对应目标表dst_c3列,原始表的src_c3列对应目标表dst_c1列,则写法如下:
- columns: dst_c2, dst_c3, dst_c1
-
- 表达式变换例子:原始文件有两列,目标表也有两列(c1,c2)但是原始文件的两列均需要经过函数变换才能对应目标表的两列,则写法如下:
- columns: tmp_c1, tmp_c2, c1 = year(tmp_c1), c2 = month(tmp_c2)
- 其中 tmp_*是一个占位符,代表的是原始文件中的两个原始列。
- ```
-
-+ exec\_mem\_limit
-
- 导入内存限制。默认为 2GB,单位为字节。
-
-+ strict\_mode
-
- Stream load 导入可以开启 strict mode 模式。开启方式为在 HEADER 中声明 ```strict_mode=true``` 。默认的 strict mode 为关闭。
-
- strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下:
-
- 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。
-
- 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。
-
- 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。
-+ merge\_type
- 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理
-
-+ two\_phase\_commit
-
- Stream load 导入可以开启两阶段事务提交模式。开启方式为在 HEADER 中声明 ```two_phase_commit=true``` 。默认的两阶段批量事务提交为关闭。
- 两阶段批量事务提交模式的意思是:Stream load过程中,数据写入完成即会返回信息给用户,此时数据不可见,事务状态为PRECOMMITTED,用户手动触发commit操作之后,数据才可见。
-
- 1. 用户可以调用如下接口对stream load事务触发commit操作:
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://fe_host:http_port/api/{db}/_stream_load_2pc
- ```
- 或
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://be_host:webserver_port/api/{db}/_stream_load_2pc
- ```
- 2. 用户可以调用如下接口对stream load事务触发abort操作:
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://fe_host:http_port/api/{db}/_stream_load_2pc
- ```
- 或
- ```
- curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://be_host:webserver_port/api/{db}/_stream_load_2pc
- ```
-
-#### strict mode 与 source data 的导入关系
-
-这里以列类型为 TinyInt 来举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|---------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa or 2000 | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 | 1 | true or false | correct data|
-
-这里以列类型为 Decimal(1,0) 举例
-
->注:当表中的列允许导入空值时
-
-|source data | source data example | string to int | strict_mode | result|
-|------------|---------------------|-----------------|--------------------|--------|
-|空值 | \N | N/A | true or false | NULL|
-|not null | aaa | NULL | true | invalid data(filtered)|
-|not null | aaa | NULL | false | NULL|
-|not null | 1 or 10 | 1 | true or false | correct data|
-
-> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。
-
-
-### 返回结果
-
-由于 Stream load 是一种同步的导入方式,所以导入的结果会通过创建导入的返回值直接返回给用户。
-
-示例:
-
-```
-{
- "TxnId": 1003,
- "Label": "b6f3bc78-0d2c-45d9-9e4c-faa0a0149bee",
- "Status": "Success",
- "ExistingJobStatus": "FINISHED", // optional
- "Message": "OK",
- "NumberTotalRows": 1000000,
- "NumberLoadedRows": 1000000,
- "NumberFilteredRows": 1,
- "NumberUnselectedRows": 0,
- "LoadBytes": 40888898,
- "LoadTimeMs": 2144,
- "BeginTxnTimeMs": 1,
- "StreamLoadPutTimeMs": 2,
- "ReadDataTimeMs": 325,
- "WriteDataTimeMs": 1933,
- "CommitAndPublishTimeMs": 106,
- "ErrorURL": "http://192.168.1.1:8042/api/_load_error_log?file=__shard_0/error_log_insert_stmt_db18266d4d9b4ee5-abb00ddd64bdf005_db18266d4d9b4ee5_abb00ddd64bdf005"
-}
-```
-
-下面主要解释了 Stream load 导入结果参数:
-
-+ TxnId:导入的事务ID。用户可不感知。
-
-+ Label:导入 Label。由用户指定或系统自动生成。
-
-+ Status:导入完成状态。
-
- "Success":表示导入成功。
-
- "Publish Timeout":该状态也表示导入已经完成,只是数据可能会延迟可见,无需重试。
-
- "Label Already Exists":Label 重复,需更换 Label。
-
- "Fail":导入失败。
-
-+ ExistingJobStatus:已存在的 Label 对应的导入作业的状态。
-
- 这个字段只有在当 Status 为 "Label Already Exists" 时才会显示。用户可以通过这个状态,知晓已存在 Label 对应的导入作业的状态。"RUNNING" 表示作业还在执行,"FINISHED" 表示作业成功。
-
-+ Message:导入错误信息。
-
-+ NumberTotalRows:导入总处理的行数。
-
-+ NumberLoadedRows:成功导入的行数。
-
-+ NumberFilteredRows:数据质量不合格的行数。
-
-+ NumberUnselectedRows:被 where 条件过滤的行数。
-
-+ LoadBytes:导入的字节数。
-
-+ LoadTimeMs:导入完成时间。单位毫秒。
-
-+ BeginTxnTimeMs:向Fe请求开始一个事务所花费的时间,单位毫秒。
-
-+ StreamLoadPutTimeMs:向Fe请求获取导入数据执行计划所花费的时间,单位毫秒。
-
-+ ReadDataTimeMs:读取数据所花费的时间,单位毫秒。
-
-+ WriteDataTimeMs:执行写入数据操作所花费的时间,单位毫秒。
-
-+ CommitAndPublishTimeMs:向Fe请求提交并且发布事务所花费的时间,单位毫秒。
-
-+ ErrorURL:如果有数据质量问题,通过访问这个 URL 查看具体错误行。
-
-> 注意:由于 Stream load 是同步的导入方式,所以并不会在 Doris 系统中记录导入信息,用户无法异步的通过查看导入命令看到 Stream load。使用时需监听创建导入请求的返回值获取导入结果。
-
-### 取消导入
-
-用户无法手动取消 Stream load,Stream load 在超时或者导入错误后会被系统自动取消。
-
-## 相关系统配置
-
-### FE 配置
-
-+ stream\_load\_default\_timeout\_second
-
- 导入任务的超时时间(以秒为单位),导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。
-
- 默认的 timeout 时间为 600 秒。如果导入的源文件无法在规定时间内完成导入,用户可以在 stream load 请求中设置单独的超时时间。
-
- 或者调整 FE 的参数```stream_load_default_timeout_second``` 来设置全局的默认超时时间。
-
-### BE 配置
-
-+ streaming\_load\_max\_mb
-
- Stream load 的最大导入大小,默认为 10G,单位是 MB。如果用户的原始文件超过这个值,则需要调整 BE 的参数 ```streaming_load_max_mb```。
-
-## 最佳实践
-
-### 应用场景
-
-使用 Stream load 的最合适场景就是原始文件在内存中,或者在磁盘中。其次,由于 Stream load 是一种同步的导入方式,所以用户如果希望用同步方式获取导入结果,也可以使用这种导入。
-
-### 数据量
-
-由于 Stream load 的原理是由 BE 发起的导入并分发数据,建议的导入数据量在 1G 到 10G 之间。由于默认的最大 Stream load 导入数据量为 10G,所以如果要导入超过 10G 的文件需要修改 BE 的配置 ```streaming_load_max_mb```
-
-```
-比如:待导入文件大小为15G
-修改 BE 配置 streaming_load_max_mb 为 16000 即可。
-```
-
-Stream load 的默认超时为 300秒,按照 Doris 目前最大的导入限速来看,约超过 3G 的文件就需要修改导入任务默认超时时间了。
-
-```
-导入任务超时时间 = 导入数据量 / 10M/s (具体的平均导入速度需要用户根据自己的集群情况计算)
-例如:导入一个 10G 的文件
-timeout = 1000s 等于 10G / 10M/s
-```
-
-### 完整例子
-数据情况: 数据在发送导入请求端的本地磁盘路径 /home/store_sales 中,导入的数据量约为 15G,希望导入到数据库 bj_sales 的表 store_sales 中。
-
-集群情况:Stream load 的并发数不受集群大小影响。
-
-+ step1: 导入文件大小是否超过默认的最大导入大小10G
-
- ```
- 修改 BE conf
- streaming_load_max_mb = 16000
- ```
-+ step2: 计算大概的导入时间是否超过默认 timeout 值
-
- ```
- 导入时间 ≈ 15000 / 10 = 1500s
- 超过了默认的 timeout 时间,需要修改 FE 的配置
- stream_load_default_timeout_second = 1500
- ```
-
-+ step3:创建导入任务
-
- ```
- curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8000/api/bj_sales/store_sales/_stream_load
- ```
-
-## 常见问题
-
-* Label Already Exists
-
- Stream load 的 Label 重复排查步骤如下:
-
- 1. 是否和其他导入方式已经存在的导入 Label 冲突:
-
- 由于 Doris 系统中导入的 Label 不区分导入方式,所以存在其他导入方式使用了相同 Label 的问题。
-
- 通过 ```SHOW LOAD WHERE LABEL = “xxx”```,其中 xxx 为重复的 Label 字符串,查看是否已经存在一个 FINISHED 导入的 Label 和用户申请创建的 Label 相同。
-
- 2. 是否 Stream load 同一个作业被重复提交了
-
- 由于 Stream load 是 HTTP 协议提交创建导入任务,一般各个语言的 HTTP Client 均会自带请求重试逻辑。Doris 系统在接受到第一个请求后,已经开始操作 Stream load,但是由于没有及时返回给 Client 端结果, Client 端会发生再次重试创建请求的情况。这时候 Doris 系统由于已经在操作第一个请求,所以第二个请求已经就会被报 Label Already Exists 的情况。
-
- 排查上述可能的方法:使用 Label 搜索 FE Master 的日志,看是否存在同一个 Label 出现了两次 ```redirect load action to destination= ``` 的情况。如果有就说明,请求被 Client 端重复提交了。
-
- 建议用户根据当前请求的数据量,计算出大致导入的时间,并根据导入超时时间,将Client 端的请求超时间改成大于导入超时时间的值,避免请求被 Client 端多次提交。
-
- 3. Connection reset 异常
-
- 在社区版 0.14.0 及之前的版本在启用Http V2之后出现connection reset异常,因为Web 容器内置的是tomcat,Tomcat 在 307 (Temporary Redirect) 是有坑的,对这个协议实现是有问题的,所有在使用Stream load 导入大数据量的情况下会出现connect reset异常,这个是因为tomcat在做307跳转之前就开始了数据传输,这样就造成了BE收到的数据请求的时候缺少了认证信息,之后将内置容器改成了Jetty解决了这个问题,如果你遇到这个问题,请升级你的Doris或者禁用Http V2(`enable_http_server_v2=false`)。
-
- 升级以后同时升级你程序的http client 版本到 `4.5.13`,在你的pom.xml文件中引入下面的依赖
-
- ```xml
-
- org.apache.httpcomponents
- httpclient
- 4.5.13
-
- ```
-
-
-
-
-
diff --git a/docs/zh-CN/administrator-guide/materialized_view.md b/docs/zh-CN/administrator-guide/materialized_view.md
deleted file mode 100644
index 1e0a169a07..0000000000
--- a/docs/zh-CN/administrator-guide/materialized_view.md
+++ /dev/null
@@ -1,488 +0,0 @@
----
-{
- "title": "物化视图",
- "language": "zh-CN"
-}
----
-
-
-
-# 物化视图
-
-物化视图是将预先计算(根据定义好的 SELECT 语句)好的数据集,存储在 Doris 中的一个特殊的表。
-
-物化视图的出现主要是为了满足用户,既能对原始明细数据的任意维度分析,也能快速的对固定维度进行分析查询。
-
-## 适用场景
-
-+ 分析需求覆盖明细数据查询以及固定维度查询两方面。
-+ 查询仅涉及表中的很小一部分列或行。
-+ 查询包含一些耗时处理操作,比如:时间很久的聚合操作等。
-+ 查询需要匹配不同前缀索引。
-
-## 优势
-
-+ 对于那些经常重复的使用相同的子查询结果的查询性能大幅提升。
-+ Doris自动维护物化视图的数据,无论是新的导入,还是删除操作都能保证base 表和物化视图表的数据一致性。无需任何额外的人工维护成本。
-+ 查询时,会自动匹配到最优物化视图,并直接从物化视图中读取数据。
-
-*自动维护物化视图的数据会造成一些维护开销,会在后面的物化视图的局限性中展开说明。*
-
-## 物化视图 VS Rollup
-
-在没有物化视图功能之前,用户一般都是使用 Rollup 功能通过预聚合方式提升查询效率的。但是 Rollup 具有一定的局限性,他不能基于明细模型做预聚合。
-
-物化视图则在覆盖了 Rollup 的功能的同时,还能支持更丰富的聚合函数。所以物化视图其实是 Rollup 的一个超集。
-
-也就是说,之前 `ALTER TABLE ADD ROLLUP` 语法支持的功能现在均可以通过 `CREATE MATERIALIZED VIEW` 实现。
-
-## 使用物化视图
-
-Doris 系统提供了一整套对物化视图的 DDL 语法,包括创建,查看,删除。DDL 的语法和 PostgreSQL, Oracle都是一致的。
-
-### 创建物化视图
-
-这里首先你要根据你的查询语句的特点来决定创建一个什么样的物化视图。这里并不是说你的物化视图定义和你的某个查询语句一模一样就最好。这里有两个原则:
-
-1. 从查询语句中**抽象**出,多个查询共有的分组和聚合方式作为物化视图的定义。
-2. 不需要给所有维度组合都创建物化视图。
-
-首先第一个点,一个物化视图如果抽象出来,并且多个查询都可以匹配到这张物化视图。这种物化视图效果最好。因为物化视图的维护本身也需要消耗资源。
-
-如果物化视图只和某个特殊的查询很贴合,而其他查询均用不到这个物化视图。则会导致这张物化视图的性价比不高,既占用了集群的存储资源,还不能为更多的查询服务。
-
-所以用户需要结合自己的查询语句,以及数据维度信息去抽象出一些物化视图的定义。
-
-第二点就是,在实际的分析查询中,并不会覆盖到所有的维度分析。所以给常用的维度组合创建物化视图即可,从而到达一个空间和时间上的平衡。
-
-创建物化视图是一个异步的操作,也就是说用户成功提交创建任务后,Doris 会在后台对存量的数据进行计算,直到创建成功。
-
-具体的语法可以通过 Mysql 协议链接 Doris 并输入下面命令查看:
-
-```
-HELP CREATE MATERIALIZED VIEW
-```
-
-### 支持聚合函数
-
-目前物化视图创建语句支持的聚合函数有:
-
-+ SUM, MIN, MAX (Version 0.12)
-+ COUNT, BITMAP\_UNION, HLL\_UNION (Version 0.13)
-
-+ BITMAP\_UNION 的形式必须为:`BITMAP_UNION(TO_BITMAP(COLUMN))` column 列的类型只能是整数(largeint也不支持), 或者 `BITMAP_UNION(COLUMN)` 且 base 表为 AGG 模型。
-+ HLL\_UNION 的形式必须为:`HLL_UNION(HLL_HASH(COLUMN))` column 列的类型不能是 DECIMAL , 或者 `HLL_UNION(COLUMN)` 且 base 表为 AGG 模型。
-
-### 更新策略
-
-为保证物化视图表和 Base 表的数据一致性, Doris 会将导入,删除等对 base 表的操作都同步到物化视图表中。并且通过增量更新的方式来提升更新效率。通过事务方式来保证原子性。
-
-比如如果用户通过 INSERT 命令插入数据到 base 表中,则这条数据会同步插入到物化视图中。当 base 表和物化视图表均写入成功后,INSERT 命令才会成功返回。
-
-### 查询自动匹配
-
-物化视图创建成功后,用户的查询不需要发生任何改变,也就是还是查询的 base 表。Doris 会根据当前查询的语句去自动选择一个最优的物化视图,从物化视图中读取数据并计算。
-
-用户可以通过 EXPLAIN 命令来检查当前查询是否使用了物化视图。
-
-物化视图中的聚合和查询中聚合的匹配关系:
-
-| 物化视图聚合 | 查询中聚合 |
-| ---------- | -------- |
-| sum | sum |
-| min | min |
-| max | max |
-| count | count |
-| bitmap\_union | bitmap\_union, bitmap\_union\_count, count(distinct) |
-| hll\_union | hll\_raw\_agg, hll\_union\_agg, ndv, approx\_count\_distinct |
-
-其中 bitmap 和 hll 的聚合函数在查询匹配到物化视图后,查询的聚合算子会根据物化视图的表结构进行一个改写。详细见实例2。
-
-### 查询物化视图
-
-查看当前表都有哪些物化视图,以及他们的表结构都是什么样的。通过下面命令:
-
-```
-MySQL [test]> desc mv_test all;
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-| IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-| mv_test | DUP_KEYS | k1 | INT | Yes | true | NULL | |
-| | | k2 | BIGINT | Yes | true | NULL | |
-| | | k3 | LARGEINT | Yes | true | NULL | |
-| | | k4 | SMALLINT | Yes | false | NULL | NONE |
-| | | | | | | | |
-| mv_2 | AGG_KEYS | k2 | BIGINT | Yes | true | NULL | |
-| | | k4 | SMALLINT | Yes | false | NULL | MIN |
-| | | k1 | INT | Yes | false | NULL | MAX |
-| | | | | | | | |
-| mv_3 | AGG_KEYS | k1 | INT | Yes | true | NULL | |
-| | | to_bitmap(`k2`) | BITMAP | No | false | | BITMAP_UNION |
-| | | | | | | | |
-| mv_1 | AGG_KEYS | k4 | SMALLINT | Yes | true | NULL | |
-| | | k1 | BIGINT | Yes | false | NULL | SUM |
-| | | k3 | LARGEINT | Yes | false | NULL | SUM |
-| | | k2 | BIGINT | Yes | false | NULL | MIN |
-+-----------+---------------+-----------------+----------+------+-------+---------+--------------+
-```
-
-可以看到当前 `mv_test` 表一共有三张物化视图:mv\_1, mv\_2 和 mv\_3,以及他们的表结构。
-
-### 删除物化视图
-
-如果用户不再需要物化视图,则可以通过命令删除物化视图。
-
-具体的语法可以通过 Mysql 协议链接 Doris 输入下面命令查看:
-
-```
-HELP DROP MATERIALIZED VIEW
-```
-
-## 最佳实践1
-
-使用物化视图一般分为以下几个步骤:
-
-1. 创建物化视图
-2. 异步检查物化视图是否构建完成
-3. 查询并自动匹配物化视图
-
-**首先是第一步:创建物化视图**
-
-假设用户有一张销售记录明细表,存储了每个交易的交易id,销售员,售卖门店,销售时间,以及金额。建表语句为:
-
-```
-create table sales_records(record_id int, seller_id int, store_id int, sale_date date, sale_amt bigint) distributed by hash(record_id) properties("replication_num" = "1");
-```
-这张 `sales_records` 的表结构如下:
-
-```
-MySQL [test]> desc sales_records;
-+-----------+--------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+-----------+--------+------+-------+---------+-------+
-| record_id | INT | Yes | true | NULL | |
-| seller_id | INT | Yes | true | NULL | |
-| store_id | INT | Yes | true | NULL | |
-| sale_date | DATE | Yes | false | NULL | NONE |
-| sale_amt | BIGINT | Yes | false | NULL | NONE |
-+-----------+--------+------+-------+---------+-------+
-```
-
-这时候如果用户经常对不同门店的销售量进行一个分析查询,则可以给这个 `sales_records` 表创建一张以售卖门店分组,对相同售卖门店的销售额求和的一个物化视图。创建语句如下:
-
-```
-MySQL [test]> create materialized view store_amt as select store_id, sum(sale_amt) from sales_records group by store_id;
-```
-
-后端返回下图,则说明创建物化视图任务提交成功。
-
-```
-Query OK, 0 rows affected (0.012 sec)
-```
-
-**第二步:检查物化视图是否构建完成**
-
-由于创建物化视图是一个异步的操作,用户在提交完创建物化视图任务后,需要异步的通过命令检查物化视图是否构建完成。命令如下:
-
-```
-SHOW ALTER TABLE ROLLUP FROM db_name; (Version 0.12)
-SHOW ALTER TABLE MATERIALIZED VIEW FROM db_name; (Version 0.13)
-```
-
-这个命令中 `db_name` 是一个参数, 你需要替换成自己真实的 db 名称。命令的结果是显示这个 db 的所有创建物化视图的任务。结果如下:
-
-```
-+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+
-| JobId | TableName | CreateTime | FinishedTime | BaseIndexName | RollupIndexName | RollupId | TransactionId | State | Msg | Progress | Timeout |
-+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+
-| 22036 | sales_records | 2020-07-30 20:04:28 | 2020-07-30 20:04:57 | sales_records | store_amt | 22037 | 5008 | FINISHED | | NULL | 86400 |
-+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+
-```
-
-其中 TableName 指的是物化视图的数据来自于哪个表,RollupIndexName 指的是物化视图的名称叫什么。其中比较重要的指标是 State。
-
-当创建物化视图任务的 State 已经变成 FINISHED 后,就说明这个物化视图已经创建成功了。这就意味着,查询的时候有可能自动匹配到这张物化视图了。
-
-**第三步:查询**
-
-当创建完成物化视图后,用户再查询不同门店的销售量时,就会直接从刚才创建的物化视图 `store_amt` 中读取聚合好的数据。达到提升查询效率的效果。
-
-用户的查询依旧指定查询 `sales_records` 表,比如:
-
-```
-SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id;
-```
-
-上面查询就能自动匹配到 `store_amt`。用户可以通过下面命令,检验当前查询是否匹配到了合适的物化视图。
-
-```
-EXPLAIN SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id;
-+-----------------------------------------------------------------------------+
-| Explain String |
-+-----------------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS: `store_id` | sum(`sale_amt`) |
-| PARTITION: UNPARTITIONED |
-| |
-| RESULT SINK |
-| |
-| 4:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `store_id` |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 04 |
-| UNPARTITIONED |
-| |
-| 3:AGGREGATE (merge finalize) |
-| | output: sum( sum(`sale_amt`)) |
-| | group by: `store_id` |
-| | |
-| 2:EXCHANGE |
-| |
-| PLAN FRAGMENT 2 |
-| OUTPUT EXPRS: |
-| PARTITION: RANDOM |
-| |
-| STREAM DATA SINK |
-| EXCHANGE ID: 02 |
-| HASH_PARTITIONED: `store_id` |
-| |
-| 1:AGGREGATE (update serialize) |
-| | STREAMING |
-| | output: sum(`sale_amt`) |
-| | group by: `store_id` |
-| | |
-| 0:OlapScanNode |
-| TABLE: sales_records |
-| PREAGGREGATION: ON |
-| partitions=1/1 |
-| rollup: store_amt |
-| tabletRatio=10/10 |
-| tabletList=22038,22040,22042,22044,22046,22048,22050,22052,22054,22056 |
-| cardinality=0 |
-| avgRowSize=0.0 |
-| numNodes=1 |
-+-----------------------------------------------------------------------------+
-45 rows in set (0.006 sec)
-```
-
-其中最重要的就是 OlapScanNode 中的 rollup 属性。可以看到当前查询的 rollup 显示的是 `store_amt`。也就是说查询已经正确匹配到物化视图 `store_amt`, 并直接从物化视图中读取数据了。
-
-## 最佳实践2 PV,UV
-
-业务场景: 计算广告的 UV,PV
-
-假设用户的原始广告点击数据存储在 Doris,那么针对广告 PV, UV 查询就可以通过创建 `bitmap_union` 的物化视图来提升查询速度。
-
-通过下面语句首先创建一个存储广告点击数据明细的表,包含每条点击的点击事件,点击的是什么广告,通过什么渠道点击,以及点击的用户是谁。
-
-```
-MySQL [test]> create table advertiser_view_record(time date, advertiser varchar(10), channel varchar(10), user_id int) distributed by hash(time) properties("replication_num" = "1");
-Query O
-K, 0 rows affected (0.014 sec)
-```
-原始的广告点击数据表结构为:
-
-```
-MySQL [test]> desc advertiser_view_record;
-+------------+-------------+------+-------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+------------+-------------+------+-------+---------+-------+
-| time | DATE | Yes | true | NULL | |
-| advertiser | VARCHAR(10) | Yes | true | NULL | |
-| channel | VARCHAR(10) | Yes | false | NULL | NONE |
-| user_id | INT | Yes | false | NULL | NONE |
-+------------+-------------+------+-------+---------+-------+
-4 rows in set (0.001 sec)
-```
-
-1. 创建物化视图
-
- 由于用户想要查询的是广告的 UV 值,也就是需要对相同广告的用户进行一个精确去重,则查询一般为:
-
- ```
- SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- ```
-
- 针对这种求 UV 的场景,我们就可以创建一个带 `bitmap_union` 的物化视图从而达到一个预先精确去重的效果。
-
- 在 Doris 中,`count(distinct)` 聚合的结果和 `bitmap_union_count`聚合的结果是完全一致的。而`bitmap_union_count` 等于 `bitmap_union` 的结果求 count, 所以如果查询中**涉及到 `count(distinct)` 则通过创建带 `bitmap_union` 聚合的物化视图方可加快查询**。
-
- 针对这个 case,则可以创建一个根据广告和渠道分组,对 `user_id` 进行精确去重的物化视图。
-
- ```
- MySQL [test]> create materialized view advertiser_uv as select advertiser, channel, bitmap_union(to_bitmap(user_id)) from advertiser_view_record group by advertiser, channel;
- Query OK, 0 rows affected (0.012 sec)
- ```
-
- *注意:因为本身 user\_id 是一个 INT 类型,所以在 Doris 中需要先将字段通过函数 `to_bitmap` 转换为 bitmap 类型然后才可以进行 `bitmap_union` 聚合。*
-
- 创建完成后, 广告点击明细表和物化视图表的表结构如下:
-
- ```
- MySQL [test]> desc advertiser_view_record all;
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- | advertiser_view_record | DUP_KEYS | time | DATE | Yes | true | NULL | |
- | | | advertiser | VARCHAR(10) | Yes | true | NULL | |
- | | | channel | VARCHAR(10) | Yes | false | NULL | NONE |
- | | | user_id | INT | Yes | false | NULL | NONE |
- | | | | | | | | |
- | advertiser_uv | AGG_KEYS | advertiser | VARCHAR(10) | Yes | true | NULL | |
- | | | channel | VARCHAR(10) | Yes | true | NULL | |
- | | | to_bitmap(`user_id`) | BITMAP | No | false | | BITMAP_UNION |
- +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+
- ```
-
-2. 查询自动匹配
-
- 当物化视图表创建完成后,查询广告 UV 时,Doris就会自动从刚才创建好的物化视图 `advertiser_uv` 中查询数据。比如原始的查询语句如下:
-
- ```
- SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- ```
-
- 在选中物化视图后,实际的查询会转化为:
-
- ```
- SELECT advertiser, channel, bitmap_union_count(to_bitmap(user_id)) FROM advertiser_uv GROUP BY advertiser, channel;
- ```
-
- 通过 EXPLAIN 命令可以检验到 Doris 是否匹配到了物化视图:
-
- ```
- MySQL [test]> explain SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel;
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- | Explain String |
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- | PLAN FRAGMENT 0 |
- | OUTPUT EXPRS: `advertiser` | `channel` | bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) |
- | PARTITION: UNPARTITIONED |
- | |
- | RESULT SINK |
- | |
- | 4:EXCHANGE |
- | |
- | PLAN FRAGMENT 1 |
- | OUTPUT EXPRS: |
- | PARTITION: HASH_PARTITIONED: `advertiser`, `channel` |
- | |
- | STREAM DATA SINK |
- | EXCHANGE ID: 04 |
- | UNPARTITIONED |
- | |
- | 3:AGGREGATE (merge finalize) |
- | | output: bitmap_union_count( bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`)) |
- | | group by: `advertiser`, `channel` |
- | | |
- | 2:EXCHANGE |
- | |
- | PLAN FRAGMENT 2 |
- | OUTPUT EXPRS: |
- | PARTITION: RANDOM |
- | |
- | STREAM DATA SINK |
- | EXCHANGE ID: 02 |
- | HASH_PARTITIONED: `advertiser`, `channel` |
- | |
- | 1:AGGREGATE (update serialize) |
- | | STREAMING |
- | | output: bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) |
- | | group by: `advertiser`, `channel` |
- | | |
- | 0:OlapScanNode |
- | TABLE: advertiser_view_record |
- | PREAGGREGATION: ON |
- | partitions=1/1 |
- | rollup: advertiser_uv |
- | tabletRatio=10/10 |
- | tabletList=22084,22086,22088,22090,22092,22094,22096,22098,22100,22102 |
- | cardinality=0 |
- | avgRowSize=0.0 |
- | numNodes=1 |
- +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- 45 rows in set (0.030 sec)
- ```
-
- 在 EXPLAIN 的结果中,首先可以看到 OlapScanNode 的 rollup 属性值为 advertiser_uv。也就是说,查询会直接扫描物化视图的数据。说明匹配成功。
-
- 其次对于 `user_id` 字段求 `count(distinct)` 被改写为求 `bitmap_union_count(to_bitmap)`。也就是通过 bitmap 的方式来达到精确去重的效果。
-
-
-## 最佳实践3
-
-业务场景:匹配更丰富的前缀索引
-
-用户的原始表有 (k1, k2, k3) 三列。其中 k1, k2 为前缀索引列。这时候如果用户查询条件中包含 `where k1=1 and k2=2` 就能通过索引加速查询。
-
-但是有些情况下,用户的过滤条件无法匹配到前缀索引,比如 `where k3=3`。则无法通过索引提升查询速度。
-
-创建以 k3 作为第一列的物化视图就可以解决这个问题。
-
-1. 创建物化视图
-
- ```
- CREATE MATERIALIZED VIEW mv_1 as SELECT k3, k2, k1 FROM tableA ORDER BY k3;
- ```
-
- 通过上面语法创建完成后,物化视图中既保留了完整的明细数据,且物化视图的前缀索引为 k3 列。表结构如下:
-
- ```
- MySQL [test]> desc tableA all;
- +-----------+---------------+-------+------+------+-------+---------+-------+
- | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra |
- +-----------+---------------+-------+------+------+-------+---------+-------+
- | tableA | DUP_KEYS | k1 | INT | Yes | true | NULL | |
- | | | k2 | INT | Yes | true | NULL | |
- | | | k3 | INT | Yes | true | NULL | |
- | | | | | | | | |
- | mv_1 | DUP_KEYS | k3 | INT | Yes | true | NULL | |
- | | | k2 | INT | Yes | false | NULL | NONE |
- | | | k1 | INT | Yes | false | NULL | NONE |
- +-----------+---------------+-------+------+------+-------+---------+-------+
- ```
-
-2. 查询匹配
-
- 这时候如果用户的查询存在 k3 列的过滤条件是,比如:
-
- ```
- select k1, k2, k3 from table A where k3=3;
- ```
-
- 这时候查询就会直接从刚才创建的 mv_1 物化视图中读取数据。物化视图对 k3 是存在前缀索引的,查询效率也会提升。
-
-
-## 局限性
-
-1. 物化视图的聚合函数的参数不支持表达式仅支持单列,比如: sum(a+b)不支持。
-2. 如果删除语句的条件列,在物化视图中不存在,则不能进行删除操作。如果一定要删除数据,则需要先将物化视图删除,然后方可删除数据。
-3. 单表上过多的物化视图会影响导入的效率:导入数据时,物化视图和 base 表数据是同步更新的,如果一张表的物化视图表超过10张,则有可能导致导入速度很慢。这就像单次导入需要同时导入10张表数据是一样的。
-4. 相同列,不同聚合函数,不能同时出现在一张物化视图中,比如:select sum(a), min(a) from table 不支持。
-5. 物化视图针对 Unique Key数据模型,只能改变列顺序,不能起到聚合的作用,所以在Unique Key模型上不能通过创建物化视图的方式对数据进行粗粒度聚合操作
-
-## 异常错误
-1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data"
- 由于数据质量问题导致物化视图创建失败。
- 注意:bitmap类型仅支持正整型, 如果原始数据中存在负数,会导致物化视图创建失败
diff --git a/docs/zh-CN/administrator-guide/multi-tenant.md b/docs/zh-CN/administrator-guide/multi-tenant.md
deleted file mode 100644
index fb1cac4d62..0000000000
--- a/docs/zh-CN/administrator-guide/multi-tenant.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-{
- "title": "多租户和资源划分",
- "language": "zh-CN"
-}
----
-
-
-
-# 多租户和资源划分
-
-Doris 的多租户和资源隔离方案,主要目的是为了多用户在同一 Doris 集群内进行数据操作时,减少相互之间的干扰,能够将集群资源更合理的分配给各用户。
-
-该方案主要分为两部分,一是集群内节点级别的资源组划分,二是针对单个查询的资源限制。
-
-## Doris 中的节点
-
-首先先简单介绍一下 Doris 的节点组成。一个 Doris 集群中有两类节点:Frontend(FE) 和 Backend(BE)。
-
-FE 主要负责元数据管理、集群管理、用户请求的接入和查询计划的解析等工作。
-
-BE 主要负责数据存储、查询计划的执行等工作。
-
-FE 不参与用户数据的处理计算等工作,因此是一个资源消耗较低的节点。而 BE 负责所有的数据计算、任务处理,属于资源消耗型的节点。因此,本文所介绍的资源划分及资源限制方案,都是针对 BE 节点的。FE 节点因为资源消耗相对较低,并且还可以横向扩展,因此通常无需做资源上的隔离和限制,FE 节点由所有用户共享即可。
-
-## 节点资源划分
-
-节点资源划分,是指将一个 Doris 集群内的 BE 节点设置标签(Tag),标签相同的 BE 节点组成一个资源组(Resource Group)。资源组可以看作是数据存储和计算的一个管理单元。下面我们通过一个具体示例,来介绍资源组的使用方式。
-
-1. 为 BE 节点设置标签
-
- 假设当前 Doris 集群有 6 个 BE 节点。分别为 host[1-6]。在初始情况下,所有节点都属于一个默认资源组(Default)。
-
- 我们可以使用以下命令将这6个节点划分成3个资源组:group_a、group_b、group_c:
-
- ```sql
- alter system modify backend "host1:9050" set ("tag.location" = "group_a");
- alter system modify backend "host2:9050" set ("tag.location" = "group_a");
- alter system modify backend "host3:9050" set ("tag.location" = "group_b");
- alter system modify backend "host4:9050" set ("tag.location" = "group_b");
- alter system modify backend "host5:9050" set ("tag.location" = "group_c");
- alter system modify backend "host6:9050" set ("tag.location" = "group_c");
- ```
-
- 这里我们将 `host[1-2]` 组成资源组 `group_a`,`host[3-4]` 组成资源组 `group_b`,`host[5-6]` 组成资源组 `group_c`。
-
- > 注:一个 BE 只支持设置一个 Tag。
-
-2. 按照资源组分配数据分布
-
- 资源组划分好后。我们可以将用户数据的不同副本分布在不同资源组内。假设一张用户表 UserTable。我们希望在3个资源组内各存放一个副本,则可以通过如下建表语句实现:
-
- ```sql
- create table UserTable
- (k1 int, k2 int)
- distributed by hash(k1) buckets 1
- properties(
- "replication_allocation"
- =
- "tag.location.group_a:1, tag.location.group_b:1, tag.location.group_c:1"
- )
- ```
-
- 这样一来,表 UserTable 中的数据,将会以3副本的形式,分别存储在资源组 group_a、group_b、group_c所在的节点中。
-
- 下图展示了当前的节点划分和数据分布:
-
- ```
- ┌────────────────────────────────────────────────────┐
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host1 │ │ host2 │ │
- │ │ ┌─────────────┐ │ │ │ │
- │ group_a │ │ replica1 │ │ │ │ │
- │ │ └─────────────┘ │ │ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- ├────────────────────────────────────────────────────┤
- ├────────────────────────────────────────────────────┤
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host3 │ │ host4 │ │
- │ │ │ │ ┌─────────────┐ │ │
- │ group_b │ │ │ │ replica2 │ │ │
- │ │ │ │ └─────────────┘ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- ├────────────────────────────────────────────────────┤
- ├────────────────────────────────────────────────────┤
- │ │
- │ ┌──────────────────┐ ┌──────────────────┐ │
- │ │ host5 │ │ host6 │ │
- │ │ │ │ ┌─────────────┐ │ │
- │ group_c │ │ │ │ replica3 │ │ │
- │ │ │ │ └─────────────┘ │ │
- │ │ │ │ │ │
- │ └──────────────────┘ └──────────────────┘ │
- │ │
- └────────────────────────────────────────────────────┘
- ```
-
-3. 使用不同资源组进行数据查询
-
- 在前两步执行完成后,我们就可以通过设置用户的资源使用权限,来限制某一用户的查询,只能使用指定资源组中的节点来执行。
-
- 比如我们可以通过以下语句,限制 user1 只能使用 `group_a` 资源组中的节点进行数据查询,user2 只能使用 `group_b` 资源组,而 user3 可以同时使用 3 个资源组:
-
- ```sql
- set property for 'user1' 'resource_tags.location' = 'group_a';
- set property for 'user2' 'resource_tags.location' = 'group_b';
- set property for 'user3' 'resource_tags.location' = 'group_a, group_b, group_c';
- ```
-
- 设置完成后,user1 在发起对 UserTable 表的查询时,只会访问 `group_a` 资源组内节点上的数据副本,并且查询仅会使用 `group_a` 资源组内的节点计算资源。而 user3 的查询可以使用任意资源组内的副本和计算资源。
-
- 这样,我们通过对节点的划分,以及对用户的资源使用限制,实现了不同用户查询上的物理资源隔离。更进一步,我们可以给不同的业务部门创建不同的用户,并限制每个用户使用不同的资源组。以避免不同业务部分之间使用资源干扰。比如集群内有一张业务表需要共享给所有9个业务部门使用,但是希望能够尽量避免不同部门之间的资源抢占。则我们可以为这张表创建3个副本,分别存储在3个资源组中。接下来,我们为9个业务部门创建9个用户,每3个用户限制使用一个资源组。这样,资源的竞争程度就由9降低到了3。
-
- 另一方面,针对在线和离线任务的隔离。我们可以利用资源组的方式实现。比如我们可以将节点划分为 Online 和 Offline 两个资源组。表数据依然以3副本的方式存储,其中 2 个副本存放在 Online 资源组,1 个副本存放在 Offline 资源组。Online 资源组主要用于高并发低延迟的在线数据服务,而一些大查询或离线ETL操作,则可以使用 Offline 资源组中的节点执行。从而实现在统一集群内同时提供在线和离线服务的能力。
-
-## 单查询资源限制
-
-前面提到的资源组方法是节点级别的资源隔离和限制。而在资源组内,依然可能发生资源抢占问题。比如前文提到的将3个业务部门安排在同一资源组内。虽然降低了资源竞争程度,但是这3个部门的查询依然有可能相互影响。
-
-因此,除了资源组方案外,Doris 还提供了对单查询的资源限制功能。
-
-目前 Doris 对单查询的资源限制主要分为 CPU 和 内存限制两方面。
-
-1. 内存限制
-
- Doris 可以限制一个查询被允许使用的最大内存开销。以保证集群的内存资源不会被某一个查询全部占用。我们可以通过以下方式设置内存限制:
-
- ```
- // 设置会话变量 exec_mem_limit。则之后该会话内(连接内)的所有查询都使用这个内存限制。
- set exec_mem_limit=1G;
- // 设置全局变量 exec_mem_limit。则之后所有新会话(新连接)的所有查询都使用这个内存限制。
- set global exec_mem_limit=1G;
- // 在 SQL 中设置变量 exec_mem_limit。则该变量仅影响这个 SQL。
- select /*+ SET_VAR(exec_mem_limit=1G) */ id, name from tbl where xxx;
- ```
-
- 因为 Doris 的查询引擎是基于全内存的 MPP 查询框架。因此当一个查询的内存使用超过限制后,查询会被终止。因此,当一个查询无法在合理的内存限制下运行时,我们就需要通过一些 SQL 优化手段,或者集群扩容的方式来解决了。
-
-2. CPU 限制
-
- 用户可以通过以下方式限制查询的 CPU 资源:
-
- ```
- // 设置会话变量 cpu_resource_limit。则之后该会话内(连接内)的所有查询都使用这个CPU限制。
- set cpu_resource_limit = 2
- // 设置用户的属性 cpu_resource_limit,则所有该用户的查询情况都使用这个CPU限制。该属性的优先级高于会话变量 cpu_resource_limit
- set property for 'user1' 'cpu_resource_limit' = '3';
- ```
-
- `cpu_resource_limit` 的取值是一个相对值,取值越大则能够使用的 CPU 资源越多。但一个查询能使用的CPU上限也取决于表的分区分桶数。原则上,一个查询的最大 CPU 使用量和查询涉及到的 tablet 数量正相关。极端情况下,假设一个查询仅涉及到一个 tablet,则即使 `cpu_resource_limit` 设置一个较大值,也仅能使用 1 个 CPU 资源。
-
-通过内存和CPU的资源限制。我们可以在一个资源组内,将用户的查询进行更细粒度的资源划分。比如我们可以让部分时效性要求不高,但是计算量很大的离线任务使用更少的CPU资源和更多的内存资源。而部分延迟敏感的在线任务,使用更多的CPU资源以及合理的内存资源。
-
-## 最佳实践和向前兼容
-
-Tag 划分和 CPU 限制是 0.15 版本中的新功能。为了保证可以从老版本平滑升级,Doris 做了如下的向前兼容:
-
-1. 每个 BE 节点会有一个默认的 Tag:`"tag.location": "default"`。
-2. 通过 `alter system add backend` 语句新增的 BE 节点也会默认设置 Tag:`"tag.location": "default"`。
-2. 所有表的副本分布默认修改为:`"tag.location.default:xx`。其中 xx 为原副本数量。
-3. 用户依然可以通过 `"replication_num" = "xx"` 在建表语句中指定副本数,这种属性将会自动转换成:`"tag.location.default:xx`。从而保证无需修改原建表语句。
-4. 默认情况下,单查询的内存限制为单节点2GB,CPU资源无限制,和原有行为保持一致。且用户的 `resource_tags.location` 属性为空,即默认情况下,用户可以访问任意 Tag 的 BE,和原有行为保持一致。
-
-这里我们给出一个从原集群升级到 0.15 版本后,开始使用资源划分功能的步骤示例:
-
-1. 关闭数据修复与均衡逻辑
-
- 因为升级后,BE的默认Tag为 `"tag.location": "default"`,而表的默认副本分布为:`"tag.location.default:xx`。所以如果直接修改 BE 的 Tag,系统会自动检测到副本分布的变化,从而开始数据重分布。这可能会占用部分系统资源。所以我们可以在修改 Tag 前,先关闭数据修复与均衡逻辑,以保证我们在规划资源时,不会有副本重分布的操作。
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true");
- ```
-
-2. 设置 Tag 和表副本分布
-
- 接下来可以通过 `alter system modify backend` 语句进行 BE 的 Tag 设置。以及通过 `alter table` 语句修改表的副本分布策略。示例如下:
-
- ```
- alter system modify backend "host1:9050, 1212:9050" set ("tag.location" = "group_a");
- alter table my_table modify partition p1 set ("replication_allocation" = "tag.location.group_a:2");
- ```
-
-3. 开启数据修复与均衡逻辑
-
- 在 Tag 和副本分布都设置完毕后,我们可以开启数据修复与均衡逻辑来触发数据的重分布了。
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "false");
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "false");
- ```
-
- 该过程根据涉及到的数据量会持续一段时间。并且会导致部分 colocation table 无法进行 colocation 规划(因为副本在迁移中)。可以通过 ` show proc "/cluster_balance/"` 来查看进度。也可以通过 `show proc "/statistic"` 中 `UnhealthyTabletNum` 的数量来判断进度。当 `UnhealthyTabletNum` 降为 0 时,则代表数据重分布完毕。
-
-4. 设置用户的资源标签权限。
-
- 等数据重分布完毕后。我们就可以开始设置用户的资源标签权限了。因为默认情况下,用户的 `resource_tags.location` 属性为空,即可以访问任意 Tag 的 BE。所以在前面步骤中,不会影响到已有用户的正常查询。当 `resource_tags.location` 属性非空时,用户将被限制访问指定 Tag 的 BE。
-
-通过以上4步,我们可以较为平滑的在原有集群升级后,使用资源划分功能。
diff --git a/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md b/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md
deleted file mode 100644
index 7acfc77f93..0000000000
--- a/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md
+++ /dev/null
@@ -1,265 +0,0 @@
----
-{
- "title": "BE端OLAP函数的返回值说明",
- "language": "zh-CN"
-}
-
----
-
-
-
-# BE端OLAP函数的返回值说明
-
-
-
-| 返回值名称 | 返回值 | 返回值说明 |
-| ------------------------------------------------ | ------ | ------------------------------------------------------------ |
-| OLAP_SUCCESS | 0 | 成功 |
-| OLAP_ERR_OTHER_ERROR | -1 | 其他错误 |
-| OLAP_REQUEST_FAILED | -2 | 请求失败 |
-| 系统错误代码,例如文件系统内存和其他系统调用失败 | | |
-| OLAP_ERR_OS_ERROR | -100 | 操作系统错误 |
-| OLAP_ERR_DIR_NOT_EXIST | -101 | 目录不存在错误 |
-| OLAP_ERR_FILE_NOT_EXIST | -102 | 文件不存在错误 |
-| OLAP_ERR_CREATE_FILE_ERROR | -103 | 创建文件错误 |
-| OLAP_ERR_MALLOC_ERROR | -104 | 内存分配错误 |
-| OLAP_ERR_STL_ERROR | -105 | 标准模板库错误 |
-| OLAP_ERR_IO_ERROR | -106 | IO错误 |
-| OLAP_ERR_MUTEX_ERROR | -107 | 互斥锁错误 |
-| OLAP_ERR_PTHREAD_ERROR | -108 | POSIX thread错误 |
-| OLAP_ERR_NETWORK_ERROR | -109 | 网络异常错误 |
-| OLAP_ERR_UB_FUNC_ERROR | -110 | |
-| OLAP_ERR_COMPRESS_ERROR | -111 | 数据压缩错误 |
-| OLAP_ERR_DECOMPRESS_ERROR | -112 | 数据解压缩错误 |
-| OLAP_ERR_UNKNOWN_COMPRESSION_TYPE | -113 | 未知的数据压缩类型 |
-| OLAP_ERR_MMAP_ERROR | -114 | 内存映射文件错误 |
-| OLAP_ERR_RWLOCK_ERROR | -115 | 读写锁错误 |
-| OLAP_ERR_READ_UNENOUGH | -116 | 读取内存不够异常 |
-| OLAP_ERR_CANNOT_CREATE_DIR | -117 | 不能创建目录异常 |
-| OLAP_ERR_UB_NETWORK_ERROR | -118 | 网络异常 |
-| OLAP_ERR_FILE_FORMAT_ERROR | -119 | 文件格式异常 |
-| OLAP_ERR_EVAL_CONJUNCTS_ERROR | -120 | |
-| OLAP_ERR_COPY_FILE_ERROR | -121 | 拷贝文件错误 |
-| OLAP_ERR_FILE_ALREADY_EXIST | -122 | 文件已经存在错误 |
-| 通用错误代码 | | |
-| OLAP_ERR_NOT_INITED | -200 | 不能初始化异常 |
-| OLAP_ERR_FUNC_NOT_IMPLEMENTED | -201 | 函数不能执行异常 |
-| OLAP_ERR_CALL_SEQUENCE_ERROR | -202 | 调用SEQUENCE异常 |
-| OLAP_ERR_INPUT_PARAMETER_ERROR | -203 | 输入参数错误 |
-| OLAP_ERR_BUFFER_OVERFLOW | -204 | 内存缓冲区溢出错误 |
-| OLAP_ERR_CONFIG_ERROR | -205 | 配置错误 |
-| OLAP_ERR_INIT_FAILED | -206 | 初始化失败 |
-| OLAP_ERR_INVALID_SCHEMA | -207 | 无效的Schema |
-| OLAP_ERR_CHECKSUM_ERROR | -208 | 检验值错误 |
-| OLAP_ERR_SIGNATURE_ERROR | -209 | 签名错误 |
-| OLAP_ERR_CATCH_EXCEPTION | -210 | 捕捉到异常 |
-| OLAP_ERR_PARSE_PROTOBUF_ERROR | -211 | 解析Protobuf出错 |
-| OLAP_ERR_SERIALIZE_PROTOBUF_ERROR | -212 | Protobuf序列化错误 |
-| OLAP_ERR_WRITE_PROTOBUF_ERROR | -213 | Protobuf写错误 |
-| OLAP_ERR_VERSION_NOT_EXIST | -214 | tablet版本不存在错误 |
-| OLAP_ERR_TABLE_NOT_FOUND | -215 | 未找到tablet错误 |
-| OLAP_ERR_TRY_LOCK_FAILED | -216 | 尝试锁失败 |
-| OLAP_ERR_OUT_OF_BOUND | -218 | 内存越界 |
-| OLAP_ERR_UNDERFLOW | -219 | underflow错误 |
-| OLAP_ERR_FILE_DATA_ERROR | -220 | 文件数据错误 |
-| OLAP_ERR_TEST_FILE_ERROR | -221 | 测试文件错误 |
-| OLAP_ERR_INVALID_ROOT_PATH | -222 | 无效的根目录 |
-| OLAP_ERR_NO_AVAILABLE_ROOT_PATH | -223 | 没有有效的根目录 |
-| OLAP_ERR_CHECK_LINES_ERROR | -224 | 检查行数错误 |
-| OLAP_ERR_INVALID_CLUSTER_INFO | -225 | 无效的Cluster信息 |
-| OLAP_ERR_TRANSACTION_NOT_EXIST | -226 | 事务不存在 |
-| OLAP_ERR_DISK_FAILURE | -227 | 磁盘错误 |
-| OLAP_ERR_TRANSACTION_ALREADY_COMMITTED | -228 | 交易已提交 |
-| OLAP_ERR_TRANSACTION_ALREADY_VISIBLE | -229 | 事务可见 |
-| OLAP_ERR_VERSION_ALREADY_MERGED | -230 | 版本已合并 |
-| OLAP_ERR_LZO_DISABLED | -231 | LZO已禁用 |
-| OLAP_ERR_DISK_REACH_CAPACITY_LIMIT | -232 | 磁盘到达容量限制 |
-| OLAP_ERR_TOO_MANY_TRANSACTIONS | -233 | 太多事务积压未完成 |
-| OLAP_ERR_INVALID_SNAPSHOT_VERSION | -234 | 无效的快照版本 |
-| OLAP_ERR_TOO_MANY_VERSION | -235 | tablet的数据版本超过了最大限制(默认500) |
-| OLAP_ERR_NOT_INITIALIZED | -236 | 不能初始化 |
-| OLAP_ERR_ALREADY_CANCELLED | -237 | 已经被取消 |
-| OLAP_ERR_TOO_MANY_SEGMENTS | -238 | 通常出现在同一批导入数据量过大的情况,从而导致某一个 tablet 的 Segment 文件过多 |
-| 命令执行异常代码 | | |
-| OLAP_ERR_CE_CMD_PARAMS_ERROR | -300 | 命令参数错误 |
-| OLAP_ERR_CE_BUFFER_TOO_SMALL | -301 | 缓冲区太多小文件 |
-| OLAP_ERR_CE_CMD_NOT_VALID | -302 | 无效的命令 |
-| OLAP_ERR_CE_LOAD_TABLE_ERROR | -303 | 加载数据表错误 |
-| OLAP_ERR_CE_NOT_FINISHED | -304 | 命令没有执行成功 |
-| OLAP_ERR_CE_TABLET_ID_EXIST | -305 | tablet Id不存在错误 |
-| OLAP_ERR_CE_TRY_CE_LOCK_ERROR | -306 | 尝试获取执行命令锁错误 |
-| Tablet错误异常代码 | | |
-| OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR | -400 | tablet副本版本错误 |
-| OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR | -401 | teblet版本索引不匹配异常 |
-| OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR | -402 | 这里不检查tablet的初始版本,因为如果在一个tablet进行schema-change时重新启动 BE,我们可能会遇到空tablet异常 |
-| OLAP_ERR_TABLE_INDEX_FIND_ERROR | -403 | 无法获得第一个Block块位置 或者找到最后一行Block块失败会引发此异常 |
-| OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR | -404 | 无法加载Tablet的时候会触发此异常 |
-| OLAP_ERR_TABLE_CREATE_META_ERROR | -405 | 无法创建Tablet(更改schema),Base tablet不存在 ,会触发此异常 |
-| OLAP_ERR_TABLE_ALREADY_DELETED_ERROR | -406 | tablet已经被删除 |
-| 存储引擎错误代码 | | |
-| OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE | -500 | 添加相同的tablet两次,添加tablet到相同数据目录两次,新tablet为空,旧tablet存在。会触发此异常 |
-| OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE | -501 | 删除不存在的表 |
-| OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR | -502 | 加载tablet_meta失败,cumulative rowset无效的segment group meta,会引发此异常 |
-| OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR | -503 | 表插入重复 |
-| OLAP_ERR_DELETE_VERSION_ERROR | -504 | 删除版本错误 |
-| OLAP_ERR_GC_SCAN_PATH_ERROR | -505 | GC扫描路径错误 |
-| OLAP_ERR_ENGINE_INSERT_OLD_TABLET | -506 | 当 BE 正在重新启动并且较旧的tablet已添加到垃圾收集队列但尚未删除时,在这种情况下,由于 data_dirs 是并行加载的,稍后加载的tablet可能比以前加载的tablet旧,这不应被确认为失败,所以此时返回改代码 |
-| Fetch Handler错误代码 | | |
-| OLAP_ERR_FETCH_OTHER_ERROR | -600 | FetchHandler其他错误 |
-| OLAP_ERR_FETCH_TABLE_NOT_EXIST | -601 | FetchHandler表不存在 |
-| OLAP_ERR_FETCH_VERSION_ERROR | -602 | FetchHandler版本错误 |
-| OLAP_ERR_FETCH_SCHEMA_ERROR | -603 | FetchHandler Schema错误 |
-| OLAP_ERR_FETCH_COMPRESSION_ERROR | -604 | FetchHandler压缩错误 |
-| OLAP_ERR_FETCH_CONTEXT_NOT_EXIST | -605 | FetchHandler上下文不存在 |
-| OLAP_ERR_FETCH_GET_READER_PARAMS_ERR | -606 | FetchHandler GET读参数错误 |
-| OLAP_ERR_FETCH_SAVE_SESSION_ERR | -607 | FetchHandler保存会话错误 |
-| OLAP_ERR_FETCH_MEMORY_EXCEEDED | -608 | FetchHandler内存超出异常 |
-| 读异常错误代码 | | |
-| OLAP_ERR_READER_IS_UNINITIALIZED | -700 | 读不能初始化 |
-| OLAP_ERR_READER_GET_ITERATOR_ERROR | -701 | 获取读迭代器错误 |
-| OLAP_ERR_CAPTURE_ROWSET_READER_ERROR | -702 | 当前Rowset读错误 |
-| OLAP_ERR_READER_READING_ERROR | -703 | 初始化列数据失败,cumulative rowset 的列数据无效 ,会返回该异常代码 |
-| OLAP_ERR_READER_INITIALIZE_ERROR | -704 | 读初始化失败 |
-| BaseCompaction异常代码信息 | | |
-| OLAP_ERR_BE_VERSION_NOT_MATCH | -800 | BE Compaction 版本不匹配错误 |
-| OLAP_ERR_BE_REPLACE_VERSIONS_ERROR | -801 | BE Compaction 替换版本错误 |
-| OLAP_ERR_BE_MERGE_ERROR | -802 | BE Compaction合并错误 |
-| OLAP_ERR_CAPTURE_ROWSET_ERROR | -804 | 找不到Rowset对应的版本 |
-| OLAP_ERR_BE_SAVE_HEADER_ERROR | -805 | BE Compaction保存Header错误 |
-| OLAP_ERR_BE_INIT_OLAP_DATA | -806 | BE Compaction 初始化OLAP数据错误 |
-| OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS | -807 | BE Compaction 尝试获得版本锁错误 |
-| OLAP_ERR_BE_NO_SUITABLE_VERSION | -808 | BE Compaction 没有合适的版本 |
-| OLAP_ERR_BE_TRY_BE_LOCK_ERROR | -809 | 其他base compaction正在运行,尝试获取锁失败 |
-| OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS | -810 | 无效的Merge版本 |
-| OLAP_ERR_BE_ERROR_DELETE_ACTION | -811 | BE执行删除操作错误 |
-| OLAP_ERR_BE_SEGMENTS_OVERLAPPING | -812 | cumulative point有重叠的Rowset异常 |
-| OLAP_ERR_BE_CLONE_OCCURRED | -813 | 将压缩任务提交到线程池后可能会发生克隆任务,并且选择用于压缩的行集可能会发生变化。 在这种情况下,不应执行当前的压缩任务。 返回该代码 |
-| PUSH异常代码 | | |
-| OLAP_ERR_PUSH_INIT_ERROR | -900 | 无法初始化读取器,无法创建表描述符,无法初始化内存跟踪器,不支持的文件格式类型,无法打开扫描仪,无法获取元组描述符,为元组分配内存失败,都会返回该代码 |
-| OLAP_ERR_PUSH_DELTA_FILE_EOF | -901 | |
-| OLAP_ERR_PUSH_VERSION_INCORRECT | -902 | PUSH版本不正确 |
-| OLAP_ERR_PUSH_SCHEMA_MISMATCH | -903 | PUSH Schema不匹配 |
-| OLAP_ERR_PUSH_CHECKSUM_ERROR | -904 | PUSH校验值错误 |
-| OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR | -905 | PUSH 获取数据源错误 |
-| OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR | -906 | PUSH 创建CUMULATIVE错误代码 |
-| OLAP_ERR_PUSH_BUILD_DELTA_ERROR | -907 | 推送的增量文件有错误的校验码 |
-| OLAP_ERR_PUSH_VERSION_ALREADY_EXIST | -908 | PUSH的版本已经存在 |
-| OLAP_ERR_PUSH_TABLE_NOT_EXIST | -909 | PUSH的表不存在 |
-| OLAP_ERR_PUSH_INPUT_DATA_ERROR | -910 | PUSH的数据无效,可能是长度,数据类型等问题 |
-| OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST | -911 | 将事务提交给引擎时,发现Rowset存在,但Rowset ID 不一样 |
-| OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED | -912 | 删除了推送批处理过程 |
-| OLAP_ERR_PUSH_COMMIT_ROWSET | -913 | PUSH Commit Rowset |
-| OLAP_ERR_PUSH_ROWSET_NOT_FOUND | -914 | PUSH Rowset没有发现 |
-| SegmentGroup异常代码 | | |
-| OLAP_ERR_INDEX_LOAD_ERROR | -1000 | 加载索引错误 |
-| OLAP_ERR_INDEX_EOF | -1001 | |
-| OLAP_ERR_INDEX_CHECKSUM_ERROR | -1002 | 校验码验证错误,加载索引对应的Segment 错误。 |
-| OLAP_ERR_INDEX_DELTA_PRUNING | -1003 | 索引增量修剪 |
-| OLAPData异常代码信息 | | |
-| OLAP_ERR_DATA_ROW_BLOCK_ERROR | -1100 | 数据行Block块错误 |
-| OLAP_ERR_DATA_FILE_TYPE_ERROR | -1101 | 数据文件类型错误 |
-| OLAP_ERR_DATA_EOF | -1102 | |
-| OLAP数据写错误代码 | | |
-| OLAP_ERR_WRITER_INDEX_WRITE_ERROR | -1200 | 索引写错误 |
-| OLAP_ERR_WRITER_DATA_WRITE_ERROR | -1201 | 数据写错误 |
-| OLAP_ERR_WRITER_ROW_BLOCK_ERROR | -1202 | Row Block块写错误 |
-| OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED | -1203 | 在添加新Segment之前,上一Segment未完成 |
-| RowBlock错误代码 | | |
-| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock解压缩错误 |
-| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | 获取Block Entry失败 |
-| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | 读取Rowblock信息错误 |
-| Tablet元数据错误 | | |
-| OLAP_ERR_HEADER_ADD_VERSION | -1400 | tablet元数据增加版本 |
-| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | tablet元数据删除版本 |
-| OLAP_ERR_HEADER_ADD_PENDING_DELTA | -1402 | tablet元数据添加待处理增量 |
-| OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION | -1403 | tablet元数据添加自增版本 |
-| OLAP_ERR_HEADER_INVALID_FLAG | -1404 | tablet元数据无效的标记 |
-| OLAP_ERR_HEADER_PUT | -1405 | tablet元数据PUT操作 |
-| OLAP_ERR_HEADER_DELETE | -1406 | tablet元数据DELETE操作 |
-| OLAP_ERR_HEADER_GET | -1407 | tablet元数据GET操作 |
-| OLAP_ERR_HEADER_LOAD_INVALID_KEY | -1408 | tablet元数据加载无效Key |
-| OLAP_ERR_HEADER_FLAG_PUT | -1409 | |
-| OLAP_ERR_HEADER_LOAD_JSON_HEADER | -1410 | tablet元数据加载JSON Header |
-| OLAP_ERR_HEADER_INIT_FAILED | -1411 | tablet元数据Header初始化失败 |
-| OLAP_ERR_HEADER_PB_PARSE_FAILED | -1412 | tablet元数据 Protobuf解析失败 |
-| OLAP_ERR_HEADER_HAS_PENDING_DATA | -1413 | tablet元数据有待处理的数据 |
-| TabletSchema异常代码信息 | | |
-| OLAP_ERR_SCHEMA_SCHEMA_INVALID | -1500 | Tablet Schema无效 |
-| OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID | -1501 | Tablet Schema 字段无效 |
-| SchemaHandler异常代码信息 | | |
-| OLAP_ERR_ALTER_MULTI_TABLE_ERR | -1600 | ALTER 多表错误 |
-| OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS | -1601 | 获取所有数据源失败,Tablet无版本 |
-| OLAP_ERR_ALTER_STATUS_ERR | -1602 | 检查行号失败,内部排序失败,行块排序失败,这些都会返回该代码 |
-| OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED | -1603 | 先前的Schema更改未完成 |
-| OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID | -1604 | Schema变更信息无效 |
-| OLAP_ERR_QUERY_SPLIT_KEY_ERR | -1605 | 查询 Split key 错误 |
-| OLAP_ERR_DATA_QUALITY_ERR | -1606 | 模式更改/物化视图期间数据质量问题导致的错误 |
-| Column File错误代码 | | |
-| OLAP_ERR_COLUMN_DATA_LOAD_BLOCK | -1700 | 加载列数据块错误 |
-| OLAP_ERR_COLUMN_DATA_RECORD_INDEX | -1701 | 加载数据记录索引错误 |
-| OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER | -1702 | |
-| OLAP_ERR_COLUMN_DATA_READ_VAR_INT | -1703 | 无法从Stream中读取列数据 |
-| OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM | -1704 | |
-| OLAP_ERR_COLUMN_STREAM_EOF | -1705 | 如果数据流结束,返回该代码 |
-| OLAP_ERR_COLUMN_READ_STREAM | -1706 | 块大小大于缓冲区大小,压缩剩余大小小于Stream头大小,读取流失败 这些情况下会抛出该异常 |
-| OLAP_ERR_COLUMN_STREAM_NOT_EXIST | -1707 | Stream为空,不存在,未找到数据流 等情况下返回该异常代码 |
-| OLAP_ERR_COLUMN_VALUE_NULL | -1708 | 列值为空异常 |
-| OLAP_ERR_COLUMN_SEEK_ERROR | -1709 | 如果通过schema变更添加列,由于schema变更可能导致列索引存在,返回这个异常代码 |
-| DeleteHandler错误代码 | | |
-| OLAP_ERR_DELETE_INVALID_CONDITION | -1900 | 删除条件无效 |
-| OLAP_ERR_DELETE_UPDATE_HEADER_FAILED | -1901 | 删除更新Header错误 |
-| OLAP_ERR_DELETE_SAVE_HEADER_FAILED | -1902 | 删除保存header错误 |
-| OLAP_ERR_DELETE_INVALID_PARAMETERS | -1903 | 删除参数无效 |
-| OLAP_ERR_DELETE_INVALID_VERSION | -1904 | 删除版本无效 |
-| Cumulative Handler错误代码 | | |
-| OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS | -2000 | Cumulative没有合适的版本 |
-| OLAP_ERR_CUMULATIVE_REPEAT_INIT | -2001 | Cumulative Repeat 初始化错误 |
-| OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS | -2002 | Cumulative参数无效 |
-| OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE | -2003 | Cumulative获取数据源失败 |
-| OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS | -2004 | Cumulative无有效需要合并版本 |
-| OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION | -2005 | Cumulative删除操作错误 |
-| OLAP_ERR_CUMULATIVE_MISS_VERSION | -2006 | rowsets缺少版本 |
-| OLAP_ERR_CUMULATIVE_CLONE_OCCURRED | -2007 | 将压缩任务提交到线程池后可能会发生克隆任务,并且选择用于压缩的行集可能会发生变化。 在这种情况下,不应执行当前的压缩任务。否则会触发改异常 |
-| OLAPMeta异常代码 | | |
-| OLAP_ERR_META_INVALID_ARGUMENT | -3000 | 元数据参数无效 |
-| OLAP_ERR_META_OPEN_DB | -3001 | 打开DB元数据错误 |
-| OLAP_ERR_META_KEY_NOT_FOUND | -3002 | 元数据key没发现 |
-| OLAP_ERR_META_GET | -3003 | GET元数据错误 |
-| OLAP_ERR_META_PUT | -3004 | PUT元数据错误 |
-| OLAP_ERR_META_ITERATOR | -3005 | 元数据迭代器错误 |
-| OLAP_ERR_META_DELETE | -3006 | 删除元数据错误 |
-| OLAP_ERR_META_ALREADY_EXIST | -3007 | 元数据已经存在错误 |
-| Rowset错误代码 | | |
-| OLAP_ERR_ROWSET_WRITER_INIT | -3100 | Rowset写初始化错误 |
-| OLAP_ERR_ROWSET_SAVE_FAILED | -3101 | Rowset保存失败 |
-| OLAP_ERR_ROWSET_GENERATE_ID_FAILED | -3102 | Rowset生成ID失败 |
-| OLAP_ERR_ROWSET_DELETE_FILE_FAILED | -3103 | Rowset删除文件失败 |
-| OLAP_ERR_ROWSET_BUILDER_INIT | -3104 | Rowset初始化构建失败 |
-| OLAP_ERR_ROWSET_TYPE_NOT_FOUND | -3105 | Rowset类型没有发现 |
-| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset已经存在 |
-| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset创建读对象失败 |
-| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset无效 |
-| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset加载失败 |
-| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset读对象初始化失败 |
-| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset读失败 |
-| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset无效的事务状态 |
-
-
-
diff --git a/docs/zh-CN/administrator-guide/operation/disk-capacity.md b/docs/zh-CN/administrator-guide/operation/disk-capacity.md
deleted file mode 100644
index 8616f63575..0000000000
--- a/docs/zh-CN/administrator-guide/operation/disk-capacity.md
+++ /dev/null
@@ -1,163 +0,0 @@
----
-{
- "title": "磁盘空间管理",
- "language": "zh-CN"
-}
----
-
-
-
-# 磁盘空间管理
-
-本文档主要介绍和磁盘存储空间有关的系统参数和处理策略。
-
-Doris 的数据磁盘空间如果不加以控制,会因磁盘写满而导致进程挂掉。因此我们监测磁盘的使用率和剩余空间,通过设置不同的警戒水位,来控制 Doris 系统中的各项操作,尽量避免发生磁盘被写满的情况。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* Data Dir:数据目录,在 BE 配置文件 `be.conf` 的 `storage_root_path` 中指定的各个数据目录。通常一个数据目录对应一个磁盘、因此下文中 **磁盘** 也指代一个数据目录。
-
-## 基本原理
-
-BE 定期(每隔一分钟)会向 FE 汇报一次磁盘使用情况。FE 记录这些统计值,并根据这些统计值,限制不同的操作请求。
-
-在 FE 中分别设置了 **高水位(High Watermark)** 和 **危险水位(Flood Stage)** 两级阈值。危险水位高于高水位。当磁盘使用率高于高水位时,Doris 会限制某些操作的执行(如副本均衡等)。而如果高于危险水位,则会禁止某些操作的执行(如导入)。
-
-同时,在 BE 上也设置了 **危险水位(Flood Stage)**。考虑到 FE 并不能完全及时的检测到 BE 上的磁盘使用情况,以及无法控制某些 BE 自身运行的操作(如 Compaction)。因此 BE 上的危险水位用于 BE 主动拒绝和停止某些操作,达到自我保护的目的。
-
-## FE 参数
-
-**高水位:**
-
-```
-storage_high_watermark_usage_percent 默认 85 (85%)。
-storage_min_left_capacity_bytes 默认 2GB。
-```
-
-当磁盘空间使用率**大于** `storage_high_watermark_usage_percent`,**或者** 磁盘空间剩余大小**小于** `storage_min_left_capacity_bytes` 时,该磁盘不会再被作为以下操作的目的路径:
-
-* Tablet 均衡操作(Balance)
-* Colocation 表数据分片的重分布(Relocation)
-* Decommission
-
-**危险水位:**
-
-```
-storage_flood_stage_usage_percent 默认 95 (95%)。
-storage_flood_stage_left_capacity_bytes 默认 1GB。
-```
-
-当磁盘空间使用率**大于** `storage_flood_stage_usage_percent`,**并且** 磁盘空间剩余大小**小于** `storage_flood_stage_left_capacity_bytes` 时,该磁盘不会再被作为以下操作的目的路径,并禁止某些操作:
-
-* Tablet 均衡操作(Balance)
-* Colocation 表数据分片的重分布(Relocation)
-* 副本补齐
-* 恢复操作(Restore)
-* 数据导入(Load/Insert)
-
-## BE 参数
-
-**危险水位:**
-
-```
-capacity_used_percent_flood_stage 默认 95 (95%)。
-capacity_min_left_bytes_flood_stage 默认 1GB。
-```
-
-当磁盘空间使用率**大于** `storage_flood_stage_usage_percent`,**并且** 磁盘空间剩余大小**小于** `storage_flood_stage_left_capacity_bytes` 时,该磁盘上的以下操作会被禁止:
-
-* Base/Cumulative Compaction。
-* 数据写入。包括各种导入操作。
-* Clone Task。通常发生于副本修复或均衡时。
-* Push Task。发生在 Hadoop 导入的 Loading 阶段,下载文件。
-* Alter Task。Schema Change 或 Rollup 任务。
-* Download Task。恢复操作的 Downloading 阶段。
-
-## 磁盘空间释放
-
-当磁盘空间高于高水位甚至危险水位后,很多操作都会被禁止。此时可以尝试通过以下方式减少磁盘使用率,恢复系统。
-
-* 删除表或分区
-
- 通过删除表或分区的方式,能够快速降低磁盘空间使用率,恢复集群。**注意:只有 `DROP` 操作可以达到快速降低磁盘空间使用率的目的,`DELETE` 操作不可以。**
-
- ```
- DROP TABLE tbl;
- ALTER TABLE tbl DROP PARTITION p1;
- ```
-
-* 扩容 BE
-
- 扩容后,数据分片会自动均衡到磁盘使用率较低的 BE 节点上。扩容操作会根据数据量及节点数量不同,在数小时或数天后使集群到达均衡状态。
-
-* 修改表或分区的副本
-
- 可以将表或分区的副本数降低。比如默认3副本可以降低为2副本。该方法虽然降低了数据的可靠性,但是能够快速的降低磁盘使用率,使集群恢复正常。该方法通常用于紧急恢复系统。请在恢复后,通过扩容或删除数据等方式,降低磁盘使用率后,将副本数恢复为 3。
-
- 修改副本操作为瞬间生效,后台会自动异步的删除多余的副本。
-
- ```
- ALTER TABLE tbl MODIFY PARTITION p1 SET("replication_num" = "2");
- ```
-
-* 删除多余文件
-
- 当 BE 进程已经因为磁盘写满而挂掉并无法启动时(此现象可能因 FE 或 BE 检测不及时而发生)。需要通过删除数据目录下的一些临时文件,保证 BE 进程能够启动。以下目录中的文件可以直接删除:
-
- * log/:日志目录下的日志文件。
- * snapshot/: 快照目录下的快照文件。
- * trash/:回收站中的文件。
-
- **这种操作会对 [从 BE 回收站中恢复数据](./tablet-restore-tool.md) 产生影响。**
-
- 如果BE还能够启动,则可以使用`ADMIN CLEAN TRASH ON(BackendHost:BackendHeartBeatPort);`来主动清理临时文件,会清理 **所有** trash文件和过期snapshot文件,**这将影响从回收站恢复数据的操作** 。
-
- 如果不手动执行`ADMIN CLEAN TRASH`,系统仍将会在几分钟至几十分钟内自动执行清理,这里分为两种情况:
- * 如果磁盘占用未达到 **危险水位(Flood Stage)** 的90%,则会清理过期trash文件和过期snapshot文件,此时会保留一些近期文件而不影响恢复数据。
- * 如果磁盘占用已达到 **危险水位(Flood Stage)** 的90%,则会清理 **所有** trash文件和过期snapshot文件, **此时会影响从回收站恢复数据的操作** 。
- 自动执行的时间间隔可以通过配置项中的`max_garbage_sweep_interval`和`max_garbage_sweep_interval`更改。
-
- 出现由于缺少trash文件而导致恢复失败的情况时,可能返回如下结果:
-
- ```
- {"status": "Fail","msg": "can find tablet path in trash"}
- ```
-
-* 删除数据文件(危险!!!)
-
- 当以上操作都无法释放空间时,需要通过删除数据文件来释放空间。数据文件在指定数据目录的 `data/` 目录下。删除数据分片(Tablet)必须先确保该 Tablet 至少有一个副本是正常的,否则**删除唯一副本会导致数据丢失**。假设我们要删除 id 为 12345 的 Tablet:
-
- * 找到 Tablet 对应的目录,通常位于 `data/shard_id/tablet_id/` 下。如:
-
- ```data/0/12345/```
-
- * 记录 tablet id 和 schema hash。其中 schema hash 为上一步目录的下一级目录名。如下为 352781111:
-
- ```data/0/12345/352781111```
-
- * 删除数据目录:
-
- ```rm -rf data/0/12345/```
-
- * 删除 Tablet 元数据(具体参考 [Tablet 元数据管理工具](./tablet-meta-tool.md))
-
- ```./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=12345 --schema_hash= 352781111```
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/operation/doris-error-code.md b/docs/zh-CN/administrator-guide/operation/doris-error-code.md
deleted file mode 100644
index 122c0d1b59..0000000000
--- a/docs/zh-CN/administrator-guide/operation/doris-error-code.md
+++ /dev/null
@@ -1,179 +0,0 @@
----
-{
- "title": "Doris错误代码表",
- "language": "zh-CN"
-}
-
----
-
-
-
-# Doris错误代码表
-
-| 错误码 | 错误信息 |
-| :----- | :----------------------------------------------------------- |
-| 1005 | 创建表格失败,在返回错误信息中给出具体原因 |
-| 1007 | 数据库已经存在,不能创建同名的数据库 |
-| 1008 | 数据库不存在,无法删除 |
-| 1044 | 数据库对用户未授权,不能访问 |
-| 1045 | 用户名及密码不匹配,不能访问系统 |
-| 1046 | 没有指定要查询的目标数据库 |
-| 1047 | 用户输入了无效的操作指令 |
-| 1049 | 用户指定了无效的数据库 |
-| 1050 | 数据表已经存在 |
-| 1051 | 无效的数据表 |
-| 1052 | 指定的列名有歧义,不能唯一确定对应列 |
-| 1053 | 为Semi-Join/Anti-Join查询指定了非法的数据列 |
-| 1054 | 指定的列在表中不存在 |
-| 1058 | 查询语句中选择的列数目与查询结果的列数目不一致 |
-| 1060 | 列名重复 |
-| 1064 | 没有存活的Backend节点 |
-| 1066 | 查询语句中出现了重复的表别名 |
-| 1094 | 线程ID无效 |
-| 1095 | 非线程的拥有者不能终止线程的运行 |
-| 1096 | 查询语句没有指定要查询或操作的数据表 |
-| 1102 | 数据库名不正确 |
-| 1104 | 数据表名不正确 |
-| 1105 | 其它错误 |
-| 1110 | 子查询中指定了重复的列 |
-| 1111 | 在Where从句中非法使用聚合函数 |
-| 1113 | 新建表的列集合不能为空 |
-| 1115 | 使用了不支持的字符集 |
-| 1130 | 客户端使用了未被授权的IP地址来访问系统 |
-| 1132 | 无权限修改用户密码 |
-| 1141 | 撤销用户权限时指定了用户不具备的权限 |
-| 1142 | 用户执行了未被授权的操作 |
-| 1166 | 列名不正确 |
-| 1193 | 使用了无效的系统变量名 |
-| 1203 | 用户使用的活跃连接数超过了限制 |
-| 1211 | 不允许创建新用户 |
-| 1227 | 拒绝访问,用户执行了无权限的操作 |
-| 1228 | 会话变量不能通过SET GLOBAL指令来修改 |
-| 1229 | 全局变量应通过SET GLOBAL指令来修改 |
-| 1230 | 相关的系统变量没有缺省值 |
-| 1231 | 给某系统变量设置了无效值 |
-| 1232 | 给某系统变量设置了错误数据类型的值 |
-| 1248 | 没有给内联视图设置别名 |
-| 1251 | 客户端不支持服务器请求的身份验证协议;请升级MySQL客户端 |
-| 1286 | 配置的存储引擎不正确 |
-| 1298 | 配置的时区不正确 |
-| 1347 | 对象与期望的类型不匹配 |
-| 1353 | SELECT和视图的字段列表具有不同的列数 |
-| 1364 | 字段不允许NULL值,但是没有设置缺省值 |
-| 1372 | 密码长度不够 |
-| 1396 | 用户执行的操作运行失败 |
-| 1471 | 指定表不允许插入数据 |
-| 1507 | 删除不存在的分区,且没有指定如果存在才删除的条件 |
-| 1508 | 无法删除所有分区,请改用DROP TABLE |
-| 1517 | 出现了重复的分区名字 |
-| 1567 | 分区的名字不正确 |
-| 1621 | 指定的系统变量是只读的 |
-| 1735 | 表中不存在指定的分区名 |
-| 1748 | 不能将数据插入具有空分区的表中。使用“ SHOW PARTITIONS FROM tbl”来查看此表的当前分区 |
-| 1749 | 表分区不存在 |
-| 5000 | 指定的表不是OLAP表 |
-| 5001 | 指定的PROC路径无效 |
-| 5002 | 必须在列置换中明确指定列名 |
-| 5003 | Key列应排在Value列之前 |
-| 5004 | 表至少应包含1个Key列 |
-| 5005 | 集群ID无效 |
-| 5006 | 无效的查询规划 |
-| 5007 | 冲突的查询规划 |
-| 5008 | 数据插入提示:仅适用于有分区的数据表 |
-| 5009 | PARTITION子句对于INSERT到未分区表中无效 |
-| 5010 | 列数不等于SELECT语句的选择列表数 |
-| 5011 | 无法解析表引用 |
-| 5012 | 指定的值不是一个有效数字 |
-| 5013 | 不支持的时间单位 |
-| 5014 | 表状态不正常 |
-| 5015 | 分区状态不正常 |
-| 5016 | 分区上存在数据导入任务 |
-| 5017 | 指定列不是Key列 |
-| 5018 | 值的格式无效 |
-| 5019 | 数据副本与版本不匹配 |
-| 5021 | BE节点已离线 |
-| 5022 | 非分区表中的分区数不是1 |
-| 5023 | alter语句中无任何操作 |
-| 5024 | 任务执行超时 |
-| 5025 | 数据插入操作失败 |
-| 5026 | 通过SELECT语句创建表时使用了不支持的数据类型 |
-| 5027 | 没有设置指定的参数 |
-| 5028 | 没有找到指定的集群 |
-| 5030 | 某用户没有访问集群的权限 |
-| 5031 | 没有指定参数或参数无效 |
-| 5032 | 没有指定集群实例数目 |
-| 5034 | 集群名已经存在 |
-| 5035 | 集群已经存在 |
-| 5036 | 集群中BE节点不足 |
-| 5037 | 删除集群之前,必须删除集群中的所有数据库 |
-| 5037 | 集群中不存在这个ID的BE节点 |
-| 5038 | 没有指定集群名字 |
-| 5040 | 未知的集群 |
-| 5041 | 没有集群名字 |
-| 5042 | 没有权限 |
-| 5043 | 实例数目应大于0 |
-| 5046 | 源集群不存在 |
-| 5047 | 目标集群不存在 |
-| 5048 | 源数据库不存在 |
-| 5049 | 目标数据库不存在 |
-| 5050 | 没有选择集群,请输入集群 |
-| 5051 | 应先将源数据库连接到目标数据库 |
-| 5052 | 集群内部错误:BE节点错误信息 |
-| 5053 | 没有从源数据库到目标数据库的迁移任务 |
-| 5054 | 指定数据库已经连接到目标数据库,或正在迁移数据 |
-| 5055 | 数据连接或者数据迁移不能在同一集群内执行 |
-| 5056 | 不能删除数据库:它被关联至其它数据库或正在迁移数据 |
-| 5056 | 不能重命名数据库:它被关联至其它数据库或正在迁移数据 |
-| 5056 | 集群中BE节点不足 |
-| 5056 | 集群内已存在指定数目的BE节点 |
-| 5059 | 集群中存在处于下线状态的BE节点 |
-| 5062 | 不正确的群集名称(名称'default_cluster'是保留名称) |
-| 5063 | 类型名不正确 |
-| 5064 | 通用错误提示 |
-| 5063 | Colocate功能已被管理员禁用 |
-| 5063 | colocate数据表不存在 |
-| 5063 | Colocate表必须是OLAP表 |
-| 5063 | Colocate表应该具有同样的副本数目 |
-| 5063 | Colocate表应该具有同样的分桶数目 |
-| 5063 | Colocate表的分区列数目必须一致 |
-| 5063 | Colocate表的分区列的数据类型必须一致 |
-| 5064 | 指定表不是colocate表 |
-| 5065 | 指定的操作是无效的 |
-| 5065 | 指定的时间单位是非法的,正确的单位包括:HOUR / DAY / WEEK / MONTH |
-| 5066 | 动态分区起始值应该小于0 |
-| 5066 | 动态分区起始值不是有效的数字 |
-| 5066 | 动态分区结束值应该大于0 |
-| 5066 | 动态分区结束值不是有效的数字 |
-| 5066 | 动态分区结束值为空 |
-| 5067 | 动态分区分桶数应该大于0 |
-| 5067 | 动态分区分桶值不是有效的数字 |
-| 5066 | 动态分区分桶值为空 |
-| 5068 | 是否允许动态分区的值不是有效的布尔值:true或者false |
-| 5069 | 指定的动态分区名前缀是非法的 |
-| 5070 | 指定的操作被禁止了 |
-| 5071 | 动态分区副本数应该大于0 |
-| 5072 | 动态分区副本值不是有效的数字 |
-| 5073 | 原始创建表stmt为空 |
-| 5074 | 创建历史动态分区参数:create_history_partition无效,期望的是:true或者false |
-| 5076 | 指定的保留历史分区时间段为空 |
-| 5077 | 指定的保留历史分区时间段无效 |
-| 5078 | 指定的保留历史分区时间段必须是成对的时间 |
-| 5079 | 指定的保留历史分区时间段对应位置的第一个时间比第二个时间大(起始时间大于结束时间) |
-
diff --git a/docs/zh-CN/administrator-guide/operation/metadata-operation.md b/docs/zh-CN/administrator-guide/operation/metadata-operation.md
deleted file mode 100644
index b2534325d7..0000000000
--- a/docs/zh-CN/administrator-guide/operation/metadata-operation.md
+++ /dev/null
@@ -1,403 +0,0 @@
----
-{
- "title": "元数据运维",
- "language": "zh-CN"
-}
----
-
-
-
-# 元数据运维
-
-本文档主要介绍在实际生产环境中,如何对 Doris 的元数据进行管理。包括 FE 节点建议的部署方式、一些常用的操作方法、以及常见错误的解决方法。
-
-在阅读本文当前,请先阅读 [Doris 元数据设计文档](../../internal/metadata-design.md) 了解 Doris 元数据的工作原理。
-
-## 重要提示
-
-* 当前元数据的设计是无法向后兼容的。即如果新版本有新增的元数据结构变动(可以查看 FE 代码中的 `FeMetaVersion.java` 文件中是否有新增的 VERSION),那么在升级到新版本后,通常是无法再回滚到旧版本的。所以,在升级 FE 之前,请务必按照 [升级文档](../../installing/upgrade.md) 中的操作,测试元数据兼容性。
-
-## 元数据目录结构
-
-我们假设在 fe.conf 中指定的 `meta_dir` 的路径为 `/path/to/palo-meta`。那么一个正常运行中的 Doris 集群,元数据的目录结构应该如下:
-
-```
-/path/to/palo-meta/
- |-- bdb/
- | |-- 00000000.jdb
- | |-- je.config.csv
- | |-- je.info.0
- | |-- je.info.0.lck
- | |-- je.lck
- | `-- je.stat.csv
- `-- image/
- |-- ROLE
- |-- VERSION
- `-- image.xxxx
-```
-
-1. bdb 目录
-
- 我们将 [bdbje](https://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html) 作为一个分布式的 kv 系统,存放元数据的 journal。这个 bdb 目录相当于 bdbje 的 “数据目录”。
-
- 其中 `.jdb` 后缀的是 bdbje 的数据文件。这些数据文件会随着元数据 journal 的不断增多而越来越多。当 Doris 定期做完 image 后,旧的日志就会被删除。所以正常情况下,这些数据文件的总大小从几 MB 到几 GB 不等(取决于使用 Doris 的方式,如导入频率等)。当数据文件的总大小大于 10GB,则可能需要怀疑是否是因为 image 没有成功,或者分发 image 失败导致的历史 journal 一直无法删除。
-
- `je.info.0` 是 bdbje 的运行日志。这个日志中的时间是 UTC+0 时区的。我们可能在后面的某个版本中修复这个问题。通过这个日志,也可以查看一些 bdbje 的运行情况。
-
-2. image 目录
-
- image 目录用于存放 Doris 定期生成的元数据镜像文件。通常情况下,你会看到有一个 `image.xxxxx` 的镜像文件。其中 `xxxxx` 是一个数字。这个数字表示该镜像包含 `xxxxx` 号之前的所有元数据 journal。而这个文件的生成时间(通过 `ls -al` 查看即可)通常就是镜像的生成时间。
-
- 你也可能会看到一个 `image.ckpt` 文件。这是一个正在生成的元数据镜像。通过 `du -sh` 命令应该可以看到这个文件大小在不断变大,说明镜像内容正在写入这个文件。当镜像写完后,会自动重名为一个新的 `image.xxxxx` 并替换旧的 image 文件。
-
- 只有角色为 Master 的 FE 才会主动定期生成 image 文件。每次生成完后,都会推送给其他非 Master 角色的 FE。当确认其他所有 FE 都收到这个 image 后,Master FE 会删除 bdbje 中旧的元数据 journal。所以,如果 image 生成失败,或者 image 推送给其他 FE 失败时,都会导致 bdbje 中的数据不断累积。
-
- `ROLE` 文件记录了 FE 的类型(FOLLOWER 或 OBSERVER),是一个文本文件。
-
- `VERSION` 文件记录了这个 Doris 集群的 cluster id,以及用于各个节点之间访问认证的 token,也是一个文本文件。
-
- `ROLE` 文件和 `VERSION` 文件只可能同时存在,或同时不存在(如第一次启动时)。
-
-## 基本操作
-
-### 启动单节点 FE
-
-单节点 FE 是最基本的一种部署方式。一个完整的 Doris 集群,至少需要一个 FE 节点。当只有一个 FE 节点时,这个节点的类型为 Follower,角色为 Master。
-
-1. 第一次启动
-
- 1. 假设在 fe.conf 中指定的 `meta_dir` 的路径为 `/path/to/palo-meta`。
- 2. 确保 `/path/to/palo-meta` 已存在,权限正确,且目录为空。
- 3. 直接通过 `sh bin/start_fe.sh` 即可启动。
- 4. 启动后,你应该可以在 fe.log 中看到如下日志:
-
- * Palo FE starting...
- * image does not exist: /path/to/palo-meta/image/image.0
- * transfer from INIT to UNKNOWN
- * transfer from UNKNOWN to MASTER
- * the very first time to open bdb, dbname is 1
- * start fencing, epoch number is 1
- * finish replay in xxx msec
- * QE service start
- * thrift server started
-
- 以上日志不一定严格按照这个顺序,但基本类似。
-
- 5. 单节点 FE 的第一次启动通常不会遇到问题。如果你没有看到以上日志,一般来说是没有仔细按照文档步骤操作,请仔细阅读相关 wiki。
-
-2. 重启
-
- 1. 直接使用 `sh bin/start_fe.sh` 可以重新启动已经停止的 FE 节点。
- 2. 重启后,你应该可以在 fe.log 中看到如下日志:
-
- * Palo FE starting...
- * finished to get cluster id: xxxx, role: FOLLOWER and node name: xxxx
- * 如果重启前还没有 image 产生,则会看到:
- * image does not exist: /path/to/palo-meta/image/image.0
-
- * 如果重启前有 image 产生,则会看到:
- * start load image from /path/to/palo-meta/image/image.xxx. is ckpt: false
- * finished load image in xxx ms
-
- * transfer from INIT to UNKNOWN
- * replayed journal id is xxxx, replay to journal id is yyyy
- * transfer from UNKNOWN to MASTER
- * finish replay in xxx msec
- * master finish replay journal, can write now.
- * begin to generate new image: image.xxxx
- * start save image to /path/to/palo-meta/image/image.ckpt. is ckpt: true
- * finished save image /path/to/palo-meta/image/image.ckpt in xxx ms. checksum is xxxx
- * push image.xxx to other nodes. totally xx nodes, push successed xx nodes
- * QE service start
- * thrift server started
-
- 以上日志不一定严格按照这个顺序,但基本类似。
-
-3. 常见问题
-
- 对于单节点 FE 的部署,启停通常不会遇到什么问题。如果有问题,请先参照相关 wiki,仔细核对你的操作步骤。
-
-### 添加 FE
-
-添加 FE 流程在 [部署和升级文档](../../installing/install-deploy.md#增加%20FE%20节点) 有详细介绍,不再赘述。这里主要说明一些注意事项,以及常见问题。
-
-1. 注意事项
-
- * 在添加新的 FE 之前,一定先确保当前的 Master FE 运行正常(连接是否正常,JVM 是否正常,image 生成是否正常,bdbje 数据目录是否过大等等)
- * 第一次启动新的 FE,一定确保添加了 `--helper` 参数指向 Master FE。再次启动时可不用添加 `--helper`。(如果指定了 `--helper`,FE 会直接询问 helper 节点自己的角色,如果没有指定,FE会尝试从 `palo-meta/image/` 目录下的 `ROLE` 和 `VERSION` 文件中获取信息)。
- * 第一次启动新的 FE,一定确保这个 FE 的 `meta_dir` 已经创建、权限正确且为空。
- * 启动新的 FE,和执行 `ALTER SYSTEM ADD FOLLOWER/OBSERVER` 语句在元数据添加 FE,这两个操作的顺序没有先后要求。如果先启动了新的 FE,而没有执行语句,则新的 FE 日志中会一直滚动 `current node is not added to the group. please add it first.` 字样。当执行语句后,则会进入正常流程。
- * 请确保前一个 FE 添加成功后,再添加下一个 FE。
- * 建议直接连接到 MASTER FE 执行 `ALTER SYSTEM ADD FOLLOWER/OBSERVER` 语句。
-
-2. 常见问题
-
- 1. this node is DETACHED
-
- 当第一次启动一个待添加的 FE 时,如果 Master FE 上的 palo-meta/bdb 中的数据很大,则可能在待添加的 FE 日志中看到 `this node is DETACHED.` 字样。这时,bdbje 正在复制数据,你可以看到待添加的 FE 的 `bdb/` 目录正在变大。这个过程通常会在数分钟不等(取决于 bdbje 中的数据量)。之后,fe.log 中可能会有一些 bdbje 相关的错误堆栈信息。如果最终日志中显示 `QE service start` 和 `thrift server started`,则通常表示启动成功。可以通过 mysql-client 连接这个 FE 尝试操作。如果没有出现这些字样,则可能是 bdbje 复制日志超时等问题。这时,直接再次重启这个 FE,通常即可解决问题。
-
- 2. 各种原因导致添加失败
-
- * 如果添加的是 OBSERVER,因为 OBSERVER 类型的 FE 不参与元数据的多数写,理论上可以随意启停。因此,对于添加 OBSERVER 失败的情况。可以直接杀死 OBSERVER FE 的进程,清空 OBSERVER 的元数据目录后,重新进行一遍添加流程。
-
- * 如果添加的是 FOLLOWER,因为 FOLLOWER 是参与元数据多数写的。所以有可能FOLLOWER 已经加入 bdbje 选举组内。如果这时只有两个 FOLLOWER 节点(包括 MASTER),那么停掉一个 FE,可能导致另一个 FE 也因无法进行多数写而退出。此时,我们应该先通过 `ALTER SYSTEM DROP FOLLOWER` 命令,从元数据中删除新添加的 FOLLOWER 节点,然后再杀死 FOLLOWER 进程,清空元数据,重新进行一遍添加流程。
-
-
-### 删除 FE
-
-通过 `ALTER SYSTEM DROP FOLLOWER/OBSERVER` 命令即可删除对应类型的 FE。以下有几点注意事项:
-
-* 对于 OBSERVER 类型的 FE,直接 DROP 即可,无风险。
-
-* 对于 FOLLOWER 类型的 FE。首先,应保证在有奇数个 FOLLOWER 的情况下(3个或以上),开始删除操作。
-
- 1. 如果删除非 MASTER 角色的 FE,建议连接到 MASTER FE,执行 DROP 命令,再杀死进程即可。
- 2. 如果要删除 MASTER FE,先确认有奇数个 FOLLOWER FE 并且运行正常。然后先杀死 MASTER FE 的进程。这时会有某一个 FE 被选举为 MASTER。在确认剩下的 FE 运行正常后,连接到新的 MASTER FE,执行 DROP 命令删除之前老的 MASTER FE 即可。
-
-## 高级操作
-
-### 故障恢复
-
-FE 有可能因为某些原因出现无法启动 bdbje、FE 之间无法同步等问题。现象包括无法进行元数据写操作、没有 MASTER 等等。这时,我们需要手动操作来恢复 FE。手动恢复 FE 的大致原理,是先通过当前 `meta_dir` 中的元数据,启动一个新的 MASTER,然后再逐台添加其他 FE。请严格按照如下步骤操作:
-
-1. 首先,停止所有 FE 进程,同时停止一切业务访问。保证在元数据恢复期间,不会因为外部访问导致其他不可预期的问题。
-
-2. 确认哪个 FE 节点的元数据是最新:
-
- * 首先,**务必先备份所有 FE 的 `meta_dir` 目录。**
- * 通常情况下,Master FE 的元数据是最新的。可以查看 `meta_dir/image` 目录下,image.xxxx 文件的后缀,数字越大,则表示元数据越新。
- * 通常,通过比较所有 FOLLOWER FE 的 image 文件,找出最新的元数据即可。
- * 之后,我们要使用这个拥有最新元数据的 FE 节点,进行恢复。
- * 如果使用 OBSERVER 节点的元数据进行恢复会比较麻烦,建议尽量选择 FOLLOWER 节点。
-
-3. 以下操作都在由第2步中选择出来的 FE 节点上进行。
-
- 1. 如果该节点是一个 OBSERVER,先将 `meta_dir/image/ROLE` 文件中的 `role=OBSERVER` 改为 `role=FOLLOWER`。(从 OBSERVER 节点恢复会比较麻烦,先按这里的步骤操作,后面会有单独说明)
- 2. 在 fe.conf 中添加配置:`metadata_failure_recovery=true`。
- 3. 执行 `sh bin/start_fe.sh` 启动这个 FE。
- 4. 如果正常,这个 FE 会以 MASTER 的角色启动,类似于前面 `启动单节点 FE` 一节中的描述。在 fe.log 应该会看到 `transfer from XXXX to MASTER` 等字样。
- 5. 启动完成后,先连接到这个 FE,执行一些查询导入,检查是否能够正常访问。如果不正常,有可能是操作有误,建议仔细阅读以上步骤,用之前备份的元数据再试一次。如果还是不行,问题可能就比较严重了。
- 6. 如果成功,通过 `show frontends;` 命令,应该可以看到之前所添加的所有 FE,并且当前 FE 是 master。
- 7. 将 fe.conf 中的 `metadata_failure_recovery=true` 配置项删除,或者设置为 `false`,然后重启这个 FE(**重要**)。
-
-
- > 如果你是从一个 OBSERVER 节点的元数据进行恢复的,那么完成如上步骤后,通过 `show frontends;` 语句你会发现,当前这个 FE 的角色为 OBSERVER,但是 `IsMaster` 显示为 `true`。这是因为,这里看到的 “OBSERVER” 是记录在 Doris 的元数据中的,而是否是 master,是记录在 bdbje 的元数据中的。因为我们是从一个 OBSERVER 节点恢复的,所以这里出现了不一致。请按如下步骤修复这个问题(这个问题我们会在之后的某个版本修复):
-
- > 1. 先把除了这个 “OBSERVER” 以外的所有 FE 节点 DROP 掉。
- > 2. 通过 `ADD FOLLOWER` 命令,添加一个新的 FOLLOWER FE,假设在 hostA 上。
- > 3. 在 hostA 上启动一个全新的 FE,通过 `--helper` 的方式加入集群。
- > 4. 启动成功后,通过 `show frontends;` 语句,你应该能看到两个 FE,一个是之前的 OBSERVER,一个是新添加的 FOLLOWER,并且 OBSERVER 是 master。
- > 5. 确认这个新的 FOLLOWER 是可以正常工作之后,用这个新的 FOLLOWER 的元数据,重新执行一遍故障恢复操作。
- > 6. 以上这些步骤的目的,其实就是人为的制造出一个 FOLLOWER 节点的元数据,然后用这个元数据,重新开始故障恢复。这样就避免了从 OBSERVER 恢复元数据所遇到的不一致的问题。
-
- > `metadata_failure_recovery=true` 的含义是,清空 "bdbje" 的元数据。这样 bdbje 就不会再联系之前的其他 FE 了,而作为一个独立的 FE 启动。这个参数只有在恢复启动时才需要设置为 true。恢复完成后,一定要设置为 false,否则一旦重启,bdbje 的元数据又会被清空,导致其他 FE 无法正常工作。
-
-4. 第3步执行成功后,我们再通过 `ALTER SYSTEM DROP FOLLOWER/OBSERVER` 命令,将之前的其他的 FE 从元数据删除后,按加入新 FE 的方式,重新把这些 FE 添加一遍。
-
-5. 如果以上操作正常,则恢复完毕。
-
-### FE 类型变更
-
-如果你需要将当前已有的 FOLLOWER/OBSERVER 类型的 FE,变更为 OBSERVER/FOLLOWER 类型,请先按照前面所述的方式删除 FE,再添加对应类型的 FE 即可
-
-### FE 迁移
-
-如果你需要将一个 FE 从当前节点迁移到另一个节点,分以下几种情况。
-
-1. 非 MASTER 节点的 FOLLOWER,或者 OBSERVER 迁移
-
- 直接添加新的 FOLLOWER/OBSERVER 成功后,删除旧的 FOLLOWER/OBSERVER 即可。
-
-2. 单节点 MASTER 迁移
-
- 当只有一个 FE 时,参考 `故障恢复` 一节。将 FE 的 palo-meta 目录拷贝到新节点上,按照 `故障恢复` 一节中,步骤3的方式启动新的 MASTER
-
-3. 一组 FOLLOWER 从一组节点迁移到另一组新的节点
-
- 在新的节点上部署 FE,通过添加 FOLLOWER 的方式先加入新节点。再逐台 DROP 掉旧节点即可。在逐台 DROP 的过程中,MASTER 会自动选择在新的 FOLLOWER 节点上。
-
-### 更换 FE 端口
-
-FE 目前有以下几个端口
-
-* edit_log_port:bdbje 的通信端口
-* http_port:http 端口,也用于推送 image
-* rpc_port:FE 的 thrift server port
-* query_port:Mysql 连接端口
-
-1. edit_log_port
-
- 如果需要更换这个端口,则需要参照 `故障恢复` 一节中的操作,进行恢复。因为该端口已经被持久化到 bdbje 自己的元数据中(同时也记录在 Doris 自己的元数据中),需要通过设置 `metadata_failure_recovery=true` 来清空 bdbje 的元数据。
-
-2. http_port
-
- 所有 FE 的 http_port 必须保持一致。所以如果要修改这个端口,则所有 FE 都需要修改并重启。修改这个端口,在多 FOLLOWER 部署的情况下会比较复杂(涉及到鸡生蛋蛋生鸡的问题...),所以不建议有这种操作。如果必须,直接按照 `故障恢复` 一节中的操作吧。
-
-3. rpc_port
-
- 修改配置后,直接重启 FE 即可。Master FE 会通过心跳将新的端口告知 BE。只有 Master FE 的这个端口会被使用。但仍然建议所有 FE 的端口保持一致。
-
-4. query_port
-
- 修改配置后,直接重启 FE 即可。这个只影响到 mysql 的连接目标。
-
-
-### 从 FE 内存中恢复元数据
-
-在某些极端情况下,磁盘上 image 文件可能会损坏,但是内存中的元数据是完好的,此时我们可以先从内存中 dump 出元数据,再替换掉磁盘上的 image 文件,来恢复元数据,整个**不停查询服务**的操作步骤如下:
-1. 集群停止所有 Load,Create,Alter 操作
-2. 执行以下命令,从 Master FE 内存中 dump 出元数据:(下面称为 image_mem)
-```
-curl -u $root_user:$password http://$master_hostname:8030/dump
-```
-3. 执行以下命令,验证生成的 image_mem 文件的完整性和正确性:
-```
-sh start_fe.sh --image path_to_image_mem
-```
-> 注意:`path_to_image_mem` 是 image_mem 文件的路径。
->
-> 如果文件有效会输出 `Load image success. Image file /absolute/path/to/image.xxxxxx is valid`。
->
-> 如果文件无效会输出 `Load image failed. Image file /absolute/path/to/image.xxxxxx is invalid`。
-4. 依次用 image_mem 文件替换掉 OBSERVER/FOLLOWER FE 节点上`meta_dir/image`目录下的 image 文件,重启 FOLLOWER FE 节点,
-确认元数据和查询服务都正常
-5. 用 image_mem 文件替换掉 Master FE 节点上`meta_dir/image`目录下的 image 文件,重启 Master FE 节点,
-确认 FE Master 切换正常, Master FE 节点可以通过 checkpoint 正常生成新的 image 文件
-6. 集群恢复所有 Load,Create,Alter 操作
-
-**注意:如果 Image 文件很大,整个操作过程耗时可能会很长,所以在此期间,要确保 Master FE 不会通过 checkpoint 生成新的 image 文件。
-当观察到 Master FE 节点上 `meta_dir/image`目录下的 `image.ckpt` 文件快和 `image.xxx` 文件一样大时,可以直接删除掉`image.ckpt` 文件。**
-
-### 查看 BDBJE 中的数据
-
-FE 的元数据日志以 Key-Value 的方式存储在 BDBJE 中。某些异常情况下,可能因为元数据错误而无法启动 FE。在这种情况下,Doris 提供一种方式可以帮助用户查询 BDBJE 中存储的数据,以方便进行问题排查。
-
-首先需在 fe.conf 中增加配置:`enable_bdbje_debug_mode=true`,之后通过 `sh start_fe.sh --daemon` 启动 FE。
-
-此时,FE 将进入 debug 模式,仅会启动 http server 和 MySQL server,并打开 BDBJE 实例,但不会进行任何元数据的加载及后续其他启动流程。
-
-这是,我们可以通过访问 FE 的 web 页面,或通过 MySQL 客户端连接到 Doris 后,通过 `show proc /bdbje;` 来查看 BDBJE 中存储的数据。
-
-```
-mysql> show proc "/bdbje";
-+----------+---------------+---------+
-| DbNames | JournalNumber | Comment |
-+----------+---------------+---------+
-| 110589 | 4273 | |
-| epochDB | 4 | |
-| metricDB | 430694 | |
-+----------+---------------+---------+
-```
-
-第一级目录会展示 BDBJE 中所有的 database 名称,以及每个 database 中的 entry 数量。
-
-```
-mysql> show proc "/bdbje/110589";
-+-----------+
-| JournalId |
-+-----------+
-| 1 |
-| 2 |
-
-...
-| 114858 |
-| 114859 |
-| 114860 |
-| 114861 |
-+-----------+
-4273 rows in set (0.06 sec)
-```
-
-进入第二级,则会罗列指定 database 下的所有 entry 的 key。
-
-```
-mysql> show proc "/bdbje/110589/114861";
-+-----------+--------------+---------------------------------------------+
-| JournalId | OpType | Data |
-+-----------+--------------+---------------------------------------------+
-| 114861 | OP_HEARTBEAT | org.apache.doris.persist.HbPackage@6583d5fb |
-+-----------+--------------+---------------------------------------------+
-1 row in set (0.05 sec)
-```
-
-第三级则可以展示指定 key 的 value 信息。
-
-## 最佳实践
-
-FE 的部署推荐,在 [安装与部署文档](../../installing/install-deploy.md) 中有介绍,这里再做一些补充。
-
-* **如果你并不十分了解 FE 元数据的运行逻辑,或者没有足够 FE 元数据的运维经验,我们强烈建议在实际使用中,只部署一个 FOLLOWER 类型的 FE 作为 MASTER,其余 FE 都是 OBSERVER,这样可以减少很多复杂的运维问题!** 不用过于担心 MASTER 单点故障导致无法进行元数据写操作。首先,如果你配置合理,FE 作为 java 进程很难挂掉。其次,如果 MASTER 磁盘损坏(概率非常低),我们也可以用 OBSERVER 上的元数据,通过 `故障恢复` 的方式手动恢复。
-
-* FE 进程的 JVM 一定要保证足够的内存。我们**强烈建议** FE 的 JVM 内存至少在 10GB 以上,推荐 32GB 至 64GB。并且部署监控来监控 JVM 的内存使用情况。因为如果FE出现OOM,可能导致元数据写入失败,造成一些**无法恢复**的故障!
-
-* FE 所在节点要有足够的磁盘空间,以防止元数据过大导致磁盘空间不足。同时 FE 日志也会占用十几G 的磁盘空间。
-
-## 其他常见问题
-
-1. fe.log 中一直滚动 `meta out of date. current time: xxx, synchronized time: xxx, has log: xxx, fe type: xxx`
-
- 这个通常是因为 FE 无法选举出 Master。比如配置了 3 个 FOLLOWER,但是只启动了一个 FOLLOWER,则这个 FOLLOWER 会出现这个问题。通常,只要把剩余的 FOLLOWER 启动起来就可以了。如果启动起来后,仍然没有解决问题,那么可能需要按照 `故障恢复` 一节中的方式,手动进行恢复。
-
-2. `Clock delta: xxxx ms. between Feeder: xxxx and this Replica exceeds max permissible delta: xxxx ms.`
-
- bdbje 要求各个节点之间的时钟误差不能超过一定阈值。如果超过,节点会异常退出。我们默认设置的阈值为 5000 ms,由 FE 的参数 `max_bdbje_clock_delta_ms` 控制,可以酌情修改。但我们建议使用 ntp 等时钟同步方式保证 Doris 集群各主机的时钟同步。
-
-
-3. `image/` 目录下的镜像文件很久没有更新
-
- Master FE 会默认每 50000 条元数据 journal,生成一个镜像文件。在一个频繁使用的集群中,通常每隔半天到几天的时间,就会生成一个新的 image 文件。如果你发现 image 文件已经很久没有更新了(比如超过一个星期),则可以顺序的按照如下方法,查看具体原因:
-
- 1. 在 Master FE 的 fe.log 中搜索 `memory is not enough to do checkpoint. Committed memroy xxxx Bytes, used memory xxxx Bytes.` 字样。如果找到,则说明当前 FE 的 JVM 内存不足以用于生成镜像(通常我们需要预留一半的 FE 内存用于 image 的生成)。那么需要增加 JVM 的内存并重启 FE 后,再观察。每次 Master FE 重启后,都会直接生成一个新的 image。也可用这种重启方式,主动地生成新的 image。注意,如果是多 FOLLOWER 部署,那么当你重启当前 Master FE 后,另一个 FOLLOWER FE 会变成 MASTER,则后续的 image 生成会由新的 Master 负责。因此,你可能需要修改所有 FOLLOWER FE 的 JVM 内存配置。
-
- 2. 在 Master FE 的 fe.log 中搜索 `begin to generate new image: image.xxxx`。如果找到,则说明开始生成 image 了。检查这个线程的后续日志,如果出现 `checkpoint finished save image.xxxx`,则说明 image 写入成功。如果出现 `Exception when generate new image file`,则生成失败,需要查看具体的错误信息。
-
-
-4. `bdb/` 目录的大小非常大,达到几个G或更多
-
- 如果在排除无法生成新的 image 的错误后,bdb 目录在一段时间内依然很大。则可能是因为 Master FE 推送 image 不成功。可以在 Master FE 的 fe.log 中搜索 `push image.xxxx to other nodes. totally xx nodes, push successed yy nodes`。如果 yy 比 xx 小,则说明有的 FE 没有被推送成功。可以在 fe.log 中查看到具体的错误 `Exception when pushing image file. url = xxx`。
-
- 同时,你也可以在 FE 的配置文件中添加配置:`edit_log_roll_num=xxxx`。该参数设定了每多少条元数据 journal,做一次 image。默认是 50000。可以适当改小这个数字,使得 image 更加频繁,从而加速删除旧的 journal。
-
-5. FOLLOWER FE 接连挂掉
-
- 因为 Doris 的元数据采用多数写策略,即一条元数据 journal 必须至少写入多数个 FOLLOWER FE 后(比如 3 个 FOLLOWER,必须写成功 2 个),才算成功。而如果写入失败,FE 进程会主动退出。那么假设有 A、B、C 三个 FOLLOWER,C 先挂掉,然后 B 再挂掉,那么 A 也会跟着挂掉。所以如 `最佳实践` 一节中所述,如果你没有丰富的元数据运维经验,不建议部署多 FOLLOWER。
-
-6. fe.log 中出现 `get exception when try to close previously opened bdb database. ignore it`
-
- 如果后面有 `ignore it` 字样,通常无需处理。如果你有兴趣,可以在 `BDBEnvironment.java` 搜索这个错误,查看相关注释说明。
-
-7. 从 `show frontends;` 看,某个 FE 的 `Join` 列为 `true`,但是实际该 FE 不正常
-
- 通过 `show frontends;` 查看到的 `Join` 信息。该列如果为 `true`,仅表示这个 FE **曾经加入过** 集群。并不能表示当前仍然正常的存在于集群中。如果为 `false`,则表示这个 FE **从未加入过** 集群。
-
-8. 关于 FE 的配置 `master_sync_policy`, `replica_sync_policy` 和 `txn_rollback_limit`
-
- `master_sync_policy` 用于指定当 Leader FE 写元数据日志时,是否调用 fsync(), `replica_sync_policy` 用于指定当 FE HA 部署时,其他 Follower FE 在同步元数据时,是否调用 fsync()。在早期的 Doris 版本中,这两个参数默认是 `WRITE_NO_SYNC`,即都不调用 fsync()。在最新版本的 Doris 中,默认已修改为 `SYNC`,即都调用 fsync()。调用 fsync() 会显著降低元数据写盘的效率。在某些环境下,IOPS 可能降至几百,延迟增加到2-3ms(但对于 Doris 元数据操作依然够用)。因此我们建议以下配置:
-
- 1. 对于单 Follower FE 部署,`master_sync_policy` 设置为 `SYNC`,防止 FE 系统宕机导致元数据丢失。
- 2. 对于多 Follower FE 部署,可以将 `master_sync_policy` 和 `replica_sync_policy` 设为 `WRITE_NO_SYNC`,因为我们认为多个系统同时宕机的概率非常低。
-
- 如果在单 Follower FE 部署中,`master_sync_policy` 设置为 `WRITE_NO_SYNC`,则可能出现 FE 系统宕机导致元数据丢失。这时如果有其他 Observer FE 尝试重启时,可能会报错:
-
- ```
- Node xxx must rollback xx total commits(numPassedDurableCommits of which were durable) to the earliest point indicated by transaction xxxx in order to rejoin the replication group, but the transaction rollback limit of xxx prohibits this.
- ```
-
- 意思有部分已经持久化的事务需要回滚,但条数超过上限。这里我们的默认上限是 100,可以通过设置 `txn_rollback_limit` 改变。该操作仅用于尝试正常启动 FE,但已丢失的元数据无法恢复。
diff --git a/docs/zh-CN/administrator-guide/operation/multi-tenant.md b/docs/zh-CN/administrator-guide/operation/multi-tenant.md
deleted file mode 100644
index dc818cd3ff..0000000000
--- a/docs/zh-CN/administrator-guide/operation/multi-tenant.md
+++ /dev/null
@@ -1,239 +0,0 @@
----
-{
- "title": "多租户(弃用)",
- "language": "zh-CN"
-}
----
-
-
-
-# 多租户(已弃用)
-
-该功能已弃用。新方案请参阅:[多租户和资源划分](../multi-tenant.md)。
-
-## 背景
-Doris 作为一款 PB 级别的在线报表与多维分析数据库,对外通过开放云提供云端的数据库服务,并且对于每个云上的客户都单独部署了一套物理集群。对内,一套物理集群部署了多个业务,对于隔离性要求比较高的业务单独搭建了集群。针对以上存在几点问题:
-
-- 部署多套物理集群维护代价大(升级、功能上线、bug修复)。
-- 一个用户的查询或者查询引起的bug经常会影响其他用户。
-- 实际生产环境单机只能部署一个BE进程。而多个BE可以更好的解决胖节点问题。并且对于join、聚合操作可以提供更高的并发度。
-
-综合以上三点,Doris需要新的多租户方案,既能做到较好的资源隔离和故障隔离,同时也能减少维护的代价,满足公有云和私有云的需求。
-
-## 设计原则
-
-- 使用简单
-- 开发代价小
-- 方便现有集群的迁移
-
-## 名词解释
-
-- FE: Frontend,即 Doris 中用于元数据管理即查询规划的模块。
-- BE: Backend,即 Doris 中用于存储和查询数据的模块。
-- Master: FE 的一种角色。一个Doris集群只有一个Master,其他的FE为Observer或者Follower。
-- instance:一个 BE 进程即是一个 instance。
-- host:单个物理机
-- cluster:即一个集群,由多个instance组成。
-- 租户:一个cluster属于一个租户。cluster和租户之间是一对一关系。
-- database:一个用户创建的数据库
-
-## 主要思路
-
-- 一个host上部署多个BE的instance,在进程级别做资源隔离。
-- 多个instance形成一个cluster,一个cluster分配给一个业务独立的的租户。
-- FE增加cluster这一级并负责cluster的管理。
-- CPU,IO,内存等资源隔离采用cgroup。
-
-## 设计方案
-
-为了能够达到隔离的目的,引入了**虚拟cluster**的概念。
-
-1. cluster表示一个虚拟的集群,由多个BE的instance组成。多个cluster共享FE。
-2. 一个host上可以启动多个instance。cluster创建时,选取任意指定数量的instance,组成一个cluster。
-3. 创建cluster的同时,会创建一个名为superuser的账户,隶属于该cluster。superuser可以对cluster进行管理、创建数据库、分配权限等。
-4. Doris启动后,会创建一个默认的cluster:default_cluster。如果用户不希望使用多cluster的功能,则会提供这个默认的cluster,并隐藏多cluster的其他操作细节。
-
-具体架构如下图:
-
-
-## SQL 接口
-
-- 登录
-
- 默认集群登录名: user_name@default_cluster 或者 user_name
-
- 自定义集群登录名:user_name@cluster_name
-
- `mysqlclient -h host -P port -u user_name@cluster_name -p password`
-
-- 添加、删除、下线(decommission)以及取消下线BE
-
- `ALTER SYSTEM ADD BACKEND "host:port"`
- `ALTER SYSTEM DROP BACKEND "host:port"`
- `ALTER SYSTEM DECOMMISSION BACKEND "host:port"`
- `CANCEL DECOMMISSION BACKEND "host:port"`
-
- 强烈建议使用 DECOMMISSION 而不是 DROP 来删除 BACKEND。DECOMMISSION 操作会首先将需要下线节点上的数据拷贝到集群内其他instance上。之后,才会真正下线。
-
-- 创建集群,并指定superuser账户的密码
-
- `CREATE CLUSTER cluster_name PROPERTIES ("instance_num" = "10") identified by "password"`
-
-- 进入一个集群
-
- `ENTER cluster_name`
-
-- 集群扩容、缩容
-
- `ALTER CLUSTER cluster_name PROPERTIES ("instance_num" = "10")`
-
- 当指定的实例个数多于cluster现有be的个数,则为扩容,如果少于则为缩容。
-
-- 链接、迁移db
-
- `LINK DATABASE src_cluster_name.db_name dest_cluster_name.db_name`
-
- 软链一个cluster的db到另外一个cluster的db ,对于需要临时访问其他cluster的db却不需要进行实际数据迁移的用户可以采用这种方式。
-
- `MIGRATE DATABASE src_cluster_name.db_name dest_cluster_name.db_name`
-
- 如果需要对db进行跨cluster的迁移,在链接之后,执行migrate对数据进行实际的迁移。
-
- 迁移不影响当前两个db的查询、导入等操作,这是一个异步的操作,可以通过`SHOW MIGRATIONS`查看迁移的进度。
-
-- 删除集群
-
- `DROP CLUSTER cluster_name`
-
- 删除集群,要求先手动删除的集群内所有database。
-
-- 其他
-
- `SHOW CLUSTERS`
-
- 展示系统内已经创建的集群。只有root用户有该权限。
-
- `SHOW BACKENDS`
-
- 查看集群内的BE instance。
-
- `SHOW MIGRATIONS`
-
- 展示当前正在进行的db迁移任务。执行完db的迁移后可以通过此命令查看迁移的进度。
-
-## 详细设计
-
-1. 命名空间隔离
-
- 为了引入多租户,需要对系统内的cluster之间的命名空间进行隔离。
-
- Doris现有的元数据采用的是image + journal 的方式(元数据的设计见相关文档)。Doris会把涉及元数据的操作的记录为一个 journal (操作日志),然后定时的按照**图1**的方式写成image,加载的时候按照写入的顺序读即可。但是这样就带来一个问题已经写入的格式不容易修改,比如记录数据分布的元数据格式为:database+table+tablet+replica 嵌套,如果按照以往的方式要做cluster之间的命名空间隔离,则需要在database上增加一层cluster,内部元数据的层级变为:cluster+database+table+tablet+replica,如**图2**所示。但加一层带来的问题有:
-
- - 增加一层带来的元数据改动,不兼容,需要按照图2的方式cluster+db+table+tablet+replica层级写,这样就改变了以往的元数据组织方式,老版本的升级会比较麻烦,比较理想的方式是按照图3在现有元数据的格式下顺序写入cluster的元数据。
-
- - 代码里所有用到db、user等,都需要加一层cluster,一工作量大改动的地方多,层级深,多数代码都获取db,现有功能几乎都要改一遍,并且需要在db的锁的基础上嵌套一层cluster的锁。
-
- 
-
- 综上这里采用了一种通过给db、user名加前缀的方式去隔离内部因为cluster之间db、user名字冲突的问题。
-
- 如下,所有的sql输入涉及db名、user名的,都需要根据自己所在的cluster来拼写db、user的全名。
-
- 
-
- 采用这种方式以上两个问题不再有。元数据的组织方式也比较简单。即采用**图3**每个cluster记录下属于自己cluster的db、user,以及节点即可。
-
-2. BE 节点管理
-
- 每个cluster都有属于自己的一组instance,可以通过`SHOW BACKENDS`查看,为了区分出instance属于哪个cluster以及使用情况,BE引入了多个状态:
-
- - free:当一个BE节点被加入系统内,此时be不属于任何cluster的时候处于空闲状态
- - using:当创建集群、或者扩容被选取到一个cluster内则处于使用中。
- - cluster decommission:如果执行缩容量,则正在执行缩容的be处于此状态。结束后,be状态变为free。
- - system decommission:be正在下线中。下线完成后,该be将会被永久删除。
-
- 只有root用户可以通过`SHOW PROC "/backends"`中cluster这一项查看集群内所有be的是否被使用。为空则为空闲,否则为使用中。`SHOW BACKENDS`只能看到所在cluster的节点。以下是be节点状态变化的示意图。
-
- 
-
-3. 创建集群
-
- 只有root用户可以创建一个cluster,并指定任意数量的BE instance。
-
- 支持在相同机器上选取多个instance。选择instance的大致原则是:尽可能选取不同机器上的be并且使所有机器上使用的be数尽可能均匀。
-
- 对于使用来讲,每一个user、db都属于一个cluster(root除外)。为了创建user、db,首先需要进入一个cluster。在创建cluster的时候系统会默认生成这个cluster的管理员,即superuser账户。superuser具有在所属cluster内创建db、user,以及查看be节点数的权限。所有的非root用户登录必须指定一个cluster,即`user_name@cluster_name`。
-
- 只有root用户可以通过`SHOW CLUSTER`查看系统内所有的cluster,并且可以通过@不同的集群名来进入不同的cluster。对于除了root之外的用户cluster都是不可见的。
-
- 为了兼容老版本Doris内置了一个名字叫做default_cluster的集群,这个名字在创建集群的时候不能使用。
-
- 
-
-4. 集群扩容
-
- 集群扩容的流程同创建集群。会优先选取不在集群之外的host上的BE instance。选取的原则同创建集群。
-
-5. 集群缩容、CLUSTER DECOMMISSION
-
- 用户可以通过设置 cluster 的 instance num 来进行集群缩容。
-
- 集群的缩容会优先在BE instance 数量最多的 host 上选取 instance 进行下线。
-
- 用户也可以直接使用 `ALTER CLUSTER DECOMMISSION BACKEND` 来指定BE,进行集群缩容。
-
-
-
-6. 建表
-
- 为了保证高可用,每个分片的副本必需在不同的机器上。所以建表时,选择副本所在be的策略为在每个host上随机选取一个be。然后从这些be中随机选取所需副本数量的be。总体上做到每个机器上分片分布均匀。
-
- 因此,假如需要创建一个3副本的分片,即使cluster包含3个或以上的instance,但是只有2个或以下的host,依然不能创建该分片。
-
-7. 负载均衡
-
- 负载均衡的粒度为cluster级别,cluster之间不做负载均衡。但是在计算负载是在host一级进行的,而一个host上可能存在多个不同cluster的BE instance。 cluster内,会通过每个host上所有分片数目、存储使用率计算负载,然后把负载高的机器上的分片往负载低的机器上拷贝(详见负载均衡相关文档)。
-
-8. LINK DATABASE(软链)
-
- 多个集群之间可以通过软链的方式访问彼此的数据。链接的级别为不同cluster的db。
-
- 通过在一个cluster内,添加需要访问的其他cluster的db的信息,来访问其他cluster中的db。
-
- 当查询链接的db时,所使用的计算以及存储资源为源db所在cluster的资源。
-
- 被软链的db不能在源cluster中删除。只有链接的db被删除后,才可以删除源db。而删除链接db,不会删除源db。
-
-9. MIGRATE DATABASE
-
- db可以在cluster之间进行物理迁移。
-
- 要迁移db,必须先链接db。执行迁移后数据会迁移到链接的db所在的cluster,并且执行迁移后源db被删除,链接断开。
-
- 数据的迁移,复用了负载均衡以及副本恢复中,复制数据的流程(详见负载均衡相关文档)。具体实现上,在执行`MIRAGTE`命令后,Doris会在元数据中,将源db的所有副本所属的cluster,修改为目的cluster。
-
- Doris会定期检查集群内机器之间是否均衡、副本是否齐全、是否有多余的副本。db的迁移即借用了这个流程,在检查副本齐全的时候同时检查副本所在的be是否属于该cluster,如果不属于,则记入要恢复的副本。并且副本多余要删除的时候会优先删除cluster外的副本,然后再按照现有的策略选择:宕机的be的副本->clone的副本->版本落后的副本->负载高的host上的副本,直到副本没有多余。
-
-
-
-10. BE的进程隔离
-
- 为了实现be进程之间实际cpu、io以及内存的隔离,需要依赖于be的部署。部署的时候需要在外围配置cgroup,把要部署的be的进程都写入cgroup。如果要实现io的物理隔离各be配置的数据存放路径需要在不同磁盘上,这里不做过多的介绍。
-
diff --git a/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md b/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md
deleted file mode 100644
index eb23662432..0000000000
--- a/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md
+++ /dev/null
@@ -1,775 +0,0 @@
----
-{
- "title": "数据副本管理",
- "language": "zh-CN"
-}
----
-
-
-
-# 数据副本管理
-
-从 0.9.0 版本开始,Doris 引入了优化后的副本管理策略,同时支持了更为丰富的副本状态查看工具。本文档主要介绍 Doris 数据副本均衡、修复方面的调度策略,以及副本管理的运维方法。帮助用户更方便的掌握和管理集群中的副本状态。
-
-> Colocation 属性的表的副本修复和均衡可以参阅 `docs/documentation/cn/administrator-guide/colocation-join.md`
-
-## 名词解释
-
-1. Tablet:Doris 表的逻辑分片,一个表有多个分片。
-2. Replica:分片的副本,默认一个分片有3个副本。
-3. Healthy Replica:健康副本,副本所在 Backend 存活,且副本的版本完整。
-4. TabletChecker(TC):是一个常驻的后台线程,用于定期扫描所有的 Tablet,检查这些 Tablet 的状态,并根据检查结果,决定是否将 tablet 发送给 TabletScheduler。
-5. TabletScheduler(TS):是一个常驻的后台线程,用于处理由 TabletChecker 发来的需要修复的 Tablet。同时也会进行集群副本均衡的工作。
-6. TabletSchedCtx(TSC):是一个 tablet 的封装。当 TC 选择一个 tablet 后,会将其封装为一个 TSC,发送给 TS。
-7. Storage Medium:存储介质。Doris 支持对分区粒度指定不同的存储介质,包括 SSD 和 HDD。副本调度策略也是针对不同的存储介质分别调度的。
-
-```
-
- +--------+ +-----------+
- | Meta | | Backends |
- +---^----+ +------^----+
- | | | 3. Send clone tasks
- 1. Check tablets | | |
- +--------v------+ +-----------------+
- | TabletChecker +--------> TabletScheduler |
- +---------------+ +-----------------+
- 2. Waiting to be scheduled
-
-
-```
-
-上图是一个简化的工作流程。
-
-
-## 副本状态
-
-一个 Tablet 的多个副本,可能因为某些情况导致状态不一致。Doris 会尝试自动修复这些状态不一致的副本,让集群尽快从错误状态中恢复。
-
-**一个 Replica 的健康状态有以下几种:**
-
-1. BAD
-
- 即副本损坏。包括但不限于磁盘故障、BUG等引起的副本不可恢复的损毁状态。
-
-2. VERSION\_MISSING
-
- 版本缺失。Doris 中每一批次导入都对应一个数据版本。而一个副本的数据由多个连续的版本组成。而由于导入错误、延迟等原因,可能导致某些副本的数据版本不完整。
-
-3. HEALTHY
-
- 健康副本。即数据正常的副本,并且副本所在的 BE 节点状态正常(心跳正常且不处于下线过程中)
-
-
-**一个 Tablet 的健康状态由其所有副本的状态决定,有以下几种:**
-
-1. REPLICA\_MISSING
-
- 副本缺失。即存活副本数小于期望副本数。
-
-2. VERSION\_INCOMPLETE
-
- 存活副本数大于等于期望副本数,但其中健康副本数小于期望副本数。
-
-3. REPLICA\_RELOCATING
-
- 拥有等于 replication num 的版本完整的存活副本数,但是部分副本所在的 BE 节点处于 unavailable 状态(比如 decommission 中)
-
-4. REPLICA\_MISSING\_IN\_CLUSTER
-
- 当使用多 cluster 方式时,健康副本数大于等于期望副本数,但在对应 cluster 内的副本数小于期望副本数。
-
-5. REDUNDANT
-
- 副本冗余。健康副本都在对应 cluster 内,但数量大于期望副本数。或者有多余的 unavailable 副本。
-
-6. FORCE\_REDUNDANT
-
- 这是一个特殊状态。只会出现在当期望副本数大于等于可用节点数时,并且 Tablet 处于副本缺失状态时出现。这种情况下,需要先删除一个副本,以保证有可用节点用于创建新副本。
-
-7. COLOCATE\_MISMATCH
-
- 针对 Colocation 属性的表的分片状态。表示分片副本与 Colocation Group 的指定的分布不一致。
-
-8. COLOCATE\_REDUNDANT
-
- 针对 Colocation 属性的表的分片状态。表示 Colocation 表的分片副本冗余。
-
-9. HEALTHY
-
- 健康分片,即条件[1-8]都不满足。
-
-## 副本修复
-
-TabletChecker 作为常驻的后台进程,会定期检查所有分片的状态。对于非健康状态的分片,将会交给 TabletScheduler 进行调度和修复。修复的实际操作,都由 BE 上的 clone 任务完成。FE 只负责生成这些 clone 任务。
-
-> 注1:副本修复的主要思想是先通过创建或补齐使得分片的副本数达到期望值,然后再删除多余的副本。
->
-> 注2:一个 clone 任务就是完成从一个指定远端 BE 拷贝指定数据到指定目的端 BE 的过程。
-
-针对不同的状态,我们采用不同的修复方式:
-
-1. REPLICA\_MISSING/REPLICA\_RELOCATING
-
- 选择一个低负载的,可用的 BE 节点作为目的端。选择一个健康副本作为源端。clone 任务会从源端拷贝一个完整的副本到目的端。对于副本补齐,我们会直接选择一个可用的 BE 节点,而不考虑存储介质。
-
-2. VERSION\_INCOMPLETE
-
- 选择一个相对完整的副本作为目的端。选择一个健康副本作为源端。clone 任务会从源端尝试拷贝缺失的版本到目的端的副本。
-
-3. REPLICA\_MISSING\_IN\_CLUSTER
-
- 这种状态处理方式和 REPLICA\_MISSING 相同。
-
-4. REDUNDANT
-
- 通常经过副本修复后,分片会有冗余的副本。我们选择一个冗余副本将其删除。冗余副本的选择遵从以下优先级:
- 1. 副本所在 BE 已经下线
- 2. 副本已损坏
- 3. 副本所在 BE 失联或在下线中
- 4. 副本处于 CLONE 状态(该状态是 clone 任务执行过程中的一个中间状态)
- 5. 副本有版本缺失
- 6. 副本所在 cluster 不正确
- 7. 副本所在 BE 节点负载高
-
-5. FORCE\_REDUNDANT
-
- 不同于 REDUNDANT,因为此时虽然 Tablet 有副本缺失,但是因为已经没有额外的可用节点用于创建新的副本了。所以此时必须先删除一个副本,以腾出一个可用节点用于创建新的副本。
- 删除副本的顺序同 REDUNDANT。
-
-6. COLOCATE\_MISMATCH
-
- 从 Colocation Group 中指定的副本分布 BE 节点中选择一个作为目的节点进行副本补齐。
-
-7. COLOCATE\_REDUNDANT
-
- 删除一个非 Colocation Group 中指定的副本分布 BE 节点上的副本。
-
-Doris 在选择副本节点时,不会将同一个 Tablet 的副本部署在同一个 host 的不同 BE 上。保证了即使同一个 host 上的所有 BE 都挂掉,也不会造成全部副本丢失。
-
-### 调度优先级
-
-TabletScheduler 里等待被调度的分片会根据状态不同,赋予不同的优先级。优先级高的分片将会被优先调度。目前有以下几种优先级。
-
-1. VERY\_HIGH
-
- * REDUNDANT。对于有副本冗余的分片,我们优先处理。虽然逻辑上来讲,副本冗余的紧急程度最低,但是因为这种情况处理起来最快且可以快速释放资源(比如磁盘空间等),所以我们优先处理。
- * FORCE\_REDUNDANT。同上。
-
-2. HIGH
-
- * REPLICA\_MISSING 且多数副本缺失(比如3副本丢失了2个)
- * VERSION\_INCOMPLETE 且多数副本的版本缺失
- * COLOCATE\_MISMATCH 我们希望 Colocation 表相关的分片能够尽快修复完成。
- * COLOCATE\_REDUNDANT
-
-3. NORMAL
-
- * REPLICA\_MISSING 但多数存活(比如3副本丢失了1个)
- * VERSION\_INCOMPLETE 但多数副本的版本完整
- * REPLICA\_RELOCATING 且多数副本需要 relocate(比如3副本有2个)
-
-4. LOW
-
- * REPLICA\_MISSING\_IN\_CLUSTER
- * REPLICA\_RELOCATING 但多数副本 stable
-
-### 手动优先级
-
-系统会自动判断调度优先级。但是有些时候,用户希望某些表或分区的分片能够更快的被修复。因此我们提供一个命令,用户可以指定某个表或分区的分片被优先修复:
-
-`ADMIN REPAIR TABLE tbl [PARTITION (p1, p2, ...)];`
-
-这个命令,告诉 TC,在扫描 Tablet 时,对需要优先修复的表或分区中的有问题的 Tablet,给予 VERY\_HIGH 的优先级。
-
-> 注:这个命令只是一个 hint,并不能保证一定能修复成功,并且优先级也会随 TS 的调度而发生变化。并且当 Master FE 切换或重启后,这些信息都会丢失。
-
-可以通过以下命令取消优先级:
-
-`ADMIN CANCEL REPAIR TABLE tbl [PARTITION (p1, p2, ...)];`
-
-### 优先级调度
-
-优先级保证了损坏严重的分片能够优先被修复,提高系统可用性。但是如果高优先级的修复任务一直失败,则会导致低优先级的任务一直得不到调度。因此,我们会根据任务的运行状态,动态的调整任务的优先级,保证所有任务都有机会被调度到。
-
-* 连续5次调度失败(如无法获取资源,无法找到合适的源端或目的端等),则优先级会被下调。
-* 持续 30 分钟未被调度,则上调优先级。
-* 同一 tablet 任务的优先级至少间隔 5 分钟才会被调整一次。
-
-同时为了保证初始优先级的权重,我们规定,初始优先级为 VERY\_HIGH 的,最低被下调到 NORMAL。而初始优先级为 LOW 的,最多被上调为 HIGH。这里的优先级调整,也会调整用户手动设置的优先级。
-
-## 副本均衡
-
-Doris 会自动进行集群内的副本均衡。目前支持两种均衡策略,负载/分区。负载均衡适合需要兼顾节点磁盘使用率和节点副本数量的场景;而分区均衡会使每个分区的副本都均匀分布在各个节点,避免热点,适合对分区读写要求比较高的场景。但是,分区均衡不考虑磁盘使用率,使用分区均衡时需要注意磁盘的使用情况。 策略只能在fe启动前配置[tablet_rebalancer_type](../config/fe_config.md#配置项列表 ) ,不支持运行时切换。
-
-### 负载均衡
-
-负载均衡的主要思想是,对某些分片,先在低负载的节点上创建一个副本,然后再删除这些分片在高负载节点上的副本。同时,因为不同存储介质的存在,在同一个集群内的不同 BE 节点上,可能存在一种或两种存储介质。我们要求存储介质为 A 的分片在均衡后,尽量依然存储在存储介质 A 中。所以我们根据存储介质,对集群的 BE 节点进行划分。然后针对不同的存储介质的 BE 节点集合,进行负载均衡调度。
-
-同样,副本均衡会保证不会将同一个 Tablet 的副本部署在同一个 host 的 BE 上。
-
-#### BE 节点负载
-
-我们用 ClusterLoadStatistics(CLS)表示一个 cluster 中各个 Backend 的负载均衡情况。TabletScheduler 根据这个统计值,来触发集群均衡。我们当前通过 **磁盘使用率** 和 **副本数量** 两个指标,为每个BE计算一个 loadScore,作为 BE 的负载分数。分数越高,表示该 BE 的负载越重。
-
-磁盘使用率和副本数量各有一个权重系数,分别为 **capacityCoefficient** 和 **replicaNumCoefficient**,其 **和衡为1**。其中 capacityCoefficient 会根据实际磁盘使用率动态调整。当一个 BE 的总体磁盘使用率在 50% 以下,则 capacityCoefficient 值为 0.5,如果磁盘使用率在 75%(可通过 FE 配置项 `capacity_used_percent_high_water` 配置)以上,则值为 1。如果使用率介于 50% ~ 75% 之间,则该权重系数平滑增加,公式为:
-
-`capacityCoefficient= 2 * 磁盘使用率 - 0.5`
-
-该权重系数保证当磁盘使用率过高时,该 Backend 的负载分数会更高,以保证尽快降低这个 BE 的负载。
-
-TabletScheduler 会每隔 20s 更新一次 CLS。
-
-### 分区均衡
-
-分区均衡的主要思想是,将每个分区的在各个 Backend 上的 replica 数量差(即 partition skew),减少到最小。因此只考虑副本个数,不考虑磁盘使用率。
-为了尽量少的迁移次数,分区均衡使用二维贪心的策略,优先均衡partition skew最大的分区,均衡分区时会尽量选择,可以使整个 cluster 的在各个 Backend 上的 replica 数量差(即 cluster skew/total skew)减少的方向。
-
-#### skew 统计
-
-skew 统计信息由`ClusterBalanceInfo`表示,其中,`partitionInfoBySkew`以 partition skew 为key排序,便于找到max partition skew;`beByTotalReplicaCount`则是以 Backend 上的所有 replica 个数为key排序。`ClusterBalanceInfo`同样保持在CLS中, 同样 20s 更新一次。
-
-max partition skew 的分区可能有多个,采用随机的方式选择一个分区计算。
-
-### 均衡策略
-
-TabletScheduler 在每轮调度时,都会通过 LoadBalancer 来选择一定数目的健康分片作为 balance 的候选分片。在下一次调度时,会尝试根据这些候选分片,进行均衡调度。
-
-## 资源控制
-
-无论是副本修复还是均衡,都是通过副本在各个 BE 之间拷贝完成的。如果同一台 BE 同一时间执行过多的任务,则会带来不小的 IO 压力。因此,Doris 在调度时控制了每个节点上能够执行的任务数目。最小的资源控制单位是磁盘(即在 be.conf 中指定的一个数据路径)。我们默认为每块磁盘配置两个 slot 用于副本修复。一个 clone 任务会占用源端和目的端各一个 slot。如果 slot 数目为零,则不会再对这块磁盘分配任务。该 slot 个数可以通过 FE 的 `schedule_slot_num_per_path` 参数配置。
-
-另外,我们默认为每块磁盘提供 2 个单独的 slot 用于均衡任务。目的是防止高负载的节点因为 slot 被修复任务占用,而无法通过均衡释放空间。
-
-## 副本状态查看
-
-副本状态查看主要是查看副本的状态,以及副本修复和均衡任务的运行状态。这些状态大部分都**仅存在于** Master FE 节点中。因此,以下命令需直连到 Master FE 执行。
-
-### 副本状态
-
-1. 全局状态检查
-
- 通过 `SHOW PROC '/statistic';` 命令可以查看整个集群的副本状态。
-
- ```
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- | DbId | DbName | TableNum | PartitionNum | IndexNum | TabletNum | ReplicaNum | UnhealthyTabletNum | InconsistentTabletNum |
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- | 35153636 | default_cluster:DF_Newrisk | 3 | 3 | 3 | 96 | 288 | 0 | 0 |
- | 48297972 | default_cluster:PaperData | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
- | 5909381 | default_cluster:UM_TEST | 7 | 7 | 10 | 320 | 960 | 1 | 0 |
- | Total | 240 | 10 | 10 | 13 | 416 | 1248 | 1 | 0 |
- +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+
- ```
-
- 其中 `UnhealthyTabletNum` 列显示了对应的 Database 中,有多少 Tablet 处于非健康状态。`InconsistentTabletNum` 列显示了对应的 Database 中,有多少 Tablet 处于副本不一致的状态。最后一行 `Total` 行对整个集群进行了统计。正常情况下 `UnhealthyTabletNum` 和 `InconsistentTabletNum` 应为0。如果不为零,可以进一步查看具体有哪些 Tablet。如上图中,UM_TEST 数据库有 1 个 Tablet 状态不健康,则可以使用以下命令查看具体是哪一个 Tablet。
-
- `SHOW PROC '/statistic/5909381';`
-
- 其中 `5909381` 为对应的 DbId。
-
- ```
- +------------------+---------------------+
- | UnhealthyTablets | InconsistentTablets |
- +------------------+---------------------+
- | [40467980] | [] |
- +------------------+---------------------+
- ```
-
- 上图会显示具体的不健康的 Tablet ID(40467980)。后面我们会介绍如何查看一个具体的 Tablet 的各个副本的状态。
-
-2. 表(分区)级别状态检查
-
- 用户可以通过以下命令查看指定表或分区的副本状态,并可以通过 WHERE 语句对状态进行过滤。如查看表 tbl1 中,分区 p1 和 p2 上状态为 OK 的副本:
-
- `ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) WHERE STATUS = "OK";`
-
- ```
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- | TabletId | ReplicaId | BackendId | Version | LastFailedVersion | LastSuccessVersion | CommittedVersion | SchemaHash | VersionNum | IsBad | State | Status |
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- | 29502429 | 29502432 | 10006 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502429 | 36885996 | 10002 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502429 | 48100551 | 10007 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 29502434 | 10001 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 44900737 | 10004 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- | 29502433 | 48369135 | 10006 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK |
- +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+
- ```
-
- 这里会展示所有副本的状态。其中 `IsBad` 列为 `true` 则表示副本已经损坏。而 `Status` 列则会显示另外的其他状态。具体的状态说明,可以通过 `HELP ADMIN SHOW REPLICA STATUS;` 查看帮助。
-
- `ADMIN SHOW REPLICA STATUS` 命令主要用于查看副本的健康状态。用户还可以通过以下命令查看指定表中副本的一些额外信息:
-
- `SHOW TABLETS FROM tbl1;`
-
- ```
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- | TabletId | ReplicaId | BackendId | SchemaHash | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | DataSize | RowCount | State | LstConsistencyCheckTime | CheckVersion | CheckVersionHash | VersionCount | PathHash | MetaUrl | CompactionStatus |
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- | 29502429 | 29502432 | 10006 | 1421156361 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -5822326203532286804 | url | url |
- | 29502429 | 36885996 | 10002 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -1441285706148429853 | url | url |
- | 29502429 | 48100551 | 10007 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -4784691547051455525 | url | url |
- +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
- ```
-
- 上图展示了包括副本大小、行数、版本数量、所在数据路径等一些额外的信息。
-
- > 注:这里显示的 `State` 列的内容不代表副本的健康状态,而是副本处于某种任务下的状态,比如 CLONE、SCHEMA\_CHANGE、ROLLUP 等。
-
- 此外,用户也可以通过以下命令,查看指定表或分区的副本分布情况,来检查副本分布是否均匀。
-
- `ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;`
-
- ```
- +-----------+------------+-------+---------+
- | BackendId | ReplicaNum | Graph | Percent |
- +-----------+------------+-------+---------+
- | 10000 | 7 | | 7.29 % |
- | 10001 | 9 | | 9.38 % |
- | 10002 | 7 | | 7.29 % |
- | 10003 | 7 | | 7.29 % |
- | 10004 | 9 | | 9.38 % |
- | 10005 | 11 | > | 11.46 % |
- | 10006 | 18 | > | 18.75 % |
- | 10007 | 15 | > | 15.62 % |
- | 10008 | 13 | > | 13.54 % |
- +-----------+------------+-------+---------+
- ```
-
- 这里分别展示了表 tbl1 的副本在各个 BE 节点上的个数、百分比,以及一个简单的图形化显示。
-
-4. Tablet 级别状态检查
-
- 当我们要定位到某个具体的 Tablet 时,可以使用如下命令来查看一个具体的 Tablet 的状态。如查看 ID 为 29502553 的 tablet:
-
- `SHOW TABLET 29502553;`
-
- ```
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- | DbName | TableName | PartitionName | IndexName | DbId | TableId | PartitionId | IndexId | IsSync | DetailCmd |
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- | default_cluster:test | test | test | test | 29502391 | 29502428 | 29502427 | 29502428 | true | SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553'; |
- +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+
- ```
-
- 上图显示了这个 tablet 所对应的数据库、表、分区、上卷表等信息。用户可以复制 `DetailCmd` 命令中的命令继续执行:
-
- `SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553';`
-
- ```
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+
- | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | SchemaHash | DataSize | RowCount | State | IsBad | VersionCount | PathHash | MetaUrl | CompactionStatus |
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+
- | 43734060 | 10004 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | -8566523878520798656 | url | url |
- | 29502555 | 10002 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1885826196444191611 | url | url |
- | 39279319 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1656508631294397870 | url | url |
- +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+
- ```
-
- 上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLET FROM tbl1;` 的内容相同。但这里可以清楚的知道,一个具体的 Tablet 的所有副本的状态。
-
-### 副本调度任务
-
-1. 查看等待被调度的任务
-
- `SHOW PROC '/cluster_balance/pending_tablets';`
-
- ```
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- | TabletId | Type | Status | State | OrigPrio | DynmPrio | SrcBe | SrcPath | DestBe | DestPath | Timeout | Create | LstSched | LstVisit | Finished | Rate | FailedSched | FailedRunning | LstAdjPrio | VisibleVer | VisibleVerHash | CmtVer | CmtVerHash | ErrMsg |
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- | 4203036 | REPAIR | REPLICA_MISSING | PENDING | HIGH | LOW | -1 | -1 | -1 | -1 | 0 | 2019-02-21 15:00:20 | 2019-02-24 11:18:41 | 2019-02-24 11:18:41 | N/A | N/A | 2 | 0 | 2019-02-21 15:00:43 | 1 | 0 | 2 | 0 | unable to find source replica |
- +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+
- ```
-
- 各列的具体含义如下:
-
- * TabletId:等待调度的 Tablet 的 ID。一个调度任务只针对一个 Tablet
- * Type:任务类型,可以是 REPAIR(修复) 或 BALANCE(均衡)
- * Status:该 Tablet 当前的状态,如 REPLICA\_MISSING(副本缺失)
- * State:该调度任务的状态,可能为 PENDING/RUNNING/FINISHED/CANCELLED/TIMEOUT/UNEXPECTED
- * OrigPrio:初始的优先级
- * DynmPrio:当前动态调整后的优先级
- * SrcBe:源端 BE 节点的 ID
- * SrcPath:源端 BE 节点的路径的 hash 值
- * DestBe:目的端 BE 节点的 ID
- * DestPath:目的端 BE 节点的路径的 hash 值
- * Timeout:当任务被调度成功后,这里会显示任务的超时时间,单位秒
- * Create:任务被创建的时间
- * LstSched:上一次任务被调度的时间
- * LstVisit:上一次任务被访问的时间。这里“被访问”指包括被调度,任务执行汇报等和这个任务相关的被处理的时间点
- * Finished:任务结束时间
- * Rate:clone 任务的数据拷贝速率
- * FailedSched:任务调度失败的次数
- * FailedRunning:任务执行失败的次数
- * LstAdjPrio:上一次优先级调整的时间
- * CmtVer/CmtVerHash/VisibleVer/VisibleVerHash:用于执行 clone 任务的 version 信息
- * ErrMsg:任务被调度和运行过程中,出现的错误信息
-
-2. 查看正在运行的任务
-
- `SHOW PROC '/cluster_balance/running_tablets';`
-
- 其结果中各列的含义和 `pending_tablets` 相同。
-
-3. 查看已结束任务
-
- `SHOW PROC '/cluster_balance/history_tablets';`
-
- 我们默认只保留最近 1000 个完成的任务。其结果中各列的含义和 `pending_tablets` 相同。如果 `State` 列为 `FINISHED`,则说明任务正常完成。如果为其他,则可以根据 `ErrMsg` 列的错误信息查看具体原因。
-
-## 集群负载及调度资源查看
-
-1. 集群负载
-
- 通过以下命令可以查看集群当前的负载情况:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat';`
-
- 首先看到的是对不同存储介质的划分:
-
- ```
- +---------------+
- | StorageMedium |
- +---------------+
- | HDD |
- | SSD |
- +---------------+
- ```
-
- 点击某一种存储介质,可以看到包含该存储介质的 BE 节点的均衡状态:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat/HDD';`
-
- ```
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- | BeId | Cluster | Available | UsedCapacity | Capacity | UsedPercent | ReplicaNum | CapCoeff | ReplCoeff | Score | Class |
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- | 10003 | default_cluster | true | 3477875259079 | 19377459077121 | 17.948 | 493477 | 0.5 | 0.5 | 0.9284678149967587 | MID |
- | 10002 | default_cluster | true | 3607326225443 | 19377459077121 | 18.616 | 496928 | 0.5 | 0.5 | 0.948660871419998 | MID |
- | 10005 | default_cluster | true | 3523518578241 | 19377459077121 | 18.184 | 545331 | 0.5 | 0.5 | 0.9843539990641831 | MID |
- | 10001 | default_cluster | true | 3535547090016 | 19377459077121 | 18.246 | 558067 | 0.5 | 0.5 | 0.9981869446537612 | MID |
- | 10006 | default_cluster | true | 3636050364835 | 19377459077121 | 18.764 | 547543 | 0.5 | 0.5 | 1.0011489897614072 | MID |
- | 10004 | default_cluster | true | 3506558163744 | 15501967261697 | 22.620 | 468957 | 0.5 | 0.5 | 1.0228319835582569 | MID |
- | 10007 | default_cluster | true | 4036460478905 | 19377459077121 | 20.831 | 551645 | 0.5 | 0.5 | 1.057279369420761 | MID |
- | 10000 | default_cluster | true | 4369719923760 | 19377459077121 | 22.551 | 547175 | 0.5 | 0.5 | 1.0964036415787461 | MID |
- +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+
- ```
-
- 其中一些列的含义如下:
-
- * Available:为 true 表示 BE 心跳正常,且没有处于下线中
- * UsedCapacity:字节,BE 上已使用的磁盘空间大小
- * Capacity:字节,BE 上总的磁盘空间大小
- * UsedPercent:百分比,BE 上的磁盘空间使用率
- * ReplicaNum:BE 上副本数量
- * CapCoeff/ReplCoeff:磁盘空间和副本数的权重系数
- * Score:负载分数。分数越高,负载越重
- * Class:根据负载情况分类,LOW/MID/HIGH。均衡调度会将高负载节点上的副本迁往低负载节点
-
- 用户可以进一步查看某个 BE 上各个路径的使用率,比如 ID 为 10001 这个 BE:
-
- `SHOW PROC '/cluster_balance/cluster_load_stat/HDD/10001';`
-
- ```
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- | RootPath | DataUsedCapacity | AvailCapacity | TotalCapacity | UsedPct | State | PathHash |
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- | /home/disk4/palo | 498.757 GB | 3.033 TB | 3.525 TB | 13.94 % | ONLINE | 4883406271918338267 |
- | /home/disk3/palo | 704.200 GB | 2.832 TB | 3.525 TB | 19.65 % | ONLINE | -5467083960906519443 |
- | /home/disk1/palo | 512.833 GB | 3.007 TB | 3.525 TB | 14.69 % | ONLINE | -7733211489989964053 |
- | /home/disk2/palo | 881.955 GB | 2.656 TB | 3.525 TB | 24.65 % | ONLINE | 4870995507205544622 |
- | /home/disk5/palo | 694.992 GB | 2.842 TB | 3.525 TB | 19.36 % | ONLINE | 1916696897889786739 |
- +------------------+------------------+---------------+---------------+---------+--------+----------------------+
- ```
-
- 这里显示了指定 BE 上,各个数据路径的磁盘使用率情况。
-
-2. 调度资源
-
- 用户可以通过以下命令,查看当前各个节点的 slot 使用情况:
-
- `SHOW PROC '/cluster_balance/working_slots';`
-
- ```
- +----------+----------------------+------------+------------+-------------+----------------------+
- | BeId | PathHash | AvailSlots | TotalSlots | BalanceSlot | AvgRate |
- +----------+----------------------+------------+------------+-------------+----------------------+
- | 10000 | 8110346074333016794 | 2 | 2 | 2 | 2.459007474009069E7 |
- | 10000 | -5617618290584731137 | 2 | 2 | 2 | 2.4730105014001578E7 |
- | 10001 | 4883406271918338267 | 2 | 2 | 2 | 1.6711402709780257E7 |
- | 10001 | -5467083960906519443 | 2 | 2 | 2 | 2.7540126380326536E7 |
- | 10002 | 9137404661108133814 | 2 | 2 | 2 | 2.417217089806745E7 |
- | 10002 | 1885826196444191611 | 2 | 2 | 2 | 1.6327378456676323E7 |
- +----------+----------------------+------------+------------+-------------+----------------------+
- ```
-
- 这里以数据路径为粒度,展示了当前 slot 的使用情况。其中 `AvgRate` 为历史统计的该路径上 clone 任务的拷贝速率,单位是字节/秒。
-
-3. 优先修复查看
-
- 以下命令,可以查看通过 `ADMIN REPAIR TABLE` 命令设置的优先修复的表或分区。
-
- `SHOW PROC '/cluster_balance/priority_repair';`
-
- 其中 `RemainingTimeMs` 表示,这些优先修复的内容,将在这个时间后,被自动移出优先修复队列。以防止优先修复一直失败导致资源被占用。
-
-### 调度器统计状态查看
-
-我们收集了 TabletChecker 和 TabletScheduler 在运行过程中的一些统计信息,可以通过以下命令查看:
-
-`SHOW PROC '/cluster_balance/sched_stat';`
-
-```
-+---------------------------------------------------+-------------+
-| Item | Value |
-+---------------------------------------------------+-------------+
-| num of tablet check round | 12041 |
-| cost of tablet check(ms) | 7162342 |
-| num of tablet checked in tablet checker | 18793506362 |
-| num of unhealthy tablet checked in tablet checker | 7043900 |
-| num of tablet being added to tablet scheduler | 1153 |
-| num of tablet schedule round | 49538 |
-| cost of tablet schedule(ms) | 49822 |
-| num of tablet being scheduled | 4356200 |
-| num of tablet being scheduled succeeded | 320 |
-| num of tablet being scheduled failed | 4355594 |
-| num of tablet being scheduled discard | 286 |
-| num of tablet priority upgraded | 0 |
-| num of tablet priority downgraded | 1096 |
-| num of clone task | 230 |
-| num of clone task succeeded | 228 |
-| num of clone task failed | 2 |
-| num of clone task timeout | 2 |
-| num of replica missing error | 4354857 |
-| num of replica version missing error | 967 |
-| num of replica relocating | 0 |
-| num of replica redundant error | 90 |
-| num of replica missing in cluster error | 0 |
-| num of balance scheduled | 0 |
-+---------------------------------------------------+-------------+
-```
-
-各行含义如下:
-
-* num of tablet check round:Tablet Checker 检查次数
-* cost of tablet check(ms):Tablet Checker 检查总耗时
-* num of tablet checked in tablet checker:Tablet Checker 检查过的 tablet 数量
-* num of unhealthy tablet checked in tablet checker:Tablet Checker 检查过的不健康的 tablet 数量
-* num of tablet being added to tablet scheduler:被提交到 Tablet Scheduler 中的 tablet 数量
-* num of tablet schedule round:Tablet Scheduler 运行次数
-* cost of tablet schedule(ms):Tablet Scheduler 运行总耗时
-* num of tablet being scheduled:被调度的 Tablet 总数量
-* num of tablet being scheduled succeeded:被成功调度的 Tablet 总数量
-* num of tablet being scheduled failed:调度失败的 Tablet 总数量
-* num of tablet being scheduled discard:调度失败且被抛弃的 Tablet 总数量
-* num of tablet priority upgraded:优先级上调次数
-* num of tablet priority downgraded:优先级下调次数
-* num of clone task:生成的 clone 任务数量
-* num of clone task succeeded:clone 任务成功的数量
-* num of clone task failed:clone 任务失败的数量
-* num of clone task timeout:clone 任务超时的数量
-* num of replica missing error:检查的状态为副本缺失的 tablet 的数量
-* num of replica version missing error:检查的状态为版本缺失的 tablet 的数量(该统计值包括了 num of replica relocating 和 num of replica missing in cluster error)
-* num of replica relocating:检查的状态为 replica relocating 的 tablet 的数量
-* num of replica redundant error:检查的状态为副本冗余的 tablet 的数量
-* num of replica missing in cluster error:检查的状态为不在对应 cluster 的 tablet 的数量
-* num of balance scheduled:均衡调度的次数
-
-> 注:以上状态都只是历史累加值。我们也在 FE 的日志中,定期打印了这些统计信息,其中括号内的数值表示自上次统计信息打印依赖,各个统计值的变化数量。
-
-## 相关配置说明
-
-### 可调整参数
-
-以下可调整参数均为 fe.conf 中可配置参数。
-
-* use\_new\_tablet\_scheduler
-
- * 说明:是否启用新的副本调度方式。新的副本调度方式即本文档介绍的副本调度方式。
- * 默认值:true
- * 重要性:高
-
-* tablet\_repair\_delay\_factor\_second
-
- * 说明:对于不同的调度优先级,我们会延迟不同的时间后开始修复。以防止因为例行重启、升级等过程中,产生大量不必要的副本修复任务。此参数为一个基准系数。对于 HIGH 优先级,延迟为 基准系数 * 1;对于 NORMAL 优先级,延迟为 基准系数 * 2;对于 LOW 优先级,延迟为 基准系数 * 3。即优先级越低,延迟等待时间越长。如果用户想尽快修复副本,可以适当降低该参数。
- * 默认值:60秒
- * 重要性:高
-
-* schedule\_slot\_num\_per\_path
-
- * 说明:默认分配给每块磁盘用于副本修复的 slot 数目。该数目表示一块磁盘能同时运行的副本修复任务数。如果想以更快的速度修复副本,可以适当调高这个参数。单数值越高,可能对 IO 影响越大。
- * 默认值:2
- * 重要性:高
-
-* balance\_load\_score\_threshold
-
- * 说明:集群均衡的阈值。默认为 0.1,即 10%。当一个 BE 节点的 load score,不高于或不低于平均 load score 的 10% 时,我们认为这个节点是均衡的。如果想让集群负载更加平均,可以适当调低这个参数。
- * 默认值:0.1
- * 重要性:中
-
-* storage\_high\_watermark\_usage\_percent 和 storage\_min\_left\_capacity\_bytes
-
- * 说明:这两个参数,分别表示一个磁盘的最大空间使用率上限,以及最小的空间剩余下限。当一块磁盘的空间使用率大于上限,或者剩余空间小于下限时,该磁盘将不再作为均衡调度的目的地址。
- * 默认值:0.85 和 1048576000 (1GB)
- * 重要性:中
-
-* disable\_balance
-
- * 说明:控制是否关闭均衡功能。当副本处于均衡过程中时,有些功能,如 ALTER TABLE 等将会被禁止。而均衡可能持续很长时间。因此,如果用户希望尽快进行被禁止的操作。可以将该参数设为 true,以关闭均衡调度。
- * 默认值:false
- * 重要性:中
-
-### 不可调整参数
-
-以下参数暂不支持修改,仅作说明。
-
-* TabletChecker 调度间隔
-
- TabletChecker 每20秒进行一次检查调度。
-
-* TabletScheduler 调度间隔
-
- TabletScheduler 每5秒进行一次调度
-
-* TabletScheduler 每批次调度个数
-
- TabletScheduler 每次调度最多 50 个 tablet。
-
-* TabletScheduler 最大等待调度和运行中任务数
-
- 最大等待调度任务数和运行中任务数为 2000。当超过 2000 后,TabletChecker 将不再产生新的调度任务给 TabletScheduler。
-
-* TabletScheduler 最大均衡任务数
-
- 最大均衡任务数为 500。当超过 500 后,将不再产生新的均衡任务。
-
-* 每块磁盘用于均衡任务的 slot 数目
-
- 每块磁盘用于均衡任务的 slot 数目为2。这个 slot 独立于用于副本修复的 slot。
-
-* 集群均衡情况更新间隔
-
- TabletScheduler 每隔 20 秒会重新计算一次集群的 load score。
-
-* Clone 任务的最小和最大超时时间
-
- 一个 clone 任务超时时间范围是 3min ~ 2hour。具体超时时间通过 tablet 的大小计算。计算公式为 (tablet size) / (5MB/s)。当一个 clone 任务运行失败 3 次后,该任务将终止。
-
-* 动态优先级调整策略
-
- 优先级最小调整间隔为 5min。当一个 tablet 调度失败5次后,会调低优先级。当一个 tablet 30min 未被调度时,会调高优先级。
-
-## 相关问题
-
-* 在某些情况下,默认的副本修复和均衡策略可能会导致网络被打满(多发生在千兆网卡,且每台 BE 的磁盘数量较多的情况下)。此时需要调整一些参数来减少同时进行的均衡和修复任务数。
-
-* 目前针对 Colocate Table 的副本的均衡策略无法保证同一个 Tablet 的副本不会分布在同一个 host 的 BE 上。但 Colocate Table 的副本的修复策略会检测到这种分布错误并校正。但可能会出现,校正后,均衡策略再次认为副本不均衡而重新均衡。从而导致在两种状态间不停交替,无法使 Colocate Group 达成稳定。针对这种情况,我们建议在使用 Colocate 属性时,尽量保证集群是同构的,以减小副本分布在同一个 host 上的概率。
-
-## 最佳实践
-
-### 控制并管理集群的副本修复和均衡进度
-
-在大多数情况下,通过默认的参数配置,Doris 都可以自动的进行副本修复和集群均衡。但是某些情况下,我们需要通过人工介入调整参数,来达到一些特殊的目的。如优先修复某个表或分区、禁止集群均衡以降低集群负载、优先修复非 colocation 的表数据等等。
-
-本小节主要介绍如何通过修改参数,来控制并管理集群的副本修复和均衡进度。
-
-1. 删除损坏副本
-
- 某些情况下,Doris 可能无法自动检测某些损坏的副本,从而导致查询或导入在损坏的副本上频繁报错。此时我们需要手动删除已损坏的副本。该方法可以适用于:删除版本数过高导致 -235 错误的副本、删除文件已损坏的副本等等。
-
- 首先,找到副本对应的 tablet id,假设为 10001。通过 `show tablet 10001;` 并执行其中的 `show proc` 语句可以查看对应的 tablet 的各个副本详情。
-
- 假设需要删除的副本的 backend id 是 20001。则执行以下语句将副本标记为 `bad`:
-
- ```
- ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10001", "backend_id" = "20001", "status" = "bad");
- ```
-
- 此时,再次通过 `show proc` 语句可以看到对应的副本的 `IsBad` 列值为 `true`。
-
- 被标记为 `bad` 的副本不会再参与导入和查询。同时副本修复逻辑会自动补充一个新的副本。
-
-2. 优先修复某个表或分区
-
- `help admin repair table;` 查看帮助。该命令会尝试优先修复指定表或分区的tablet。
-
-3. 停止均衡任务
-
- 均衡任务会占用一定的网络带宽和IO资源。如果希望停止新的均衡任务的产生,可以通过以下命令:
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");
- ```
-
-4. 停止所有副本调度任务
-
- 副本调度任务包括均衡和修复任务。这些任务都会占用一定的网络带宽和IO资源。可以通过以下命令停止所有副本调度任务(不包括已经在运行的,包括 colocation 表和普通表):
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true");
- ```
-
-5. 停止所有 colocation 表的副本调度任务。
-
- colocation 表的副本调度和普通表是分开独立运行的。某些情况下,用户可能希望先停止对 colocation 表的均衡和修复工作,而将集群资源用于普通表的修复,则可以通过以下命令:
-
- ```
- ADMIN SET FRONTEND CONFIG ("disable_colocate_balance" = "true");
- ```
-
-6. 使用更保守的策略修复副本
-
- Doris 在检测到副本缺失、BE宕机等情况下,会自动修复副本。但为了减少一些抖动导致的错误(如BE短暂宕机),Doris 会延迟触发这些任务。
-
- * `tablet_repair_delay_factor_second` 参数。默认 60 秒。根据修复任务优先级的不同,会推迟 60秒、120秒、180秒后开始触发修复任务。可以通过以下命令延长这个时间,这样可以容忍更长的异常时间,以避免触发不必要的修复任务:
-
- ```
- ADMIN SET FRONTEND CONFIG ("tablet_repair_delay_factor_second" = "120");
- ```
-
-7. 使用更保守的策略触发 colocation group 的重分布
-
- colocation group 的重分布可能伴随着大量的 tablet 迁移。`colocate_group_relocate_delay_second` 用于控制重分布的触发延迟。默认 1800秒。如果某台 BE 节点可能长时间下线,可以尝试调大这个参数,以避免不必要的重分布:
-
- ```
- ADMIN SET FRONTEND CONFIG ("colocate_group_relocate_delay_second" = "3600");
- ```
-
-8. 更快速的副本均衡
-
- Doris 的副本均衡逻辑会先增加一个正常副本,然后在删除老的副本,已达到副本迁移的目的。而在删除老副本时,Doris会等待这个副本上已经开始执行的导入任务完成,以避免均衡任务影响导入任务。但这样会降低均衡逻辑的执行速度。此时可以通过修改以下参数,让 Doris 忽略这个等待,直接删除老副本:
-
- ```
- ADMIN SET FRONTEND CONFIG ("enable_force_drop_redundant_replica" = "true");
- ```
-
- 这种操作可能会导致均衡期间部分导入任务失败(需要重试),但会显著加速均衡速度。
-
-总体来讲,当我们需要将集群快速恢复到正常状态时,可以考虑按照以下思路处理:
-
-1. 找到导致高优任务报错的tablet,将有问题的副本置为 bad。
-2. 通过 `admin repair` 语句高优修复某些表。
-3. 停止副本均衡逻辑以避免占用集群资源,等集群恢复后,再开启即可。
-4. 使用更保守的策略触发修复任务,以应对 BE 频繁宕机导致的雪崩效应。
-5. 按需关闭 colocation 表的调度任务,集中集群资源修复其他高优数据。
-
-
-
diff --git a/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md b/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md
deleted file mode 100644
index 238ac04a97..0000000000
--- a/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md
+++ /dev/null
@@ -1,161 +0,0 @@
----
-{
- "title": "正交的BITMAP计算",
- "language": "zh-CN"
-}
----
-
-
-
-# 正交的BITMAP计算
-
-## 背景
-
-Doris原有的Bitmap聚合函数设计比较通用,但对亿级别以上bitmap大基数的交并集计算性能较差。排查后端be的bitmap聚合函数逻辑,发现主要有两个原因。一是当bitmap基数较大时,如bitmap大小超过1g,网络/磁盘IO处理时间比较长;二是后端be实例在scan数据后全部传输到顶层节点进行求交和并运算,给顶层单节点带来压力,成为处理瓶颈。
-
-解决思路是将bitmap列的值按照range划分,不同range的值存储在不同的分桶中,保证了不同分桶的bitmap值是正交的。当查询时,先分别对不同分桶中的正交bitmap进行聚合计算,然后顶层节点直接将聚合计算后的值合并汇总,并输出。如此会大大提高计算效率,解决了顶层单节点计算瓶颈问题。
-
-## 使用指南
-
-1. 建表,增加hid列,表示bitmap列值id范围, 作为hash分桶列
-2. 使用场景
-
-### Create table
-
-建表时需要使用聚合模型,数据类型是 bitmap , 聚合函数是 bitmap_union
-
-```
-CREATE TABLE `user_tag_bitmap` (
- `tag` bigint(20) NULL COMMENT "用户标签",
- `hid` smallint(6) NULL COMMENT "分桶id",
- `user_id` bitmap BITMAP_UNION NULL COMMENT ""
-) ENGINE=OLAP
-AGGREGATE KEY(`tag`, `hid`)
-COMMENT "OLAP"
-DISTRIBUTED BY HASH(`hid`) BUCKETS 3
-```
-表schema增加hid列,表示id范围, 作为hash分桶列。
-
-注:hid数和BUCKETS要设置合理,hid数设置至少是BUCKETS的5倍以上,以使数据hash分桶尽量均衡
-
-### Data Load
-
-```
-LOAD LABEL user_tag_bitmap_test
-(
-DATA INFILE('hdfs://abc')
-INTO TABLE user_tag_bitmap
-COLUMNS TERMINATED BY ','
-(tmp_tag, tmp_user_id)
-SET (
-tag = tmp_tag,
-hid = ceil(tmp_user_id/5000000),
-user_id = to_bitmap(tmp_user_id)
-)
-)
-注意:5000000这个数不固定,可按需调整
-...
-```
-数据格式:
-```
-11111111,1
-11111112,2
-11111113,3
-11111114,4
-...
-```
-注:第一列代表用户标签,由中文转换成数字
-
-load数据时,对用户bitmap值range范围纵向切割,例如,用户id在1-5000000范围内的hid值相同,hid值相同的行会分配到一个分桶内,如此每个分桶内到的bitmap都是正交的。可以利用桶内bitmap值正交特性,进行交并集计算,计算结果会被shuffle至top节点聚合。
-
-
-
-#### bitmap_orthogonal_intersect
-
-求bitmap交集函数
-
-语法:
-
- orthogonal_bitmap_intersect(bitmap_column, column_to_filter, filter_values)
-
-参数:
-
- 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值
-
-说明:
-
- 查询规划上聚合分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,结果序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的bitmap值循环求并集
-
-样例:
-```
-select BITMAP_COUNT(orthogonal_bitmap_intersect(user_id, tag, 13080800, 11110200)) from user_tag_bitmap where tag in (13080800, 11110200);
-
-```
-
-#### orthogonal_bitmap_intersect_count
-
-求bitmap交集count函数,语法同原版intersect_count,但实现不同
-
-语法:
-
- orthogonal_bitmap_intersect_count(bitmap_column, column_to_filter, filter_values)
-
-参数:
-
- 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数开始是变长参数,含义是过滤维度列的不同取值
-
-说明:
-
- 查询规划聚合上分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,再对交集结果求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum
-
-#### orthogonal_bitmap_union_count
-
-求bitmap并集count函数,语法同原版bitmap_union_count,但实现不同。
-
-语法:
-
- orthogonal_bitmap_union_count(bitmap_column)
-
-参数:
-
- 参数类型是bitmap,是待求并集count的列
-
-说明:
-
- 查询规划上分2层,在第一层be节点(update、serialize)对所有bitmap求并集,再对并集的结果bitmap求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum
-
-### 使用场景
-
-符合对bitmap进行正交计算的场景,如在用户行为分析中,计算留存,漏斗,用户画像等。
-
-
-人群圈选:
-
-```
- select orthogonal_bitmap_intersect_count(user_id, tag, 13080800, 11110200) from user_tag_bitmap where tag in (13080800, 11110200);
- 注:13080800、11110200代表用户标签
-```
-
-计算user_id的去重值:
-
-```
-select orthogonal_bitmap_union_count(user_id) from user_tag_bitmap where tag in (13080800, 11110200);
-
-```
diff --git a/docs/zh-CN/administrator-guide/outfile.md b/docs/zh-CN/administrator-guide/outfile.md
deleted file mode 100644
index 3a3b024997..0000000000
--- a/docs/zh-CN/administrator-guide/outfile.md
+++ /dev/null
@@ -1,192 +0,0 @@
----
-{
- "title": "导出查询结果集",
- "language": "zh-CN"
-}
----
-
-
-
-# 导出查询结果集
-
-本文档介绍如何使用 `SELECT INTO OUTFILE` 命令进行查询结果的导出操作。
-
-## 语法
-
-`SELECT INTO OUTFILE` 语句可以将查询结果导出到文件中。目前支持通过 Broker 进程, 通过 S3 协议, 或直接通过 HDFS 协议,导出到远端存储,如 HDFS,S3,BOS,COS(腾讯云)上。语法如下
-
-```
-query_stmt
-INTO OUTFILE "file_path"
-[format_as]
-[properties]
-```
-
-* `file_path`
-
- `file_path` 指向文件存储的路径以及文件前缀。如 `hdfs://path/to/my_file_`。
-
- 最终的文件名将由 `my_file_`,文件序号以及文件格式后缀组成。其中文件序号由0开始,数量为文件被分割的数量。如:
-
- ```
- my_file_abcdefg_0.csv
- my_file_abcdefg_1.csv
- my_file_abcdegf_2.csv
- ```
-
-* `[format_as]`
-
- ```
- FORMAT AS CSV
- ```
-
- 指定导出格式。支持csv、parquet、csv_with_names、csv_with_names_and_types. 默认为 CSV。
-
-
-* `[properties]`
-
- 指定相关属性。目前支持通过 Broker 进程, 或通过 S3 协议进行导出。
-
- + Broker 相关属性需加前缀 `broker.`。具体参阅[Broker 文档](./broker.html)。
- + HDFS 相关属性需加前缀 `hdfs.` 其中 hdfs.fs.defaultFS 用于填写 namenode 地址和端口。属于必填项。。
- + S3 协议则直接执行 S3 协议配置即可。
-
- ```
- ("broker.prop_key" = "broker.prop_val", ...)
- or
- ("hdfs.fs.defaultFS" = "xxx", "hdfs.hdfs_user" = "xxx")
- or
- ("AWS_ENDPOINT" = "xxx", ...)
- ```
-
- 其他属性:
-
- ```
- ("key1" = "val1", "key2" = "val2", ...)
- ```
-
- 目前支持以下属性:
-
- * `column_separator`:列分隔符,仅对 CSV 格式适用。默认为 `\t`。
- * `line_delimiter`:行分隔符,仅对 CSV 格式适用。默认为 `\n`。
- * `max_file_size`:单个文件的最大大小。默认为 1GB。取值范围在 5MB 到 2GB 之间。超过这个大小的文件将会被切分。
- * `schema`:PARQUET 文件schema信息。仅对 PARQUET 格式适用。导出文件格式为PARQUET时,必须指定`schema`。
-
-## 并发导出
-
-默认情况下,查询结果集的导出是非并发的,也就是单点导出。如果用户希望查询结果集可以并发导出,需要满足以下条件:
-
-1. session variable 'enable_parallel_outfile' 开启并发导出: ```set enable_parallel_outfile = true;```
-2. 导出方式为 S3 , 或者 HDFS, 而不是使用 broker
-3. 查询可以满足并发导出的需求,比如顶层不包含 sort 等单点节点。(后面会举例说明,哪种属于不可并发导出结果集的查询)
-
-满足以上三个条件,就能触发并发导出查询结果集了。并发度 = ```be_instacne_num * parallel_fragment_exec_instance_num```
-
-### 如何验证结果集被并发导出
-
-用户通过 session 变量设置开启并发导出后,如果想验证当前查询是否能进行并发导出,则可以通过下面这个方法。
-
-```
-explain select xxx from xxx where xxx into outfile "s3://xxx" format as csv properties ("AWS_ENDPOINT" = "xxx", ...);
-```
-
-对查询进行 explain 后,Doris 会返回该查询的规划,如果你发现 ```RESULT FILE SINK``` 出现在 ```PLAN FRAGMENT 1``` 中,就说明导出并发开启成功了。
-如果 ```RESULT FILE SINK``` 出现在 ```PLAN FRAGMENT 0``` 中,则说明当前查询不能进行并发导出 (当前查询不同时满足并发导出的三个条件)。
-
-```
-并发导出的规划示例:
-+-----------------------------------------------------------------------------+
-| Explain String |
-+-----------------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS: | | | |
-| PARTITION: UNPARTITIONED |
-| |
-| RESULT SINK |
-| |
-| 1:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS:`k1` + `k2` |
-| PARTITION: HASH_PARTITIONED: `default_cluster:test`.`multi_tablet`.`k1` |
-| |
-| RESULT FILE SINK |
-| FILE PATH: s3://ml-bd-repo/bpit_test/outfile_1951_ |
-| STORAGE TYPE: S3 |
-| |
-| 0:OlapScanNode |
-| TABLE: multi_tablet |
-+-----------------------------------------------------------------------------+
-```
-
-## 使用示例
-
-具体参阅[OUTFILE 文档](../sql-reference/sql-statements/Data%20Manipulation/OUTFILE.md)。
-
-## 返回结果
-
-导出命令为同步命令。命令返回,即表示操作结束。同时会返回一行结果来展示导出的执行结果。
-
-如果正常导出并返回,则结果如下:
-
-```
-mysql> select * from tbl1 limit 10 into outfile "file:///home/work/path/result_";
-+------------+-----------+----------+--------------------------------------------------------------------+
-| FileNumber | TotalRows | FileSize | URL |
-+------------+-----------+----------+--------------------------------------------------------------------+
-| 1 | 2 | 8 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ |
-+------------+-----------+----------+--------------------------------------------------------------------+
-1 row in set (0.05 sec)
-```
-
-* FileNumber:最终生成的文件个数。
-* TotalRows:结果集行数。
-* FileSize:导出文件总大小。单位字节。
-* URL:如果是导出到本地磁盘,则这里显示具体导出到哪个 Compute Node。
-
-如果进行了并发导出,则会返回多行数据。
-
-```
-+------------+-----------+----------+--------------------------------------------------------------------+
-| FileNumber | TotalRows | FileSize | URL |
-+------------+-----------+----------+--------------------------------------------------------------------+
-| 1 | 3 | 7 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ |
-| 1 | 2 | 4 | file:///192.168.1.11/home/work/path/result_{fragment_instance_id}_ |
-+------------+-----------+----------+--------------------------------------------------------------------+
-2 rows in set (2.218 sec)
-```
-
-如果执行错误,则会返回错误信息,如:
-
-```
-mysql> SELECT * FROM tbl INTO OUTFILE ...
-ERROR 1064 (HY000): errCode = 2, detailMessage = Open broker writer failed ...
-```
-
-## 注意事项
-
-* 如果不开启并发导出,查询结果是由单个 BE 节点,单线程导出的。因此导出时间和导出结果集大小正相关。开启并发导出可以降低导出的时间。
-* 导出命令不会检查文件及文件路径是否存在。是否会自动创建路径、或是否会覆盖已存在文件,完全由远端存储系统的语义决定。
-* 如果在导出过程中出现错误,可能会有导出文件残留在远端存储系统上。Doris 不会清理这些文件。需要用户手动清理。
-* 导出命令的超时时间同查询的超时时间。可以通过 `SET query_timeout=xxx` 进行设置。
-* 对于结果集为空的查询,依然会产生一个大小为0的文件。
-* 文件切分会保证一行数据完整的存储在单一文件中。因此文件的大小并不严格等于 `max_file_size`。
-* 对于部分输出为非可见字符的函数,如 BITMAP、HLL 类型,输出为 `\N`,即 NULL。
-* 目前部分地理信息函数,如 `ST_Point` 的输出类型为 VARCHAR,但实际输出值为经过编码的二进制字符。当前这些函数会输出乱码。对于地理函数,请使用 `ST_AsText` 进行输出。
diff --git a/docs/zh-CN/administrator-guide/partition_cache.md b/docs/zh-CN/administrator-guide/partition_cache.md
deleted file mode 100644
index a14ac5a837..0000000000
--- a/docs/zh-CN/administrator-guide/partition_cache.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# 分区缓存
-
-## 需求场景
-大部分数据分析场景是写少读多,数据写入一次,多次频繁读取,比如一张报表涉及的维度和指标,数据在凌晨一次性计算好,但每天有数百甚至数千次的页面访问,因此非常适合把结果集缓存起来。在数据分析或BI应用中,存在下面的业务场景:
-* **高并发场景**,Doris可以较好的支持高并发,但单台服务器无法承载太高的QPS
-* **复杂图表的看板**,复杂的Dashboard或者大屏类应用,数据来自多张表,每个页面有数十个查询,虽然每个查询只有数十毫秒,但是总体查询时间会在数秒
-* **趋势分析**,给定日期范围的查询,指标按日显示,比如查询最近7天内的用户数的趋势,这类查询数据量大,查询范围广,查询时间往往需要数十秒
-* **用户重复查询**,如果产品没有防重刷机制,用户因手误或其他原因重复刷新页面,导致提交大量的重复的SQL
-
-以上四种场景,在应用层的解决方案,把查询结果放到Redis中,周期性的更新缓存或者用户手工刷新缓存,但是这个方案有如下问题:
-* **数据不一致**,无法感知数据的更新,导致用户经常看到旧的数据
-* **命中率低**,缓存整个查询结果,如果数据实时写入,缓存频繁失效,命中率低且系统负载较重
-* **额外成本**,引入外部缓存组件,会带来系统复杂度,增加额外成本
-
-## 解决方案
-本分区缓存策略可以解决上面的问题,优先保证数据一致性,在此基础上细化缓存粒度,提升命中率,因此有如下特点:
-* 用户无需担心数据一致性,通过版本来控制缓存失效,缓存的数据和从BE中查询的数据是一致的
-* 没有额外的组件和成本,缓存结果存储在BE的内存中,用户可以根据需要调整缓存内存大小
-* 实现了两种缓存策略,SQLCache和PartitionCache,后者缓存粒度更细
-* 用一致性哈希解决BE节点上下线的问题,BE中的缓存算法是改进的LRU
-
-## SQLCache
-SQLCache按SQL的签名、查询的表的分区ID、分区最新版本来存储和获取缓存。三者组合确定一个缓存数据集,任何一个变化了,如SQL有变化,如查询字段或条件不一样,或数据更新后版本变化了,会导致命中不了缓存。
-
-如果多张表Join,使用最近更新的分区ID和最新的版本号,如果其中一张表更新了,会导致分区ID或版本号不一样,也一样命中不了缓存。
-
-SQLCache,更适合T+1更新的场景,凌晨数据更新,首次查询从BE中获取结果放入到缓存中,后续相同查询从缓存中获取。实时更新数据也可以使用,但是可能存在命中率低的问题,可以参考如下PartitionCache。
-
-## PartitionCache
-
-### 设计原理
-1. SQL可以并行拆分,Q = Q1 ∪ Q2 ... ∪ Qn,R= R1 ∪ R2 ... ∪ Rn,Q为查询语句,R为结果集
-2. 拆分为只读分区和可更新分区,只读分区缓存,更新分区不缓存
-
-如上,查询最近7天的每天用户数,如按日期分区,数据只写当天分区,当天之外的其他分区的数据,都是固定不变的,在相同的查询SQL下,查询某个不更新分区的指标都是固定的。如下,在2020-03-09当天查询前7天的用户数,2020-03-03至2020-03-07的数据来自缓存,2020-03-08第一次查询来自分区,后续的查询来自缓存,2020-03-09因为当天在不停写入,所以来自分区。
-
-因此,查询N天的数据,数据更新最近的D天,每天只是日期范围不一样相似的查询,只需要查询D个分区即可,其他部分都来自缓存,可以有效降低集群负载,减少查询时间。
-
-```
-MySQL [(none)]> SELECT eventdate,count(userid) FROM testdb.appevent WHERE eventdate>="2020-03-03" AND eventdate<="2020-03-09" GROUP BY eventdate ORDER BY eventdate;
-+------------+-----------------+
-| eventdate | count(`userid`) |
-+------------+-----------------+
-| 2020-03-03 | 15 |
-| 2020-03-04 | 20 |
-| 2020-03-05 | 25 |
-| 2020-03-06 | 30 |
-| 2020-03-07 | 35 |
-| 2020-03-08 | 40 | //第一次来自分区,后续来自缓存
-| 2020-03-09 | 25 | //来自分区
-+------------+-----------------+
-7 rows in set (0.02 sec)
-```
-
-在PartitionCache中,缓存第一级Key是去掉了分区条件后的SQL的128位MD5签名,下面是改写后的待签名的SQL:
-```
-SELECT eventdate,count(userid) FROM testdb.appevent GROUP BY eventdate ORDER BY eventdate;
-```
-缓存的第二级Key是查询结果集的分区字段的内容,比如上面查询结果的eventdate列的内容,二级Key的附属信息是分区的版本号和版本更新时间。
-
-下面演示上面SQL在2020-03-09当天第一次执行的流程:
-1. 从缓存中获取数据
-```
-+------------+-----------------+
-| 2020-03-03 | 15 |
-| 2020-03-04 | 20 |
-| 2020-03-05 | 25 |
-| 2020-03-06 | 30 |
-| 2020-03-07 | 35 |
-+------------+-----------------+
-```
-2. 从BE中获取数据的SQL和数据
-```
-SELECT eventdate,count(userid) FROM testdb.appevent WHERE eventdate>="2020-03-08" AND eventdate<="2020-03-09" GROUP BY eventdate ORDER BY eventdate;
-
-+------------+-----------------+
-| 2020-03-08 | 40 |
-+------------+-----------------+
-| 2020-03-09 | 25 |
-+------------+-----------------+
-```
-3. 最后发送给终端的数据
-```
-+------------+-----------------+
-| eventdate | count(`userid`) |
-+------------+-----------------+
-| 2020-03-03 | 15 |
-| 2020-03-04 | 20 |
-| 2020-03-05 | 25 |
-| 2020-03-06 | 30 |
-| 2020-03-07 | 35 |
-| 2020-03-08 | 40 |
-| 2020-03-09 | 25 |
-+------------+-----------------+
-```
-4. 发送给缓存的数据
-```
-+------------+-----------------+
-| 2020-03-08 | 40 |
-+------------+-----------------+
-```
-
-Partition缓存,适合按日期分区,部分分区实时更新,查询SQL较为固定。
-
-分区字段也可以是其他字段,但是需要保证只有少量分区更新。
-
-### 一些限制
-* 只支持OlapTable,其他存储如MySQL的表没有版本信息,无法感知数据是否更新
-* 只支持按分区字段分组,不支持按其他字段分组,按其他字段分组,该分组数据都有可能被更新,会导致缓存都失效
-* 只支持结果集的前半部分、后半部分以及全部命中缓存,不支持结果集被缓存数据分割成几个部分
-
-## 使用方式
-### 开启SQLCache
-确保fe.conf的cache_enable_sql_mode=true(默认是true)
-```
-vim fe/conf/fe.conf
-cache_enable_sql_mode=true
-```
-在MySQL命令行中设置变量
-```
-MySQL [(none)]> set [global] enable_sql_cache=true;
-```
-注:global是全局变量,不加指当前会话变量
-
-### 开启PartitionCache
-确保fe.conf的cache_enable_partition_mode=true(默认是true)
-```
-vim fe/conf/fe.conf
-cache_enable_partition_mode=true
-```
-在MySQL命令行中设置变量
-```
-MySQL [(none)]> set [global] enable_partition_cache=true;
-```
-
-如果同时开启了两个缓存策略,下面的参数,需要注意一下:
-```
-cache_last_version_interval_second=900
-```
-如果分区的最新版本的时间离现在的间隔,大于cache_last_version_interval_second,则会优先把整个查询结果缓存。如果小于这个间隔,如果符合PartitionCache的条件,则按PartitionCache数据。
-
-### 监控
-FE的监控项:
-```
-query_table //Query中有表的数量
-query_olap_table //Query中有Olap表的数量
-cache_mode_sql //识别缓存模式为sql的Query数量
-cache_hit_sql //模式为sql的Query命中Cache的数量
-query_mode_partition //识别缓存模式为Partition的Query数量
-cache_hit_partition //通过Partition命中的Query数量
-partition_all //Query中扫描的所有分区
-partition_hit //通过Cache命中的分区数量
-
-Cache命中率 = (cache_hit_sql + cache_hit_partition) / query_olap_table
-Partition命中率 = partition_hit / partition_all
-```
-
-BE的监控项:
-```
-query_cache_memory_total_byte //Cache内存大小
-query_query_cache_sql_total_count //Cache的SQL的数量
-query_cache_partition_total_count //Cache分区数量
-
-SQL平均数据大小 = cache_memory_total / cache_sql_total
-Partition平均数据大小 = cache_memory_total / cache_partition_total
-```
-
-其他监控:
-可以从Grafana中查看BE节点的CPU和内存指标,Query统计中的Query Percentile等指标,配合Cache参数的调整来达成业务目标。
-
-
-### 优化参数
-FE的配置项cache_result_max_row_count,查询结果集放入缓存的最大行数,可以根据实际情况调整,但建议不要设置过大,避免过多占用内存,超过这个大小的结果集不会被缓存。
-```
-vim fe/conf/fe.conf
-cache_result_max_row_count=3000
-```
-
-BE最大分区数量cache_max_partition_count,指每个SQL对应的最大分区数,如果是按日期分区,能缓存2年多的数据,假如想保留更长时间的缓存,请把这个参数设置得更大,同时修改cache_result_max_row_count的参数。
-```
-vim be/conf/be.conf
-cache_max_partition_count=1024
-```
-
-BE中缓存内存设置,有两个参数query_cache_max_size和query_cache_elasticity_size两部分组成(单位MB),内存超过query_cache_max_size + cache_elasticity_size会开始清理,并把内存控制到query_cache_max_size以下。可以根据BE节点数量,节点内存大小,和缓存命中率来设置这两个参数。
-```
-query_cache_max_size_mb=256
-query_cache_elasticity_size_mb=128
-```
-计算方法:
-
-假如缓存10K个Query,每个Query缓存1000行,每行是128个字节,分布在10台BE上,则每个BE需要128M内存(10K*1000*128/10)。
-
-## 未尽事项
-* T+1的数据,是否也可以用Partition缓存? 目前不支持
-* 类似的SQL,之前查询了2个指标,现在查询3个指标,是否可以利用2个指标的缓存? 目前不支持
-* 按日期分区,但是需要按周维度汇总数据,是否可用PartitionCache? 目前不支持
diff --git a/docs/zh-CN/administrator-guide/privilege.md b/docs/zh-CN/administrator-guide/privilege.md
deleted file mode 100644
index 1e59b2999c..0000000000
--- a/docs/zh-CN/administrator-guide/privilege.md
+++ /dev/null
@@ -1,234 +0,0 @@
----
-{
- "title": "权限管理",
- "language": "zh-CN"
-}
----
-
-
-
-# 权限管理
-
-Doris 新的权限管理系统参照了 Mysql 的权限管理机制,做到了表级别细粒度的权限控制,基于角色的权限访问控制,并且支持白名单机制。
-
-## 名词解释
-
-1. 用户标识 user_identity
-
- 在权限系统中,一个用户被识别为一个 User Identity(用户标识)。用户标识由两部分组成:username 和 userhost。其中 username 为用户名,由英文大小写组成。userhost 表示该用户链接来自的 IP。user_identity 以 username@'userhost' 的方式呈现,表示来自 userhost 的 username。
-
- user_identity 的另一种表现方式为 username@['domain'],其中 domain 为域名,可以通过 DNS 或 BNS(百度名字服务)解析为一组 ip。最终表现为一组 username@'userhost',所以后面我们统一使用 username@'userhost' 来表示。
-
-2. 权限 Privilege
-
- 权限作用的对象是节点、数据库或表。不同的权限代表不同的操作许可。
-
-3. 角色 Role
-
- Doris可以创建自定义命名的角色。角色可以被看做是一组权限的集合。新创建的用户可以被赋予某一角色,则自动被赋予该角色所拥有的权限。后续对角色的权限变更,也会体现在所有属于该角色的用户权限上。
-
-4. 用户属性 user_property
-
- 用户属性直接附属于某一用户,而不是用户标识。即 cmy@'192.%' 和 cmy@['domain'] 都拥有同一组用户属性,该属性属于用户 cmy,而不是 cmy@'192.%' 或 cmy@['domain']。
-
- 用户属性包括但不限于: 用户最大连接数、导入集群配置等等。
-
-## 支持的操作
-
-1. 创建用户:CREATE USER
-2. 删除用户:DROP USER
-3. 授权:GRANT
-4. 撤权:REVOKE
-5. 创建角色:CREATE ROLE
-6. 删除角色:DROP ROLE
-7. 查看当前用户权限:SHOW GRANTS
-8. 查看所有用户权限:SHOW ALL GRANTS
-9. 查看已创建的角色:SHOW ROLES
-10. 查看用户属性:SHOW PROPERTY
-
-关于以上命令的详细帮助,可以通过 mysql 客户端连接 Doris 后,使用 help + command 获取帮助。如 `HELP CREATE USER`。
-
-## 权限类型
-
-Doris 目前支持以下几种权限
-
-1. Node_priv
-
- 节点变更权限。包括 FE、BE、BROKER 节点的添加、删除、下线等操作。目前该权限只能授予 Root 用户。
-
-2. Grant_priv
-
- 权限变更权限。允许执行包括授权、撤权、添加/删除/变更 用户/角色 等操作。
-
-3. Select_priv
-
- 对数据库、表的只读权限。
-
-4. Load_priv
-
- 对数据库、表的写权限。包括 Load、Insert、Delete 等。
-
-5. Alter_priv
-
- 对数据库、表的更改权限。包括重命名 库/表、添加/删除/变更 列、添加/删除 分区等操作。
-
-6. Create_priv
-
- 创建数据库、表、视图的权限。
-
-7. Drop_priv
-
- 删除数据库、表、视图的权限。
-
-8. Usage_priv
-
- 资源的使用权限。
-
-
-## 权限层级
-
-同时,根据权限适用范围的不同,我们将库表的权限分为以下三个层级:
-
-1. GLOBAL LEVEL:全局权限。即通过 GRANT 语句授予的 `*.*` 上的权限。被授予的权限适用于任意数据库中的任意表。
-2. DATABASE LEVEL:数据库级权限。即通过 GRANT 语句授予的 `db.*` 上的权限。被授予的权限适用于指定数据库中的任意表。
-3. TABLE LEVEL:表级权限。即通过 GRANT 语句授予的 `db.tbl` 上的权限。被授予的权限适用于指定数据库中的指定表。
-
-将资源的权限分为以下两个层级:
-
-1. GLOBAL LEVEL:全局权限。即通过 GRANT 语句授予的 `*` 上的权限。被授予的权限适用于资源。
-2. RESOURCE LEVEL: 资源级权限。即通过 GRANT 语句授予的 `resource_name` 上的权限。被授予的权限适用于指定资源。
-
-
-## ADMIN/GRANT 权限说明
-
-ADMIN\_PRIV 和 GRANT\_PRIV 权限同时拥有**授予权限**的权限,较为特殊。这里对和这两个权限相关的操作逐一说明。
-
-1. CREATE USER
-
- * 拥有 ADMIN 权限,或任意层级的 GRANT 权限的用户可以创建新用户。
-
-2. DROP USER
-
- * 只有 ADMIN 权限可以删除用户。
-
-3. CREATE/DROP ROLE
-
- * 只有 ADMIN 权限可以创建角色。
-
-4. GRANT/REVOKE
-
- * 拥有 ADMIN 权限,或者 GLOBAL 层级 GRANT 权限的用户,可以授予或撤销任意用户的权限。
- * 拥有 DATABASE 层级 GRANT 权限的用户,可以授予或撤销任意用户对指定数据库的权限。
- * 拥有 TABLE 层级 GRANT 权限的用户,可以授予或撤销任意用户对指定数据库中指定表的权限。
-
-5. SET PASSWORD
-
- * 拥有 ADMIN 权限,或者 GLOBAL 层级 GRANT 权限的用户,可以设置任意用户的密码。
- * 普通用户可以设置自己对应的 UserIdentity 的密码。自己对应的 UserIdentity 可以通过 `SELECT CURRENT_USER();` 命令查看。
- * 拥有非 GLOBAL 层级 GRANT 权限的用户,不可以设置已存在用户的密码,仅能在创建用户时指定密码。
-
-
-## 一些说明
-
-1. Doris 初始化时,会自动创建如下用户和角色:
-
- 1. operator 角色:该角色拥有 Node\_priv 和 Admin\_priv,即对Doris的所有权限。后续某个升级版本中,我们可能会将该角色的权限限制为 Node\_priv,即仅授予节点变更权限。以满足某些云上部署需求。
-
- 2. admin 角色:该角色拥有 Admin\_priv,即除节点变更以外的所有权限。
-
- 3. root@'%':root 用户,允许从任意节点登陆,角色为 operator。
-
- 4. admin@'%':admin 用户,允许从任意节点登陆,角色为 admin。
-
-2. 不支持删除或更改默认创建的角色或用户的权限。
-
-3. operator 角色的用户有且只有一个。admin 角色的用户可以创建多个。
-
-4. 一些可能产生冲突的操作说明
-
- 1. 域名与ip冲突:
-
- 假设创建了如下用户:
-
- CREATE USER cmy@['domain'];
-
- 并且授权:
-
- GRANT SELECT_PRIV ON \*.\* TO cmy@['domain']
-
- 该 domain 被解析为两个 ip:ip1 和 ip2
-
- 假设之后,我们对 cmy@'ip1' 进行一次单独授权:
-
- GRANT ALTER_PRIV ON \*.\* TO cmy@'ip1';
-
- 则 cmy@'ip1' 的权限会被修改为 SELECT\_PRIV, ALTER\_PRIV。并且当我们再次变更 cmy@['domain'] 的权限时,cmy@'ip1' 也不会跟随改变。
-
- 2. 重复ip冲突:
-
- 假设创建了如下用户:
-
- CREATE USER cmy@'%' IDENTIFIED BY "12345";
-
- CREATE USER cmy@'192.%' IDENTIFIED BY "abcde";
-
- 在优先级上,'192.%' 优先于 '%',因此,当用户 cmy 从 192.168.1.1 这台机器尝试使用密码 '12345' 登陆 Doris 会被拒绝。
-
-5. 忘记密码
-
- 如果忘记了密码无法登陆 Doris,可以在 Doris FE 节点所在机器,使用如下命令无密码登陆 Doris:
-
- `mysql-client -h 127.0.0.1 -P query_port -uroot`
-
- 登陆后,可以通过 SET PASSWORD 命令重置密码。
-
-6. 任何用户都不能重置 root 用户的密码,除了 root 用户自己。
-
-7. ADMIN\_PRIV 权限只能在 GLOBAL 层级授予或撤销。
-
-8. 拥有 GLOBAL 层级 GRANT_PRIV 其实等同于拥有 ADMIN\_PRIV,因为该层级的 GRANT\_PRIV 有授予任意权限的权限,请谨慎使用。
-
-9. `current_user()` 和 `user()`
-
- 用户可以通过 `SELECT current_user();` 和 `SELECT user();` 分别查看 `current_user` 和 `user`。其中 `current_user` 表示当前用户是以哪种身份通过认证系统的,而 `user` 则是用户当前实际的 `user_identity`。举例说明:
-
- 假设创建了 `user1@'192.%'` 这个用户,然后以为来自 192.168.10.1 的用户 user1 登陆了系统,则此时的 `current_user` 为 `user1@'192.%'`,而 `user` 为 `user1@'192.168.10.1'`。
-
- 所有的权限都是赋予某一个 `current_user` 的,真实用户拥有对应的 `current_user` 的所有权限。
-
-## 最佳实践
-
-这里举例一些 Doris 权限系统的使用场景。
-
-1. 场景一
-
- Doris 集群的使用者分为管理员(Admin)、开发工程师(RD)和用户(Client)。其中管理员拥有整个集群的所有权限,主要负责集群的搭建、节点管理等。开发工程师负责业务建模,包括建库建表、数据的导入和修改等。用户访问不同的数据库和表来获取数据。
-
- 在这种场景下,可以为管理员赋予 ADMIN 权限或 GRANT 权限。对 RD 赋予对任意或指定数据库表的 CREATE、DROP、ALTER、LOAD、SELECT 权限。对 Client 赋予对任意或指定数据库表 SELECT 权限。同时,也可以通过创建不同的角色,来简化对多个用户的授权操作。
-
-2. 场景二
-
- 一个集群内有多个业务,每个业务可能使用一个或多个数据。每个业务需要管理自己的用户。在这种场景下。管理员用户可以为每个数据库创建一个拥有 DATABASE 层级 GRANT 权限的用户。该用户仅可以对用户进行指定的数据库的授权。
-
-3. 黑名单
-
- Doris 本身不支持黑名单,只有白名单功能,但我们可以通过某些方式来模拟黑名单。假设先创建了名为 `user@'192.%'` 的用户,表示允许来自 `192.*` 的用户登录。此时如果想禁止来自 `192.168.10.1` 的用户登录。则可以再创建一个用户 `cmy@'192.168.10.1'` 的用户,并设置一个新的密码。因为 `192.168.10.1` 的优先级高于 `192.%`,所以来自 `192.168.10.1` 将不能再使用旧密码进行登录。
-
-
diff --git a/docs/zh-CN/administrator-guide/query_cache.md b/docs/zh-CN/administrator-guide/query_cache.md
deleted file mode 100644
index cb2bdb119d..0000000000
--- a/docs/zh-CN/administrator-guide/query_cache.md
+++ /dev/null
@@ -1,155 +0,0 @@
----
-{
- "title": "QUERY CACHE",
- "language": "zh-CN"
-}
----
-
-
-# QUERY CACHE
-
-## 1 需求
-
-虽然在数据库存储层也做了对应的缓存,但这种数据库存储层的缓存一般针对的是查询内容,而且粒度也太小,一般只有表中数据没有变更的时候,数据库对应的cache才发挥了作用。 但这并不能减少业务系统对数据库进行增删改查所带来的庞大的IO压力。所以数据库缓存技术在此诞生,实现热点数据的高速缓存,提高应用的响应速度,极大缓解后端数据库的压力
-
-- 高并发场景
- Doris可以较好地支持高并发,但单台服务器无法承载太高的QPS
-
-- 复杂图表的看板
- 复杂的Dashboard或者大屏类应用,数据来自多张表,每个页面有数十个查询,虽然每个查询只有数十毫秒,但是总体查询时间会在数秒
-
-- 趋势分析
- 给定日期范围的查询,指标按日显示,比如查询最近7天内的用户数的趋势,这类查询数据量大,查询范围广,查询时间往往需要数十秒
-
-- 用户重复查询
- 如果产品没有防重刷机制,用户因手误或其他原因重复刷新页面,导致提交大量的重复的SQL
-
-以上四种场景,一种在应用层的解决方案是把查询结果放到Redis中,周期性地更新缓存或者用户手动刷新缓存,但是这个方案有如下问题:
-
-- 数据不一致
- 无法感知数据的更新,导致用户经常看到旧的数据
-
-- 命中率低
- 缓存整个查询结果,如果数据实时写入,缓存频繁失效,命中率低且系统负载较重
-
-- 额外成本
- 引入外部缓存组件,会带来系统复杂度,增加额外成本
-
-## 2 解决方案
-
-目前我们设计出结果缓存和分区缓存两个模块
-
-## 3 名词解释
-
-1. 结果缓存 result_cache
-
-针对用户的sql直接缓存查询的结果集合
-
-2. 分区缓存 partition_cache
-
-在partition粒度做针对每个分区查询的结果缓存
-
-## 4 设计原理
-
-### 1 结果缓存 `result_cache`
-
-result_cache 分两种 第一种为 result_cache_ttl 第二种为 result_cache_version
-
-#### `result_cache_ttl`
-
-result_cache_ttl 变量设置在用户Session中,用户可自定义是否开启,通过ttl时间来确定用户的sql是否使用缓存,`这里数据变更时不保证数据的正确性`
-按照 用户 connectid,和查询的sql 来存储和获取缓存,超过缓存失效时间则命中不了缓存,该缓存也会被清理
-
-#### ` result_cache_version`
-
-result_cache_version 按SQL的签名、查询的表的分区ID、分区最新版本来存储和获取缓存。三者组合确定一个缓存数据集,任何一个变化了,如SQL有变化,如查询字段或条件不一样,或数据更新后版本变化了,会导致命中不了缓存。
-
-如果多张表Join,使用最近更新的分区ID和最新的版本号,如果其中一张表更新了,会导致分区ID或版本号不一样,也一样命中不了缓存。
-
-### 2 分区缓存 `partition_cache`
-
-1. SQL可以并行拆分,Q = Q1 ∪ Q2 ... ∪ Qn,R= R1 ∪ R2 ... ∪ Rn,Q为查询语句,R为结果集
-2. 拆分为只读分区和可更新分区,只读分区缓存,更新分区不缓存
-
-### 5 使用场景
-
-|缓存类型|使用场景|
-|--|--|
-|result_cache_ttl|主要解决高QPS,用户重复查询的场景|
-|result_cache_version|主要解决整张表长时间没有变更的场景|
-|partition_cache|主要解决历史分区不变更的场景|
-
-## 6 参数
-
-### fe
-
-#### cache 开关
-
-1. `enable_result_cache_ttl`
-- 解释: enable_result_cache_ttl 开关
-- 默认值:false
-
-2. `enable_result_cache_version`
-- 解释:结果集缓存针对table版本的的开关
-- 默认值:false
-
-- `enable_partition_cache`
-- 解释:分区缓存 开关
-- 默认值:false
-
-#### 每个查询是否缓存的限制
-
-1. `cache_per_query_max_row_count`
-- 缓存每个查询最大的行数
-- 默认值 3000
-
-2. `cache_per_query_max_size_in_bytes`
-- 缓存每次查询的大小,单位bytes
-- 默认值 1Mb
-
-3. `result_cache_ttl_in_milliseconds`
-- result cache 缓存时长
-- 默认值 3s
-
-### be
-
-1. `cache_max_partition_count`
-- parition cache 最大缓存分区数
-- 默认值:1024
-
-2. `cache_max_size_in_mb` `cache_elasticity_size_in_mb`
-- BE中缓存内存设置,有两个参数cache_max_size_in_mb和cache_elasticity_size_in_mb),内存超过cache_max_size_in_mb+cache_elasticity_size_in_mb会开始清理,并把内存控制到cache_max_size_in_mb以下。可以根据BE节点数量,节点内存大小,和缓存命中率来设置这两个参数。
-
-## 7 如何使用
-
-- use enable_result_cache_ttl
-```
-set `global` enable_result_cache_ttl =true
-```
-
-- use enable_result_cache_version
-```
-set `global` enable_result_cache_version = true
-```
-
-- use enable_partition_cache
-```
-set `global` enable_partition_cache = true
-```
diff --git a/docs/zh-CN/administrator-guide/resource-management.md b/docs/zh-CN/administrator-guide/resource-management.md
deleted file mode 100644
index 897fb53e80..0000000000
--- a/docs/zh-CN/administrator-guide/resource-management.md
+++ /dev/null
@@ -1,170 +0,0 @@
----
-{
- "title": "资源管理",
- "language": "zh-CN"
-}
----
-
-
-
-# 资源管理
-
-为了节省Doris集群内的计算、存储资源,Doris需要引入一些其他外部资源来完成相关的工作,如Spark/GPU用于查询,HDFS/S3用于外部存储,Spark/MapReduce用于ETL, 通过ODBC连接外部存储等,因此我们引入资源管理机制来管理Doris使用的这些外部资源。
-
-
-
-## 基本概念
-
-一个资源包含名字、类型等基本信息,名字为全局唯一,不同类型的资源包含不同的属性,具体参考各资源的介绍。
-
-资源的创建和删除只能由拥有 `admin` 权限的用户进行操作。一个资源隶属于整个Doris集群。拥有 `admin` 权限的用户可以将使用权限`usage_priv` 赋给普通用户。可参考`HELP GRANT`或者权限文档。
-
-
-
-## 具体操作
-
-资源管理主要有三个命令:`CREATE RESOURCE`,`DROP RESOURCE` 和 `SHOW RESOURCES`,分别为创建、删除和查看资源。这三个命令的具体语法可以通过MySQL客户端连接到 Doris 后,执行 `HELP cmd` 的方式查看帮助。
-
-1. CREATE RESOURCE
-
- 语法
-
- ```sql
- CREATE [EXTERNAL] RESOURCE "resource_name"
- PROPERTIES ("key"="value", ...);
- ```
-
- 在创建资源的命令中,用户必须提供以下信息:
-
- * `resource_name` 为 Doris 中配置的资源的名字。
- * `PROPERTIES` 是资源相关参数,如下:
- * `type`:资源类型,必填,目前仅支持 spark与odbc_catalog。
- * 其他参数见各资源介绍。
-
-2. DROP RESOURCE
-
- 该命令可以删除一个已存在的资源。具体操作见:`HELP DROP RESOURCE`
-
-3. SHOW RESOURCES
-
- 该命令可以查看用户有使用权限的资源。具体操作见:`HELP SHOW RESOURCES`
-
-
-
-## 支持的资源
-
-目前Doris能够支持
-* Spark资源 : 完成ETL工作。
-* ODBC资源:查询和导入外部表的数据
-
-下面将分别展示两种资源的使用方式。
-
-### Spark
-
-#### 参数
-
-##### Spark 相关参数如下:
-
-`spark.master`: 必填,目前支持yarn,spark://host:port。
-
-`spark.submit.deployMode`: Spark 程序的部署模式,必填,支持 cluster,client 两种。
-
-`spark.hadoop.yarn.resourcemanager.address`: master为yarn时必填。
-
-`spark.hadoop.fs.defaultFS`: master为yarn时必填。
-
-其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html。
-
-
-
-##### 如果Spark用于ETL,还需要指定以下参数:
-
-`working_dir`: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。
-
-`broker`: broker 名字。spark作为ETL资源使用时必填。需要使用`ALTER SYSTEM ADD BROKER` 命令提前完成配置。
-
- * `broker.property_key`: broker读取ETL生成的中间文件时需要指定的认证信息等。
-
-
-
-#### 示例
-
-创建 yarn cluster 模式,名为 spark0 的 Spark 资源。
-
-```sql
-CREATE EXTERNAL RESOURCE "spark0"
-PROPERTIES
-(
- "type" = "spark",
- "spark.master" = "yarn",
- "spark.submit.deployMode" = "cluster",
- "spark.jars" = "xxx.jar,yyy.jar",
- "spark.files" = "/tmp/aaa,/tmp/bbb",
- "spark.executor.memory" = "1g",
- "spark.yarn.queue" = "queue0",
- "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",
- "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",
- "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",
- "broker" = "broker0",
- "broker.username" = "user0",
- "broker.password" = "password0"
-);
-```
-
-### ODBC
-
-#### 参数
-
-##### ODBC 相关参数如下:
-
-`type`: 必填,且必须为`odbc_catalog`。作为resource的类型标识。
-
-`user`: 外部表的账号,必填。
-
-`password`: 外部表的密码,必填。
-
-`host`: 外部表的连接ip地址,必填。
-
-`port`: 外部表的连接端口,必填。
-
-`odbc_type`: 标示外部表的类型,当前doris支持`mysql`与`oracle`,未来可能支持更多的数据库。引用该resource的ODBC外表必填,旧的mysql外表选填。
-
-`driver`: 标示外部表使用的driver动态库,引用该resource的ODBC外表必填,旧的mysql外表选填。
-
-
-具体如何使用可以,可以参考[ODBC of Doris](../extending-doris/odbc-of-doris.html)
-
-#### 示例
-
-创建oracle的odbc resource,名为 odbc_oracle 的 odbc_catalog的 资源。
-
-```sql
-CREATE EXTERNAL RESOURCE `oracle_odbc`
-PROPERTIES (
-"type" = "odbc_catalog",
-"host" = "192.168.0.1",
-"port" = "8086",
-"user" = "test",
-"password" = "test",
-"database" = "test",
-"odbc_type" = "oracle",
-"driver" = "Oracle 19 ODBC driver"
-);
-```
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/runtime-filter.md b/docs/zh-CN/administrator-guide/runtime-filter.md
deleted file mode 100644
index ca9f908ed9..0000000000
--- a/docs/zh-CN/administrator-guide/runtime-filter.md
+++ /dev/null
@@ -1,282 +0,0 @@
----
-{
- "title": "Runtime Filter",
- "language": "zh-CN"
-}
----
-
-
-
-# Runtime Filter
-
-Runtime Filter 是在 Doris 0.15 版本中正式加入的新功能。旨在为某些 Join 查询在运行时动态生成过滤条件,来减少扫描的数据量,避免不必要的I/O和网络传输,从而加速查询。
-
-它的设计、实现和效果可以参阅 [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116)。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* 左表:Join查询时,左边的表。进行Probe操作。可被Join Reorder调整顺序。
-* 右表:Join查询时,右边的表。进行Build操作。可被Join Reorder调整顺序。
-* Fragment:FE会将具体的SQL语句的执行转化为对应的Fragment并下发到BE进行执行。BE上执行对应Fragment,并将结果汇聚返回给FE。
-* Join on clause: `A join B on A.a=B.b`中的`A.a=B.b`,在查询规划时基于此生成join conjuncts,包含join Build和Probe使用的expr,其中Build expr在Runtime Filter中称为src expr,Probe expr在Runtime Filter中称为target expr。
-
-## 原理
-
-Runtime Filter在查询规划时生成,在HashJoinNode中构建,在ScanNode中应用。
-
-举个例子,当前存在T1表与T2表的Join查询,它的Join方式为HashJoin,T1是一张事实表,数据行数为100000,T2是一张维度表,数据行数为2000,Doris join的实际情况是:
-```
-| > HashJoinNode <
-| | |
-| | 100000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 100000 | 2000
-| T1 T2
-|
-```
-显而易见对T2扫描数据要远远快于T1,如果我们主动等待一段时间再扫描T1,等T2将扫描的数据记录交给HashJoinNode后,HashJoinNode根据T2的数据计算出一个过滤条件,比如T2数据的最大和最小值,或者构建一个Bloom Filter,接着将这个过滤条件发给等待扫描T1的ScanNode,后者应用这个过滤条件,将过滤后的数据交给HashJoinNode,从而减少probe hash table的次数和网络开销,这个过滤条件就是Runtime Filter,效果如下:
-```
-| > HashJoinNode <
-| | |
-| | 6000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 100000 | 2000
-| T1 T2
-|
-```
-如果能将过滤条件(Runtime Filter)下推到存储引擎,则某些情况下可以利用索引来直接减少扫描的数据量,从而大大减少扫描耗时,效果如下:
-```
-| > HashJoinNode <
-| | |
-| | 6000 | 2000
-| | |
-| OlapScanNode OlapScanNode
-| ^ ^
-| | 6000 | 2000
-| T1 T2
-|
-```
-可见,和谓词下推、分区裁剪不同,Runtime Filter是在运行时动态生成的过滤条件,即在查询运行时解析join on clause确定过滤表达式,并将表达式广播给正在读取左表的ScanNode,从而减少扫描的数据量,进而减少probe hash table的次数,避免不必要的I/O和网络传输。
-
-Runtime Filter主要用于大表join小表的优化,如果左表的数据量太小,或者右表的数据量太大,则Runtime Filter可能不会取得预期效果。
-
-## 使用方式
-
-### Runtime Filter查询选项
-
-与Runtime Filter相关的查询选项信息,请参阅以下部分:
-
-- 第一个查询选项是调整使用的Runtime Filter类型,大多数情况下,您只需要调整这一个选项,其他选项保持默认即可。
-
- - `runtime_filter_type`: 包括Bloom Filter、MinMax Filter、IN predicate、IN Or Bloom Filter,默认会使用IN Or Bloom Filter,部分情况下同时使用Bloom Filter、MinMax Filter、IN predicate时性能更高。
-
-- 其他查询选项通常仅在某些特定场景下,才需进一步调整以达到最优效果。通常只在性能测试后,针对资源密集型、运行耗时足够长且频率足够高的查询进行优化。
-
- - `runtime_filter_mode`: 用于调整Runtime Filter的下推策略,包括OFF、LOCAL、GLOBAL三种策略,默认设置为GLOBAL策略
-
- - `runtime_filter_wait_time_ms`: 左表的ScanNode等待每个Runtime Filter的时间,默认1000ms
-
- - `runtime_filters_max_num`: 每个查询可应用的Runtime Filter中Bloom Filter的最大数量,默认10
-
- - `runtime_bloom_filter_min_size`: Runtime Filter中Bloom Filter的最小长度,默认1048576(1M)
-
- - `runtime_bloom_filter_max_size`: Runtime Filter中Bloom Filter的最大长度,默认16777216(16M)
-
- - `runtime_bloom_filter_size`: Runtime Filter中Bloom Filter的默认长度,默认2097152(2M)
-
- - `runtime_filter_max_in_num`: 如果join右表数据行数大于这个值,我们将不生成IN predicate,默认1024
-
-下面对查询选项做进一步说明。
-
-#### 1.runtime_filter_type
-使用的Runtime Filter类型。
-
-**类型**: 数字(1, 2, 4, 8)或者相对应的助记符字符串(IN, BLOOM_FILTER, MIN_MAX, ```IN_OR_BLOOM_FILTER```),默认8(```IN_OR_BLOOM_FILTER```),使用多个时用逗号分隔,注意需要加引号,或者将任意多个类型的数字相加,例如:
-```
-set runtime_filter_type="BLOOM_FILTER,IN,MIN_MAX";
-```
-等价于:
-```
-set runtime_filter_type=7;
-```
-
-**使用注意事项**
-
-- **IN or Bloom Filter**: 根据右表在执行过程中的真实行数,由系统自动判断使用 IN predicate 还是 Bloom Filter
- - 默认在右表数据行数少于1024时会使用IN predicate(可通过session变量中的`runtime_filter_max_in_num`调整,否则使用Bloom filter。
-- **Bloom Filter**: 有一定的误判率,导致过滤的数据比预期少一点,但不会导致最终结果不准确,在大部分情况下Bloom Filter都可以提升性能或对性能没有显著影响,但在部分情况下会导致性能降低。
- - Bloom Filter构建和应用的开销较高,所以当过滤率较低时,或者左表数据量较少时,Bloom Filter可能会导致性能降低。
- - 目前只有左表的Key列应用Bloom Filter才能下推到存储引擎,而测试结果显示Bloom Filter不下推到存储引擎时往往会导致性能降低。
- - 目前Bloom Filter仅在ScanNode上使用表达式过滤时有短路(short-circuit)逻辑,即当假阳性率过高时,不继续使用Bloom Filter,但当Bloom Filter下推到存储引擎后没有短路逻辑,所以当过滤率较低时可能导致性能降低。
-
-- **MinMax Filter**: 包含最大值和最小值,从而过滤小于最小值和大于最大值的数据,MinMax Filter的过滤效果与join on clause中Key列的类型和左右表数据分布有关。
- - 当join on clause中Key列的类型为int/bigint/double等时,极端情况下,如果左右表的最大最小值相同则没有效果,反之右表最大值小于左表最小值,或右表最小值大于左表最大值,则效果最好。
- - 当join on clause中Key列的类型为varchar等时,应用MinMax Filter往往会导致性能降低。
-
-- **IN predicate**: 根据join on clause中Key列在右表上的所有值构建IN predicate,使用构建的IN predicate在左表上过滤,相比Bloom Filter构建和应用的开销更低,在右表数据量较少时往往性能更高。
- - 默认只有右表数据行数少于1024才会下推(可通过session变量中的`runtime_filter_max_in_num`调整)。
- - 目前IN predicate已实现合并方法。
- - 当同时指定In predicate和其他filter,并且in的过滤数值没达到runtime_filter_max_in_num时,会尝试把其他filter去除掉。原因是In predicate是精确的过滤条件,即使没有其他filter也可以高效过滤,如果同时使用则其他filter会做无用功。目前仅在Runtime filter的生产者和消费者处于同一个fragment时才会有去除非in filter的逻辑。
-
-#### 2.runtime_filter_mode
-用于控制Runtime Filter在instance之间传输的范围。
-
-**类型**: 数字(0, 1, 2)或者相对应的助记符字符串(OFF, LOCAL, GLOBAL),默认2(GLOBAL)。
-
-**使用注意事项**
-
-LOCAL:相对保守,构建的Runtime Filter只能在同一个instance(查询执行的最小单元)上同一个Fragment中使用,即Runtime Filter生产者(构建Filter的HashJoinNode)和消费者(使用RuntimeFilter的ScanNode)在同一个Fragment,比如broadcast join的一般场景;
-
-GLOBAL:相对激进,除满足LOCAL策略的场景外,还可以将Runtime Filter合并后通过网络传输到不同instance上的不同Fragment中使用,比如Runtime Filter生产者和消费者在不同Fragment,比如shuffle join。
-
-大多数情况下GLOBAL策略可以在更广泛的场景对查询进行优化,但在有些shuffle join中生成和合并Runtime Filter的开销超过给查询带来的性能优势,可以考虑更改为LOCAL策略。
-
-如果集群中涉及的join查询不会因为Runtime Filter而提高性能,您可以将设置更改为OFF,从而完全关闭该功能。
-
-在不同Fragment上构建和应用Runtime Filter时,需要合并Runtime Filter的原因和策略可参阅 [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116)
-
-#### 3.runtime_filter_wait_time_ms
-Runtime Filter的等待耗时。
-
-**类型**: 整数,默认1000,单位ms
-
-**使用注意事项**
-
-在开启Runtime Filter后,左表的ScanNode会为每一个分配给自己的Runtime Filter等待一段时间再扫描数据,即如果ScanNode被分配了3个Runtime Filter,那么它最多会等待3000ms。
-
-因为Runtime Filter的构建和合并均需要时间,ScanNode会尝试将等待时间内到达的Runtime Filter下推到存储引擎,如果超过等待时间后,ScanNode会使用已经到达的Runtime Filter直接开始扫描数据。
-
-如果Runtime Filter在ScanNode开始扫描之后到达,则ScanNode不会将该Runtime Filter下推到存储引擎,而是对已经从存储引擎扫描上来的数据,在ScanNode上基于该Runtime Filter使用表达式过滤,之前已经扫描的数据则不会应用该Runtime Filter,这样得到的中间数据规模会大于最优解,但可以避免严重的裂化。
-
-如果集群比较繁忙,并且集群上有许多资源密集型或长耗时的查询,可以考虑增加等待时间,以避免复杂查询错过优化机会。如果集群负载较轻,并且集群上有许多只需要几秒的小查询,可以考虑减少等待时间,以避免每个查询增加1s的延迟。
-
-#### 4.runtime_filters_max_num
-每个查询生成的Runtime Filter中Bloom Filter数量的上限。
-
-**类型**: 整数,默认10
-
-**使用注意事项**
-目前仅对Bloom Filter的数量进行限制,因为相比MinMax Filter和IN predicate,Bloom Filter构建和应用的代价更高。
-
-如果生成的Bloom Filter超过允许的最大数量,则保留选择性大的Bloom Filter,选择性大意味着预期可以过滤更多的行。这个设置可以防止Bloom Filter耗费过多的内存开销而导致潜在的问题。
-```
-选择性=(HashJoinNode Cardinality / HashJoinNode left child Cardinality)
--- 因为目前FE拿到Cardinality不准,所以这里Bloom Filter计算的选择性与实际不准,因此最终可能只是随机保留了部分Bloom Filter。
-```
-仅在对涉及大表间join的某些长耗时查询进行调优时,才需要调整此查询选项。
-
-#### 5.Bloom Filter长度相关参数
-包括`runtime_bloom_filter_min_size`、`runtime_bloom_filter_max_size`、`runtime_bloom_filter_size`,用于确定Runtime Filter使用的Bloom Filter数据结构的大小(以字节为单位)。
-
-**类型**: 整数
-
-**使用注意事项**
-因为需要保证每个HashJoinNode构建的Bloom Filter长度相同才能合并,所以目前在FE查询规划时计算Bloom Filter的长度。
-
-如果能拿到join右表统计信息中的数据行数(Cardinality),会尝试根据Cardinality估计Bloom Filter的最佳大小,并四舍五入到最接近的2的幂(以2为底的log值)。如果无法拿到右表的Cardinality,则会使用默认的Bloom Filter长度`runtime_bloom_filter_size`。`runtime_bloom_filter_min_size`和`runtime_bloom_filter_max_size`用于限制最终使用的Bloom Filter长度最小和最大值。
-
-更大的Bloom Filter在处理高基数的输入集时更有效,但需要消耗更多的内存。假如查询中需要过滤高基数列(比如含有数百万个不同的取值),可以考虑增加`runtime_bloom_filter_size`的值进行一些基准测试,这有助于使Bloom Filter过滤的更加精准,从而获得预期的性能提升。
-
-Bloom Filter的有效性取决于查询的数据分布,因此通常仅对一些特定查询额外调整其Bloom Filter长度,而不是全局修改,一般仅在对涉及大表间join的某些长耗时查询进行调优时,才需要调整此查询选项。
-
-### 查看query生成的Runtime Filter
-
-`explain`命令可以显示的查询计划中包括每个Fragment使用的join on clause信息,以及Fragment生成和使用Runtime Filter的注释,从而确认是否将Runtime Filter应用到了期望的join on clause上。
-- 生成Runtime Filter的Fragment包含的注释例如`runtime filters: filter_id[type] <- table.column`。
-- 使用Runtime Filter的Fragment包含的注释例如`runtime filters: filter_id[type] -> table.column`。
-
-下面例子中的查询使用了一个ID为RF000的Runtime Filter。
-```
-CREATE TABLE test (t1 INT) DISTRIBUTED BY HASH (t1) BUCKETS 2 PROPERTIES("replication_num" = "1");
-INSERT INTO test VALUES (1), (2), (3), (4);
-
-CREATE TABLE test2 (t2 INT) DISTRIBUTED BY HASH (t2) BUCKETS 2 PROPERTIES("replication_num" = "1");
-INSERT INTO test2 VALUES (3), (4), (5);
-
-EXPLAIN SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2;
-+-------------------------------------------------------------------+
-| Explain String |
-+-------------------------------------------------------------------+
-| PLAN FRAGMENT 0 |
-| OUTPUT EXPRS:`t1` |
-| |
-| 4:EXCHANGE |
-| |
-| PLAN FRAGMENT 1 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test`.`t1` |
-| |
-| 2:HASH JOIN |
-| | join op: INNER JOIN (BUCKET_SHUFFLE) |
-| | equal join conjunct: `test`.`t1` = `test2`.`t2` |
-| | runtime filters: RF000[in] <- `test2`.`t2` |
-| | |
-| |----3:EXCHANGE |
-| | |
-| 0:OlapScanNode |
-| TABLE: test |
-| runtime filters: RF000[in] -> `test`.`t1` |
-| |
-| PLAN FRAGMENT 2 |
-| OUTPUT EXPRS: |
-| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test2`.`t2` |
-| |
-| 1:OlapScanNode |
-| TABLE: test2 |
-+-------------------------------------------------------------------+
--- 上面`runtime filters`的行显示了`PLAN FRAGMENT 1`的`2:HASH JOIN`生成了ID为RF000的IN predicate,
--- 其中`test2`.`t2`的key values仅在运行时可知,
--- 在`0:OlapScanNode`使用了该IN predicate用于在读取`test`.`t1`时过滤不必要的数据。
-
-SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2;
--- 返回2行结果[3, 4];
-
--- 通过query的profile(set enable_profile=true;)可以查看查询内部工作的详细信息,
--- 包括每个Runtime Filter是否下推、等待耗时、以及OLAP_SCAN_NODE从prepare到接收到Runtime Filter的总时长。
-RuntimeFilter:in:
- - HasPushDownToEngine: true
- - AWaitTimeCost: 0ns
- - EffectTimeCost: 2.76ms
-
--- 此外,在profile的OLAP_SCAN_NODE中还可以查看Runtime Filter下推后的过滤效果和耗时。
- - RowsVectorPredFiltered: 9.320008M (9320008)
- - VectorPredEvalTime: 364.39ms
-```
-
-## Runtime Filter的规划规则
-1. 只支持对join on clause中的等值条件生成Runtime Filter,不包括Null-safe条件,因为其可能会过滤掉join左表的null值。
-2. 不支持将Runtime Filter下推到left outer、full outer、anti join的左表;
-3. 不支持src expr或target expr是常量;
-4. 不支持src expr和target expr相等;
-5. 不支持src expr的类型等于`HLL`或者`BITMAP`;
-6. 目前仅支持将Runtime Filter下推给OlapScanNode;
-7. 不支持target expr包含NULL-checking表达式,比如`COALESCE/IFNULL/CASE`,因为当outer join上层其他join的join on clause包含NULL-checking表达式并生成Runtime Filter时,将这个Runtime Filter下推到outer join的左表时可能导致结果不正确;
-8. 不支持target expr中的列(slot)无法在原始表中找到某个等价列;
-9. 不支持列传导,这包含两种情况:
- - 一是例如join on clause包含A.k = B.k and B.k = C.k时,目前C.k只可以下推给B.k,而不可以下推给A.k;
- - 二是例如join on clause包含A.a + B.b = C.c,如果A.a可以列传导到B.a,即A.a和B.a是等价的列,那么可以用B.a替换A.a,然后可以尝试将Runtime Filter下推给B(如果A.a和B.a不是等价列,则不能下推给B,因为target expr必须与唯一一个join左表绑定);
-10. Target expr和src expr的类型必须相等,因为Bloom Filter基于hash,若类型不等则会尝试将target expr的类型转换为src expr的类型;
-11. 不支持`PlanNode.Conjuncts`生成的Runtime Filter下推,与HashJoinNode的`eqJoinConjuncts`和`otherJoinConjuncts`不同,`PlanNode.Conjuncts`生成的Runtime Filter在测试中发现可能会导致错误的结果,例如`IN`子查询转换为join时,自动生成的join on clause将保存在`PlanNode.Conjuncts`中,此时应用Runtime Filter可能会导致结果缺少一些行。
diff --git a/docs/zh-CN/administrator-guide/segment-v2-usage.md b/docs/zh-CN/administrator-guide/segment-v2-usage.md
deleted file mode 100644
index 1309182587..0000000000
--- a/docs/zh-CN/administrator-guide/segment-v2-usage.md
+++ /dev/null
@@ -1,157 +0,0 @@
----
-{
- "title": "Segment V2 升级手册",
- "language": "zh-CN"
-}
----
-
-
-
-# Segment V2 升级手册
-
-## 背景
-
-Doris 0.12 版本中实现了新的存储格式:Segment V2,引入词典压缩、bitmap索引、page cache等优化,能够提升系统性能。
-
-0.12 版本会同时支持读写原有的 Segment V1(以下简称V1) 和新的 Segment V2(以下简称V2) 两种格式。如果原有数据想使用 V2 相关特性,需通过命令将 V1 转换成 V2 格式。
-
-本文档主要介绍从 0.11 版本升级至 0.12 版本后,如何转换和使用 V2 格式。
-
-V2 格式的表可以支持以下新的特性:
-
-1. bitmap 索引
-2. 内存表
-3. page cache
-4. 字典压缩
-5. 延迟物化(Lazy Materialization)
-
-**从 0.13 版本开始,新建表的默认存储格式将为 Segment V2**
-
-## 集群升级
-
-0.12 版本仅支持从 0.11 版本升级,不支持从 0.11 之前的版本升级。请先确保升级的前的 Doris 集群版本为 0.11。
-
-0.12 版本有两个 V2 相关的重要参数:
-
-* `default_rowset_type`:FE 一个全局变量(Global Variable)设置,默认为 "alpha",即 V1 版本。
-* `default_rowset_type`:BE 的一个配置项,默认为 "ALPHA",即 V1 版本。
-
-保持上述配置默认的话,按常规步骤对集群升级后,原有集群数据的存储格式不会变更,即依然为 V1 格式。如果对 V2 格式没有需求,则继续正常使用集群即可,无需做任何额外操作。所有原有数据、以及新导入的数据,都依然是 V1 版本。
-
-## V2 格式转换
-
-### 已有表数据转换成 V2
-
-对于已有表数据的格式转换,Doris 提供两种方式:
-
-1. 创建一个 V2 格式的特殊 Rollup
-
- 该方式会针对指定表,创建一个 V2 格式的特殊 Rollup。创建完成后,新的 V2 格式的 Rollup 会和原有表格式数据并存。用户可以指定对 V2 格式的 Rollup 进行查询验证。
-
- 该方式主要用于对 V2 格式的验证,因为不会修改原有表数据,因此可以安全的进行 V2 格式的数据验证,而不用担心表数据因格式转换而损坏。通常先使用这个方式对数据进行校验,之后再使用方法2对整个表进行数据格式转换。
-
- 操作步骤如下:
-
- ```
- ## 创建 V2 格式的 Rollup
-
- ALTER TABLE table_name ADD ROLLUP table_name (columns) PROPERTIES ("storage_format" = "v2");
- ```
-
- 其中, Rollup 的名称必须为表名。columns 字段可以任意填写,系统不会检查该字段的合法性。该语句会自动生成一个名为 `__V2_table_name` 的 Rollup,并且该 Rollup 列包含表的全部列。
-
- 通过以下语句查看创建进度:
-
- ```
- SHOW ALTER TABLE ROLLUP;
- ```
-
- 创建完成后,可以通过 `DESC table_name ALL;` 查看到名为 `__v2_table_name` 的 Rollup。
-
- 之后,通过如下命令,切换到 V2 格式查询:
-
- ```
- set use_v2_rollup = true;
- select * from table_name limit 10;
- ```
-
- `use_V2_Rollup` 这个变量会强制查询名为 `__V2_table_name` 的 Rollup,并且不会考虑其他 Rollup 的命中条件。所以该参数仅用于对 V2 格式数据进行验证。
-
-2. 转换现有表数据格式
-
- 该方式相当于给指定的表发送一个 schema change 作业,作业完成后,表的所有数据会被转换成 V2 格式。该方法不会保留原有 v1 格式,所以请先使用方法1进行格式验证。
-
- ```
- ALTER TABLE table_name SET ("storage_format" = "v2");
- ```
-
- 之后通过如下命令查看作业进度:
-
- ```
- SHOW ALTER TABLE COLUMN;
- ```
-
- 作业完成后,该表的所有数据(包括Rollup)都转换为了 V2。且 V1 版本的数据已被删除。如果该表是分区表,则之后创建的分区也都是 V2 格式。
-
- **V2 格式的表不能重新转换为 V1**
-
-### 创建新的 V2 格式的表
-
-在不改变默认配置参数的情况下,用户可以创建 V2 格式的表:
-
-```
-CREATE TABLE tbl_name
-(
- k1 INT,
- k2 INT
-)
-DISTRIBUTED BY HASH(k1) BUCKETS 1
-PROPERTIES
-(
- "storage_format" = "v2"
-);
-```
-
-在 `properties` 中指定 `"storage_format" = "v2"` 后,该表将使用 V2 格式创建。如果是分区表,则之后创建的分区也都是 V2 格式。
-
-### 全量格式转换(试验功能,不推荐)
-
-通过以下方式可以开启整个集群的全量数据格式转换(V1 -> V2)。全量数据转换是通过 BE 后台的数据 compaction 过程异步进行的。
-**该功能目前并没有很好的方式查看或控制转换进度,并且无法保证数据能够转换完成。可能导致同一张表长期处于同时包含两种数据格式的状态。因此建议使用 ALTER TABLE 针对性的转换。**
-
-1. 从 BE 开启全量格式转换
-
- 在 `be.conf` 中添加变量 `default_rowset_type=BETA` 并重启 BE 节点。在之后的 compaction 流程中,数据会自动从 V1 转换成 V2。
-
-2. 从 FE 开启全量格式转换
-
- 通过 mysql 客户端连接 Doris 后,执行如下语句:
-
- `SET GLOBAL default_rowset_type = beta;`
-
- 执行完成后,FE 会通过心跳将信息发送给 BE,之后 BE 的 compaction 流程中,数据会自动从 V1 转换成 V2。
-
- FE 的配置参数优先级高于 BE 的配置。即使 BE 中的配置 `default_rowset_type` 为 ALPHA,如果 FE 配置为 beta 后,则 BE 依然开始进行 V1 到 V2 的数据格式转换。
-
- **建议先通过对单独表的数据格式转换验证后,再进行全量转换。全量转换的时间比较长,且进度依赖于 compaction 的进度。**可能出现 compaction 无法完成的情况,因此需要通过显式的执行 `ALTER TABLE` 操作进行个别表的数据格式转换。
-
-3. 查看全量转换进度
-
- 全量转换进度须通过脚本查看。脚本位置为代码库的 `tools/show_segment_status/` 目录。请参阅其中的 `README` 文档查看使用帮助。
diff --git a/docs/zh-CN/administrator-guide/small-file-mgr.md b/docs/zh-CN/administrator-guide/small-file-mgr.md
deleted file mode 100644
index d107e5e639..0000000000
--- a/docs/zh-CN/administrator-guide/small-file-mgr.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-{
- "title": "文件管理器",
- "language": "zh-CN"
-}
----
-
-
-
-# 文件管理器
-
-Doris 中的一些功能需要使用一些用户自定义的文件。比如用于访问外部数据源的公钥、密钥文件、证书文件等等。文件管理器提供这样一个功能,能够让用户预先上传这些文件并保存在 Doris 系统中,然后可以在其他命令中引用或访问。
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-* BDBJE:Oracle Berkeley DB Java Edition。FE 中用于持久化元数据的分布式嵌入式数据库。
-* SmallFileMgr:文件管理器。负责创建并维护用户的文件。
-
-## 基本概念
-
-文件是指用户创建并保存在 Doris 中的文件。
-
-一个文件由 `数据库名称(database)`、`分类(catalog)` 和 `文件名(file_name)` 共同定位。同时每个文件也有一个全局唯一的 id(file_id),作为系统内的标识。
-
-文件的创建和删除只能由拥有 `admin` 权限的用户进行操作。一个文件隶属于一个数据库。对某一数据库拥有访问权限(查询、导入、修改等等)的用户都可以使用该数据库下创建的文件。
-
-## 具体操作
-
-文件管理主要有三个命令:`CREATE FILE`,`SHOW FILE` 和 `DROP FILE`,分别为创建、查看和删除文件。这三个命令的具体语法可以通过连接到 Doris 后,执行 `HELP cmd;` 的方式查看帮助。
-
-1. CREATE FILE
-
- 在创建文件的命令中,用户必须提供以下信息:
-
- * file_name:文件名。用户自定义,在一个 catalog 内唯一即可。
- * catalog:文件所属分类。用户自定义,在一个 database 内唯一即可。
-
- > Doris 也有一些特殊的分类名称供特定的命令使用。
-
- > 1. kafka
-
- > 当在例行导入命令中指定数据源为 Kafka,并且需要引用到文件时,Doris 会默认从 catalog 名为 "kafka" 的分类中查找文件。
-
- * url:文件的下载地址。目前仅支持无认证的 http 下载地址。该下载地址仅用于在执行创建文件命令时,从这个地址下载文件。当文件成功创建并保存在 Doris 中后,该地址将不再被使用。
- * md5:可选项。文件的 MD5 值。如果用户提供该值,将在文件下载后进行 MD5 值的校验。校验失败则文件创建失败。
-
- 文件创建成功后,文件相关的信息将持久化在 Doris 中。用户可以通过 `SHOW FILE` 命令查看已经创建成功的文件。
-
-2. SHOW FILE
-
- 该命令可以查看已经创建成功的文件。具体操作见:`HELP SHOW FILE;`
-
-3. DROP FILE
-
- 该命令可以删除一个已经创建的文件。具体操作见:`HELP DROP FILE;`
-
-## 实现细节
-
-### 创建和删除文件
-
-当用户执行 `CREATE FILE` 命令后,FE 会从给定的 URL 下载文件。并将文件的内容以 Base64 编码的形式直接保存在 FE 的内存中。同时会将文件内容以及文件相关的元信息持久化在 BDBJE 中。所有被创建的文件,其元信息和文件内容都会常驻于 FE 的内存中。如果 FE 宕机重启,也会从 BDBJE 中加载元信息和文件内容到内存中。当文件被删除时,会直接从 FE 内存中删除相关信息,同时也从 BDBJE 中删除持久化的信息。
-
-### 文件的使用
-
-如果是 FE 端需要使用创建的文件,则 SmallFileMgr 会直接将 FE 内存中的数据保存为本地文件,存储在指定的目录中,并返回本地的文件路径供使用。
-
-如果是 BE 端需要使用创建的文件,BE 会通过 FE 的 http 接口 `/api/get_small_file` 将文件内容下载到 BE 上指定的目录中,供使用。同时,BE 也会在内存中记录当前已经下载过的文件的信息。当 BE 请求一个文件时,会先查看本地文件是否存在并校验。如果校验通过,则直接返回本地文件路径。如果校验失败,则会删除本地文件,重新从 FE 下载。当 BE 重启时,会预先加载本地的文件到内存中。
-
-## 使用限制
-
-因为文件元信息和内容都存储于 FE 的内存中。所以默认仅支持上传大小在 1MB 以内的文件。并且总文件数量限制为 100 个。可以通过下一小节介绍的配置项进行修改。
-
-## 相关配置
-
-1. FE 配置
-
- * `small_file_dir`:用于存放上传文件的路径,默认为 FE 运行目录的 `small_files/` 目录下。
- * `max_small_file_size_bytes`:单个文件大小限制,单位为字节。默认为 1MB。大于该配置的文件创建将会被拒绝。
- * `max_small_file_number`:一个 Doris 集群支持的总文件数量。默认为 100。当创建的文件数超过这个值后,后续的创建将会被拒绝。
-
- > 如果需要上传更多文件或提高单个文件的大小限制,可以通过 `ADMIN SET CONFIG` 命令修改 `max_small_file_size_bytes` 和 `max_small_file_number` 参数。但文件数量和大小的增加,会导致 FE 内存使用量的增加。
-
-2. BE 配置
-
- * `small_file_dir`:用于存放从 FE 下载的文件的路径,默认为 BE 运行目录的 `lib/small_files/` 目录下。
diff --git a/docs/zh-CN/administrator-guide/sql-mode.md b/docs/zh-CN/administrator-guide/sql-mode.md
deleted file mode 100644
index c2ebca85f6..0000000000
--- a/docs/zh-CN/administrator-guide/sql-mode.md
+++ /dev/null
@@ -1,76 +0,0 @@
----
-{
- "title": "SQL MODE",
- "language": "zh-CN"
-}
----
-
-
-
-# SQL MODE
-
-Doris新支持的sql mode参照了 Mysql 的sql mode管理机制,每个客户端都能设置自己的sql mode,拥有Admin权限的数据库管理员可以设置全局sql mode。
-
-## sql mode 介绍
-
-sql mode使用户能在不同风格的sql语法和数据校验严格度间做切换,使Doris对其他数据库有更好的兼容性。例如在一些数据库里,'||'符号是一个字符串连接符,但在Doris里却是与'or'等价的,这时用户只需要使用sql mode切换到自己想要的风格。每个客户端都能设置sql mode,并在当前对话中有效,只有拥有Admin权限的用户可以设置全局sql mode。
-
-## 原理
-
-sql mode用一个64位的Long型存储在SessionVariables中,这个地址的每一位都代表一个mode的开启/禁用(1表示开启,0表示禁用)状态,只要知道每一种mode具体是在哪一位,我们就可以通过位运算方便快速的对sql mode进行校验和操作。
-
-每一次对sql mode的查询,都会对此Long型进行一次解析,变成用户可读的字符串形式,同理,用户发送给服务器的sql mode字符串,会被解析成能够存储在SessionVariables中的Long型。
-
-已被设置好的全局sql mode会被持久化,因此对全局sql mode的操作总是只需一次,即使程序重启后仍可以恢复上一次的全局sql mode。
-
-## 操作方式
-
-1、设置sql mode
-
-```
-set global sql_mode = ""
-set session sql_mode = ""
-```
->目前Doris的默认sql mode为空。
->设置global sql mode需要Admin权限,并会影响所有在此后连接的客户端。
->设置session sql mode只会影响当前对话客户端,默认为session方式。
-
-2、查询sql mode
-
-```
-select @@global.sql_mode
-select @@session.sql_mode
-```
->除了这种方式,你还可以通过下面方式返回所有session variables来查看当前sql mode
-
-```
-show global variables
-show session variables
-```
-
-## 已支持mode
-
-1. `PIPES_AS_CONCAT`
-
- 在此模式下,'||'符号是一种字符串连接符号(同CONCAT()函数),而不是'OR'符号的同义词。(e.g., `'a'||'b' = 'ab'`, `1||0 = '10'`)
-
-## 复合mode
-
-(后续补充)
\ No newline at end of file
diff --git a/docs/zh-CN/administrator-guide/time-zone.md b/docs/zh-CN/administrator-guide/time-zone.md
deleted file mode 100644
index 20e784f4d5..0000000000
--- a/docs/zh-CN/administrator-guide/time-zone.md
+++ /dev/null
@@ -1,91 +0,0 @@
----
-{
- "title": "时区",
- "language": "zh-CN"
-}
----
-
-
-
-# 时区
-
-Doris 支持多时区设置
-
-## 名词解释
-
-* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。
-* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。
-
-## 基本概念
-
-Doris 内部存在多个时区相关参数
-
-* system_time_zone :
- 当服务器启动时,会根据机器设置时区自动设置,设置后不可修改。
-
-* time_zone :
- 服务器当前时区,区分session级别和global级别
-
-## 具体操作
-
-1. show variables like '%time_zone%'
-
- 查看当前时区相关配置
-
-2. SET time_zone = 'Asia/Shanghai'
-
- 该命令可以设置session级别的时区,连接断开后失效
-
-3. SET global time_zone = 'Asia/Shanghai'
-
- 该命令可以设置global级别的时区参数,fe会将参数持久化,连接断开后不失效
-
-### 时区的影响
-
-时区设置会影响对时区敏感的时间值的显示和存储。
-
-包括NOW()或CURTIME()等时间函数显示的值,也包括show load, show backends中的时间值。
-
-但不会影响 create table 中时间类型分区列的 less than 值,也不会影响存储为 date/datetime 类型的值的显示。
-
-受时区影响的函数:
-
-* `FROM_UNIXTIME`:给定一个 UTC 时间戳,返回指定时区的日期时间:如 `FROM_UNIXTIME(0)`, 返回 CST 时区:`1970-01-01 08:00:00`。
-* `UNIX_TIMESTAMP`:给定一个指定时区日期时间,返回 UTC 时间戳:如 CST 时区 `UNIX_TIMESTAMP('1970-01-01 08:00:00')`,返回 `0`。
-* `CURTIME`:返回指定时区时间。
-* `NOW`:返指定地时区日期时间。
-* `CONVERT_TZ`:将一个日期时间从一个指定时区转换到另一个指定时区。
-
-## 使用限制
-
-时区值可以使用几种格式给出,不区分大小写:
-
-* 表示UTC偏移量的字符串,如'+10:00'或'-6:00'
-
-* 标准时区格式,如"Asia/Shanghai"、"America/Los_Angeles"
-
-* 不支持缩写时区格式,如"MET"、"CTT"。因为缩写时区在不同场景下存在歧义,不建议使用。
-
-* 为了兼容Doris,支持CST缩写时区,内部会将CST转移为"Asia/Shanghai"的中国标准时区
-
-## 时区格式列表
-
-[List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
-
diff --git a/docs/zh-CN/administrator-guide/update.md b/docs/zh-CN/administrator-guide/update.md
deleted file mode 100644
index c994c8a6c7..0000000000
--- a/docs/zh-CN/administrator-guide/update.md
+++ /dev/null
@@ -1,126 +0,0 @@
----
-{
- "title": "更新",
- "language": "zh-CN"
-}
----
-
-
-
-# 更新
-
-如果我们需要修改或更新 Doris 中的数据,就可以使用 UPDATE 命令来操作。
-
-## 适用场景
-
-+ 对满足某些条件的行,修改他的取值。
-+ 点更新,小范围更新,待更新的行最好是整个表的非常小一部分。
-+ update 命令只能在 Unique 数据模型的表中操作。
-
-## 名词解释
-
-1. Unique 模型:Doris 系统中的一种数据模型。将列分为两类,Key 和 Value。当用户导入相同 Key 的行时,后者的 Value 会覆盖已有的 Value。与 Mysql 中的 Unique 含义一致。
-
-## 基本原理
-
-利用查询引擎自身的 where 过滤逻辑,从待更新表中筛选出需要被更新的行。再利用 Unique 模型自带的 Value 列新数据替换旧数据的逻辑,将待更新的行变更后,再重新插入到表中。从而实现行级别更新。
-
-举例说明
-
-假设 Doris 中存在一张订单表,其中 订单id 是 Key 列,订单状态,订单金额是 Value 列。数据状态如下:
-
-|订单id | 订单金额| 订单状态|
-|---|---|---|
-| 1 | 100| 待付款 |
-
-这时候,用户点击付款后,Doris 系统需要将订单id 为 '1' 的订单状态变更为 '待发货', 就需要用到 Update 功能。
-
-```
-UPDATE order SET 订单状态='待发货' WHERE 订单id=1;
-```
-
-用户执行 UPDATE 命令后,系统会进行如下三步:
-
-+ 第一步:读取满足 WHERE 订单id=1 的行
- (1,100,'待付款')
-+ 第二步:变更该行的订单状态,从'待付款'改为'待发货'
- (1,100,'待发货')
-+ 第三步:将更新后的行再插入回表中,从而达到更新的效果。
- |订单id | 订单金额| 订单状态|
- |---|---|---|
- | 1 | 100| 待付款 |
- | 1 | 100 | 待发货 |
- 由于表 order 是 UNIQUE 模型,所以相同 Key 的行,之后后者才会生效,所以最终效果如下:
- |订单id | 订单金额| 订单状态|
- |---|---|---|
- | 1 | 100 | 待发货 |
-
-## 基本操作
-
-### UPDATE 语法
-
-```UPDATE table_name SET value=xxx WHERE condition;```
-
-+ `table_name`: 待更新的表,必须是 UNIQUE 模型的表才能进行更新。
-
-+ value=xxx: 待更新的列,等式左边必须是表的 value 列。等式右边可以是常量,也可以是某个表中某列的表达式变换。
- 比如 value = 1, 则待更新的列值会变为1。
- 比如 value = value +1, 则待更新的列值会自增1。
-
-+ condition:只有满足 condition 的行才会被更新。condition 必须是一个结果为 Boolean 类型的表达式。
- 比如 k1 = 1, 则只有当 k1 列值为1的行才会被更新。
- 比如 k1 = k2, 则只有 k1 列值和 k2 列一样的行才会被更新。
- 不支持不填写condition,也就是不支持全表更新。
-
-### 同步
-
-Update 语法在 Doris 中是一个同步语法,既 Update 语句成功,更新就成功了,数据可见。
-
-### 性能
-
-Update 语句的性能和待更新的行数,以及 condition 的检索效率密切相关。
-
-+ 待更新的行数:待更新的行数越多,Update 语句的速度就会越慢。这和导入的原理是一致的。
- Doris 的更新比较合适偶发更新的场景,比如修改个别行的值。
- Doris 并不适合大批量的修改数据。大批量修改会使得 Update 语句运行时间很久。
-
-+ condition 的检索效率:Doris 的 Update 实现原理是先将满足 condition 的行读取处理,所以如果 condition 的检索效率高,则 Update 的速度也会快。
- condition 列最好能命中,索引或者分区分桶裁剪。这样 Doris 就不需要扫全表,可以快速定位到需要更新的行。从而提升更新效率。
- 强烈不推荐 condition 列中包含 UNIQUE 模型的 value 列。
-
-### 并发控制
-
-默认情况下,并不允许同一时间对同一张表并发进行多个 Update 操作。
-
-主要原因是,Doris 目前支持的是行更新,这意味着,即使用户声明的是 ```SET v2 = 1```,实际上,其他所有的 Value 列也会被覆盖一遍(尽管值没有变化)。
-
-这就会存在一个问题,如果同时有两个 Update 操作对同一行进行更新,那么其行为可能是不确定的。也就是可能存在脏数据。
-
-但在实际应用中,如果用户自己可以保证即使并发更新,也不会同时对同一行进行操作的话,就可以手动打开并发限制。通过修改 FE 配置 ```enable_concurrent_update```。当配置值为 true 时,则对更新并发无限制。
-
-## 使用风险
-
-由于 Doris 目前支持的是行更新,并且采用的是读取后再写入的两步操作,则如果 Update 语句和其他导入或 Delete 语句刚好修改的是同一行时,存在不确定的数据结果。
-
-所以用户在使用的时候,一定要注意*用户侧自己*进行 Update 语句和其他 DML 语句的并发控制。
-
-## 版本
-
-Doris Version 0.15.x +
diff --git a/docs/zh-CN/administrator-guide/variables.md b/docs/zh-CN/administrator-guide/variables.md
deleted file mode 100644
index 219e86049e..0000000000
--- a/docs/zh-CN/administrator-guide/variables.md
+++ /dev/null
@@ -1,492 +0,0 @@
----
-{
- "title": "变量",
- "language": "zh-CN"
-}
----
-
-
-
-# 变量
-
-本文档主要介绍当前支持的变量(variables)。
-
-Doris 中的变量参考 MySQL 中的变量设置。但部分变量仅用于兼容一些 MySQL 客户端协议,并不产生其在 MySQL 数据库中的实际意义。
-
-## 变量设置与查看
-
-### 查看
-
-可以通过 `SHOW VARIABLES [LIKE 'xxx'];` 查看所有或指定的变量。如:
-
-```
-SHOW VARIABLES;
-SHOW VARIABLES LIKE '%time_zone%';
-```
-
-### 设置
-
-部分变量可以设置全局生效或仅当前会话生效。设置全局生效后,后续新的会话连接中会沿用设置值。而设置仅当前会话生效,则变量仅对当前会话产生作用。
-
-仅当前会话生效,通过 `SET var_name=xxx;` 语句来设置。如:
-
-```
-SET exec_mem_limit = 137438953472;
-SET forward_to_master = true;
-SET time_zone = "Asia/Shanghai";
-```
-
-全局生效,通过 `SET GLOBAL var_name=xxx;` 设置。如:
-
-```
-SET GLOBAL exec_mem_limit = 137438953472
-```
-
-> 注1:只有 ADMIN 用户可以设置变量的全局生效。
-> 注2:全局生效的变量不影响当前会话的变量值,仅影响新的会话中的变量。
-
-既支持当前会话生效又支持全局生效的变量包括:
-
-* `time_zone`
-* `wait_timeout`
-* `sql_mode`
-* `enable_profile`
-* `query_timeout`
-* `exec_mem_limit`
-* `batch_size`
-* `allow_partition_column_nullable`
-* `insert_visible_timeout_ms`
-* `enable_fold_constant_by_be`
-
-只支持全局生效的变量包括:
-
-* `default_rowset_type`
-
-同时,变量设置也支持常量表达式。如:
-
-```
-SET exec_mem_limit = 10 * 1024 * 1024 * 1024;
-SET forward_to_master = concat('tr', 'u', 'e');
-```
-
-### 在查询语句中设置变量
-
-在一些场景中,我们可能需要对某些查询有针对性的设置变量。
-通过使用SET_VAR提示可以在查询中设置会话变量(在单个语句内生效)。例子:
-
-```
-SELECT /*+ SET_VAR(exec_mem_limit = 8589934592) */ name FROM people ORDER BY name;
-SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
-```
-
-注意注释必须以/*+ 开头,并且只能跟随在SELECT之后。
-
-## 支持的变量
-
-* `SQL_AUTO_IS_NULL`
-
- 用于兼容 JDBC 连接池 C3P0。 无实际作用。
-
-* `auto_increment_increment`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `autocommit`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `batch_size`
-
- 用于指定在查询执行过程中,各个节点传输的单个数据包的行数。默认一个数据包的行数为 1024 行,即源端节点每产生 1024 行数据后,打包发给目的节点。
-
- 较大的行数,会在扫描大数据量场景下提升查询的吞吐,但可能会在小查询场景下增加查询延迟。同时,也会增加查询的内存开销。建议设置范围 1024 至 4096。
-
-* `character_set_client`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `character_set_connection`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `character_set_results`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `character_set_server`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `codegen_level`
-
- 用于设置 LLVM codegen 的等级。(当前未生效)。
-
-* `collation_connection`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `collation_database`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `collation_server`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `delete_without_partition`
-
- 设置为 true 时。当使用 delete 命令删除分区表数据时,可以不指定分区。delete 操作将会自动应用到所有分区。
-
- 但注意,自动应用到所有分区可能到导致 delete 命令耗时触发大量子任务导致耗时较长。如无必要,不建议开启。
-
-* `disable_colocate_join`
-
- 控制是否启用 [Colocation Join](./colocation-join.md) 功能。默认为 false,表示启用该功能。true 表示禁用该功能。当该功能被禁用后,查询规划将不会尝试执行 Colocation Join。
-
-* `enable_bucket_shuffle_join`
-
- 控制是否启用 [Bucket Shuffle Join](./bucket-shuffle-join.md) 功能。默认为 true,表示启用该功能。false 表示禁用该功能。当该功能被禁用后,查询规划将不会尝试执行 Bucket Shuffle Join。
-
-* `disable_streaming_preaggregations`
-
- 控制是否开启流式预聚合。默认为 false,即开启。当前不可设置,且默认开启。
-
-* `enable_insert_strict`
-
- 用于设置通过 INSERT 语句进行数据导入时,是否开启 `strict` 模式。默认为 false,即不开启 `strict` 模式。关于该模式的介绍,可以参阅 [这里](./load-data/insert-into-manual.md)。
-
-* `enable_spilling`
-
- 用于设置是否开启大数据量落盘排序。默认为 false,即关闭该功能。当用户未指定 ORDER BY 子句的 LIMIT 条件,同时设置 `enable_spilling` 为 true 时,才会开启落盘排序。该功能启用后,会使用 BE 数据目录下 `doris-scratch/` 目录存放临时的落盘数据,并在查询结束后,清空临时数据。
-
- 该功能主要用于使用有限的内存进行大数据量的排序操作。
-
- 注意,该功能为实验性质,不保证稳定性,请谨慎开启。
-
-* `exec_mem_limit`
-
- 用于设置单个查询的内存限制。默认为 2GB,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB, 默认为B。
-
- 该参数用于限制一个查询计划中,单个查询计划的实例所能使用的内存。一个查询计划可能有多个实例,一个 BE 节点可能执行一个或多个实例。所以该参数并不能准确限制一个查询在整个集群的内存使用,也不能准确限制一个查询在单一 BE 节点上的内存使用。具体需要根据生成的查询计划判断。
-
- 通常只有在一些阻塞节点(如排序节点、聚合节点、Join 节点)上才会消耗较多的内存,而其他节点(如扫描节点)中,数据为流式通过,并不会占用较多的内存。
-
- 当出现 `Memory Exceed Limit` 错误时,可以尝试指数级增加该参数,如 4G、8G、16G 等。
-
-* `forward_to_master`
-
- 用户设置是否将一些show 类命令转发到 Master FE 节点执行。默认为 `true`,即转发。Doris 中存在多个 FE 节点,其中一个为 Master 节点。通常用户可以连接任意 FE 节点进行全功能操作。但部分信息查看指令,只有从 Master FE 节点才能获取详细信息。
-
- 如 `SHOW BACKENDS;` 命令,如果不转发到 Master FE 节点,则仅能看到节点是否存活等一些基本信息,而转发到 Master FE 则可以获取包括节点启动时间、最后一次心跳时间等更详细的信息。
-
- 当前受该参数影响的命令如下:
-
- 1. `SHOW FRONTENDS;`
-
- 转发到 Master 可以查看最后一次心跳信息。
-
- 2. `SHOW BACKENDS;`
-
- 转发到 Master 可以查看启动时间、最后一次心跳信息、磁盘容量信息。
-
- 3. `SHOW BROKER;`
-
- 转发到 Master 可以查看启动时间、最后一次心跳信息。
-
- 4. `SHOW TABLET;`/`ADMIN SHOW REPLICA DISTRIBUTION;`/`ADMIN SHOW REPLICA STATUS;`
-
- 转发到 Master 可以查看 Master FE 元数据中存储的 tablet 信息。正常情况下,不同 FE 元数据中 tablet 信息应该是一致的。当出现问题时,可以通过这个方法比较当前 FE 和 Master FE 元数据的差异。
-
- 5. `SHOW PROC;`
-
- 转发到 Master 可以查看 Master FE 元数据中存储的相关 PROC 的信息。主要用于元数据比对。
-
-* `init_connect`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `interactive_timeout`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `enable_profile`
-
- 用于设置是否需要查看查询的 profile。默认为 false,即不需要 profile。
-
- 默认情况下,只有在查询发生错误时,BE 才会发送 profile 给 FE,用于查看错误。正常结束的查询不会发送 profile。发送 profile 会产生一定的网络开销,对高并发查询场景不利。
- 当用户希望对一个查询的 profile 进行分析时,可以将这个变量设为 true 后,发送查询。查询结束后,可以通过在当前连接的 FE 的 web 页面查看到 profile:
-
- `fe_host:fe_http_port/query`
-
- 其中会显示最近100条,开启 `enable_profile` 的查询的 profile。
-
-* `language`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `license`
-
- 显示 Doris 的 License。无其他作用。
-
-* `load_mem_limit`
-
- 用于指定导入操作的内存限制。默认为 0,即表示不使用该变量,而采用 `exec_mem_limit` 作为导入操作的内存限制。
-
- 这个变量仅用于 INSERT 操作。因为 INSERT 操作设计查询和导入两个部分,如果用户不设置此变量,则查询和导入操作各自的内存限制均为 `exec_mem_limit`。否则,INSERT 的查询部分内存限制为 `exec_mem_limit`,而导入部分限制为 `load_mem_limit`。
-
- 其他导入方式,如 BROKER LOAD,STREAM LOAD 的内存限制依然使用 `exec_mem_limit`。
-
-* `lower_case_table_names`
-
- 用于控制用户表表名大小写是否敏感。
-
- 值为 0 时,表名大小写敏感。默认为0。
-
- 值为 1 时,表名大小写不敏感,doris在存储和查询时会将表名转换为小写。
- 优点是在一条语句中可以使用表名的任意大小写形式,下面的sql是正确的:
- ```
- mysql> show tables;
- +------------------+
- | Tables_in_testdb |
- +------------------+
- | cost |
- +------------------+
-
- mysql> select * from COST where COst.id < 100 order by cost.id;
- ```
- 缺点是建表后无法获得建表语句中指定的表名,`show tables` 查看的表名为指定表名的小写。
-
- 值为 2 时,表名大小写不敏感,doris存储建表语句中指定的表名,查询时转换为小写进行比较。
- 优点是`show tables` 查看的表名为建表语句中指定的表名;
- 缺点是同一语句中只能使用表名的一种大小写形式,例如对`cost` 表使用表名 `COST` 进行查询:
- ```
- mysql> select * from COST where COST.id < 100 order by COST.id;
- ```
-
- 该变量兼容MySQL。需在集群初始化时通过fe.conf 指定 `lower_case_table_names=`进行配置,集群初始化完成后无法通过`set` 语句修改该变量,也无法通过重启、升级集群修改该变量。
-
- information_schema中的系统视图表名不区分大小写,当`lower_case_table_names`值为 0 时,表现为 2。
-
-* `max_allowed_packet`
-
- 用于兼容 JDBC 连接池 C3P0。 无实际作用。
-
-* `max_pushdown_conditions_per_column`
-
- 该变量的具体含义请参阅 [BE 配置项](./config/be_config.md) 中 `max_pushdown_conditions_per_column` 的说明。该变量默认置为 -1,表示使用 `be.conf` 中的配置值。如果设置大于 0,则当前会话中的查询会使用该变量值,而忽略 `be.conf` 中的配置值。
-
-* `max_scan_key_num`
-
- 该变量的具体含义请参阅 [BE 配置项](./config/be_config.md) 中 `doris_max_scan_key_num` 的说明。该变量默认置为 -1,表示使用 `be.conf` 中的配置值。如果设置大于 0,则当前会话中的查询会使用该变量值,而忽略 `be.conf` 中的配置值。
-
-* `net_buffer_length`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `net_read_timeout`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `net_write_timeout`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `parallel_exchange_instance_num`
-
- 用于设置执行计划中,一个上层节点接收下层节点数据所使用的 exchange node 数量。默认为 -1,即表示 exchange node 数量等于下层节点执行实例的个数(默认行为)。当设置大于0,并且小于下层节点执行实例的个数,则 exchange node 数量等于设置值。
-
- 在一个分布式的查询执行计划中,上层节点通常有一个或多个 exchange node 用于接收来自下层节点在不同 BE 上的执行实例的数据。通常 exchange node 数量等于下层节点执行实例数量。
-
- 在一些聚合查询场景下,如果底层需要扫描的数据量较大,但聚合之后的数据量很小,则可以尝试修改此变量为一个较小的值,可以降低此类查询的资源开销。如在 DUPLICATE KEY 明细模型上进行聚合查询的场景。
-
-* `parallel_fragment_exec_instance_num`
-
- 针对扫描节点,设置其在每个 BE 节点上,执行实例的个数。默认为 1。
-
- 一个查询计划通常会产生一组 scan range,即需要扫描的数据范围。这些数据分布在多个 BE 节点上。一个 BE 节点会有一个或多个 scan range。默认情况下,每个 BE 节点的一组 scan range 只由一个执行实例处理。当机器资源比较充裕时,可以将增加该变量,让更多的执行实例同时处理一组 scan range,从而提升查询效率。
-
- 而 scan 实例的数量决定了上层其他执行节点,如聚合节点,join 节点的数量。因此相当于增加了整个查询计划执行的并发度。修改该参数会对大查询效率提升有帮助,但较大数值会消耗更多的机器资源,如CPU、内存、磁盘IO。
-
-* `query_cache_size`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `query_cache_type`
-
- 用于兼容 JDBC 连接池 C3P0。 无实际作用。
-
-* `query_timeout`
-
- 用于设置查询超时。该变量会作用于当前连接中所有的查询语句,以及 INSERT 语句。默认为 5 分钟,单位为秒。
-
-* `resource_group`
-
- 暂不使用。
-* `send_batch_parallelism`
-
- 用于设置执行 InsertStmt 操作时发送批处理数据的默认并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。
-
-* `sql_mode`
-
- 用于指定 SQL 模式,以适应某些 SQL 方言。关于 SQL 模式,可参阅 [这里](./sql-mode.md)。
-
-* `sql_safe_updates`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `sql_select_limit`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `system_time_zone`
-
- 显示当前系统时区。不可更改。
-
-* `time_zone`
-
- 用于设置当前会话的时区。时区会对某些时间函数的结果产生影响。关于时区,可以参阅 [这里](./time-zone.md)。
-
-* `tx_isolation`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `tx_read_only`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `transaction_read_only`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `transaction_isolation`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `version`
-
- 用于兼容 MySQL 客户端。无实际作用。
-
-* `performance_schema`
-
- 用于兼容 8.0.16及以上版本的MySQL JDBC。无实际作用。
-
-* `version_comment`
-
- 用于显示 Doris 的版本。不可更改。
-
-* `wait_timeout`
-
- 用于设置空闲连接的连接时长。当一个空闲连接在该时长内与 Doris 没有任何交互,则 Doris 会主动断开这个链接。默认为 8 小时,单位为秒。
-
-* `default_rowset_type`
-
- 用于设置计算节点存储引擎默认的存储格式。当前支持的存储格式包括:alpha/beta。
-
-* `use_v2_rollup`
-
- 用于控制查询使用segment v2存储格式的rollup索引获取数据。该变量用于上线segment v2的时候,进行验证使用;其他情况,不建议使用。
-
-* `rewrite_count_distinct_to_bitmap_hll`
-
- 是否将 bitmap 和 hll 类型的 count distinct 查询重写为 bitmap_union_count 和 hll_union_agg 。
-
-* `prefer_join_method`
-
- 在选择join的具体实现方式是broadcast join还是shuffle join时,如果broadcast join cost和shuffle join cost相等时,优先选择哪种join方式。
-
- 目前该变量的可选值为"broadcast" 或者 "shuffle"。
-
-* `allow_partition_column_nullable`
-
- 建表时是否允许分区列为NULL。默认为true,表示允许为NULL。false 表示分区列必须被定义为NOT NULL
-
-* `insert_visible_timeout_ms`
-
- 在执行insert语句时,导入动作(查询和插入)完成后,还需要等待事务提交,使数据可见。此参数控制等待数据可见的超时时间,默认为10000,最小为1000。
-
-* `enable_exchange_node_parallel_merge`
-
- 在一个排序的查询之中,一个上层节点接收下层节点有序数据时,会在exchange node上进行对应的排序来保证最终的数据是有序的。但是单线程进行多路数据归并时,如果数据量过大,会导致exchange node的单点的归并瓶颈。
-
- Doris在这部分进行了优化处理,如果下层的数据节点过多。exchange node会启动多线程进行并行归并来加速排序过程。该参数默认为False,即表示 exchange node 不采取并行的归并排序,来减少额外的CPU和内存消耗。
-
-* `extract_wide_range_expr`
-
- 用于控制是否开启 「宽泛公因式提取」的优化。取值有两种:true 和 false 。默认情况下开启。
-
-* `enable_fold_constant_by_be`
-
- 用于控制常量折叠的计算方式。默认是 `false`,即在 `FE` 进行计算;若设置为 `true`,则通过 `RPC` 请求经 `BE` 计算。
-
-* `cpu_resource_limit`
-
- 用于限制一个查询的资源开销。这是一个实验性质的功能。目前的实现是限制一个查询在单个节点上的scan线程数量。限制了scan线程数,从底层返回的数据速度变慢,从而限制了查询整体的计算资源开销。假设设置为 2,则一个查询在单节点上最多使用2个scan线程。
-
- 该参数会覆盖 `parallel_fragment_exec_instance_num` 的效果。即假设 `parallel_fragment_exec_instance_num` 设置为4,而该参数设置为2。则单个节点上的4个执行实例会共享最多2个扫描线程。
-
- 该参数会被 user property 中的 `cpu_resource_limit` 配置覆盖。
-
- 默认 -1,即不限制。
-
-* `disable_join_reorder`
-
- 用于关闭所有系统自动的 join reorder 算法。取值有两种:true 和 false。默认行况下关闭,也就是采用系统自动的 join reorder 算法。设置为 true 后,系统会关闭所有自动排序的算法,采用 SQL 原始的表顺序,执行 join
-
-* `return_object_data_as_binary`
- 用于标识是否在select 结果中返回bitmap/hll 结果。在 select into outfile 语句中,如果导出文件格式为csv 则会将 bimap/hll 数据进行base64编码,如果是parquet 文件格式 将会把数据作为byte array 存储
-* `block_encryption_mode`
- 可以通过block_encryption_mode参数,控制块加密模式,默认值为:空。当使用AES算法加密时相当于`AES_128_ECB`, 当时用SM3算法加密时相当于`SM3_128_ECB`
- 可选值:
-```
- AES_128_ECB,
- AES_192_ECB,
- AES_256_ECB,
- AES_128_CBC,
- AES_192_CBC,
- AES_256_CBC,
- AES_128_CFB,
- AES_192_CFB,
- AES_256_CFB,
- AES_128_CFB1,
- AES_192_CFB1,
- AES_256_CFB1,
- AES_128_CFB8,
- AES_192_CFB8,
- AES_256_CFB8,
- AES_128_CFB128,
- AES_192_CFB128,
- AES_256_CFB128,
- AES_128_CTR,
- AES_192_CTR,
- AES_256_CTR,
- AES_128_OFB,
- AES_192_OFB,
- AES_256_OFB,
- SM4_128_ECB,
- SM4_128_CBC,
- SM4_128_CFB128,
- SM4_128_OFB,
- SM4_128_CTR,
-```
-
-* `enable_infer_predicate`
-
- 用于控制是否进行谓词推导。取值有两种:true 和 false。默认情况下关闭,系统不在进行谓词推导,采用原始的谓词进行相关操作。设置为 true 后,进行谓词扩展。
-
diff --git a/new-docs/zh-CN/advanced/alter-table/replace-table.md b/docs/zh-CN/advanced/alter-table/replace-table.md
similarity index 100%
rename from new-docs/zh-CN/advanced/alter-table/replace-table.md
rename to docs/zh-CN/advanced/alter-table/replace-table.md
diff --git a/new-docs/zh-CN/advanced/alter-table/schema-change.md b/docs/zh-CN/advanced/alter-table/schema-change.md
similarity index 100%
rename from new-docs/zh-CN/advanced/alter-table/schema-change.md
rename to docs/zh-CN/advanced/alter-table/schema-change.md
diff --git a/new-docs/zh-CN/advanced/best-practice/debug-log.md b/docs/zh-CN/advanced/best-practice/debug-log.md
similarity index 100%
rename from new-docs/zh-CN/advanced/best-practice/debug-log.md
rename to docs/zh-CN/advanced/best-practice/debug-log.md
diff --git a/new-docs/zh-CN/advanced/best-practice/import-analysis.md b/docs/zh-CN/advanced/best-practice/import-analysis.md
similarity index 100%
rename from new-docs/zh-CN/advanced/best-practice/import-analysis.md
rename to docs/zh-CN/advanced/best-practice/import-analysis.md
diff --git a/new-docs/zh-CN/advanced/best-practice/query-analysis.md b/docs/zh-CN/advanced/best-practice/query-analysis.md
similarity index 100%
rename from new-docs/zh-CN/advanced/best-practice/query-analysis.md
rename to docs/zh-CN/advanced/best-practice/query-analysis.md
diff --git a/new-docs/zh-CN/advanced/broker.md b/docs/zh-CN/advanced/broker.md
similarity index 100%
rename from new-docs/zh-CN/advanced/broker.md
rename to docs/zh-CN/advanced/broker.md
diff --git a/new-docs/zh-CN/advanced/cache/partition-cache.md b/docs/zh-CN/advanced/cache/partition-cache.md
similarity index 100%
rename from new-docs/zh-CN/advanced/cache/partition-cache.md
rename to docs/zh-CN/advanced/cache/partition-cache.md
diff --git a/new-docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md b/docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md
similarity index 100%
rename from new-docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md
rename to docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md
diff --git a/new-docs/zh-CN/advanced/join-optimization/colocation-join.md b/docs/zh-CN/advanced/join-optimization/colocation-join.md
similarity index 100%
rename from new-docs/zh-CN/advanced/join-optimization/colocation-join.md
rename to docs/zh-CN/advanced/join-optimization/colocation-join.md
diff --git a/new-docs/zh-CN/advanced/join-optimization/runtime-filter.md b/docs/zh-CN/advanced/join-optimization/runtime-filter.md
similarity index 100%
rename from new-docs/zh-CN/advanced/join-optimization/runtime-filter.md
rename to docs/zh-CN/advanced/join-optimization/runtime-filter.md
diff --git a/new-docs/zh-CN/advanced/materialized-view.md b/docs/zh-CN/advanced/materialized-view.md
similarity index 100%
rename from new-docs/zh-CN/advanced/materialized-view.md
rename to docs/zh-CN/advanced/materialized-view.md
diff --git a/new-docs/zh-CN/advanced/orthogonal-bitmap-manual.md b/docs/zh-CN/advanced/orthogonal-bitmap-manual.md
similarity index 100%
rename from new-docs/zh-CN/advanced/orthogonal-bitmap-manual.md
rename to docs/zh-CN/advanced/orthogonal-bitmap-manual.md
diff --git a/new-docs/zh-CN/advanced/partition/dynamic-partition.md b/docs/zh-CN/advanced/partition/dynamic-partition.md
similarity index 100%
rename from new-docs/zh-CN/advanced/partition/dynamic-partition.md
rename to docs/zh-CN/advanced/partition/dynamic-partition.md
diff --git a/new-docs/zh-CN/advanced/partition/table-tmp-partition.md b/docs/zh-CN/advanced/partition/table-tmp-partition.md
similarity index 100%
rename from new-docs/zh-CN/advanced/partition/table-tmp-partition.md
rename to docs/zh-CN/advanced/partition/table-tmp-partition.md
diff --git a/new-docs/zh-CN/advanced/resource.md b/docs/zh-CN/advanced/resource.md
similarity index 100%
rename from new-docs/zh-CN/advanced/resource.md
rename to docs/zh-CN/advanced/resource.md
diff --git a/new-docs/zh-CN/advanced/small-file-mgr.md b/docs/zh-CN/advanced/small-file-mgr.md
similarity index 100%
rename from new-docs/zh-CN/advanced/small-file-mgr.md
rename to docs/zh-CN/advanced/small-file-mgr.md
diff --git a/new-docs/zh-CN/advanced/time-zone.md b/docs/zh-CN/advanced/time-zone.md
similarity index 100%
rename from new-docs/zh-CN/advanced/time-zone.md
rename to docs/zh-CN/advanced/time-zone.md
diff --git a/new-docs/zh-CN/advanced/variables.md b/docs/zh-CN/advanced/variables.md
similarity index 100%
rename from new-docs/zh-CN/advanced/variables.md
rename to docs/zh-CN/advanced/variables.md
diff --git a/docs/zh-CN/administrator-guide/vectorized-execution-engine.md b/docs/zh-CN/advanced/vectorized-execution-engine.md
similarity index 100%
rename from docs/zh-CN/administrator-guide/vectorized-execution-engine.md
rename to docs/zh-CN/advanced/vectorized-execution-engine.md
diff --git a/docs/zh-CN/benchmark/samples.md b/docs/zh-CN/benchmark/samples.md
deleted file mode 100644
index 8ee915936c..0000000000
--- a/docs/zh-CN/benchmark/samples.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-{
- "title": "使用示例",
- "language": "zh-CN"
-}
----
-
-
-
-# 使用示例
-
-Doris 代码库中提供了丰富的使用示例,能够帮助 Doris 用户快速上手体验 Doris 的功能。
-
-## 示例说明
-
-示例代码都存放在 Doris 代码库的 [`samples/`](https://github.com/apache/incubator-doris/tree/master/samples) 目录下。
-
-```
-.
-├── connect
-├── doris-demo
-├── insert
-└── mini_load
-```
-
-* `connect/`
-
- 该目录下主要展示了各个程序语言连接 Doris 的代码示例。
-
-* `doris-demo/`
-
- 该目下主要以 Maven 工程的形式,展示了 Doris 多个功能的代码示例。如 spark-connector 和 flink-connector 的使用示例、与 Spring 框架集成的示例、Stream Load 导入示例等等。
-
-* `insert/`
-
- 该目录展示了通过 python 或 shell 脚本调用 Doris 的 Insert 命令导入数据的一些代码示例。
-
-* `miniload/`
-
- 该目录展示了通过 python 调用 mini load 进行数据导入的代码示例。但因为 mini load 功能已由 stream load 功能代替,建议使用 stream load 功能进行数据导入。
\ No newline at end of file
diff --git a/new-docs/zh-CN/benchmark/ssb.md b/docs/zh-CN/benchmark/ssb.md
similarity index 100%
rename from new-docs/zh-CN/benchmark/ssb.md
rename to docs/zh-CN/benchmark/ssb.md
diff --git a/docs/zh-CN/benchmark/star-schema-benchmark.md b/docs/zh-CN/benchmark/star-schema-benchmark.md
deleted file mode 100644
index 9d9b1b405f..0000000000
--- a/docs/zh-CN/benchmark/star-schema-benchmark.md
+++ /dev/null
@@ -1,182 +0,0 @@
----
-{
- "title": "Star-Schema-Benchmark 测试",
- "language": "zh-CN"
-}
----
-
-
-
-# Star Schema Benchmark
-
-[Star Schema Benchmark(SSB)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) 是一个轻量级的数仓场景下的性能测试集。SSB基于 [TPC-H](http://www.tpc.org/tpch/) 提供了一个简化版的星型模型数据集,主要用于测试在星型模型下,多表关联查询的性能表现。
-
-本文档主要介绍如何在 Doris 中通过 SSB 进行初步的性能测试。
-
-> 注1:包括 SSB 在内的标准测试集通常和实际业务场景差距较大,并且部分测试会针对测试集进行参数调优。所以标准测试集的测试结果仅能反映数据库在特定场景下的性能表现。建议用户使用实际业务数据进行进一步的测试。
->
-> 注2:本文档涉及的操作都在 CentOS 7 环境进行。
-
-## 环境准备
-
-请先参照 [官方文档](http://doris.incubator.apache.org/master/zh-CN/installing/install-deploy.html) 进行 Doris 的安装部署,以获得一个正常运行中的 Doris 集群(至少包含 1 FE,1 BE)。
-
-以下文档中涉及的脚本都存放在 Doris 代码库的 `tools/ssb-tools/` 下。
-
-## 数据准备
-
-### 1. 下载安装 SSB 数据生成工具。
-
-执行以下脚本下载并编译 [ssb-dbgen](https://github.com/electrum/ssb-dbgen.git) 工具。
-
-```
-sh build-ssb-dbgen.sh
-```
-
-安装成功后,将在 `ssb-dbgen/` 目录下生成 `dbgen` 二进制文件。
-
-### 2. 生成 SSB 测试集
-
-执行以下脚本生成 SSB 数据集:
-
-```
-sh gen-ssb-data.sh -s 100 -c 100
-```
-
-> 注1:通过 `sh gen-ssb-data.sh -h` 查看脚本帮助。
->
-> 注2:数据会以 `.tbl` 为后缀生成在 `ssb-data/` 目录下。文件总大小约60GB。生成时间可能在数分钟到1小时不等。
->
-> 注3:`-s 100` 表示测试集大小系数为 100,`-c 100` 表示并发100个线程生成 lineorder 表的数据。`-c` 参数也决定了最终 lineorder 表的文件数量。参数越大,文件数越多,每个文件越小。
-
-在 `-s 100` 参数下,生成的数据集大小为:
-
-|Table |Rows |Size | File Number |
-|---|---|---|---|
-|lineorder| 6亿(600037902) | 60GB | 100|
-|customer|300万(3000000) |277M |1|
-|part|140万(1400000) | 116M|1|
-|supplier|20万(200000) |17M |1|
-|date| 2556|228K |1|
-
-3. 建表
-
- 复制 [create-tables.sql](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/create-tables.sql) 中的建表语句,在 Doris 中执行。
-
-4. 导入数据
-
- 0. 准备 'doris-cluster.conf' 文件。
-
- 在调用导入脚本前,需要将 FE 的 ip 端口等信息写在 `doris-cluster.conf` 文件中。
-
- 文件位置和 `load-dimension-data.sh` 平级。
-
- 文件内容包括 FE 的 ip,HTTP 端口,用户名,密码以及待导入数据的 DB 名称:
-
- ```
- export FE_HOST="xxx"
- export FE_HTTP_PORT="8030"
- export USER="root"
- export PASSWORD='xxx'
- export DB="ssb"
- ```
-
- 1. 导入 4 张维度表数据(customer, part, supplier and date)
-
- 因为这4张维表数据量较小,导入较简单,我们使用以下命令先导入这4表的数据:
-
- `sh load-dimension-data.sh`
-
- 2. 导入事实表 lineorder。
-
- 通过以下命令导入 lineorder 表数据:
-
- `sh load-fact-data.sh -c 5`
-
- `-c 5` 表示启动 5 个并发线程导入(默认为3)。在单 BE 节点情况下,由 `sh gen-ssb-data.sh -s 100 -c 100` 生成的 lineorder 数据,使用 `sh load-fact-data.sh -c 3` 的导入时间约为 10min。内存开销约为 5-6GB。如果开启更多线程,可以加快导入速度,但会增加额外的内存开销。
-
- > 注:为获得更快的导入速度,你可以在 be.conf 中添加 `flush_thread_num_per_store=5` 后重启BE。该配置表示每个数据目录的写盘线程数,默认为2。较大的数据可以提升写数据吞吐,但可能会增加 IO Util。(参考值:1块机械磁盘,在默认为2的情况下,导入过程中的 IO Util 约为12%,设置为5时,IO Util 约为26%。如果是 SSD 盘,则几乎为 0)。
-
-5. 检查导入数据
-
- ```
- select count(*) from part;
- select count(*) from customer;
- select count(*) from supplier;
- select count(*) from date;
- select count(*) from lineorder;
- ```
-
- 数据量应和生成数据的行数一致。
-
-## 查询测试
-
-SSB 测试集共 4 组 14 个 SQL。查询语句在 [queries/](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/queries) 目录下。
-
-## 测试报告
-
-以下测试报告基于 Doris [branch-0.15](https://github.com/apache/incubator-doris/tree/branch-0.15) 分支代码测试,仅供参考。(更新时间:2021年10月25号)
-
-1. 硬件环境
-
- * 1 FE + 1-3 BE 混部
- * CPU:96core, Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz
- * 内存:384GB
- * 硬盘:1块机械硬盘
- * 网卡:万兆网卡
-
-2. 数据集
-
- |Table |Rows |Origin Size | Compacted Size(1 Replica) |
- |---|---|---|---|
- |lineorder| 6亿(600037902) | 60 GB | 14.846 GB |
- |customer|300万(3000000) |277 MB | 414.741 MB |
- |part|140万(1400000) | 116 MB | 38.277 MB |
- |supplier|20万(200000) |17 MB | 27.428 MB |
- |date| 2556|228 KB | 275.804 KB |
-
-3. 测试结果
-
- |Query |Time(ms) (1 BE) | Time(ms) (3 BE) | Parallelism | Runtime Filter Mode |
- |---|---|---|---|---|
- | q1.1 | 200 | 140 | 8 | IN |
- | q1.2 | 90 | 80 | 8 | IN |
- | q1.3 | 90 | 80 | 8 | IN |
- | q2.1 | 1100 | 400 | 8 | BLOOM_FILTER |
- | q2.2 | 900 | 330 | 8 | BLOOM_FILTER |
- | q2.3 | 790 | 320 | 8 | BLOOM_FILTER |
- | q3.1 | 3100 | 1280 | 8 | BLOOM_FILTER |
- | q3.2 | 700 | 270 | 8 | BLOOM_FILTER |
- | q3.3 | 540 | 270 | 8 | BLOOM_FILTER |
- | q3.4 | 560 | 240 | 8 | BLOOM_FILTER |
- | q4.1 | 2820 | 1150 | 8 | BLOOM_FILTER |
- | q4.2 | 1430 | 670 | 8 | BLOOM_FILTER |
- | q4.2 | 1750 | 1030 | 8 | BLOOM_FILTER |
-
- > 注1:“这个测试集和你的生产环境相去甚远,请对他保持怀疑态度!”
- >
- > 注2:测试结果为多次执行取平均值(Page Cache 会起到一定加速作用)。并且数据经过充分的 compaction (如果在刚导入数据后立刻测试,则查询延迟可能高于本测试结果)
- >
- > 注3:因环境受限,本测试使用的硬件规格较高,但整个测试过程中不会消耗如此多的硬件资源。其中内存消耗在 10GB 以内,CPU使用率在 10% 以内。
- >
- > 注4:Parallelism 表示查询并发度,通过 `set parallel_fragment_exec_instance_num=8` 设置。
- >
- > 注5:Runtime Filter Mode 是 Runtime Filter 的类型,通过 `set runtime_filter_type="BLOOM_FILTER"` 设置。([Runtime Filter](http://doris.incubator.apache.org/master/zh-CN/administrator-guide/runtime-filter.html) 功能对 SSB 测试集效果显著。因为该测试集中,Join 算子右表的数据可以对左表起到很好的过滤作用。你可以尝试通过 `set runtime_filter_mode=off` 关闭该功能,看看查询延迟的变化。)
-
diff --git a/docs/zh-CN/benchmark/systemd.md b/docs/zh-CN/benchmark/systemd.md
deleted file mode 100644
index 21de382bfe..0000000000
--- a/docs/zh-CN/benchmark/systemd.md
+++ /dev/null
@@ -1,31 +0,0 @@
----
-{
- "title": "Systemd",
- "language": "zh-CN"
-}
----
-
-
-
-# Systemd
-
-Doris 代码库中提供了 Systemd 配置文件,可以帮助用户在 Linux 中方便地控制 Doris 服务的启停。
-
-配置文件请前往 [代码库](https://github.com/apache/incubator-doris/tree/master/tools/systemd) 查看。
diff --git a/new-docs/zh-CN/benchmark/tpc-h.md b/docs/zh-CN/benchmark/tpc-h.md
similarity index 100%
rename from new-docs/zh-CN/benchmark/tpc-h.md
rename to docs/zh-CN/benchmark/tpc-h.md
diff --git a/new-docs/zh-CN/data-operate/export/export-manual.md b/docs/zh-CN/data-operate/export/export-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/export/export-manual.md
rename to docs/zh-CN/data-operate/export/export-manual.md
diff --git a/new-docs/zh-CN/data-operate/export/export_with_mysql_dump.md b/docs/zh-CN/data-operate/export/export_with_mysql_dump.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/export/export_with_mysql_dump.md
rename to docs/zh-CN/data-operate/export/export_with_mysql_dump.md
diff --git a/new-docs/zh-CN/data-operate/export/outfile.md b/docs/zh-CN/data-operate/export/outfile.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/export/outfile.md
rename to docs/zh-CN/data-operate/export/outfile.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md b/docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md
rename to docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/external-table-load.md b/docs/zh-CN/data-operate/import/import-scenes/external-table-load.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/external-table-load.md
rename to docs/zh-CN/data-operate/import/import-scenes/external-table-load.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md b/docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md
rename to docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/kafka-load.md b/docs/zh-CN/data-operate/import/import-scenes/kafka-load.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/kafka-load.md
rename to docs/zh-CN/data-operate/import/import-scenes/kafka-load.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md b/docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md
rename to docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md b/docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md
rename to docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md b/docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md
rename to docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md
diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/local-file-load.md b/docs/zh-CN/data-operate/import/import-scenes/local-file-load.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-scenes/local-file-load.md
rename to docs/zh-CN/data-operate/import/import-scenes/local-file-load.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md b/docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/broker-load-manual.md b/docs/zh-CN/data-operate/import/import-way/broker-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/broker-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/broker-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/insert-into-manual.md b/docs/zh-CN/data-operate/import/import-way/insert-into-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/insert-into-manual.md
rename to docs/zh-CN/data-operate/import/import-way/insert-into-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/load-json-format.md b/docs/zh-CN/data-operate/import/import-way/load-json-format.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/load-json-format.md
rename to docs/zh-CN/data-operate/import/import-way/load-json-format.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/routine-load-manual.md b/docs/zh-CN/data-operate/import/import-way/routine-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/routine-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/routine-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/s3-load-manual.md b/docs/zh-CN/data-operate/import/import-way/s3-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/s3-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/s3-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/spark-load-manual.md b/docs/zh-CN/data-operate/import/import-way/spark-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/spark-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/spark-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/import-way/stream-load-manual.md b/docs/zh-CN/data-operate/import/import-way/stream-load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/import-way/stream-load-manual.md
rename to docs/zh-CN/data-operate/import/import-way/stream-load-manual.md
diff --git a/new-docs/zh-CN/data-operate/import/load-manual.md b/docs/zh-CN/data-operate/import/load-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/import/load-manual.md
rename to docs/zh-CN/data-operate/import/load-manual.md
diff --git a/new-docs/zh-CN/data-operate/update-delete/batch-delete-manual.md b/docs/zh-CN/data-operate/update-delete/batch-delete-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/update-delete/batch-delete-manual.md
rename to docs/zh-CN/data-operate/update-delete/batch-delete-manual.md
diff --git a/new-docs/zh-CN/data-operate/update-delete/delete-manual.md b/docs/zh-CN/data-operate/update-delete/delete-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/update-delete/delete-manual.md
rename to docs/zh-CN/data-operate/update-delete/delete-manual.md
diff --git a/new-docs/zh-CN/data-operate/update-delete/sequence-column-manual.md b/docs/zh-CN/data-operate/update-delete/sequence-column-manual.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/update-delete/sequence-column-manual.md
rename to docs/zh-CN/data-operate/update-delete/sequence-column-manual.md
diff --git a/new-docs/zh-CN/data-operate/update-delete/update.md b/docs/zh-CN/data-operate/update-delete/update.md
similarity index 100%
rename from new-docs/zh-CN/data-operate/update-delete/update.md
rename to docs/zh-CN/data-operate/update-delete/update.md
diff --git a/new-docs/zh-CN/data-table/advance-usage.md b/docs/zh-CN/data-table/advance-usage.md
similarity index 100%
rename from new-docs/zh-CN/data-table/advance-usage.md
rename to docs/zh-CN/data-table/advance-usage.md
diff --git a/new-docs/zh-CN/data-table/basic-usage.md b/docs/zh-CN/data-table/basic-usage.md
similarity index 100%
rename from new-docs/zh-CN/data-table/basic-usage.md
rename to docs/zh-CN/data-table/basic-usage.md
diff --git a/new-docs/zh-CN/data-table/best-practice.md b/docs/zh-CN/data-table/best-practice.md
similarity index 100%
rename from new-docs/zh-CN/data-table/best-practice.md
rename to docs/zh-CN/data-table/best-practice.md
diff --git a/new-docs/zh-CN/data-table/data-model.md b/docs/zh-CN/data-table/data-model.md
similarity index 100%
rename from new-docs/zh-CN/data-table/data-model.md
rename to docs/zh-CN/data-table/data-model.md
diff --git a/new-docs/zh-CN/data-table/data-partition.md b/docs/zh-CN/data-table/data-partition.md
similarity index 100%
rename from new-docs/zh-CN/data-table/data-partition.md
rename to docs/zh-CN/data-table/data-partition.md
diff --git a/new-docs/zh-CN/data-table/hit-the-rollup.md b/docs/zh-CN/data-table/hit-the-rollup.md
similarity index 100%
rename from new-docs/zh-CN/data-table/hit-the-rollup.md
rename to docs/zh-CN/data-table/hit-the-rollup.md
diff --git a/new-docs/zh-CN/data-table/index/bitmap-index.md b/docs/zh-CN/data-table/index/bitmap-index.md
similarity index 100%
rename from new-docs/zh-CN/data-table/index/bitmap-index.md
rename to docs/zh-CN/data-table/index/bitmap-index.md
diff --git a/new-docs/zh-CN/data-table/index/bloomfilter.md b/docs/zh-CN/data-table/index/bloomfilter.md
similarity index 100%
rename from new-docs/zh-CN/data-table/index/bloomfilter.md
rename to docs/zh-CN/data-table/index/bloomfilter.md
diff --git a/new-docs/zh-CN/data-table/index/prefix-index.md b/docs/zh-CN/data-table/index/prefix-index.md
similarity index 100%
rename from new-docs/zh-CN/data-table/index/prefix-index.md
rename to docs/zh-CN/data-table/index/prefix-index.md
diff --git a/new-docs/zh-CN/ecosystem/audit-plugin.md b/docs/zh-CN/ecosystem/audit-plugin.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/audit-plugin.md
rename to docs/zh-CN/ecosystem/audit-plugin.md
diff --git a/new-docs/zh-CN/ecosystem/datax.md b/docs/zh-CN/ecosystem/datax.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/datax.md
rename to docs/zh-CN/ecosystem/datax.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/cluster-managenent.md b/docs/zh-CN/ecosystem/doris-manager/cluster-managenent.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/cluster-managenent.md
rename to docs/zh-CN/ecosystem/doris-manager/cluster-managenent.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/compiling-deploying.md b/docs/zh-CN/ecosystem/doris-manager/compiling-deploying.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/compiling-deploying.md
rename to docs/zh-CN/ecosystem/doris-manager/compiling-deploying.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/initializing.md b/docs/zh-CN/ecosystem/doris-manager/initializing.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/initializing.md
rename to docs/zh-CN/ecosystem/doris-manager/initializing.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/space-list.md b/docs/zh-CN/ecosystem/doris-manager/space-list.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/space-list.md
rename to docs/zh-CN/ecosystem/doris-manager/space-list.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/space-management.md b/docs/zh-CN/ecosystem/doris-manager/space-management.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/space-management.md
rename to docs/zh-CN/ecosystem/doris-manager/space-management.md
diff --git a/docs/zh-CN/extending-doris/doris-manager/system-settings.md b/docs/zh-CN/ecosystem/doris-manager/system-settings.md
similarity index 100%
rename from docs/zh-CN/extending-doris/doris-manager/system-settings.md
rename to docs/zh-CN/ecosystem/doris-manager/system-settings.md
diff --git a/new-docs/zh-CN/ecosystem/external-table/doris-on-es.md b/docs/zh-CN/ecosystem/external-table/doris-on-es.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/external-table/doris-on-es.md
rename to docs/zh-CN/ecosystem/external-table/doris-on-es.md
diff --git a/new-docs/zh-CN/ecosystem/external-table/hive-of-doris.md b/docs/zh-CN/ecosystem/external-table/hive-of-doris.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/external-table/hive-of-doris.md
rename to docs/zh-CN/ecosystem/external-table/hive-of-doris.md
diff --git a/new-docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md b/docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md
rename to docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md
diff --git a/new-docs/zh-CN/ecosystem/external-table/odbc-of-doris.md b/docs/zh-CN/ecosystem/external-table/odbc-of-doris.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/external-table/odbc-of-doris.md
rename to docs/zh-CN/ecosystem/external-table/odbc-of-doris.md
diff --git a/new-docs/zh-CN/ecosystem/flink-doris-connector.md b/docs/zh-CN/ecosystem/flink-doris-connector.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/flink-doris-connector.md
rename to docs/zh-CN/ecosystem/flink-doris-connector.md
diff --git a/new-docs/zh-CN/ecosystem/logstash.md b/docs/zh-CN/ecosystem/logstash.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/logstash.md
rename to docs/zh-CN/ecosystem/logstash.md
diff --git a/new-docs/zh-CN/ecosystem/plugin-development-manual.md b/docs/zh-CN/ecosystem/plugin-development-manual.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/plugin-development-manual.md
rename to docs/zh-CN/ecosystem/plugin-development-manual.md
diff --git a/new-docs/zh-CN/ecosystem/seatunnel/flink-sink.md b/docs/zh-CN/ecosystem/seatunnel/flink-sink.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/seatunnel/flink-sink.md
rename to docs/zh-CN/ecosystem/seatunnel/flink-sink.md
diff --git a/new-docs/zh-CN/ecosystem/seatunnel/spark-sink.md b/docs/zh-CN/ecosystem/seatunnel/spark-sink.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/seatunnel/spark-sink.md
rename to docs/zh-CN/ecosystem/seatunnel/spark-sink.md
diff --git a/new-docs/zh-CN/ecosystem/spark-doris-connector.md b/docs/zh-CN/ecosystem/spark-doris-connector.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/spark-doris-connector.md
rename to docs/zh-CN/ecosystem/spark-doris-connector.md
diff --git a/new-docs/zh-CN/ecosystem/udf/contribute-udf.md b/docs/zh-CN/ecosystem/udf/contribute-udf.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/udf/contribute-udf.md
rename to docs/zh-CN/ecosystem/udf/contribute-udf.md
diff --git a/new-docs/zh-CN/ecosystem/udf/native-user-defined-function.md b/docs/zh-CN/ecosystem/udf/native-user-defined-function.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/udf/native-user-defined-function.md
rename to docs/zh-CN/ecosystem/udf/native-user-defined-function.md
diff --git a/new-docs/zh-CN/ecosystem/udf/remote-user-defined-function.md b/docs/zh-CN/ecosystem/udf/remote-user-defined-function.md
similarity index 100%
rename from new-docs/zh-CN/ecosystem/udf/remote-user-defined-function.md
rename to docs/zh-CN/ecosystem/udf/remote-user-defined-function.md
diff --git a/docs/zh-CN/extending-doris/audit-plugin.md b/docs/zh-CN/extending-doris/audit-plugin.md
deleted file mode 100644
index df9387da87..0000000000
--- a/docs/zh-CN/extending-doris/audit-plugin.md
+++ /dev/null
@@ -1,119 +0,0 @@
----
-{
- "title": "审计日志插件",
- "language": "zh-CN"
-}
----
-
-
-
-# 审计日志插件
-
-Doris 的审计日志插件是在 FE 的插件框架基础上开发的。是一个可选插件。用户可以在运行时安装或卸载这个插件。
-
-该插件可以将 FE 的审计日志定期的导入到指定 Doris 集群中,以方便用户通过 SQL 对审计日志进行查看和分析。
-
-## 编译、配置和部署
-
-### FE 配置
-
-FE的插件框架当前是实验性功能,Doris中默认关闭,在FE的配置文件中,增加`plugin_enable = true`启用plugin框架
-
-### AuditLoader 配置
-
-auditloader plugin的配置位于`${DORIS}/fe_plugins/auditloader/src/main/assembly/`.
-
-打开 `plugin.conf` 进行配置。配置项说明参见注释。
-
-### 编译
-
-在 Doris 代码目录下执行 `sh build_plugin.sh` 后,会在 `fe_plugins/output` 目录下得到 `auditloader.zip` 文件。
-
-### 部署
-
-您可以将这个文件放置在一个 http 服务器上,或者拷贝`auditloader.zip`(或者解压`auditloader.zip`)到所有 FE 的指定目录下。这里我们使用后者。
-
-### 安装
-
-部署完成后,安装插件前,需要创建之前在 `plugin.conf` 中指定的审计数据库和表。其中建表语句如下:
-
-```
-create table doris_audit_tbl__
-(
- query_id varchar(48) comment "Unique query id",
- time datetime not null comment "Query start time",
- client_ip varchar(32) comment "Client IP",
- user varchar(64) comment "User name",
- db varchar(96) comment "Database of this query",
- state varchar(8) comment "Query result state. EOF, ERR, OK",
- query_time bigint comment "Query execution time in millisecond",
- scan_bytes bigint comment "Total scan bytes of this query",
- scan_rows bigint comment "Total scan rows of this query",
- return_rows bigint comment "Returned rows of this query",
- stmt_id int comment "An incremental id of statement",
- is_query tinyint comment "Is this statemt a query. 1 or 0",
- frontend_ip varchar(32) comment "Frontend ip of executing this statement",
- cpu_time_ms bigint comment "Total scan cpu time in millisecond of this query",
- sql_hash varchar(48) comment "Hash value for this query",
- peak_memory_bytes bigint comment "Peak memory bytes used on all backends of this query",
- stmt string comment "The original statement, trimed if longer than 2G "
-) engine=OLAP
-duplicate key(query_id, time, client_ip)
-partition by range(time) ()
-distributed by hash(query_id) buckets 1
-properties(
- "dynamic_partition.time_unit" = "DAY",
- "dynamic_partition.start" = "-30",
- "dynamic_partition.end" = "3",
- "dynamic_partition.prefix" = "p",
- "dynamic_partition.buckets" = "1",
- "dynamic_partition.enable" = "true",
- "replication_num" = "3"
-);
-```
-
->**注意**
->
-> 上面表结构中:stmt string ,这个只能在0.15及之后版本中使用,之前版本,字段类型使用varchar
-
-其中 `dynamic_partition` 属性根据自己的需要,选择审计日志保留的天数。
-
-之后,连接到 Doris 后使用 `INSTALL PLUGIN` 命令完成安装。安装成功后,可以通过 `SHOW PLUGINS` 看到已经安装的插件,并且状态为 `INSTALLED`。
-
-完成后,插件会不断的以指定的时间间隔将审计日志插入到这个表中。
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/docs/zh-CN/extending-doris/datax.md b/docs/zh-CN/extending-doris/datax.md
deleted file mode 100644
index 6c11f5b94d..0000000000
--- a/docs/zh-CN/extending-doris/datax.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-{
- "title": "DataX doriswriter",
- "language": "zh-CN"
-}
----
-
-
-
-# DataX doriswriter
-
-[DataX](https://github.com/alibaba/DataX) doriswriter 插件,用于通过 DataX 同步其他数据源的数据到 Doris 中。
-
-这个插件是利用Doris的Stream Load 功能进行数据导入的。需要配合 DataX 服务一起使用。
-
-## 关于 DataX
-
-DataX 是阿里云 DataWorks数据集成 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。
-
-更多信息请参阅: `https://github.com/alibaba/DataX/`
-
-## 使用手册
-
-DataX doriswriter 插件代码 [这里](https://github.com/apache/incubator-doris/tree/master/extension/DataX)。
-
-这个目录包含插件代码以及 DataX 项目的开发环境。
-
-doriswriter 插件依赖的 DataX 代码中的一些模块。而这些模块并没有在 Maven 官方仓库中。所以我们在开发 doriswriter 插件时,需要下载完整的 DataX 代码库,才能进行插件的编译和开发。
-
-### 目录结构
-
-1. `doriswriter/`
-
- 这个目录是 doriswriter 插件的代码目录。这个目录中的所有代码,都托管在 Apache Doris 的代码库中。
-
- doriswriter 插件帮助文档在这里:`doriswriter/doc`
-
-2. `init-env.sh`
-
- 这个脚本主要用于构建 DataX 开发环境,他主要进行了以下操作:
-
- 1. 将 DataX 代码库 clone 到本地。
- 2. 将 `doriswriter/` 目录软链到 `DataX/doriswriter` 目录。
- 3. 在 `DataX/pom.xml` 文件中添加 `doriswriter` 模块。
- 4. 将 `DataX/core/pom.xml` 文件中的 httpclient 版本从 4.5 改为 4.5.13.
-
- > httpclient v4.5 在处理 307 转发时有bug。
-
- 这个脚本执行后,开发者就可以进入 `DataX/` 目录开始开发或编译了。因为做了软链,所以任何对 `DataX/doriswriter` 目录中文件的修改,都会反映到 `doriswriter/` 目录中,方便开发者提交代码。
-
-### 编译
-
-1. 运行 `init-env.sh`
-2. 按需修改 `DataX/doriswriter` 中的代码。
-3. 编译 doriswriter:
-
- 1. 单独编译 doriswriter 插件:
-
- `mvn clean install -pl plugin-rdbms-util,doriswriter -DskipTests`
-
- 2. 编译整个 DataX 项目:
-
- `mvn package assembly:assembly -Dmaven.test.skip=true`
-
- 产出在 `target/datax/datax/`.
-
- > hdfsreader, hdfswriter and oscarwriter 这三个插件需要额外的jar包。如果你并不需要这些插件,可以在 `DataX/pom.xml` 中删除这些插件的模块。
-
- 3. 编译错误
-
- 如遇到如下编译错误:
-
- ```
- Could not find artifact com.alibaba.datax:datax-all:pom:0.0.1-SNAPSHOT ...
- ```
-
- 可尝试以下方式解决:
-
- 1. 下载 [alibaba-datax-maven-m2-20210928.tar.gz](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/alibaba-datax-maven-m2-20210928.tar.gz)
- 2. 解压后,将得到的 `alibaba/datax/` 目录,拷贝到所使用的 maven 对应的 `.m2/repository/com/alibaba/` 下。
- 3. 再次尝试编译。
-
-4. 按需提交修改。
-
-### 示例
-
-doriswriter 插件的使用说明请参阅 [这里](https://github.com/apache/incubator-doris/blob/master/extension/DataX/doriswriter/doc/doriswriter.md)
diff --git a/docs/zh-CN/extending-doris/doris-on-es.md b/docs/zh-CN/extending-doris/doris-on-es.md
deleted file mode 100644
index 7840b6be27..0000000000
--- a/docs/zh-CN/extending-doris/doris-on-es.md
+++ /dev/null
@@ -1,588 +0,0 @@
----
-{
- "title": "Doris On ES",
- "language": "zh-CN"
-}
----
-
-
-
-# Doris On ES
-
-Doris-On-ES将Doris的分布式查询规划能力和ES(Elasticsearch)的全文检索能力相结合,提供更完善的OLAP分析场景解决方案:
-
- 1. ES中的多index分布式Join查询
- 2. Doris和ES中的表联合查询,更复杂的全文检索过滤
-
-本文档主要介绍该功能的实现原理、使用方式等。
-
-## 名词解释
-
-### Doris相关
-* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入
-* BE:Backend,Doris 的后端节点,负责查询执行和数据存储
-
-### ES相关
-* DataNode:ES的数据存储与计算节点
-* MasterNode:ES的Master节点,管理元数据、节点、数据分布等
-* scroll:ES内置的数据集游标特性,用来对数据进行流式扫描和过滤
-* _source: 导入时传入的原始JSON格式文档内容
-* doc_values: ES/Lucene 中字段的列式存储定义
-* keyword: 字符串类型字段,ES/Lucene不会对文本内容进行分词处理
-* text: 字符串类型字段,ES/Lucene会对文本内容进行分词处理,分词器需要用户指定,默认为standard英文分词器
-
-
-## 使用方法
-
-### 创建ES索引
-
-```
-PUT test
-{
- "settings": {
- "index": {
- "number_of_shards": "1",
- "number_of_replicas": "0"
- }
- },
- "mappings": {
- "doc": { // ES 7.x版本之后创建索引时不需要指定type,会有一个默认且唯一的`_doc` type
- "properties": {
- "k1": {
- "type": "long"
- },
- "k2": {
- "type": "date"
- },
- "k3": {
- "type": "keyword"
- },
- "k4": {
- "type": "text",
- "analyzer": "standard"
- },
- "k5": {
- "type": "float"
- }
- }
- }
- }
-}
-```
-
-### ES索引导入数据
-
-```
-POST /_bulk
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Doris", "k4": "Trying out Doris", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris On ES", "k4": "Doris On ES", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris", "k4": "Doris", "k5": 10.0}
-{"index":{"_index":"test","_type":"doc"}}
-{ "k1" : 100, "k2": "2020-01-01", "k3": "ES", "k4": "ES", "k5": 10.0}
-```
-
-### Doris中创建ES外表
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH // ENGINE必须是Elasticsearch
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-
-"user" = "root",
-"password" = "root"
-);
-```
-
-参数说明:
-
-参数 | 说明
----|---
-**hosts** | ES集群地址,可以是一个或多个,也可以是ES前端的负载均衡地址
-**index** | 对应的ES的index名字,支持alias,如果使用doc_value,需要使用真实的名称
-**type** | index的type,不指定的情况会使用_doc
-**user** | ES集群用户名
-**password** | 对应用户的密码信息
-
-* ES 7.x之前的集群请注意在建表的时候选择正确的**索引类型type**
-* 认证方式目前仅支持Http Basic认证,并且需要确保该用户有访问: /\_cluster/state/、\_nodes/http等路径和index的读权限; 集群未开启安全认证,用户名和密码不需要设置
-* Doris表中的列名需要和ES中的字段名完全匹配,字段类型应该保持一致
-* **ENGINE**必须是 **Elasticsearch**
-
-##### 过滤条件下推
-`Doris On ES`一个重要的功能就是过滤条件的下推: 过滤条件下推给ES,这样只有真正满足条件的数据才会被返回,能够显著的提高查询性能和降低Doris和Elasticsearch的CPU、memory、IO使用量
-
-下面的操作符(Operators)会被优化成如下ES Query:
-
-| SQL syntax | ES 5.x+ syntax |
-|-------|:---:|
-| = | term query|
-| in | terms query |
-| > , < , >= , ⇐ | range query |
-| and | bool.filter |
-| or | bool.should |
-| not | bool.must_not |
-| not in | bool.must_not + terms query |
-| is\_not\_null | exists query |
-| is\_null | bool.must_not + exists query |
-| esquery | ES原生json形式的QueryDSL |
-
-##### 数据类型映射
-
-Doris\ES | byte | short | integer | long | float | double| keyword | text | date
-------------- | ------------- | ------ | ---- | ----- | ---- | ------ | ----| --- | --- |
-tinyint | √ | | | | | | | |
-smallint | √ | √ | | | | | | |
-int | √ | √ | √ | | | | | |
-bigint | √ | √ | √ | √ | | | | |
-float | | | | | √ | | | |
-double | | | | | | √ | | |
-char | | | | | | | √ | √ |
-varchar | | | | | | | √ | √ |
-date | | | | | | | | | √|
-datetime | | | | | | | | | √|
-
-
-### 启用列式扫描优化查询速度(enable\_docvalue\_scan=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"enable_docvalue_scan" = "true"
-);
-```
-
-参数说明:
-
-参数 | 说明
----|---
-**enable\_docvalue\_scan** | 是否开启通过ES/Lucene列式存储获取查询字段的值,默认为false
-
-开启后Doris从ES中获取数据会遵循以下两个原则:
-
-* **尽力而为**: 自动探测要读取的字段是否开启列式存储(doc_value: true),如果获取的字段全部有列存,Doris会从列式存储中获取所有字段的值
-* **自动降级**: 如果要获取的字段只要有一个字段没有列存,所有字段的值都会从行存`_source`中解析获取
-
-##### 优势:
-
-默认情况下,Doris On ES会从行存也就是`_source`中获取所需的所有列,`_source`的存储采用的行式+json的形式存储,在批量读取性能上要劣于列式存储,尤其在只需要少数列的情况下尤为明显,只获取少数列的情况下,docvalue的性能大约是_source性能的十几倍
-
-##### 注意
-1. `text`类型的字段在ES中是没有列式存储,因此如果要获取的字段值有`text`类型字段会自动降级为从`_source`中获取
-2. 在获取的字段数量过多的情况下(`>= 25`),从`docvalue`中获取字段值的性能会和从`_source`中获取字段值基本一样
-
-
-### 探测keyword类型字段(enable\_keyword\_sniff=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"enable_keyword_sniff" = "true"
-);
-```
-
-参数说明:
-
-参数 | 说明
----|---
-**enable\_keyword\_sniff** | 是否对ES中字符串类型分词类型(**text**) `fields` 进行探测,获取额外的未分词(**keyword**)字段名(multi-fields机制)
-
-在ES中可以不建立index直接进行数据导入,这时候ES会自动创建一个新的索引,针对字符串类型的字段ES会创建一个既有`text`类型的字段又有`keyword`类型的字段,这就是ES的multi fields特性,mapping如下:
-
-```
-"k4": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
-}
-```
-对k4进行条件过滤时比如=,Doris On ES会将查询转换为ES的TermQuery
-
-SQL过滤条件:
-
-```
-k4 = "Doris On ES"
-```
-
-转换成ES的query DSL为:
-
-```
-"term" : {
- "k4": "Doris On ES"
-
-}
-```
-
-因为k4的第一字段类型为`text`,在数据导入的时候就会根据k4设置的分词器(如果没有设置,就是standard分词器)进行分词处理得到doris、on、es三个Term,如下ES analyze API分析:
-
-```
-POST /_analyze
-{
- "analyzer": "standard",
- "text": "Doris On ES"
-}
-```
-分词的结果是:
-
-```
-{
- "tokens": [
- {
- "token": "doris",
- "start_offset": 0,
- "end_offset": 5,
- "type": "",
- "position": 0
- },
- {
- "token": "on",
- "start_offset": 6,
- "end_offset": 8,
- "type": "",
- "position": 1
- },
- {
- "token": "es",
- "start_offset": 9,
- "end_offset": 11,
- "type": "",
- "position": 2
- }
- ]
-}
-```
-查询时使用的是:
-
-```
-"term" : {
- "k4": "Doris On ES"
-}
-```
-`Doris On ES`这个term匹配不到词典中的任何term,不会返回任何结果,而启用`enable_keyword_sniff: true`会自动将`k4 = "Doris On ES"`转换成`k4.keyword = "Doris On ES"`来完全匹配SQL语义,转换后的ES query DSL为:
-
-```
-"term" : {
- "k4.keyword": "Doris On ES"
-}
-```
-
-`k4.keyword` 的类型是`keyword`,数据写入ES中是一个完整的term,所以可以匹配
-
-### 开启节点自动发现, 默认为true(es\_nodes\_discovery=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"nodes_discovery" = "true"
-);
-```
-
-参数说明:
-
-参数 | 说明
----|---
-**es\_nodes\_discovery** | 是否开启es节点发现,默认为true
-
-当配置为true时,Doris将从ES找到所有可用的相关数据节点(在上面分配的分片)。如果ES数据节点的地址没有被Doris BE访问,则设置为false。ES集群部署在与公共Internet隔离的内网,用户通过代理访问
-
-### ES集群是否开启https访问模式,如果开启应设置为`true`,默认为false(http\_ssl\_enabled=true)
-
-```
-CREATE EXTERNAL TABLE `test` (
- `k1` bigint(20) COMMENT "",
- `k2` datetime COMMENT "",
- `k3` varchar(20) COMMENT "",
- `k4` varchar(100) COMMENT "",
- `k5` float COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200",
-"index" = "test",
-"type" = "doc",
-"user" = "root",
-"password" = "root",
-
-"http_ssl_enabled" = "true"
-);
-```
-
-参数说明:
-
-参数 | 说明
----|---
-**http\_ssl\_enabled** | ES集群是否开启https访问模式
-
-目前会fe/be实现方式为信任所有,这是临时解决方案,后续会使用真实的用户配置证书
-
-### 查询用法
-
-完成在Doris中建立ES外表后,除了无法使用Doris中的数据模型(rollup、预聚合、物化视图等)外并无区别
-
-#### 基本查询
-
-```
-select * from es_table where k1 > 1000 and k3 ='term' or k4 like 'fu*z_'
-```
-
-#### 扩展的esquery(field, QueryDSL)
-通过`esquery(field, QueryDSL)`函数将一些无法用sql表述的query如match_phrase、geoshape等下推给ES进行过滤处理,`esquery`的第一个列名参数用于关联`index`,第二个参数是ES的基本`Query DSL`的json表述,使用花括号`{}`包含,json的`root key`有且只能有一个,如match_phrase、geo_shape、bool等
-
-match_phrase查询:
-
-```
-select * from es_table where esquery(k4, '{
- "match_phrase": {
- "k4": "doris on es"
- }
- }');
-```
-geo相关查询:
-
-```
-select * from es_table where esquery(k4, '{
- "geo_shape": {
- "location": {
- "shape": {
- "type": "envelope",
- "coordinates": [
- [
- 13,
- 53
- ],
- [
- 14,
- 52
- ]
- ]
- },
- "relation": "within"
- }
- }
- }');
-```
-
-bool查询:
-
-```
-select * from es_table where esquery(k4, ' {
- "bool": {
- "must": [
- {
- "terms": {
- "k1": [
- 11,
- 12
- ]
- }
- },
- {
- "terms": {
- "k2": [
- 100
- ]
- }
- }
- ]
- }
- }');
-```
-
-
-
-## 原理
-
-```
-+----------------------------------------------+
-| |
-| Doris +------------------+ |
-| | FE +--------------+-------+
-| | | Request Shard Location
-| +--+-------------+-+ | |
-| ^ ^ | |
-| | | | |
-| +-------------------+ +------------------+ | |
-| | | | | | | | |
-| | +----------+----+ | | +--+-----------+ | | |
-| | | BE | | | | BE | | | |
-| | +---------------+ | | +--------------+ | | |
-+----------------------------------------------+ |
- | | | | | | |
- | | | | | | |
- | HTTP SCROLL | | HTTP SCROLL | |
-+-----------+---------------------+------------+ |
-| | v | | v | | |
-| | +------+--------+ | | +------+-------+ | | |
-| | | | | | | | | | |
-| | | DataNode | | | | DataNode +<-----------+
-| | | | | | | | | | |
-| | | +<--------------------------------+
-| | +---------------+ | | |--------------| | | |
-| +-------------------+ +------------------+ | |
-| Same Physical Node | |
-| | |
-| +-----------------------+ | |
-| | | | |
-| | MasterNode +<-----------------+
-| ES | | |
-| +-----------------------+ |
-+----------------------------------------------+
-
-
-```
-
-1. 创建ES外表后,FE会请求建表指定的主机,获取所有节点的HTTP端口信息以及index的shard分布信息等,如果请求失败会顺序遍历host列表直至成功或完全失败
-
-2. 查询时会根据FE得到的一些节点信息和index的元数据信息,生成查询计划并发给对应的BE节点
-
-3. BE节点会根据`就近原则`即优先请求本地部署的ES节点,BE通过`HTTP Scroll`方式流式的从ES index的每个分片中并发的从`_source`或`docvalue`中获取数据
-
-4. Doris计算完结果后,返回给用户
-
-## 最佳实践
-
-### 时间类型字段使用建议
-
-在ES中,时间类型的字段使用十分灵活,但是在Doris On ES中如果对时间类型字段的类型设置不当,则会造成过滤条件无法下推
-
-创建索引时对时间类型格式的设置做最大程度的格式兼容:
-
-```
- "dt": {
- "type": "date",
- "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
- }
-```
-
-在Doris中建立该字段时建议设置为`date`或`datetime`,也可以设置为`varchar`类型, 使用如下SQL语句都可以直接将过滤条件下推至ES:
-
-```
-select * from doe where k2 > '2020-06-21';
-
-select * from doe where k2 < '2020-06-21 12:00:00';
-
-select * from doe where k2 < 1593497011;
-
-select * from doe where k2 < now();
-
-select * from doe where k2 < date_format(now(), '%Y-%m-%d');
-```
-
-注意:
-
-* 在ES中如果不对时间类型的字段设置`format`, 默认的时间类型字段格式为
-
-```
-strict_date_optional_time||epoch_millis
-```
-
-* 导入到ES的日期字段如果是时间戳需要转换成`ms`, ES内部处理时间戳都是按照`ms`进行处理的, 否则Doris On ES会出现显示错误
-
-### 获取ES元数据字段`_id`
-
-导入文档在不指定`_id`的情况下ES会给每个文档分配一个全局唯一的`_id`即主键, 用户也可以在导入时为文档指定一个含有特殊业务意义的`_id`; 如果需要在Doris On ES中获取该字段值,建表时可以增加类型为`varchar`的`_id`字段:
-
-```
-CREATE EXTERNAL TABLE `doe` (
- `_id` varchar COMMENT "",
- `city` varchar COMMENT ""
-) ENGINE=ELASTICSEARCH
-PROPERTIES (
-"hosts" = "http://127.0.0.1:8200",
-"user" = "root",
-"password" = "root",
-"index" = "doe",
-"type" = "doc"
-}
-```
-
-注意:
-
-1. `_id`字段的过滤条件仅支持`=`和`in`两种
-2. `_id`字段只能是`varchar`类型
-
-## Q&A
-
-1. Doris On ES对ES的版本要求
-
- ES主版本大于5,ES在2.x之前和5.x之后数据的扫描方式不同,目前支持仅5.x之后的
-
-2. 是否支持X-Pack认证的ES集群
-
- 支持所有使用HTTP Basic认证方式的ES集群
-3. 一些查询比请求ES慢很多
-
- 是,比如_count相关的query等,ES内部会直接读取满足条件的文档个数相关的元数据,不需要对真实的数据进行过滤
-
-4. 聚合操作是否可以下推
-
- 目前Doris On ES不支持聚合操作如sum, avg, min/max 等下推,计算方式是批量流式的从ES获取所有满足条件的文档,然后在Doris中进行计算
-
diff --git a/docs/zh-CN/extending-doris/flink-doris-connector.md b/docs/zh-CN/extending-doris/flink-doris-connector.md
deleted file mode 100644
index cd12cdd886..0000000000
--- a/docs/zh-CN/extending-doris/flink-doris-connector.md
+++ /dev/null
@@ -1,497 +0,0 @@
----
-{
-
- "title": "Flink Doris Connector",
- "language": "zh-CN"
-
-}
----
-
-
-
-# Flink Doris Connector
-
-Flink Doris Connector 可以支持通过 Flink 操作(读取、插入、修改、删除) Doris 中存储的数据。
-
-代码库地址:https://github.com/apache/incubator-doris-flink-connector
-
-* 可以将 `Doris` 表映射为 `DataStream` 或者 `Table`。
-
->**注意:**
->
->1. 修改和删除只支持在 Unique Key 模型上
->2. 目前的删除是支持 Flink CDC 的方式接入数据实现自动删除,如果是其他数据接入的方式删除需要自己实现。Flink CDC 的数据删除使用方式参照本文档最后一节
-
-## 版本兼容
-
-| Connector | Flink | Doris | Java | Scala |
-| --------- | ----- | ------ | ---- |------|
-| 1.11.6-2.12-xx | 1.11.x | 0.13+ | 8 | 2.12 |
-| 1.12.7-2.12-xx | 1.12.x | 0.13.+ | 8 | 2.12 |
-| 1.13.5-2.12-xx | 1.13.x | 0.13.+ | 8 | 2.12 |
-| 1.14.4-2.12-xx | 1.14.x | 0.13.+ | 8 | 2.12 |
-
-## 编译与安装
-
-准备工作
-
-1.修改`custom_env.sh.tpl`文件,重命名为`custom_env.sh`
-
-2.指定thrift安装目录
-
-```bash
-##源文件内容
-#export THRIFT_BIN=
-#export MVN_BIN=
-#export JAVA_HOME=
-
-##修改如下,MacOS为例
-export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-#export MVN_BIN=
-#export JAVA_HOME=
-
-安装 `thrift` 0.13.0 版本(注意:`Doris` 0.15 和最新的版本基于 `thrift` 0.13.0 构建, 之前的版本依然使用`thrift` 0.9.3 构建)
- Windows:
- 1.下载:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`(下载目录自己指定)
- 2.修改thrift-0.13.0.exe 为 thrift
-
- MacOS:
- 1. 下载:`brew install thrift@0.13.0`
- 2. 默认下载地址:/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift
-
-
- 注:MacOS执行 `brew install thrift@0.13.0` 可能会报找不到版本的错误,解决方法如下,在终端执行:
- 1. `brew tap-new $USER/local-tap`
- 2. `brew extract --version='0.13.0' thrift $USER/local-tap`
- 3. `brew install thrift@0.13.0`
- 参考链接: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c`
-
- Linux:
- 1.下载源码包:`wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz`
- 2.安装依赖:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++`
- 3.`tar zxvf thrift-0.13.0.tar.gz`
- 4.`cd thrift-0.13.0`
- 5.`./configure --without-tests`
- 6.`make`
- 7.`make install`
- 安装完成后查看版本:thrift --version
- 注:如果编译过Doris,则不需要安装thrift,可以直接使用 $DORIS_HOME/thirdparty/installed/bin/thrift
-```
-
-在源码目录下执行:
-
-```bash
-sh build.sh
-
- Usage:
- build.sh --flink version --scala version # specify flink and scala version
- build.sh --tag # this is a build from tag
- e.g.:
- build.sh --flink 1.14.3 --scala 2.12
- build.sh --tag
-
-然后按照你需要版本执行命令编译即可,例如:
-sh build.sh --flink 1.14.3 --scala 2.12
-```
-
-> 注:如果你是从 tag 检出的源码,则可以直接执行 `sh build.sh --tag`,而无需指定 flink 和 scala 的版本。因为 tag 源码中的版本是固定的。比如 `1.13.5-2.12-1.0.1` 表示 flink 版本 1.13.5,scala 版本 2.12,connector 版本 1.0.1。
-
-编译成功后,会在 `target/` 目录下生成文件,如:`flink-doris-connector-1.14_2.12-1.0.0-SNAPSHOT.jar` 。将此文件复制到 `Flink` 的 `ClassPath` 中即可使用 `Flink-Doris-Connector` 。例如, `Local` 模式运行的 `Flink` ,将此文件放入 `jars/` 文件夹下。 `Yarn` 集群模式运行的 `Flink` ,则将此文件放入预部署包中。
-
-**备注**
-
-1. Doris FE 要在配置中配置启用 http v2
-2. Scala 版本目前支持2.12和2.11
-
-conf/fe.conf
-
-```
-enable_http_server_v2 = true
-```
-
-## 使用 Maven 管理
-
-添加 flink-doris-connector 和必要的 Flink Maven 依赖
-
-Flink 1.13.* 及以前的版本
-```
-
- org.apache.flink
- flink-java
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-streaming-java_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-clients_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.flink
- flink-table-common
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-table-api-java-bridge_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-table-planner-blink_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.doris
- flink-doris-connector-1.13_2.12
-
-
- 1.0.3
-
-```
-Flink 1.14.* 版本
-```
-
- org.apache.flink
- flink-java
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-streaming-java_${scala.version}
- ${flink.version}
- provided
-
-
- org.apache.flink
- flink-clients_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.flink
- flink-table-planner_${scala.version}
- ${flink.version}
- provided
-
-
-
- org.apache.doris
- flink-doris-connector-1.14_2.12
- 1.0.3
-
-```
-
-**备注**
-
-1.请根据不同的 Flink 和 Scala 版本替换对应的 Connector 和 Flink 依赖版本。
-2.目前maven中仅提供了scala2.12版本的包,2.11版本的包需要自行编译,参考上面编译安装小节。
-
-## 使用方法
-
-Flink 读写 Doris 数据主要有三种方式
-
-* SQL
-* DataStream
-* DataSet
-
-### 参数配置
-
-Flink Doris Connector Sink 的内部实现是通过 `Stream Load` 服务向 Doris 写入数据, 同时也支持 `Stream Load` 请求参数的配置设定
-
-参数配置方法
-* SQL 使用 `WITH` 参数 `sink.properties.` 配置
-* DataStream 使用方法`DorisExecutionOptions.builder().setStreamLoadProp(Properties)`配置
-
-### SQL
-
-* Source
-
-```sql
-CREATE TABLE flink_doris_source (
- name STRING,
- age INT,
- price DECIMAL(5,2),
- sale DOUBLE
- )
- WITH (
- 'connector' = 'doris',
- 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT',
- 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME',
- 'username' = '$YOUR_DORIS_USERNAME',
- 'password' = '$YOUR_DORIS_PASSWORD'
-);
-```
-
-* Sink
-
-```sql
-CREATE TABLE flink_doris_sink (
- name STRING,
- age INT,
- price DECIMAL(5,2),
- sale DOUBLE
- )
- WITH (
- 'connector' = 'doris',
- 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT',
- 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME',
- 'username' = '$YOUR_DORIS_USERNAME',
- 'password' = '$YOUR_DORIS_PASSWORD'
-);
-```
-
-* Insert
-
-```sql
-INSERT INTO flink_doris_sink select name,age,price,sale from flink_doris_source
-```
-
-### DataStream
-
-* Source
-
-```java
- Properties properties = new Properties();
- properties.put("fenodes","FE_IP:8030");
- properties.put("username","root");
- properties.put("password","");
- properties.put("table.identifier","db.table");
- env.addSource(new DorisSourceFunction(
- new DorisStreamOptions(properties),
- new SimpleListDeserializationSchema()
- )
- ).print();
-```
-
-* Sink
-
-Json 数据流
-
-```java
-Properties pro = new Properties();
-pro.setProperty("format", "json");
-pro.setProperty("strip_outer_array", "true");
-env.fromElements(
- "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}"
- )
- .addSink(
- DorisSink.sink(
- DorisReadOptions.builder().build(),
- DorisExecutionOptions.builder()
- .setBatchSize(3)
- .setBatchIntervalMs(0l)
- .setMaxRetries(3)
- .setStreamLoadProp(pro).build(),
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-
-```
-
-Json 数据流
-
-```java
-env.fromElements(
- "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}"
- )
- .addSink(
- DorisSink.sink(
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-```
-
-RowData 数据流
-
-```java
-DataStream source = env.fromElements("")
- .map(new MapFunction() {
- @Override
- public RowData map(String value) throws Exception {
- GenericRowData genericRowData = new GenericRowData(3);
- genericRowData.setField(0, StringData.fromString("北京"));
- genericRowData.setField(1, 116.405419);
- genericRowData.setField(2, 39.916927);
- return genericRowData;
- }
- });
-
-String[] fields = {"city", "longitude", "latitude"};
-LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()};
-
-source.addSink(
- DorisSink.sink(
- fields,
- types,
- DorisReadOptions.builder().build(),
- DorisExecutionOptions.builder()
- .setBatchSize(3)
- .setBatchIntervalMs(0L)
- .setMaxRetries(3)
- .build(),
- DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build()
- ));
-```
-
-### DataSet
-
-* Sink
-
-```java
-MapOperator data = env.fromElements("")
- .map(new MapFunction() {
- @Override
- public RowData map(String value) throws Exception {
- GenericRowData genericRowData = new GenericRowData(3);
- genericRowData.setField(0, StringData.fromString("北京"));
- genericRowData.setField(1, 116.405419);
- genericRowData.setField(2, 39.916927);
- return genericRowData;
- }
- });
-
-DorisOptions dorisOptions = DorisOptions.builder()
- .setFenodes("FE_IP:8030")
- .setTableIdentifier("db.table")
- .setUsername("root")
- .setPassword("").build();
-DorisReadOptions readOptions = DorisReadOptions.defaults();
-DorisExecutionOptions executionOptions = DorisExecutionOptions.defaults();
-
-LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()};
-String[] fields = {"city", "longitude", "latitude"};
-
-DorisDynamicOutputFormat outputFormat = new DorisDynamicOutputFormat(
- dorisOptions, readOptions, executionOptions, types, fields
- );
-
-outputFormat.open(0, 1);
-data.output(outputFormat);
-outputFormat.close();
-```
-
-## 配置
-
-### 通用配置项
-
-| Key | Default Value | Comment |
-| -------------------------------- | ----------------- | ------------------------------------------------------------ |
-| fenodes | -- | Doris FE http 地址 |
-| table.identifier | -- | Doris 表名,如:db1.tbl1 |
-| username | -- | 访问 Doris 的用户名 |
-| password | -- | 访问 Doris 的密码 |
-| doris.request.retries | 3 | 向 Doris 发送请求的重试次数 |
-| doris.request.connect.timeout.ms | 30000 | 向 Doris 发送请求的连接超时时间 |
-| doris.request.read.timeout.ms | 30000 | 向 Doris 发送请求的读取超时时间 |
-| doris.request.query.timeout.s | 3600 | 查询 Doris 的超时时间,默认值为1小时,-1表示无超时限制 |
-| doris.request.tablet.size | Integer. MAX_VALUE | 一个 Partition 对应的 Doris Tablet 个数。 此数值设置越小,则会生成越多的 Partition。从而提升 Flink 侧的并行度,但同时会对 Doris 造成更大的压力。 |
-| doris.batch.size | 1024 | 一次从 BE 读取数据的最大行数。增大此数值可减少 Flink 与 Doris 之间建立连接的次数。 从而减轻网络延迟所带来的的额外时间开销。 |
-| doris.exec.mem.limit | 2147483648 | 单个查询的内存限制。默认为 2GB,单位为字节 |
-| doris.deserialize.arrow.async | false | 是否支持异步转换 Arrow 格式到 flink-doris-connector 迭代所需的 RowBatch |
-| doris.deserialize.queue.size | 64 | 异步转换 Arrow 格式的内部处理队列,当 doris.deserialize.arrow.async 为 true 时生效 |
-| doris.read.field | -- | 读取 Doris 表的列名列表,多列之间使用逗号分隔 |
-| doris.filter.query | -- | 过滤读取数据的表达式,此表达式透传给 Doris。Doris 使用此表达式完成源端数据过滤。 |
-| sink.batch.size | 10000 | 单次写 BE 的最大行数 |
-| sink.max-retries | 1 | 写 BE 失败之后的重试次数 |
-| sink.batch.interval | 10s | flush 间隔时间,超过该时间后异步线程将 缓存中数据写入 BE。 默认值为10秒,支持时间单位 ms、 s、 min、 h 和 d。设置为 0 表示关闭定期写入。 |
-| sink.properties.* | -- | Stream Load 的导入参数
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/new-docs/.vuepress/theme/index.js b/new-docs/.vuepress/theme/index.js
deleted file mode 100644
index d882e00623..0000000000
--- a/new-docs/.vuepress/theme/index.js
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-module.exports = {
- extend: "@vuepress/theme-default"
-};
diff --git a/new-docs/.vuepress/theme/layouts/Article.vue b/new-docs/.vuepress/theme/layouts/Article.vue
deleted file mode 100644
index e23c358893..0000000000
--- a/new-docs/.vuepress/theme/layouts/Article.vue
+++ /dev/null
@@ -1,71 +0,0 @@
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/new-docs/.vuepress/theme/layouts/Layout.vue b/new-docs/.vuepress/theme/layouts/Layout.vue
deleted file mode 100644
index 98ad313485..0000000000
--- a/new-docs/.vuepress/theme/layouts/Layout.vue
+++ /dev/null
@@ -1,57 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/new-docs/.vuepress/theme/styles/index.styl b/new-docs/.vuepress/theme/styles/index.styl
deleted file mode 100644
index 52c89d31e0..0000000000
--- a/new-docs/.vuepress/theme/styles/index.styl
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/* Override style of sidebar's sub-direcotry */
-.sidebar-group.is-sub-group > .sidebar-heading:not(.clickable) {
- opacity: 1 !important;
-}
diff --git a/new-docs/.vuepress/vuepress.textClipping b/new-docs/.vuepress/vuepress.textClipping
deleted file mode 100644
index d943526db7..0000000000
Binary files a/new-docs/.vuepress/vuepress.textClipping and /dev/null differ
diff --git a/new-docs/README.md b/new-docs/README.md
deleted file mode 100644
index 2de95bbcc6..0000000000
--- a/new-docs/README.md
+++ /dev/null
@@ -1,255 +0,0 @@
-
-
-# Doris Document
-
-[Vuepress](https://github.com/vuejs/vuepress.git) is used as our document site generator. Configurations are in `./docs/.vuepress` folder.
-
-## Getting Started
-
-Download and install [nodejs](http://nodejs.cn/download/)
-
-```bash
-npm config set registry https://registry.npm.taobao.org // Only if you are in Mainland China.
-cd docs && npm install
-npm run dev
-```
-
-Open your browser and navigate to `localhost:8080/en/` or `localhost:8080/zh-CN/`.
-
-## Docs' Directories
-
-```bash
- .
- ├─ docs/
- │ ├─ .vuepress
- │ │ ├─ dist // Built site files.
- │ │ ├─ public // Assets
- │ │ ├─ sidebar // Side bar configurations.
- │ │ │ ├─ en.js
- │ │ │ └─ zh-CN.js
- │ ├─ theme // Global styles and customizations.
- │ └─ config.js // Vuepress configurations.
- ├─ zh-CN/
- │ ├─ xxxx.md
- │ └─ README.md // Will be rendered as entry page.
- └─ en/
- ├─ one.md
- └─ README.md // Will be rendered as entry page.
-```
-
-## Start Writing
-
-1. Write markdown files in multi languages and put them in separated folders `./en/` and `./zh-CN/`. **But they should be with the same name.**
-
- ```bash
- .
- ├─ en/
- │ ├─ one.md
- │ └─ two.md
- └─ zh-CN/
- │ ├─ one.md
- │ └─ two.md
- ```
-
-2. Frontmatters like below should always be on the top of each file:
-
- ```markdown
- ---
- {
- "title": "Backup and Recovery", // sidebar title
- "language": "en" // writing language
- }
- ---
- ```
-
-3. Assets are in `.vuepress/public/`.
-
- Assuming that there exists a png `.vuepress/public/images/image_x.png`, then it can be used like:
-
- ```markdown
- 
- ```
-
-4. Remember to update the sidebar configurations in `.vuepress/sidebar/` after adding a new file or a folder.
-
- Assuming that the directories are:
-
- ```bash
- .
- ├─ en/
- │ ├─ subfolder
- │ │ ├─ one.md
- │ │ └─ two.md
- │ └─ three.md
- └─ zh-CN/
- ├─ subfolder
- │ ├─ one.md
- │ └─ two.md
- └─ three.md
- ```
-
- Then the sidebar configurations would be like:
-
- ```javascript
- // .vuepress/sidebar/en.js`
- module.exports = [
- {
- title: "subfolder name",
- directoryPath: "subfolder/",
- children: ["one", "two"]
- },
- "three"
- ]
- ```
-
- ```javascript
- // .vuepress/sidebar/zh-CN.js
- module.exports = [
- {
- title: "文件夹名称",
- directoryPath: "subfolder/",
- children: ["one", "two"]
- },
- "three"
- ]
- ```
-
-5. Run `npm run lint` before starting a PR.
-
- Surely that there will be lots of error logs if the mardown files are not following the rules, and these logs will all be printed in the console:
-
-```shell
-
-en/administrator-guide/alter-table/alter-table-bitmap-index.md:92 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: " ```"]
-en/administrator-guide/alter-table/alter-table-rollup.md:45 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-rollup.md:77 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-rollup.md:178 MD046/code-block-style Code block style [Expected: fenced; Actual: indented]
-en/administrator-guide/alter-table/alter-table-schema-change.md:50 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-schema-change.md:82 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-schema-change.md:127 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-schema-change.md:144 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-schema-change.md:153 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"]
-en/administrator-guide/alter-table/alter-table-schema-change.md:199 MD046/code-block-style Code block style [Expected: fenced; Actual: indented]
-en/administrator-guide/backup-restore.md:45:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 2; Style: 1/1/1]
-en/administrator-guide/backup-restore.md:57:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 2; Style: 1/1/1]
-en/administrator-guide/backup-restore.md:61:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 3; Style: 1/1/1]
-npm ERR! code ELIFECYCLE
-npm ERR! errno 1
-npm ERR! docs@ lint: `markdownlint '**/*.md' -f`
-npm ERR! Exit status 1
-npm ERR!
-npm ERR! Failed at the docs@ lint script.
-
-```
-
-## FullText search
-
-We use [Algolia DocSearch](https://docsearch.algolia.com/) as our fulltext search engine.
-
-One thing we need to do is that [Config.json From DocSearch](https://github.com/algolia/docsearch-configs/blob/master/configs/apache_doris.json) should be updated if a new language or branch is created.
-
-For more detail of the docsearch's configuration, please refer to [Configuration of DocSearch](https://docsearch.algolia.com/docs/config-file)
-
-## Deployment
-
-Just start a PR, and all things will be done automatically.
-
-## What Travis Does
-
-Once a PR accepted, travis ci will be triggered to build and deploy the whole website within its own branch. Here is what `.travis.yml` does:
-
-1. Prepare nodejs and vuepress enviorment.
-
-2. Use current branch's name as the relative url path in `.vuepress/config.js`(which is the `base` property).
-
-3. Build the documents into a website all by vuepress.
-
-4. Fetch asf-site repo to local directory, and copy `.vupress/dist/` into `{BRANCH}/`.
-
-5. Push the new site to asf-site repo with `GitHub Token`(which is preset in Travis console as a variable used in .travis.yml).
-
-## asf-site repository
-
-Finally the asf-site repository will be like:
-
-```bash
-.
-├─ master/
-│ ├─ en/
-│ │ ├─ subfolder
-│ │ │ ├─ one.md
-│ │ └─ three.md
-│ └─ zh-CN/
-│ ├─ subfolder
-│ │ ├─ one.md
-│ └─ three.md
-├─ incubating-0.11/
-│ ├─ en/
-│ │ ├─ subfolder
-│ │ │ ├─ one.md
-│ │ └─ three.md
-│ └─ zh-CN/
-│ ├─ subfolder
-│ │ ├─ one.md
-│ └─ three.md
-├─ index.html // user entry, and auto redirected to master folder
-└─ versions.json // all versions that can be seleted on the website are defined here
-```
-
-And the `versions.json` is like:
-
-```json
-{
- "en": [
- {
- "text": "Versions", // dropdown label
- "items": [
- {
- "text": "master", // dropdown-item label
- "link": "/../master/en/installing/compilation.html", // entry page for this version
- "target": "_blank"
- },
- {
- "text": "branch-0.11",
- "link": "/../branch-0.11/en/installing/compilation.html",
- "target": "_blank"
- }
- ]
- }
- ],
- "zh-CN": [
- {
- "text": "版本",
- "items": [
- {
- "text": "master",
- "link": "/../master/zh-CN/installing/compilation.html",
- "target": "_blank"
- },
- {
- "text": "branch-0.11",
- "link": "/../branch-0.11/zh-CN/installing/compilation.html",
- "target": "_blank"
- }
- ]
- }
- ]
-}
-```
diff --git a/new-docs/build_help_zip.sh b/new-docs/build_help_zip.sh
deleted file mode 100755
index 82fc8761dc..0000000000
--- a/new-docs/build_help_zip.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-##############################################################
-# This script is used to build help doc zip file
-##############################################################
-
-#!/bin/bash
-
-set -eo pipefail
-
-ROOT=`dirname "$0"`
-ROOT=`cd "$ROOT"; pwd`
-
-BUILD_DIR=build
-HELP_DIR=contents
-HELP_ZIP_FILE=help-resource.zip
-SQL_REF_DOC_DIR=zh-CN/sql-reference/
-
-cd $ROOT
-rm -rf $BUILD_DIR $HELP_DIR $HELP_ZIP_FILE
-mkdir -p $BUILD_DIR $HELP_DIR
-
-cp -r $SQL_REF_DOC_DIR/* $HELP_DIR/
-
-zip -r $HELP_ZIP_FILE $HELP_DIR
-mv $HELP_ZIP_FILE $BUILD_DIR/
-
-
diff --git a/new-docs/en/README.md b/new-docs/en/README.md
deleted file mode 100644
index d5ddba4ad6..0000000000
--- a/new-docs/en/README.md
+++ /dev/null
@@ -1,95 +0,0 @@
----
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-home: true
-heroImage: /images/home/banner-stats.png
-heroBgImage: /images/home/hero-bg.png
-heroText:
- - Welcome to
- - Apache Doris
-tagline: A fast MPP database for all modern analytics on big data.
-structure:
- title: Apache Doris
- subTitle:
- descriptions:
- - Apache Doris is a modern MPP analytical database product. It can provide sub-second queries and efficient real-time data analysis. With it's distributed architecture, up to 10PB level datasets will be well supported and easy to operate.
- - Apache Doris can meet various data analysis demands, including history data reports, real-time data analysis, interactive data analysis, and exploratory data analysis. Make your data analysis easier!
- image: /images/home/structure-fresh.png
- actionText: Learn More
- actionLink: /en/getting-started/basic-usage
-features:
- title: Apache Doris Core Features
- subTitle:
- list:
- - title: Modern MPP architecture
- icon: /images/home/struct.png
- - title: Getting result of a query within one second
- icon: /images/home/clock.png
- - title: Support standard SQL language, compatible with MySQL protocol
- icon: /images/home/sql.png
- - title: Vectorized SQL executor
- icon: /images/home/program.png
- - title: Effective data model for aggregation
- icon: /images/home/aggr.png
- - title: Rollup, novel pre-computation mechanism
- icon: /images/home/rollup.png
- - title: High performance, high availability, high reliability
- icon: /images/home/cpu.png
- - title: easy for operation, Elastic data warehouse for big data
- icon: /images/home/dev.png
-cases:
- title: Apache Doris Users
- subTitle:
- list:
- - logo: /images/home/logo-meituan.png
- alt: 美团
- - logo: /images/home/logo-xiaomi.png
- alt: 小米
- - logo: /images/home/logo-jd.png
- alt: 京东
- - logo: /images/home/logo-huawei.png
- alt: 华为
- - logo: /images/home/logo-baidu.png
- alt: 百度
- - logo: /images/home/logo-weibo.png
- alt: 新浪微博
- - logo: /images/home/logo-zuoyebang.png
- alt: 作业帮
- - logo: /images/home/logo-vipkid.png
- alt: Vipkid
- - logo: /images/home/logo-360.png
- alt: 360
- - logo: /images/home/logo-shopee.png
- alt: Shopee
- - logo: /images/home/logo-tantan.png
- alt: 探探
- - logo: /images/home/logo-kuaishou.png
- alt: 快手
- - logo: /images/home/logo-sohu.png
- alt: 搜狐
- - logo: /images/home/logo-yidian.png
- alt: 一点资讯
- - logo: /images/home/logo-dingdong.png
- alt: 叮咚买菜
- - logo: /images/home/logo-youdao.png
- alt: 有道
-actionText: Quick Start →
-actionLink: /en/get-starting/get-starting
-articleText: Latest News
-articleLink: /en/article/article-list
----
diff --git a/new-docs/en/admin-manual/http-actions/cancel-label.md b/new-docs/en/admin-manual/http-actions/cancel-label.md
deleted file mode 100644
index e60393e021..0000000000
--- a/new-docs/en/admin-manual/http-actions/cancel-label.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-{
- "title": "CANCEL LABEL",
- "language": "en"
-}
----
-
-
-
-# CANCEL LABEL
-## description
- NAME:
- cancel_label: cancel a transaction with label
-
- SYNOPSIS
- curl -u user:passwd -XPOST http://host:port/api/{db}/_cancel?label={label}
-
- DESCRIPTION
-
- This is to cancel a transaction with specified label.
-
- RETURN VALUES
-
- Return a JSON format string:
-
- Status:
- Success: cancel succeed
- Others: cancel failed
- Message: Error message if cancel failed
-
- ERRORS
-
-## example
-
- 1. Cancel the transaction with label "testLabel" on database "testDb"
-
- curl -u root -XPOST http://host:port/api/testDb/_cancel?label=testLabel
-
-## keyword
-
- CANCEL, LABEL
-
-
-
-
-
-
diff --git a/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md b/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md
deleted file mode 100644
index cbe3137e5c..0000000000
--- a/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-{
- "title": "CHECK/RESET Stub Cache",
- "language": "zh-CN"
-}
----
-
-
-
-# CHECK/RESET Stub Cache
-## description
-
-### Check Stub Cache
- Check whether the connection cache is available
-
- Description: Check whether the connection cache is available, the maximum load is 10M
- METHOD: GET
- URI: http://be_host:be_http_port/api/check_rpc_channel/{host_to_check}/{remot_brpc_port}/{payload_size}
-
-### Reset Stub Cache
- This api is used to reset the connection cache of brpc. Endpoints can be in the form of `all` to clear all caches, `host1:port1,host2:port2,...`: clear to the cache of the specified target
-
- Description: Reset connection cache
- METHOD: GET
- URI: http://be_host:be_http_port/api/reset_rpc_channel/{endpoints}
-## example
-
- curl -X GET "http://host:port/api/check_rpc_channel/host2/8060/1024000"
- curl -X GET "http://host:port/api/reset_rpc_channel/all"
-
diff --git a/new-docs/en/admin-manual/http-actions/compaction-action.md b/new-docs/en/admin-manual/http-actions/compaction-action.md
deleted file mode 100644
index f753cea238..0000000000
--- a/new-docs/en/admin-manual/http-actions/compaction-action.md
+++ /dev/null
@@ -1,211 +0,0 @@
----
-{
- "title": "Compaction Action",
- "language": "en"
-}
----
-
-
-
-# Compaction Action
-
-This API is used to view the overall compaction status of a BE node or the compaction status of a specified tablet. It can also be used to manually trigger Compaction.
-
-## View Compaction status
-
-### The overall compaction status of the node
-
-```
-curl -X GET http://be_host:webserver_port/api/compaction/run_status
-```
-
-Return JSON:
-
-```
-{
- "CumulativeCompaction": {
- "/home/disk1" : [10001, 10002],
- "/home/disk2" : [10003]
- },
- "BaseCompaction": {
- "/home/disk1" : [10001, 10002],
- "/home/disk2" : [10003]
- }
-}
-```
-
-This structure represents the id of the tablet that is performing the compaction task in a certain data directory, and the type of compaction.
-
-### Specify the compaction status of the tablet
-
-```
-curl -X GET http://be_host:webserver_port/api/compaction/show?tablet_id=xxxx
-```
-
-If the tablet does not exist, an error in JSON format is returned:
-
-```
-{
- "status": "Fail",
- "msg": "Tablet not found"
-}
-```
-
-If the tablet exists, the result is returned in JSON format:
-
-```
-{
- "cumulative policy type": "NUM_BASED",
- "cumulative point": 50,
- "last cumulative failure time": "2019-12-16 18:13:43.224",
- "last base failure time": "2019-12-16 18:13:23.320",
- "last cumu success time": "2019-12-16 18:12:15.110",
- "last base success time": "2019-12-16 18:11:50.780",
- "rowsets": [
- "[0-48] 10 DATA OVERLAPPING 574.00 MB",
- "[49-49] 2 DATA OVERLAPPING 574.00 B",
- "[50-50] 0 DELETE NONOVERLAPPING 574.00 B",
- "[51-51] 5 DATA OVERLAPPING 574.00 B"
- ],
- "missing_rowsets": [],
- "stale version path": [
- {
- "path id": "2",
- "last create time": "2019-12-16 18:11:15.110 +0800",
- "path list": "2-> [0-24] -> [25-48]"
- },
- {
- "path id": "1",
- "last create time": "2019-12-16 18:13:15.110 +0800",
- "path list": "1-> [25-40] -> [40-48]"
- }
- ]
-}
-```
-
-Explanation of results:
-
-* cumulative policy type: The cumulative compaction policy type which is used by current tablet.
-* cumulative point: The version boundary between base and cumulative compaction. Versions before (excluding) points are handled by base compaction. Versions after (inclusive) are handled by cumulative compaction.
-* last cumulative failure time: The time when the last cumulative compaction failed. After 10 minutes by default, cumulative compaction is attempted on the this tablet again.
-* last base failure time: The time when the last base compaction failed. After 10 minutes by default, base compaction is attempted on the this tablet again.
-* rowsets: The current rowsets collection of this tablet. [0-48] means a rowset with version 0-48. The second number is the number of segments in a rowset. The `DELETE` indicates the delete version. `OVERLAPPING` and `NONOVERLAPPING` indicates whether data between segments is overlap.
-* missing_rowset: The missing rowsets.
-* stale version path: The merged version path of the rowset collection currently merged in the tablet. It is an array structure and each element represents a merged path. Each element contains three attributes: path id indicates the version path id, and last create time indicates the creation time of the most recent rowset on the path. By default, all rowsets on this path will be deleted after half an hour at the last create time.
-
-### Examples
-
-```
-curl -X GET http://192.168.10.24:8040/api/compaction/show?tablet_id=10015
-```
-
-## Manually trigger Compaction
-
-```
-curl -X POST http://be_host:webserver_port/api/compaction/run?tablet_id=xxxx\&compact_type=cumulative
-```
-
-The only one manual compaction task that can be performed at a moment, and the value range of compact_type is base or cumulative
-
-If the tablet does not exist, an error in JSON format is returned:
-
-```
-{
- "status": "Fail",
- "msg": "Tablet not found"
-}
-```
-
-If the compaction execution task fails to be triggered, an error in JSON format is returned:
-
-```
-{
- "status": "Fail",
- "msg": "fail to execute compaction, error = -2000"
-}
-```
-
-If the compaction execution task successes to be triggered, an error in JSON format is returned:
-
-```
-{
- "status": "Success",
- "msg": "compaction task is successfully triggered."
-}
-```
-
-Explanation of results:
-
-* status: Trigger task status, when it is successfully triggered, it is Success; when for some reason (for example, the appropriate version is not obtained), it returns Fail.
-* msg: Give specific success or failure information.
-
-### Examples
-
-```
-curl -X POST http://192.168.10.24:8040/api/compaction/run?tablet_id=10015\&compact_type=cumulative
-```
-
-## Manual Compaction execution status
-
-```
-curl -X GET http://be_host:webserver_port/api/compaction/run_status?tablet_id=xxxx
-```
-If the tablet does not exist, an error in JSON format is returned:
-
-```
-{
- "status": "Fail",
- "msg": "Tablet not found"
-}
-```
-
-If the tablet exists and the tablet is not running, JSON format is returned:
-
-```
-{
- "status" : "Success",
- "run_status" : false,
- "msg" : "this tablet_id is not running",
- "tablet_id" : 11308,
- "compact_type" : ""
-}
-```
-
-If the tablet exists and the tablet is running, JSON format is returned:
-
-```
-{
- "status" : "Success",
- "run_status" : true,
- "msg" : "this tablet_id is running",
- "tablet_id" : 11308,
- "compact_type" : "cumulative"
-}
-```
-
-Explanation of results:
-
-* run_status: Get the current manual compaction task execution status.
-
-### Examples
-
-```
-curl -X GET http://192.168.10.24:8040/api/compaction/run_status?tablet_id=10015
-
diff --git a/new-docs/en/admin-manual/http-actions/connection-action.md b/new-docs/en/admin-manual/http-actions/connection-action.md
deleted file mode 100644
index d7d81e0516..0000000000
--- a/new-docs/en/admin-manual/http-actions/connection-action.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-{
- "title": "CONNECTION",
- "language": "en"
-}
----
-
-
-
-# CONNECTION
-
-To get current query_id from connection
-
-```
-curl -X GET http://fe_host:fe_http_port/api/connection?connection_id=123
-```
-
-If connection_id does not exist, return 404 NOT FOUND ERROR
-
-If connection_id exists, return last query_id belongs to connection_id
-```
-{
- "query_id" : 9133b7efa92a44c8-8ed4b44772ec2a0c
-}
-```
diff --git a/new-docs/en/admin-manual/http-actions/fe-get-log-file.md b/new-docs/en/admin-manual/http-actions/fe-get-log-file.md
deleted file mode 100644
index 5a7595ad58..0000000000
--- a/new-docs/en/admin-manual/http-actions/fe-get-log-file.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-{
- "title": "get\\_log\\_file",
- "language": "en"
-}
----
-
-
-
-# get\_log\_file
-
-To get FE log via HTTP
-
-## Types of FE log
-
-1. fe.audit.log (Audit log)
-
- The audit log records the all statements executed. Audit log's name format as follow:
-
- ```
- fe.audit.log # The latest audit log
- fe.audit.log.20190603.1 # The historical audit log. The smaller the sequence number, the newer the log.
- fe.audit.log.20190603.2
- fe.audit.log.20190602.1
- ...
- ```
-
-## Example
-
-1. Get the list of specified type of logs
-
- Example
-
- `curl -v -X HEAD -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log`
-
- Returns:
-
- ```
- HTTP/1.1 200 OK
- file_infos: {"fe.audit.log":24759,"fe.audit.log.20190528.1":132934}
- content-type: text/html
- connection: keep-alive
- ```
-
- In the header of result, the `file_infos` section saves the file list and file size in JSON format.
-
-2. Download files
-
- Example:
-
- ```
- curl -X GET -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log\&file=fe.audit.log.20190528.1
- ```
-
-## Notification
-
-Need ADMIN privilege.
diff --git a/new-docs/en/admin-manual/http-actions/fe/backends-action.md b/new-docs/en/admin-manual/http-actions/fe/backends-action.md
deleted file mode 100644
index 17589dd95d..0000000000
--- a/new-docs/en/admin-manual/http-actions/fe/backends-action.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-{
- "title": "Backends Action",
- "language": "zh-CN"
-}
----
-
-
-
-# Backends Action
-
-## Request
-
-```
-GET /api/backends
-```
-
-## Description
-
-Backends Action returns the Backends list, including Backend's IP, PORT and other information.
-
-## Path parameters
-
-None
-
-## Query parameters
-
-* `is_alive`
-
- Optional parameters. Whether to return the surviving BE nodes. The default is false, which means that all BE nodes are returned.
-
-## Request body
-
-None
-
-## Response
-
-```
-{
- "msg": "success",
- "code": 0,
- "data": {
- "backends": [
- {
- "ip": "192.1.1.1",
- "http_port": 8040,
- "is_alive": true
- }
- ]
- },
- "count": 0
-}
-```
diff --git a/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md b/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md
deleted file mode 100644
index 1f30ca3498..0000000000
--- a/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md
+++ /dev/null
@@ -1,121 +0,0 @@
----
-{
- "title": "Bootstrap Action",
- "language": "en"
-}
----
-
-
-
-# Bootstrap Action
-
-## Request
-
-`GET /api/bootstrap`
-
-## Description
-
-It is used to judge whether the FE has started. When no parameters are provided, only whether the startup is successful is returned. If `token` and `cluster_id` are provided, more detailed information is returned.
-
-## Path parameters
-
-none
-
-## Query parameters
-
-* `cluster_id`
-
- The cluster id. It can be viewed in the file `palo-meta/image/VERSION`.
-
-* `token`
-
- Cluster token. It can be viewed in the file `palo-meta/image/VERSION`.
-
-## Request body
-
-none
-
-## Response
-
-* No parameters provided
-
- ```
- {
- "msg": "OK",
- "code": 0,
- "data": null,
- "count": 0
- }
- ```
-
- A code of 0 means that the FE node has started successfully. Error codes other than 0 indicate other errors.
-
-* Provide `token` and `cluster_id`
-
- ```
- {
- "msg": "OK",
- "code": 0,
- "data": {
- "queryPort": 9030,
- "rpcPort": 9020,
- "maxReplayedJournal": 17287
- },
- "count": 0
- }
- ```
-
- * `queryPort` is the MySQL protocol port of the FE node.
- * `rpcPort` is the thrift RPC port of the FE node.
- * `maxReplayedJournal` represents the maximum metadata journal id currently played back by the FE node.
-
-## Examples
-
-1. No parameters
-
- ```
- GET /api/bootstrap
-
- Response:
- {
- "msg": "OK",
- "code": 0,
- "data": null,
- "count": 0
- }
- ```
-
-2. Provide `token` and `cluster_id`
-
- ```
- GET /api/bootstrap?cluster_id=935437471&token=ad87f6dd-c93f-4880-bcdb-8ca8c9ab3031
-
- Response:
- {
- "msg": "OK",
- "code": 0,
- "data": {
- "queryPort": 9030,
- "rpcPort": 9020,
- "maxReplayedJournal": 17287
- },
- "count": 0
- }
- ```
\ No newline at end of file
diff --git a/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md b/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md
deleted file mode 100644
index 1ca4f5fcd7..0000000000
--- a/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md
+++ /dev/null
@@ -1,96 +0,0 @@
----
-{
- "title": "Cancel Load Action",
- "language": "en"
-}
----
-
-
-
-# Cancel Load Action
-
-## Request
-
-`POST /api//_cancel`
-
-## Description
-
-Used to cancel the load transaction of the specified label.
-
-## Path parameters
-
-* ``
-
- Specify the database name
-
-## Query parameters
-
-* `