diff --git a/docs/en/docs/ecosystem/external-table/multi-catalog.md b/docs/en/docs/ecosystem/external-table/multi-catalog.md index 6f82c30153..0c7ddf75a9 100644 --- a/docs/en/docs/ecosystem/external-table/multi-catalog.md +++ b/docs/en/docs/ecosystem/external-table/multi-catalog.md @@ -232,7 +232,77 @@ Query OK, 1000 rows affected (0.28 sec) ### Connect Elasticsearch -TODO +> 1. 5.x and later versions are supported. +> 2. In 5.x and 6.x, multiple types in an index are taken as the first by default. + +The following example creates a Catalog connection named es to the specified ES and turns off node discovery. + +``` +CREATE CATALOG es PROPERTIES ( + "type"="es", + "elasticsearch.hosts"="http://192.168.120.12:29200", + "elasticsearch.nodes_discovery"="false" +); +``` + +Once created, you can view the catalog with the `SHOW CATALOGS` command: + +``` +mysql> SHOW CATALOGS; ++-----------+-------------+----------+ +| CatalogId | CatalogName | Type | ++-----------+-------------+----------+ +| 0 | internal | internal | +| 11003 | es | es | ++-----------+-------------+----------+ +2 rows in set (0.02 sec) +``` + +Switch to the hive catalog with the `SWITCH` command and view the databases in it(Only one default_db associates all index) + +``` +mysql> SWITCH es; +Query OK, 0 rows affected (0.00 sec) + +mysql> SHOW DATABASES; ++------------+ +| Database | ++------------+ +| default_db | ++------------+ + +mysql> show tables; ++----------------------+ +| Tables_in_default_db | ++----------------------+ +| test | +| test2 | ++----------------------+ +``` + +Query + +``` +mysql> select * from test; ++------------+-------------+--------+-------+ +| test4 | test2 | test3 | test1 | ++------------+-------------+--------+-------+ +| 2022-08-08 | hello world | 2.415 | test2 | +| 2022-08-08 | hello world | 3.1415 | test1 | ++------------+-------------+--------+-------+ +``` + +## Parameters that: + +Parameter | Description +---|--- +**elasticsearch.hosts** | ES Connection Address, maybe one or more node, load-balance is also accepted +**elasticsearch.username** | username for ES +**elasticsearch.password** | password for the user +**elasticsearch.doc_value_scan** | whether to enable ES/Lucene column storage to get the value of the query field, the default is false +**elasticsearch.keyword_sniff** | Whether to detect the string type text.fields in ES to obtain additional not analyzed keyword field name multi-fields mechanism +**elasticsearch.nodes_discovery** | Whether or not to enable ES node discovery, the default is true. In network isolation, set this parameter to false. Only the specified node is connected. +**elasticsearch.ssl** | Whether ES cluster enables https access mode, the current FE/BE implementation is to trust all ## Column Type Mapping diff --git a/docs/sidebars.json b/docs/sidebars.json index 8721e9b76f..a4bfea09f6 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -174,6 +174,7 @@ "type": "category", "label": "Expansion table", "items": [ + "ecosystem/external-table/multi-catalog", "ecosystem/external-table/doris-on-es", "ecosystem/external-table/hudi-external-table", "ecosystem/external-table/iceberg-of-doris", diff --git a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md index fdc5c34444..dff1f0a4dd 100644 --- a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md +++ b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md @@ -232,8 +232,77 @@ Query OK, 1000 rows affected (0.28 sec) ### 连接 Elasticsearch -TODO +> 1. 支持 5.x 及以上版本。 +> 2. 5.x 和 6.x 中一个 index 中的多个 type 默认取第一个 +以下示例,用于创建一个名为 es 的 Catalog 连接指定的 ES,并关闭节点发现功能。 + +``` +CREATE CATALOG es PROPERTIES ( + "type"="es", + "elasticsearch.hosts"="http://192.168.120.12:29200", + "elasticsearch.nodes_discovery"="false" +); +``` + +创建后,可以通过 `SHOW CATALOGS` 命令查看 catalog: + +``` +mysql> SHOW CATALOGS; ++-----------+-------------+----------+ +| CatalogId | CatalogName | Type | ++-----------+-------------+----------+ +| 0 | internal | internal | +| 11003 | es | es | ++-----------+-------------+----------+ +2 rows in set (0.02 sec) +``` + +通过 `SWITCH` 命令切换到 es catalog,并查看其中的数据库(只有一个 default_db 关联所有 index) + +``` +mysql> SWITCH es; +Query OK, 0 rows affected (0.00 sec) + +mysql> SHOW DATABASES; ++------------+ +| Database | ++------------+ +| default_db | ++------------+ + +mysql> show tables; ++----------------------+ +| Tables_in_default_db | ++----------------------+ +| test | +| test2 | ++----------------------+ +``` + +查询示例 + +``` +mysql> select * from test; ++------------+-------------+--------+-------+ +| test4 | test2 | test3 | test1 | ++------------+-------------+--------+-------+ +| 2022-08-08 | hello world | 2.415 | test2 | +| 2022-08-08 | hello world | 3.1415 | test1 | ++------------+-------------+--------+-------+ +``` + +## 参数说明: + +参数 | 说明 +---|--- +**elasticsearch.hosts** | ES 地址,可以是一个或多个,也可以是 ES 的负载均衡地址 +**elasticsearch.username** | ES 用户名 +**elasticsearch.password** | 对应用户的密码信息 +**elasticsearch.doc_value_scan** | 是否开启通过 ES/Lucene 列式存储获取查询字段的值,默认为 false +**elasticsearch.keyword_sniff** | 是否对 ES 中字符串类型分词类型 text.fields 进行探测,获取额外的未分词 keyword 字段名 multi-fields 机制 +**elasticsearch.nodes_discovery** | 是否开启 ES 节点发现,默认为 true,在网络隔离环境下设置为 false,只连接指定节点 +**elasticsearch.ssl** | ES 是否开启 https 访问模式,目前在 fe/be 实现方式为信任所有 ## 列类型映射 diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java index c9b89e8f5d..5e9b25ffc5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java @@ -397,8 +397,9 @@ public class EsUtil { case "integer": return Type.INT; case "long": - case "unsigned_long": return Type.BIGINT; + case "unsigned_long": + return Type.LARGEINT; case "float": case "half_float": return Type.FLOAT; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SimpleScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SimpleScheduler.java index 5cb66e7a8b..1b1cc6bfa4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SimpleScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SimpleScheduler.java @@ -126,6 +126,9 @@ public class SimpleScheduler { public static TNetworkAddress getHost(ImmutableMap backends, Reference backendIdRef) throws UserException { + if (backends.isEmpty()) { + throw new UserException(SystemInfoService.NO_SCAN_NODE_BACKEND_AVAILABLE_MSG); + } long id = nextId.getAndIncrement() % backends.size(); Map.Entry backendEntry = backends.entrySet().stream().skip(id).filter( e -> isAvailable(e.getValue())).findFirst().orElse(null);