From 200b558156ef8ec041655dbdd83eab8491a76f31 Mon Sep 17 00:00:00 2001 From: "jiafeng.zhang" Date: Thu, 11 Aug 2022 09:57:26 +0800 Subject: [PATCH] [typo](doc)spark load uses kerberos authentication method (#11662) spark load uses kerberos authentication method --- .../import/import-way/spark-load-manual.md | 50 +++++++++++++++---- .../import/import-way/spark-load-manual.md | 42 +++++++++++++++- 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/docs/en/docs/data-operate/import/import-way/spark-load-manual.md b/docs/en/docs/data-operate/import/import-way/spark-load-manual.md index d801d3af54..49dfc5628b 100644 --- a/docs/en/docs/data-operate/import/import-way/spark-load-manual.md +++ b/docs/en/docs/data-operate/import/import-way/spark-load-manual.md @@ -153,7 +153,11 @@ PROPERTIES spark_conf_key = spark_conf_value, working_dir = path, broker = broker_name, - broker.property_key = property_value + broker.property_key = property_value, + hadoop.security.authentication = kerberos, + kerberos_principal = doris@YOUR.COM, + kerberos_keytab = /home/doris/my.keytab + kerberos_keytab_content = ASDOWHDLAWIDJHWLDKSALDJSDIWALD ) -- drop spark resource @@ -178,7 +182,6 @@ REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name `Properties` are the parameters related to spark resources, as follows: - `type`: resource type, required. Currently, only spark is supported. - - Spark related parameters are as follows: - `spark.master`: required, yarn is supported at present, `spark://host:port`. @@ -190,11 +193,12 @@ REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name - `spark.hadoop.fs.defaultfs`: required when master is yarn. - Other parameters are optional, refer to `http://spark.apache.org/docs/latest/configuration.html` - - `working_dir`: directory used by ETL. Spark is required when used as an ETL resource. For example: `hdfs://host :port/tmp/doris`. - +- `hadoop.security.authentication`: Specify the authentication method as kerberos. +- `kerberos_principal`: Specify the principal of kerberos. +- `kerberos_keytab`: Specify the path to the keytab file for kerberos. The file must be an absolute path to a file on the server where the broker process is located. And can be accessed by the Broker process. +- `kerberos_keytab_content`: Specify the content of the keytab file in kerberos after base64 encoding. You can choose one of these with `kerberos_keytab` configuration. - `broker`: the name of the broker. Spark is required when used as an ETL resource. You need to use the 'alter system add broker' command to complete the configuration in advance. - - `broker.property_key`: the authentication information that the broker needs to specify when reading the intermediate file generated by ETL. Example: @@ -231,6 +235,38 @@ PROPERTIES ); ``` +**Spark Load supports Kerberos authentication** + +If Spark load accesses Hadoop cluster resources with Kerberos authentication, we only need to specify the following parameters when creating Spark resources: + +- `hadoop.security.authentication`: Specify the authentication method as kerberos. +- `kerberos_principal`: Specify the principal of kerberos. +- `kerberos_keytab`: Specify the path to the keytab file for kerberos. The file must be an absolute path to a file on the server where the broker process is located. And can be accessed by the Broker process. +- `kerberos_keytab_content`: Specify the content of the keytab file in kerberos after base64 encoding. You can choose one of these with `kerberos_keytab` configuration. + +Example: + +```sql +CREATE EXTERNAL RESOURCE "spark_on_kerberos" +PROPERTIES +( + "type" = "spark", + "spark.master" = "yarn", + "spark.submit.deployMode" = "cluster", + "spark.jars" = "xxx.jar,yyy.jar", + "spark.files" = "/tmp/aaa,/tmp/bbb", + "spark.executor.memory" = "1g", + "spark.yarn.queue" = "queue0", + "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", + "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", + "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", + "broker" = "broker0", + "hadoop.security.authentication" = "kerberos", + "kerberos_principal" = "doris@YOUR.COM", + "kerberos_keytab" = "/home/doris/my.keytab" +); +``` + **Show resources** Ordinary accounts can only see the resources that they have `USAGE_PRIV` to use. @@ -248,22 +284,18 @@ You can use the `USAGE_PRIV` permission is given to a user or a role, and the ro GRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%"; - -- Grant permission to the spark0 resource to role ROLE0 GRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0"; - -- Grant permission to all resources to user user0 GRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%"; - -- Grant permission to all resources to role ROLE0 GRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0"; - -- Revoke the spark0 resource permission of user user0 REVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%"; diff --git a/docs/zh-CN/docs/data-operate/import/import-way/spark-load-manual.md b/docs/zh-CN/docs/data-operate/import/import-way/spark-load-manual.md index b6269ea176..d8bc642296 100644 --- a/docs/zh-CN/docs/data-operate/import/import-way/spark-load-manual.md +++ b/docs/zh-CN/docs/data-operate/import/import-way/spark-load-manual.md @@ -126,7 +126,11 @@ PROPERTIES spark_conf_key = spark_conf_value, working_dir = path, broker = broker_name, - broker.property_key = property_value + broker.property_key = property_value, + hadoop.security.authentication = kerberos, + kerberos_principal = doris@YOUR.COM, + kerberos_keytab = /home/doris/my.keytab + kerberos_keytab_content = ASDOWHDLAWIDJHWLDKSALDJSDIWALD ) -- drop spark resource @@ -158,6 +162,10 @@ REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name - `spark.hadoop.fs.defaultFS`: master为yarn时必填。 - 其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html - `working_dir`: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。 +- `hadoop.security.authentication`:指定认证方式为 kerberos。 +- `kerberos_principal`:指定 kerberos 的 principal。 +- `kerberos_keytab`:指定 kerberos 的 keytab 文件路径。该文件必须为 Broker 进程所在服务器上的文件的绝对路径。并且可以被 Broker 进程访问。 +- `kerberos_keytab_content`:指定 kerberos 中 keytab 文件内容经过 base64 编码之后的内容。这个跟 `kerberos_keytab` 配置二选一即可。 - `broker`: broker 名字。spark 作为 ETL 资源使用时必填。需要使用 `ALTER SYSTEM ADD BROKER` 命令提前完成配置。 - `broker.property_key`: broker 读取 ETL 生成的中间文件时需要指定的认证信息等。 @@ -195,6 +203,38 @@ PROPERTIES ); ``` +**Spark Load 支持 Kerberos 认证** + +如果是 Spark load 访问带有 Kerberos 认证的 Hadoop 集群资源,我们只需要在创建 Spark resource 的时候指定以下参数即可: + +- `hadoop.security.authentication`:指定认证方式为 kerberos。 +- `kerberos_principal`:指定 kerberos 的 principal。 +- `kerberos_keytab`:指定 kerberos 的 keytab 文件路径。该文件必须为 Broker 进程所在服务器上的文件的绝对路径。并且可以被 Broker 进程访问。 +- `kerberos_keytab_content`:指定 kerberos 中 keytab 文件内容经过 base64 编码之后的内容。这个跟 `kerberos_keytab` 配置二选一即可。 + +实例: + +```sql +CREATE EXTERNAL RESOURCE "spark_on_kerberos" +PROPERTIES +( + "type" = "spark", + "spark.master" = "yarn", + "spark.submit.deployMode" = "cluster", + "spark.jars" = "xxx.jar,yyy.jar", + "spark.files" = "/tmp/aaa,/tmp/bbb", + "spark.executor.memory" = "1g", + "spark.yarn.queue" = "queue0", + "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", + "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", + "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", + "broker" = "broker0", + "hadoop.security.authentication" = "kerberos", + "kerberos_principal" = "doris@YOUR.COM", + "kerberos_keytab" = "/home/doris/my.keytab" +); +``` + **查看资源** 普通账户只能看到自己有 USAGE_PRIV 使用权限的资源。