From d44c4b1300535aa729a215f053c856ae49fd7a54 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Wed, 1 Mar 2023 17:08:36 +0800 Subject: [PATCH] [improvement][fix](catalog) check required properties when creating catalog and fix jdbc catalog issue (#17209) Check required properties when creating catalog. To avoid some strange error when missing required properties This PR add checks for: hms catalog: check the validation of dfs.ha properties jdbc catalog: check jdbc_url, driver_url, driver_class is set. Fix NPE when init MasterCatalogExecutor The MasterCatalogExecutor may be called by FrontendServiceImpl from BE, which does not have ConnectionContext. Add more jdbc url param to resolve Chinese issue add useUnicode=true&characterEncoding=utf-8 by default in jdbc catalog when connecting to MySQL Update FAQ doc of catalog --- docs/en/docs/lakehouse/multi-catalog/faq.md | 14 +++++ .../zh-CN/docs/lakehouse/multi-catalog/faq.md | 14 +++++ .../apache/doris/catalog/HdfsResource.java | 1 + .../apache/doris/catalog/JdbcResource.java | 53 +++++++++++++++-- .../apache/doris/datasource/CatalogMgr.java | 8 ++- .../doris/datasource/ExternalCatalog.java | 4 ++ .../doris/datasource/HMSExternalCatalog.java | 36 ++++++++++++ .../doris/datasource/JdbcExternalCatalog.java | 16 +++++ .../doris/qe/MasterCatalogExecutor.java | 7 ++- .../doris/datasource/CatalogMgrTest.java | 58 +++++++++++++++++++ 10 files changed, 202 insertions(+), 9 deletions(-) diff --git a/docs/en/docs/lakehouse/multi-catalog/faq.md b/docs/en/docs/lakehouse/multi-catalog/faq.md index e350507bd3..0c163cdc33 100644 --- a/docs/en/docs/lakehouse/multi-catalog/faq.md +++ b/docs/en/docs/lakehouse/multi-catalog/faq.md @@ -67,3 +67,17 @@ under the License. `"hive.exec.orc.split.strategy" = "BI"` Other options: HYBRID (default), ETL. + +6. An error is reported when connecting to SQLServer through JDBC Catalog: `unable to find valid certification path to requested target` + + Please add `trustServerCertificate=true` option in `jdbc_url`. + +7. When connecting to the MySQL database through the JDBC Catalog, the Chinese characters are garbled, or the Chinese character condition query is incorrect + + Please add `useUnicode=true&characterEncoding=utf-8` in `jdbc_url` + + > Note: After version 1.2.3, these parameters will be automatically added when using JDBC Catalog to connect to the MySQL database. + +8. An error is reported when connecting to the MySQL database through the JDBC Catalog: `Establishing SSL connection without server's identity verification is not recommended` + + Please add `useSSL=true` in `jdbc_url` diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/faq.md b/docs/zh-CN/docs/lakehouse/multi-catalog/faq.md index 9d20ff653c..a1cbd53db6 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/faq.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/faq.md @@ -67,3 +67,17 @@ under the License. `"hive.exec.orc.split.strategy" = "BI"` 其他选项:HYBRID(默认),ETL。 + +6. 通过 JDBC Catalog 连接 SQLServer 报错:`unable to find valid certification path to requested target` + + 请在 `jdbc_url` 中添加 `trustServerCertificate=true` 选项。 + +7. 通过 JDBC Catalog 连接 MySQL 数据库,中文字符乱码,或中文字符条件查询不正确 + + 请在 `jdbc_url` 中添加 `useUnicode=true&characterEncoding=utf-8` + + > 注:1.2.3 版本后,使用 JDBC Catalog 连接 MySQL 数据库,会自动添加这些参数。 + +8. 通过 JDBC Catalog 连接 MySQL 数据库报错:`Establishing SSL connection without server's identity verification is not recommended` + + 请在 `jdbc_url` 中添加 `useSSL=true` diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java index 0421949ff1..d812d8ec53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java @@ -51,6 +51,7 @@ public class HdfsResource extends Resource { public static String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab"; public static String HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit"; public static String HADOOP_SOCKET_PATH = "dfs.domain.socket.path"; + public static String DSF_NAMESERVICES = "dfs.nameservices"; @SerializedName(value = "properties") private Map properties; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java index c02666d928..d43098b7db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java @@ -247,21 +247,36 @@ public class JdbcResource extends Resource { String newJdbcUrl = jdbcUrl.replaceAll(" ", ""); String dbType = parseDbType(newJdbcUrl); if (dbType.equals(MYSQL)) { - // 1. `yearIsDateType` is a parameter of JDBC, and the default is true. + // `yearIsDateType` is a parameter of JDBC, and the default is true. // We force the use of `yearIsDateType=false` - // 2. `tinyInt1isBit` is a parameter of JDBC, and the default is true. + newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "yearIsDateType", "true", "false"); + // `tinyInt1isBit` is a parameter of JDBC, and the default is true. // We force the use of `tinyInt1isBit=false`, so that for mysql type tinyint, // it will convert to Doris tinyint, not bit. - newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "yearIsDateType", "true", "false"); - newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "tinyInt1isBit", "true", "false"); + newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "tinyInt1isBit", "true", "false"); + // set useUnicode and characterEncoding to false and utf-8 + newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "useUnicode", "false", "true"); + newJdbcUrl = checkAndSetJdbcParam(newJdbcUrl, "characterEncoding", "utf-8"); } if (dbType.equals(MYSQL) || dbType.equals(POSTGRESQL)) { - newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "useCursorFetch", "false", "true"); + newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "useCursorFetch", "false", "true"); } return newJdbcUrl; } - private static String checkJdbcUrlParam(String jdbcUrl, String params, String unexpectedVal, String expectedVal) { + /** + * Check jdbcUrl param, if the param is not set, set it to the expected value. + * If the param is set to an unexpected value, replace it with the expected value. + * If the param is set to the expected value, do nothing. + * + * @param jdbcUrl + * @param params + * @param unexpectedVal + * @param expectedVal + * @return + */ + private static String checkAndSetJdbcBoolParam(String jdbcUrl, String params, String unexpectedVal, + String expectedVal) { String unexpectedParams = params + "=" + unexpectedVal; String expectedParams = params + "=" + expectedVal; if (jdbcUrl.contains(expectedParams)) { @@ -280,4 +295,30 @@ public class JdbcResource extends Resource { } return jdbcUrl; } + + /** + * Check jdbcUrl param, if the param is set, do thing. + * If the param is not set, set it to expected value. + * + * @param jdbcUrl + * @param params + * @param value + * @return + */ + private static String checkAndSetJdbcParam(String jdbcUrl, String params, String expectedVal) { + String expectedParams = params + "=" + expectedVal; + if (jdbcUrl.contains(expectedParams)) { + return jdbcUrl; + } else { + if (jdbcUrl.contains("?")) { + if (jdbcUrl.charAt(jdbcUrl.length() - 1) != '?') { + jdbcUrl += "&"; + } + } else { + jdbcUrl += "?"; + } + jdbcUrl += expectedParams; + } + return jdbcUrl; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java index d3513feb7a..9ab8ecae4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java @@ -250,7 +250,10 @@ public class CatalogMgr implements Writable, GsonPostProcessable { } long id = Env.getCurrentEnv().getNextId(); CatalogLog log = CatalogFactory.constructorCatalogLog(id, stmt); - replayCreateCatalog(log); + CatalogIf catalog = replayCreateCatalog(log); + if (catalog instanceof ExternalCatalog) { + ((ExternalCatalog) catalog).checkProperties(); + } Env.getCurrentEnv().getEditLog().logCatalogLog(OperationType.OP_CREATE_CATALOG, log); } finally { writeUnlock(); @@ -458,11 +461,12 @@ public class CatalogMgr implements Writable, GsonPostProcessable { /** * Reply for create catalog event. */ - public void replayCreateCatalog(CatalogLog log) throws DdlException { + public CatalogIf replayCreateCatalog(CatalogLog log) throws DdlException { writeLock(); try { CatalogIf catalog = CatalogFactory.constructorFromLog(log); addCatalog(catalog); + return catalog; } finally { writeUnlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 607c22144d..6f4ff316aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.external.HMSExternalDatabase; import org.apache.doris.catalog.external.IcebergExternalDatabase; import org.apache.doris.catalog.external.JdbcExternalDatabase; import org.apache.doris.cluster.ClusterNamespace; +import org.apache.doris.common.DdlException; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Util; @@ -150,6 +151,9 @@ public abstract class ExternalCatalog implements CatalogIf, Wr // hms client, read properties from hive-site.xml, es client protected abstract void initLocalObjectsImpl(); + // check if all required properties are set when creating catalog + public void checkProperties() throws DdlException { + } /** * eg: diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java index a4d1bab514..0cf7afdc53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java @@ -24,9 +24,11 @@ import org.apache.doris.catalog.HdfsResource; import org.apache.doris.catalog.external.ExternalDatabase; import org.apache.doris.catalog.external.HMSExternalDatabase; import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient; import org.apache.doris.datasource.hive.event.MetastoreNotificationFetchException; +import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; @@ -63,6 +65,40 @@ public class HMSExternalCatalog extends ExternalCatalog { catalogProperty = new CatalogProperty(resource, props); } + @Override + public void checkProperties() throws DdlException { + super.checkProperties(); + // check the dfs.ha properties + // 'dfs.nameservices'='your-nameservice', + // 'dfs.ha.namenodes.your-nameservice'='nn1,nn2', + // 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007', + // 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007', + // 'dfs.client.failover.proxy.provider.your-nameservice'='xxx' + String dfsNameservices = catalogProperty.getOrDefault(HdfsResource.DSF_NAMESERVICES, ""); + if (Strings.isNullOrEmpty(dfsNameservices)) { + return; + } + String namenodes = catalogProperty.getOrDefault("dfs.ha.namenodes." + dfsNameservices, ""); + if (Strings.isNullOrEmpty(namenodes)) { + throw new DdlException("Missing dfs.ha.namenodes." + dfsNameservices + " property"); + } + String[] names = namenodes.split(","); + for (String name : names) { + String address = catalogProperty.getOrDefault("dfs.namenode.rpc-address." + dfsNameservices + "." + name, + ""); + if (Strings.isNullOrEmpty(address)) { + throw new DdlException( + "Missing dfs.namenode.rpc-address." + dfsNameservices + "." + name + " property"); + } + } + String failoverProvider = catalogProperty.getOrDefault("dfs.client.failover.proxy.provider." + dfsNameservices, + ""); + if (Strings.isNullOrEmpty(failoverProvider)) { + throw new DdlException( + "Missing dfs.client.failover.proxy.provider." + dfsNameservices + " property"); + } + } + public String getHiveMetastoreUris() { return catalogProperty.getOrDefault(HMSResource.HIVE_METASTORE_URIS, ""); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java index 8281b735d1..1bcf418570 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java @@ -39,6 +39,12 @@ import java.util.Map; public class JdbcExternalCatalog extends ExternalCatalog { private static final Logger LOG = LogManager.getLogger(JdbcExternalCatalog.class); + private static final List REQUIRED_PROPERTIES = Lists.newArrayList( + JdbcResource.JDBC_URL, + JdbcResource.DRIVER_URL, + JdbcResource.DRIVER_CLASS + ); + // Must add "transient" for Gson to ignore this field, // or Gson will throw exception with HikariCP private transient JdbcClient jdbcClient; @@ -50,6 +56,16 @@ public class JdbcExternalCatalog extends ExternalCatalog { this.catalogProperty = new CatalogProperty(resource, processCompatibleProperties(props)); } + @Override + public void checkProperties() throws DdlException { + super.checkProperties(); + for (String requiredProperty : REQUIRED_PROPERTIES) { + if (!catalogProperty.getProperties().containsKey(requiredProperty)) { + throw new DdlException("Required property '" + requiredProperty + "' is missing"); + } + } + } + @Override public void onClose() { super.onClose(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/MasterCatalogExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/MasterCatalogExecutor.java index a26c687e10..e13d2daefd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/MasterCatalogExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/MasterCatalogExecutor.java @@ -40,7 +40,12 @@ public class MasterCatalogExecutor { public MasterCatalogExecutor() { ctx = ConnectContext.get(); - waitTimeoutMs = ctx.getSessionVariable().getQueryTimeoutS() * 1000; + if (ctx == null) { + // The method may be called by FrontendServiceImpl from BE, which does not have ConnectContext. + waitTimeoutMs = 300 * 1000; + } else { + waitTimeoutMs = ctx.getSessionVariable().getQueryTimeoutS() * 1000; + } } public void forward(long catalogId, long dbId) throws Exception { diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java index 8cbf023328..7c8f10a1dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java @@ -44,6 +44,7 @@ import org.apache.doris.catalog.external.HMSExternalDatabase; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; +import org.apache.doris.common.ExceptionChecker; import org.apache.doris.common.FeConstants; import org.apache.doris.datasource.hive.HiveMetaStoreCache; import org.apache.doris.datasource.hive.HiveMetaStoreCache.HivePartitionValues; @@ -548,4 +549,61 @@ public class CatalogMgrTest extends TestWithFeService { partitionNameToIdMap, idToUniqueIdsMap, singleUidToColumnRangeMap, partitionValuesMap); } + @Test + public void testInvalidCreateCatalogProperties() throws Exception { + String createCatalogSql = "CREATE CATALOG bad_hive1 PROPERTIES (\n" + + " 'type'='hms',\n" + + " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n" + + " 'hadoop.username' = 'hive',\n" + + " 'dfs.nameservices'='your-nameservice',\n" + + " 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',\n" + + " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',\n" + + " 'dfs.client.failover.proxy.provider.your-nameservice'" + + "='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'\n" + + ");"; + CreateCatalogStmt createStmt1 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, "Missing dfs.ha.namenodes.your-nameservice property", + () -> mgr.createCatalog(createStmt1)); + + createCatalogSql = "CREATE CATALOG bad_hive2 PROPERTIES (\n" + + " 'type'='hms',\n" + + " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n" + + " 'hadoop.username' = 'hive',\n" + + " 'dfs.nameservices'='your-nameservice',\n" + + " 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',\n" + + " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',\n" + + " 'dfs.client.failover.proxy.provider.your-nameservice'" + + "='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'\n" + + ");"; + CreateCatalogStmt createStmt2 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Missing dfs.namenode.rpc-address.your-nameservice.nn1 property", + () -> mgr.createCatalog(createStmt2)); + + createCatalogSql = "CREATE CATALOG good_hive PROPERTIES (\n" + + " 'type'='hms',\n" + + " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n" + + " 'hadoop.username' = 'hive',\n" + + " 'dfs.nameservices'='your-nameservice',\n" + + " 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',\n" + + " 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',\n" + + " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007'\n" + + ");"; + CreateCatalogStmt createStmt3 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Missing dfs.client.failover.proxy.provider.your-nameservice property", + () -> mgr.createCatalog(createStmt3)); + + createCatalogSql = "CREATE CATALOG bad_jdbc PROPERTIES (\n" + + " \"type\"=\"jdbc\",\n" + + " \"user\"=\"root\",\n" + + " \"password\"=\"123456\",\n" + + " \"jdbc_url\" = \"jdbc:mysql://127.0.0.1:3306/demo\",\n" + + " \"driver_class\" = \"com.mysql.jdbc.Driver\"\n" + + ")"; + CreateCatalogStmt createStmt4 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Required property 'driver_url' is missing", + () -> mgr.createCatalog(createStmt4)); + } }