[improvement][fix](catalog) check required properties when creating catalog and fix jdbc catalog issue (#17209)

Check required properties when creating catalog.
To avoid some strange error when missing required properties

This PR add checks for:

hms catalog: check the validation of dfs.ha properties

jdbc catalog: check jdbc_url, driver_url, driver_class is set.

Fix NPE when init MasterCatalogExecutor
The MasterCatalogExecutor may be called by FrontendServiceImpl from BE, which does not have ConnectionContext.

Add more jdbc url param to resolve Chinese issue

add useUnicode=true&characterEncoding=utf-8 by default in jdbc catalog when connecting to MySQL

Update FAQ doc of catalog
This commit is contained in:
Mingyu Chen
2023-03-01 17:08:36 +08:00
committed by GitHub
parent a5bd71c03a
commit d44c4b1300
10 changed files with 202 additions and 9 deletions

View File

@ -51,6 +51,7 @@ public class HdfsResource extends Resource {
public static String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab";
public static String HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit";
public static String HADOOP_SOCKET_PATH = "dfs.domain.socket.path";
public static String DSF_NAMESERVICES = "dfs.nameservices";
@SerializedName(value = "properties")
private Map<String, String> properties;

View File

@ -247,21 +247,36 @@ public class JdbcResource extends Resource {
String newJdbcUrl = jdbcUrl.replaceAll(" ", "");
String dbType = parseDbType(newJdbcUrl);
if (dbType.equals(MYSQL)) {
// 1. `yearIsDateType` is a parameter of JDBC, and the default is true.
// `yearIsDateType` is a parameter of JDBC, and the default is true.
// We force the use of `yearIsDateType=false`
// 2. `tinyInt1isBit` is a parameter of JDBC, and the default is true.
newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "yearIsDateType", "true", "false");
// `tinyInt1isBit` is a parameter of JDBC, and the default is true.
// We force the use of `tinyInt1isBit=false`, so that for mysql type tinyint,
// it will convert to Doris tinyint, not bit.
newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "yearIsDateType", "true", "false");
newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "tinyInt1isBit", "true", "false");
newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "tinyInt1isBit", "true", "false");
// set useUnicode and characterEncoding to false and utf-8
newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "useUnicode", "false", "true");
newJdbcUrl = checkAndSetJdbcParam(newJdbcUrl, "characterEncoding", "utf-8");
}
if (dbType.equals(MYSQL) || dbType.equals(POSTGRESQL)) {
newJdbcUrl = checkJdbcUrlParam(newJdbcUrl, "useCursorFetch", "false", "true");
newJdbcUrl = checkAndSetJdbcBoolParam(newJdbcUrl, "useCursorFetch", "false", "true");
}
return newJdbcUrl;
}
private static String checkJdbcUrlParam(String jdbcUrl, String params, String unexpectedVal, String expectedVal) {
/**
* Check jdbcUrl param, if the param is not set, set it to the expected value.
* If the param is set to an unexpected value, replace it with the expected value.
* If the param is set to the expected value, do nothing.
*
* @param jdbcUrl
* @param params
* @param unexpectedVal
* @param expectedVal
* @return
*/
private static String checkAndSetJdbcBoolParam(String jdbcUrl, String params, String unexpectedVal,
String expectedVal) {
String unexpectedParams = params + "=" + unexpectedVal;
String expectedParams = params + "=" + expectedVal;
if (jdbcUrl.contains(expectedParams)) {
@ -280,4 +295,30 @@ public class JdbcResource extends Resource {
}
return jdbcUrl;
}
/**
* Check jdbcUrl param, if the param is set, do thing.
* If the param is not set, set it to expected value.
*
* @param jdbcUrl
* @param params
* @param value
* @return
*/
private static String checkAndSetJdbcParam(String jdbcUrl, String params, String expectedVal) {
String expectedParams = params + "=" + expectedVal;
if (jdbcUrl.contains(expectedParams)) {
return jdbcUrl;
} else {
if (jdbcUrl.contains("?")) {
if (jdbcUrl.charAt(jdbcUrl.length() - 1) != '?') {
jdbcUrl += "&";
}
} else {
jdbcUrl += "?";
}
jdbcUrl += expectedParams;
}
return jdbcUrl;
}
}

View File

@ -250,7 +250,10 @@ public class CatalogMgr implements Writable, GsonPostProcessable {
}
long id = Env.getCurrentEnv().getNextId();
CatalogLog log = CatalogFactory.constructorCatalogLog(id, stmt);
replayCreateCatalog(log);
CatalogIf catalog = replayCreateCatalog(log);
if (catalog instanceof ExternalCatalog) {
((ExternalCatalog) catalog).checkProperties();
}
Env.getCurrentEnv().getEditLog().logCatalogLog(OperationType.OP_CREATE_CATALOG, log);
} finally {
writeUnlock();
@ -458,11 +461,12 @@ public class CatalogMgr implements Writable, GsonPostProcessable {
/**
* Reply for create catalog event.
*/
public void replayCreateCatalog(CatalogLog log) throws DdlException {
public CatalogIf replayCreateCatalog(CatalogLog log) throws DdlException {
writeLock();
try {
CatalogIf catalog = CatalogFactory.constructorFromLog(log);
addCatalog(catalog);
return catalog;
} finally {
writeUnlock();
}

View File

@ -26,6 +26,7 @@ import org.apache.doris.catalog.external.HMSExternalDatabase;
import org.apache.doris.catalog.external.IcebergExternalDatabase;
import org.apache.doris.catalog.external.JdbcExternalDatabase;
import org.apache.doris.cluster.ClusterNamespace;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.common.util.Util;
@ -150,6 +151,9 @@ public abstract class ExternalCatalog implements CatalogIf<ExternalDatabase>, Wr
// hms client, read properties from hive-site.xml, es client
protected abstract void initLocalObjectsImpl();
// check if all required properties are set when creating catalog
public void checkProperties() throws DdlException {
}
/**
* eg:

View File

@ -24,9 +24,11 @@ import org.apache.doris.catalog.HdfsResource;
import org.apache.doris.catalog.external.ExternalDatabase;
import org.apache.doris.catalog.external.HMSExternalDatabase;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient;
import org.apache.doris.datasource.hive.event.MetastoreNotificationFetchException;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
@ -63,6 +65,40 @@ public class HMSExternalCatalog extends ExternalCatalog {
catalogProperty = new CatalogProperty(resource, props);
}
@Override
public void checkProperties() throws DdlException {
super.checkProperties();
// check the dfs.ha properties
// 'dfs.nameservices'='your-nameservice',
// 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',
// 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
// 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
// 'dfs.client.failover.proxy.provider.your-nameservice'='xxx'
String dfsNameservices = catalogProperty.getOrDefault(HdfsResource.DSF_NAMESERVICES, "");
if (Strings.isNullOrEmpty(dfsNameservices)) {
return;
}
String namenodes = catalogProperty.getOrDefault("dfs.ha.namenodes." + dfsNameservices, "");
if (Strings.isNullOrEmpty(namenodes)) {
throw new DdlException("Missing dfs.ha.namenodes." + dfsNameservices + " property");
}
String[] names = namenodes.split(",");
for (String name : names) {
String address = catalogProperty.getOrDefault("dfs.namenode.rpc-address." + dfsNameservices + "." + name,
"");
if (Strings.isNullOrEmpty(address)) {
throw new DdlException(
"Missing dfs.namenode.rpc-address." + dfsNameservices + "." + name + " property");
}
}
String failoverProvider = catalogProperty.getOrDefault("dfs.client.failover.proxy.provider." + dfsNameservices,
"");
if (Strings.isNullOrEmpty(failoverProvider)) {
throw new DdlException(
"Missing dfs.client.failover.proxy.provider." + dfsNameservices + " property");
}
}
public String getHiveMetastoreUris() {
return catalogProperty.getOrDefault(HMSResource.HIVE_METASTORE_URIS, "");
}

View File

@ -39,6 +39,12 @@ import java.util.Map;
public class JdbcExternalCatalog extends ExternalCatalog {
private static final Logger LOG = LogManager.getLogger(JdbcExternalCatalog.class);
private static final List<String> REQUIRED_PROPERTIES = Lists.newArrayList(
JdbcResource.JDBC_URL,
JdbcResource.DRIVER_URL,
JdbcResource.DRIVER_CLASS
);
// Must add "transient" for Gson to ignore this field,
// or Gson will throw exception with HikariCP
private transient JdbcClient jdbcClient;
@ -50,6 +56,16 @@ public class JdbcExternalCatalog extends ExternalCatalog {
this.catalogProperty = new CatalogProperty(resource, processCompatibleProperties(props));
}
@Override
public void checkProperties() throws DdlException {
super.checkProperties();
for (String requiredProperty : REQUIRED_PROPERTIES) {
if (!catalogProperty.getProperties().containsKey(requiredProperty)) {
throw new DdlException("Required property '" + requiredProperty + "' is missing");
}
}
}
@Override
public void onClose() {
super.onClose();

View File

@ -40,7 +40,12 @@ public class MasterCatalogExecutor {
public MasterCatalogExecutor() {
ctx = ConnectContext.get();
waitTimeoutMs = ctx.getSessionVariable().getQueryTimeoutS() * 1000;
if (ctx == null) {
// The method may be called by FrontendServiceImpl from BE, which does not have ConnectContext.
waitTimeoutMs = 300 * 1000;
} else {
waitTimeoutMs = ctx.getSessionVariable().getQueryTimeoutS() * 1000;
}
}
public void forward(long catalogId, long dbId) throws Exception {

View File

@ -44,6 +44,7 @@ import org.apache.doris.catalog.external.HMSExternalDatabase;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.ExceptionChecker;
import org.apache.doris.common.FeConstants;
import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hive.HiveMetaStoreCache.HivePartitionValues;
@ -548,4 +549,61 @@ public class CatalogMgrTest extends TestWithFeService {
partitionNameToIdMap, idToUniqueIdsMap, singleUidToColumnRangeMap, partitionValuesMap);
}
@Test
public void testInvalidCreateCatalogProperties() throws Exception {
String createCatalogSql = "CREATE CATALOG bad_hive1 PROPERTIES (\n"
+ " 'type'='hms',\n"
+ " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n"
+ " 'hadoop.username' = 'hive',\n"
+ " 'dfs.nameservices'='your-nameservice',\n"
+ " 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',\n"
+ " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',\n"
+ " 'dfs.client.failover.proxy.provider.your-nameservice'"
+ "='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'\n"
+ ");";
CreateCatalogStmt createStmt1 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql);
ExceptionChecker.expectThrowsWithMsg(DdlException.class, "Missing dfs.ha.namenodes.your-nameservice property",
() -> mgr.createCatalog(createStmt1));
createCatalogSql = "CREATE CATALOG bad_hive2 PROPERTIES (\n"
+ " 'type'='hms',\n"
+ " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n"
+ " 'hadoop.username' = 'hive',\n"
+ " 'dfs.nameservices'='your-nameservice',\n"
+ " 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',\n"
+ " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',\n"
+ " 'dfs.client.failover.proxy.provider.your-nameservice'"
+ "='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'\n"
+ ");";
CreateCatalogStmt createStmt2 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql);
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Missing dfs.namenode.rpc-address.your-nameservice.nn1 property",
() -> mgr.createCatalog(createStmt2));
createCatalogSql = "CREATE CATALOG good_hive PROPERTIES (\n"
+ " 'type'='hms',\n"
+ " 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',\n"
+ " 'hadoop.username' = 'hive',\n"
+ " 'dfs.nameservices'='your-nameservice',\n"
+ " 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',\n"
+ " 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',\n"
+ " 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007'\n"
+ ");";
CreateCatalogStmt createStmt3 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql);
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Missing dfs.client.failover.proxy.provider.your-nameservice property",
() -> mgr.createCatalog(createStmt3));
createCatalogSql = "CREATE CATALOG bad_jdbc PROPERTIES (\n"
+ " \"type\"=\"jdbc\",\n"
+ " \"user\"=\"root\",\n"
+ " \"password\"=\"123456\",\n"
+ " \"jdbc_url\" = \"jdbc:mysql://127.0.0.1:3306/demo\",\n"
+ " \"driver_class\" = \"com.mysql.jdbc.Driver\"\n"
+ ")";
CreateCatalogStmt createStmt4 = (CreateCatalogStmt) parseAndAnalyzeStmt(createCatalogSql);
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Required property 'driver_url' is missing",
() -> mgr.createCatalog(createStmt4));
}
}