[Feature-wip](MySQL Load)Support cancel query for mysql load (#17233)
Notice some changes: 1. Support cancel query for mysql load 2. Change the thread pool for mysql load manager. 3. Fix sucret path check logic 4. Fix some doc error
This commit is contained in:
@ -102,7 +102,7 @@ PROPERTIES ("strict_mode" = "true")
|
||||
```
|
||||
1. The only difference between the syntax of importing server level local files and importing client side syntax is whether the'LOCAL 'keyword is added after the'LOAD DATA' keyword.
|
||||
2. FE will have multi-nodes, and importing server level files can only import FE nodes connected by the client side, and cannot import files local to other FE nodes.
|
||||
3. Server side load was disabled by default. Enable it by setting `mysql_load_server_secure_path` with a secure path. All the load file should be under this path. Recommend create a `local_import_data` directory under `DORIS_HOME` to load data.
|
||||
3. Server side load was disabled by default. Enable it by setting `mysql_load_server_secure_path` with a secure path. All the load file should be under this path.
|
||||
|
||||
### Return result
|
||||
Since MySQL load is a synchronous import method, the imported results are returned to the user through SQL syntax.
|
||||
@ -113,6 +113,10 @@ Query OK, 1 row affected (0.17 sec)
|
||||
Records: 1 Deleted: 0 Skipped: 0 Warnings: 0
|
||||
```
|
||||
|
||||
### Configuration
|
||||
1. `mysql_load_thread_pool`: the thread pool size for singe FE node, set 4 thread by default. The block queue size is 5 times of `mysql_load_thread_pool`. So FE can accept 4 + 4*5 = 24 requests in one time. Increase this configuration if the parallelism are larger than 24.
|
||||
2. `mysql_load_server_secure_path`: the secure path for load data from server. Empty path by default means that it's not allowed for server load. Recommend to create a `local_import_data` directory under `DORIS_HOME` to load data if you want enable it.
|
||||
|
||||
## Notice
|
||||
|
||||
1. If you see this `LOAD DATA LOCAL INFILE file request rejected due to restrictions on access` message, you should connet mysql with `mysql --local-infile=1` command to enable client to load local file.
|
||||
|
||||
@ -121,8 +121,8 @@ This import method can still guarantee the atomicity of a batch of import tasks,
|
||||
```sql
|
||||
LOAD DATA LOCAL
|
||||
INFILE 'testData'
|
||||
PARTITION (p1, p2)
|
||||
INTO TABLE testDb.testTbl
|
||||
PARTITION (p1, p2)
|
||||
PROPERTIES ("max_filter_ratio"="0.2")
|
||||
```
|
||||
|
||||
@ -141,8 +141,8 @@ This import method can still guarantee the atomicity of a batch of import tasks,
|
||||
```sql
|
||||
LOAD DATA LOCAL
|
||||
INFILE 'testData'
|
||||
PARTITION (p1, p2)
|
||||
INTO TABLE testDb.testTbl
|
||||
PARTITION (p1, p2)
|
||||
IGNORE 1 LINES
|
||||
```
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ PROPERTIES ("strict_mode"="true")
|
||||
```
|
||||
1. 导入服务端本地文件的语法和导入客户端语法的唯一区别是`LOAD DATA`关键词后面是否加入`LOCAL`关键字.
|
||||
2. FE为多节点部署, 导入服务端文件功能只能够导入客户端连接的FE节点, 无法导入其他FE节点本地的文件.
|
||||
3. 服务端导入默认是关闭, 通过设置FE的配置`mysql_load_server_secure_path`开启, 导入文件的必须在该目录下.建议在`DORIS_HOME`目录下创建一个`local_import_data`目录用于导入数据.
|
||||
3. 服务端导入默认是关闭, 通过设置FE的配置`mysql_load_server_secure_path`开启, 导入文件的必须在该目录下.
|
||||
|
||||
### 返回结果
|
||||
|
||||
@ -115,6 +115,10 @@ Query OK, 1 row affected (0.17 sec)
|
||||
Records: 1 Deleted: 0 Skipped: 0 Warnings: 0
|
||||
```
|
||||
|
||||
### 配置项
|
||||
1. `mysql_load_thread_pool`控制单个FE中MySQL Load并发执行线程个数, 默认为4. 线程池的排队对接大小为`mysql_load_thread_pool`的5倍, 因此默认情况下, 可以并发提交的任务为 4 + 4*5 = 24个. 如果并发个数超过24时, 可以调大该配置项.
|
||||
2. `mysql_load_server_secure_path`服务端导入的安全路径, 默认为空, 即不允许服务端导入. 如需开启这个功能, 建议在`DORIS_HOME`目录下创建一个`local_import_data`目录, 用于导入数据.
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. 如果客户端出现`LOAD DATA LOCAL INFILE file request rejected due to restrictions on access`错误, 需要用`mysql --local-infile=1`命令来打开客户端的导入功能.
|
||||
|
||||
@ -120,8 +120,8 @@ INTO TABLE tbl_name
|
||||
```sql
|
||||
LOAD DATA LOCAL
|
||||
INFILE 'testData'
|
||||
PARTITION (p1, p2)
|
||||
INTO TABLE testDb.testTbl
|
||||
PARTITION (p1, p2)
|
||||
PROPERTIES ("max_filter_ratio"="0.2")
|
||||
```
|
||||
|
||||
@ -140,8 +140,8 @@ INTO TABLE tbl_name
|
||||
```sql
|
||||
LOAD DATA LOCAL
|
||||
INFILE 'testData'
|
||||
PARTITION (p1, p2)
|
||||
INTO TABLE testDb.testTbl
|
||||
PARTITION (p1, p2)
|
||||
IGNORE 1 LINES
|
||||
```
|
||||
|
||||
|
||||
@ -2056,5 +2056,8 @@ public class Config extends ConfigBase {
|
||||
*/
|
||||
@ConfField(mutable = false, masterOnly = false)
|
||||
public static String mysql_load_server_secure_path = "";
|
||||
|
||||
@ConfField(mutable = false, masterOnly = false)
|
||||
public static int mysql_load_thread_pool = 4;
|
||||
}
|
||||
|
||||
|
||||
@ -39,6 +39,7 @@ import com.google.common.collect.Lists;
|
||||
import org.checkerframework.checker.nullness.qual.Nullable;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
@ -429,10 +430,15 @@ public class LoadStmt extends DdlStmt {
|
||||
throw new AnalysisException("Load local data from fe local is not enabled. If you want to use it,"
|
||||
+ " plz set the `mysql_load_server_secure_path` for FE to be a right path.");
|
||||
} else {
|
||||
if (!(path.startsWith(Config.mysql_load_server_secure_path))) {
|
||||
throw new AnalysisException("Local file should be under the secure path of FE.");
|
||||
File file = new File(path);
|
||||
try {
|
||||
if (!(file.getCanonicalPath().startsWith(Config.mysql_load_server_secure_path))) {
|
||||
throw new AnalysisException("Local file should be under the secure path of FE.");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
if (!new File(path).exists()) {
|
||||
if (!file.exists()) {
|
||||
throw new AnalysisException("File: " + path + " is not exists.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@ import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.LoadStmt;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.cluster.ClusterNamespace;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.LoadException;
|
||||
import org.apache.doris.common.ThreadPoolManager;
|
||||
import org.apache.doris.common.UserException;
|
||||
@ -54,6 +55,7 @@ import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
|
||||
public class MysqlLoadManager {
|
||||
@ -62,12 +64,52 @@ public class MysqlLoadManager {
|
||||
private final ThreadPoolExecutor mysqlLoadPool;
|
||||
private final TokenManager tokenManager;
|
||||
|
||||
private static class MySqlLoadContext {
|
||||
private boolean finished;
|
||||
private HttpPut request;
|
||||
private boolean isCancelled;
|
||||
|
||||
public MySqlLoadContext() {
|
||||
this.finished = false;
|
||||
this.isCancelled = false;
|
||||
}
|
||||
|
||||
public boolean isFinished() {
|
||||
return finished;
|
||||
}
|
||||
|
||||
public void setFinished(boolean finished) {
|
||||
this.finished = finished;
|
||||
}
|
||||
|
||||
public HttpPut getRequest() {
|
||||
return request;
|
||||
}
|
||||
|
||||
public void setRequest(HttpPut request) {
|
||||
this.request = request;
|
||||
}
|
||||
|
||||
public boolean isCancelled() {
|
||||
return isCancelled;
|
||||
}
|
||||
|
||||
public void setCancelled(boolean cancelled) {
|
||||
isCancelled = cancelled;
|
||||
}
|
||||
}
|
||||
|
||||
private final Map<String, MySqlLoadContext> loadContextMap = new ConcurrentHashMap<>();
|
||||
|
||||
|
||||
public MysqlLoadManager(TokenManager tokenManager) {
|
||||
this.mysqlLoadPool = ThreadPoolManager.newDaemonCacheThreadPool(4, "Mysql Load", true);
|
||||
int poolSize = Config.mysql_load_thread_pool;
|
||||
// MySqlLoad pool can accept 4 + 4 * 5 = 24 requests by default.
|
||||
this.mysqlLoadPool = ThreadPoolManager.newDaemonFixedThreadPool(poolSize, poolSize * 5, "Mysql Load", true);
|
||||
this.tokenManager = tokenManager;
|
||||
}
|
||||
|
||||
public LoadJobRowResult executeMySqlLoadJobFromStmt(ConnectContext context, LoadStmt stmt)
|
||||
public LoadJobRowResult executeMySqlLoadJobFromStmt(ConnectContext context, LoadStmt stmt, String loadId)
|
||||
throws IOException, UserException {
|
||||
LoadJobRowResult loadResult = new LoadJobRowResult();
|
||||
// Mysql data load only have one data desc
|
||||
@ -75,11 +117,21 @@ public class MysqlLoadManager {
|
||||
List<String> filePaths = dataDesc.getFilePaths();
|
||||
String database = ClusterNamespace.getNameFromFullName(dataDesc.getDbName());
|
||||
String table = dataDesc.getTableName();
|
||||
int oldTimeout = context.getExecTimeout();
|
||||
int newTimeOut = extractTimeOut(dataDesc);
|
||||
if (newTimeOut > oldTimeout) {
|
||||
// set exec timeout avoid by killed TimeoutChecker
|
||||
context.setExecTimeout(newTimeOut);
|
||||
}
|
||||
String token = tokenManager.acquireToken();
|
||||
LOG.info("execute MySqlLoadJob for id: {}.", loadId);
|
||||
try (final CloseableHttpClient httpclient = HttpClients.createDefault()) {
|
||||
for (String file : filePaths) {
|
||||
InputStreamEntity entity = getInputStreamEntity(context, dataDesc.isClientLocal(), file);
|
||||
InputStreamEntity entity = getInputStreamEntity(context, dataDesc.isClientLocal(), file, loadId);
|
||||
HttpPut request = generateRequestForMySqlLoad(entity, dataDesc, database, table, token);
|
||||
MySqlLoadContext loadContext = new MySqlLoadContext();
|
||||
loadContext.setRequest(request);
|
||||
loadContextMap.put(loadId, loadContext);
|
||||
try (final CloseableHttpResponse response = httpclient.execute(request)) {
|
||||
String body = EntityUtils.toString(response.getEntity());
|
||||
JsonObject result = JsonParser.parseString(body).getAsJsonObject();
|
||||
@ -91,10 +143,51 @@ public class MysqlLoadManager {
|
||||
loadResult.incSkipped(result.get("NumberFilteredRows").getAsInt());
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
LOG.warn("Execute mysql load {} failed", loadId, t);
|
||||
// drain the data from client conn util empty packet received, otherwise the connection will be reset
|
||||
if (loadContextMap.containsKey(loadId) && !loadContextMap.get(loadId).isFinished()) {
|
||||
LOG.warn("not drained yet, try reading left data from client connection for load {}.", loadId);
|
||||
ByteBuffer buffer = context.getMysqlChannel().fetchOnePacket();
|
||||
// MySql client will send an empty packet when eof
|
||||
while (buffer != null && buffer.limit() != 0) {
|
||||
buffer = context.getMysqlChannel().fetchOnePacket();
|
||||
}
|
||||
LOG.debug("Finished reading the left bytes.");
|
||||
}
|
||||
// make cancel message to user
|
||||
if (loadContextMap.containsKey(loadId) && loadContextMap.get(loadId).isCancelled()) {
|
||||
throw new LoadException("Cancelled");
|
||||
} else {
|
||||
throw t;
|
||||
}
|
||||
} finally {
|
||||
loadContextMap.remove(loadId);
|
||||
// revert the exec timeout
|
||||
if (newTimeOut > oldTimeout) {
|
||||
context.setExecTimeout(oldTimeout);
|
||||
}
|
||||
}
|
||||
return loadResult;
|
||||
}
|
||||
|
||||
public void cancelMySqlLoad(String loadId) {
|
||||
if (loadContextMap.containsKey(loadId)) {
|
||||
loadContextMap.get(loadId).setCancelled(true);
|
||||
loadContextMap.get(loadId).getRequest().abort();
|
||||
LOG.info("Cancel MySqlLoad with id {}", loadId);
|
||||
} else {
|
||||
LOG.info("Load id: {} may be already finished.", loadId);
|
||||
}
|
||||
}
|
||||
|
||||
public int extractTimeOut(DataDescription desc) {
|
||||
if (desc.getProperties() != null && desc.getProperties().containsKey(LoadStmt.TIMEOUT_PROPERTY)) {
|
||||
return Integer.parseInt(desc.getProperties().get(LoadStmt.TIMEOUT_PROPERTY));
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private String getColumns(DataDescription desc) {
|
||||
if (desc.getFileFieldNames() != null) {
|
||||
List<String> fields = desc.getFileFieldNames();
|
||||
@ -114,14 +207,18 @@ public class MysqlLoadManager {
|
||||
return null;
|
||||
}
|
||||
|
||||
private InputStreamEntity getInputStreamEntity(ConnectContext context, boolean isClientLocal, String file)
|
||||
private InputStreamEntity getInputStreamEntity(
|
||||
ConnectContext context,
|
||||
boolean isClientLocal,
|
||||
String file,
|
||||
String loadId)
|
||||
throws IOException {
|
||||
InputStream inputStream;
|
||||
if (isClientLocal) {
|
||||
// mysql client will check the file exist.
|
||||
replyClientForReadFile(context, file);
|
||||
inputStream = new ByteBufferNetworkInputStream();
|
||||
fillByteBufferAsync(context, (ByteBufferNetworkInputStream) inputStream);
|
||||
fillByteBufferAsync(context, (ByteBufferNetworkInputStream) inputStream, loadId);
|
||||
} else {
|
||||
// server side file had already check after analyze.
|
||||
inputStream = Files.newInputStream(Paths.get(file));
|
||||
@ -137,7 +234,7 @@ public class MysqlLoadManager {
|
||||
context.getMysqlChannel().sendAndFlush(serializer.toByteBuffer());
|
||||
}
|
||||
|
||||
private void fillByteBufferAsync(ConnectContext context, ByteBufferNetworkInputStream inputStream) {
|
||||
private void fillByteBufferAsync(ConnectContext context, ByteBufferNetworkInputStream inputStream, String loadId) {
|
||||
mysqlLoadPool.submit(() -> {
|
||||
ByteBuffer buffer;
|
||||
try {
|
||||
@ -147,7 +244,11 @@ public class MysqlLoadManager {
|
||||
inputStream.fillByteBuffer(buffer);
|
||||
buffer = context.getMysqlChannel().fetchOnePacket();
|
||||
}
|
||||
if (loadContextMap.containsKey(loadId)) {
|
||||
loadContextMap.get(loadId).setFinished(true);
|
||||
}
|
||||
} catch (IOException | InterruptedException e) {
|
||||
LOG.warn("Failed fetch packet from mysql client for load: " + loadId, e);
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
inputStream.markFinished();
|
||||
|
||||
@ -199,6 +199,7 @@ public class StmtExecutor implements ProfileWriter {
|
||||
private QueryPlannerProfile plannerProfile = new QueryPlannerProfile();
|
||||
private String stmtName;
|
||||
private PrepareStmt prepareStmt;
|
||||
private String mysqlLoadId;
|
||||
|
||||
// The result schema if "dry_run_query" is true.
|
||||
// Only one column to indicate the real return row numbers.
|
||||
@ -1010,6 +1011,9 @@ public class StmtExecutor implements ProfileWriter {
|
||||
if (coordRef != null) {
|
||||
coordRef.cancel();
|
||||
}
|
||||
if (mysqlLoadId != null) {
|
||||
Env.getCurrentEnv().getLoadManager().getMysqlLoadManager().cancelMySqlLoad(mysqlLoadId);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle kill statement.
|
||||
@ -1900,8 +1904,10 @@ public class StmtExecutor implements ProfileWriter {
|
||||
+ " to load client local file.");
|
||||
return;
|
||||
}
|
||||
String loadId = UUID.randomUUID().toString();
|
||||
mysqlLoadId = loadId;
|
||||
LoadJobRowResult submitResult = loadManager.getMysqlLoadManager()
|
||||
.executeMySqlLoadJobFromStmt(context, loadStmt);
|
||||
.executeMySqlLoadJobFromStmt(context, loadStmt, loadId);
|
||||
context.getState().setOk(submitResult.getRecords(), submitResult.getWarnings(),
|
||||
submitResult.toString());
|
||||
} else {
|
||||
|
||||
@ -240,4 +240,47 @@ public class LoadStmtTest {
|
||||
Assert.assertNull(stmt.getLabel().getDbName());
|
||||
Assert.assertEquals(EtlJobType.LOCAL_FILE, stmt.getEtlJobType());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMySqlLoadPath(@Injectable DataDescription desc) throws UserException, IOException {
|
||||
File temp = File.createTempFile("testMySqlLoadData_path", ".txt");
|
||||
String parentPath = temp.getParent();
|
||||
String fakePath = parentPath + "/../fake_path";
|
||||
new Expectations() {
|
||||
{
|
||||
desc.isClientLocal();
|
||||
minTimes = 0;
|
||||
result = false;
|
||||
|
||||
desc.getFilePaths();
|
||||
minTimes = 0;
|
||||
result = Lists.newArrayList(fakePath);
|
||||
|
||||
desc.toSql();
|
||||
minTimes = 0;
|
||||
result = "XXX";
|
||||
|
||||
desc.getTableName();
|
||||
minTimes = 0;
|
||||
result = "testTbl";
|
||||
|
||||
desc.analyzeFullDbName(null, (Analyzer) any);
|
||||
minTimes = 0;
|
||||
result = "testCluster:testDb";
|
||||
|
||||
desc.getMergeType();
|
||||
minTimes = 0;
|
||||
result = LoadTask.MergeType.APPEND;
|
||||
}
|
||||
};
|
||||
|
||||
LoadStmt stmt = new LoadStmt(desc, Maps.newHashMap(), "");
|
||||
Config.mysql_load_server_secure_path = parentPath;
|
||||
try {
|
||||
stmt.analyze(analyzer);
|
||||
} catch (AnalysisException ae) {
|
||||
Assert.assertEquals("errCode = 2, detailMessage = Local file should be under the secure path of FE.",
|
||||
ae.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user