[enhancement](stats) Retry when loading stats (#21849)

This commit is contained in:
AKIRA
2023-07-31 17:33:20 +08:00
committed by GitHub
parent afb6a57aa8
commit e72a012ada
5 changed files with 98 additions and 6 deletions

View File

@ -17,6 +17,7 @@
package org.apache.doris.common;
import org.apache.doris.metric.Metric;
import org.apache.doris.metric.Metric.MetricUnit;
import org.apache.doris.metric.MetricLabel;
@ -45,7 +46,6 @@ import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
/**
* ThreadPoolManager is a helper class for construct daemon thread pool with limit thread and memory resource.
* thread names in thread pool are formatted as poolName-ID, where ID is a unique, sequentially assigned integer.
@ -134,6 +134,15 @@ public class ThreadPoolManager {
poolName, needRegisterMetric);
}
public static ThreadPoolExecutor newDaemonFixedThreadPool(int numThread, int queueSize,
String poolName,
boolean needRegisterMetric,
RejectedExecutionHandler handler) {
return newDaemonThreadPool(numThread, numThread, KEEP_ALIVE_TIME, TimeUnit.SECONDS,
new LinkedBlockingQueue<>(queueSize), handler,
poolName, needRegisterMetric);
}
public static <T> ThreadPoolExecutor newDaemonFixedPriorityThreadPool(int numThread, int initQueueSize,
Comparator<T> comparator, Class<T> tClass,
String poolName, boolean needRegisterMetric) {

View File

@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.qe;
public class InternalQueryExecutionException extends RuntimeException {
public InternalQueryExecutionException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -2505,7 +2505,7 @@ public class StmtExecutor {
coord.exec();
} catch (Exception e) {
queryScheduleSpan.recordException(e);
throw new RuntimeException("Failed to execute internal SQL. " + Util.getRootCauseMessage(e), e);
throw new InternalQueryExecutionException(e.getMessage() + Util.getRootCauseMessage(e), e);
} finally {
queryScheduleSpan.end();
}

View File

@ -19,6 +19,8 @@ package org.apache.doris.statistics;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.ThreadPoolManager;
import org.apache.doris.qe.InternalQueryExecutionException;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
@ -27,16 +29,23 @@ import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.ThreadPoolExecutor.DiscardOldestPolicy;
public class ColumnStatisticsCacheLoader extends StatisticsCacheLoader<Optional<ColumnStatistic>> {
private static final Logger LOG = LogManager.getLogger(ColumnStatisticsCacheLoader.class);
private static final ThreadPoolExecutor singleThreadPool = ThreadPoolManager.newDaemonFixedThreadPool(
StatisticConstants.RETRY_LOAD_THREAD_POOL_SIZE,
StatisticConstants.RETRY_LOAD_QUEUE_SIZE, "STATS_RELOAD",
true,
new DiscardOldestPolicy());
@Override
protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
// Load from statistics table.
Optional<ColumnStatistic> columnStatistic = loadFromStatsTable(key.tableId,
key.idxId, key.colName);
Optional<ColumnStatistic> columnStatistic = loadFromStatsTable(key);
if (columnStatistic.isPresent()) {
return columnStatistic;
}
@ -52,8 +61,14 @@ public class ColumnStatisticsCacheLoader extends StatisticsCacheLoader<Optional<
return columnStatistic;
}
private Optional<ColumnStatistic> loadFromStatsTable(long tableId, long idxId, String colName) {
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey key) {
List<ResultRow> columnResults = null;
try {
columnResults = StatisticsRepository.loadColStats(key.tableId, key.idxId, key.colName);
} catch (InternalQueryExecutionException e) {
retryLoad(key);
return Optional.empty();
}
ColumnStatistic columnStatistics;
try {
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
@ -67,4 +82,42 @@ public class ColumnStatisticsCacheLoader extends StatisticsCacheLoader<Optional<
return Optional.of(columnStatistics);
}
}
private void retryLoad(StatisticsCacheKey key) {
singleThreadPool.submit(new RetryTask(key, 1));
}
private static class RetryTask implements Runnable {
StatisticsCacheKey key;
int retryTimes;
public RetryTask(StatisticsCacheKey key, int retryTimes) {
this.key = key;
this.retryTimes = retryTimes;
}
@Override
public void run() {
List<ResultRow> columnResults = null;
try {
columnResults = StatisticsRepository.loadColStats(key.tableId, key.idxId, key.colName);
} catch (InternalQueryExecutionException e) {
if (this.retryTimes < StatisticConstants.LOAD_RETRY_TIMES) {
retryTimes++;
singleThreadPool.submit(this);
}
return;
}
ColumnStatistic columnStatistics;
try {
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults);
} catch (Exception e) {
LOG.warn("Exception to deserialize column statistics", e);
return;
}
if (columnStatistics != null) {
Env.getCurrentEnv().getStatisticsCache().putCache(key, columnStatistics);
}
}
}
}

View File

@ -79,6 +79,12 @@ public class StatisticConstants {
public static final int STATISTIC_INTERNAL_TABLE_REPLICA_NUM = 3;
public static final int RETRY_LOAD_QUEUE_SIZE = 1000;
public static final int RETRY_LOAD_THREAD_POOL_SIZE = 1;
public static final int LOAD_RETRY_TIMES = 3;
static {
STATISTICS_DB_BLACK_LIST.add(SystemInfoService.DEFAULT_CLUSTER
+ ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME);