calibrate: refactor metrics error (#44451)

ref pingcap/tidb#43212
2023-06-16 17:25:10 +08:00
parent 052c17f4f6
commit 841aed8d95
2 changed files with 41 additions and 26 deletions
--- a/executor/calibrate_resource.go
+++ b/executor/calibrate_resource.go
@ -183,6 +183,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro
 	return e.staticCalibrate(ctx, req, exec)
 }

+var (
+	errLowUsage          = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
+	errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v")
+)
+
 func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error {
 	startTs, endTs, err := e.parseCalibrateDuration(ctx)
 	if err != nil {
@ -193,11 +198,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk

 	totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
 	if err != nil {
-		return err
+		return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
 	}
 	totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
 	if err != nil {
-		return err
+		return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
 	}
 	rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime)
 	if err != nil {
@ -243,10 +248,10 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
 		tikvCPUs.next()
 	}
 	if len(quotas) < 5 {
-		return errors.Errorf("There are too few metrics points available in selected time window")
+		return errLowUsage
 	}
 	if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass {
-		return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
+		return errLowUsage
 	}
 	sort.Slice(quotas, func(i, j int) bool {
 		return quotas[i] > quotas[j]
@ -274,11 +279,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk.

 	totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
 	if err != nil {
-		return err
+		return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
 	}
 	totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
 	if err != nil {
-		return err
+		return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
 	}

 	// The default workload to calculate the RU capacity.
@ -378,9 +383,6 @@ func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sql
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
-	if len(rows) == 0 {
-		return nil, errors.Errorf("metrics '%s' is empty", metrics)
-	}
 	ret := make([]*timePointValue, 0, len(rows))
 	for _, row := range rows {
 		if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil {
--- a/executor/calibrate_resource_test.go
+++ b/executor/calibrate_resource_test.go
@ -89,24 +89,30 @@ func TestCalibrateResource(t *testing.T) {
 		return time
 	}

-	mockData := map[string][][]types.Datum{
-		"tikv_cpu_quota": {
-			types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
-			types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
-			types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
-			types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
-			types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
-			types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
-		},
-		"tidb_server_maxprocs": {
-			types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
-			types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
-		},
-	}
+	mockData := make(map[string][][]types.Datum)
 	ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData)
 	ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool {
 		return fpName == fpname
 	})
+	rs, err = tk.Exec("CALIBRATE RESOURCE")
+	require.NoError(t, err)
+	require.NotNil(t, rs)
+	err = rs.Next(ctx, rs.NewChunk(nil))
+	// because when mock metrics is empty, error is always `pd unavailable`, don't check detail.
+	require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable")
+
+	mockData["tikv_cpu_quota"] = [][]types.Datum{
+		types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
+		types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
+		types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
+		types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
+		types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
+		types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
+	}
+	mockData["tidb_server_maxprocs"] = [][]types.Datum{
+		types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
+		types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
+	}
 	tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768"))
 	tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768"))
 	tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823"))
@ -396,7 +402,7 @@ func TestCalibrateResource(t *testing.T) {
 	require.NoError(t, err)
 	require.NotNil(t, rs)
 	err = rs.Next(ctx, rs.NewChunk(nil))
-	require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
+	require.ErrorContains(t, err, "The workload in selected time window is too low")

 	ru3 := [][]types.Datum{
 		types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0),
@ -436,7 +442,7 @@ func TestCalibrateResource(t *testing.T) {
 	require.NoError(t, err)
 	require.NotNil(t, rs)
 	err = rs.Next(ctx, rs.NewChunk(nil))
-	require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
+	require.ErrorContains(t, err, "The workload in selected time window is too low")

 	// flash back to init data.
 	mockData["resource_manager_resource_unit"] = ru1
@ -547,7 +553,14 @@ func TestCalibrateResource(t *testing.T) {
 	require.NoError(t, err)
 	require.NotNil(t, rs)
 	err = rs.Next(ctx, rs.NewChunk(nil))
-	require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
+	require.ErrorContains(t, err, "The workload in selected time window is too low")
+
+	delete(mockData, "process_cpu_usage")
+	rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'")
+	require.NoError(t, err)
+	require.NotNil(t, rs)
+	err = rs.Next(ctx, rs.NewChunk(nil))
+	require.ErrorContains(t, err, "query metric error: pd unavailable")
 }

 type mockResourceGroupProvider struct {