calibrate: refactor metrics error (#44451)

ref pingcap/tidb#43212
This commit is contained in:
Yongbo Jiang
2023-06-16 17:25:10 +08:00
committed by GitHub
parent 052c17f4f6
commit 841aed8d95
2 changed files with 41 additions and 26 deletions

View File

@ -183,6 +183,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro
return e.staticCalibrate(ctx, req, exec)
}
var (
errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v")
)
func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error {
startTs, endTs, err := e.parseCalibrateDuration(ctx)
if err != nil {
@ -193,11 +198,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime)
if err != nil {
@ -243,10 +248,10 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
tikvCPUs.next()
}
if len(quotas) < 5 {
return errors.Errorf("There are too few metrics points available in selected time window")
return errLowUsage
}
if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass {
return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
return errLowUsage
}
sort.Slice(quotas, func(i, j int) bool {
return quotas[i] > quotas[j]
@ -274,11 +279,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk.
totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
// The default workload to calculate the RU capacity.
@ -378,9 +383,6 @@ func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sql
if err != nil {
return nil, errors.Trace(err)
}
if len(rows) == 0 {
return nil, errors.Errorf("metrics '%s' is empty", metrics)
}
ret := make([]*timePointValue, 0, len(rows))
for _, row := range rows {
if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil {

View File

@ -89,24 +89,30 @@ func TestCalibrateResource(t *testing.T) {
return time
}
mockData := map[string][][]types.Datum{
"tikv_cpu_quota": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
},
"tidb_server_maxprocs": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
},
}
mockData := make(map[string][][]types.Datum)
ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData)
ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool {
return fpName == fpname
})
rs, err = tk.Exec("CALIBRATE RESOURCE")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
// because when mock metrics is empty, error is always `pd unavailable`, don't check detail.
require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable")
mockData["tikv_cpu_quota"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
}
mockData["tidb_server_maxprocs"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
}
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823"))
@ -396,7 +402,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")
ru3 := [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0),
@ -436,7 +442,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")
// flash back to init data.
mockData["resource_manager_resource_unit"] = ru1
@ -547,7 +553,14 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")
delete(mockData, "process_cpu_usage")
rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "query metric error: pd unavailable")
}
type mockResourceGroupProvider struct {