From d06fd67764e6f3d21d73e5d658677a12d65c8dfd Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 17 Aug 2021 16:02:00 +0800 Subject: [PATCH] statistics: fix "data too long" error when dumping stats from table with new collation data (#27033) --- executor/analyze.go | 2 +- statistics/handle/handle.go | 6 ++++++ statistics/handle/handle_test.go | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/executor/analyze.go b/executor/analyze.go index 67cc4cbd54..0f3bcee345 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -1270,7 +1270,7 @@ workLoop: // When it's new collation data, we need to use its collate key instead of original value because only // the collate key can ensure the correct ordering. // This is also corresponding to similar operation in (*statistics.Column).GetColumnRowCount(). - if ft.EvalType() == types.ETString { + if ft.EvalType() == types.ETString && ft.Tp != mysql.TypeEnum && ft.Tp != mysql.TypeSet { val.SetBytes(collate.GetCollator(ft.Collate).Key(val.GetString())) } sampleItems = append(sampleItems, &statistics.SampleItem{ diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 48e4997c56..1c2a89fe0f 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -1287,6 +1287,12 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID } else { sc := &stmtctx.StatementContext{TimeZone: time.UTC} d := rows[i].GetDatum(2, &fields[2].Column.FieldType) + // When there's new collation data, the length of bounds of histogram(the collate key) might be + // longer than the FieldType.Flen of this column. + // We change it to TypeBlob to bypass the length check here. + if tp.EvalType() == types.ETString && tp.Tp != mysql.TypeEnum && tp.Tp != mysql.TypeSet { + tp = types.NewFieldType(mysql.TypeBlob) + } lowerBound, err = d.ConvertTo(sc, tp) if err != nil { return nil, errors.Trace(err) diff --git a/statistics/handle/handle_test.go b/statistics/handle/handle_test.go index 9cfd064477..533a15ad54 100644 --- a/statistics/handle/handle_test.go +++ b/statistics/handle/handle_test.go @@ -2867,6 +2867,27 @@ func (s *testSerialStatsSuite) TestCorrelationWithDefinedCollate(c *C) { c.Assert(rows[0][5], Equals, "-1.000000") } +func (s *testSerialStatsSuite) TestLoadHistogramWithCollate(c *C) { + defer cleanEnv(c, s.store, s.do) + testKit := testkit.NewTestKit(c, s.store) + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a varchar(10) collate utf8mb4_unicode_ci);") + testKit.MustExec("insert into t values('abcdefghij');") + testKit.MustExec("insert into t values('abcdufghij');") + testKit.MustExec("analyze table t with 0 topn;") + do := s.do + h := do.StatsHandle() + is := do.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tblInfo := tbl.Meta() + _, err = h.TableStatsFromStorage(tblInfo, tblInfo.ID, true, 0) + c.Assert(err, IsNil) +} + func (s *testSerialStatsSuite) TestFastAnalyzeColumnHistWithNullValue(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store)