statistics: fix "data too long" error when dumping stats from table with new collation data (#27033)

This commit is contained in:
Zhou Kunqin
2021-08-17 16:02:00 +08:00
committed by GitHub
parent b7860acc9f
commit d06fd67764
3 changed files with 28 additions and 1 deletions

View File

@ -1270,7 +1270,7 @@ workLoop:
// When it's new collation data, we need to use its collate key instead of original value because only
// the collate key can ensure the correct ordering.
// This is also corresponding to similar operation in (*statistics.Column).GetColumnRowCount().
if ft.EvalType() == types.ETString {
if ft.EvalType() == types.ETString && ft.Tp != mysql.TypeEnum && ft.Tp != mysql.TypeSet {
val.SetBytes(collate.GetCollator(ft.Collate).Key(val.GetString()))
}
sampleItems = append(sampleItems, &statistics.SampleItem{

View File

@ -1287,6 +1287,12 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID
} else {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
d := rows[i].GetDatum(2, &fields[2].Column.FieldType)
// When there's new collation data, the length of bounds of histogram(the collate key) might be
// longer than the FieldType.Flen of this column.
// We change it to TypeBlob to bypass the length check here.
if tp.EvalType() == types.ETString && tp.Tp != mysql.TypeEnum && tp.Tp != mysql.TypeSet {
tp = types.NewFieldType(mysql.TypeBlob)
}
lowerBound, err = d.ConvertTo(sc, tp)
if err != nil {
return nil, errors.Trace(err)

View File

@ -2867,6 +2867,27 @@ func (s *testSerialStatsSuite) TestCorrelationWithDefinedCollate(c *C) {
c.Assert(rows[0][5], Equals, "-1.000000")
}
func (s *testSerialStatsSuite) TestLoadHistogramWithCollate(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a varchar(10) collate utf8mb4_unicode_ci);")
testKit.MustExec("insert into t values('abcdefghij');")
testKit.MustExec("insert into t values('abcdufghij');")
testKit.MustExec("analyze table t with 0 topn;")
do := s.do
h := do.StatsHandle()
is := do.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tblInfo := tbl.Meta()
_, err = h.TableStatsFromStorage(tblInfo, tblInfo.ID, true, 0)
c.Assert(err, IsNil)
}
func (s *testSerialStatsSuite) TestFastAnalyzeColumnHistWithNullValue(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)