From aef905cb16804bd429424e02a33ce5047ed7e754 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 21 Sep 2022 23:39:02 -0400 Subject: [PATCH] executor: convert json numeric value to float64 in hash (#38065) close pingcap/tidb#38049 --- executor/aggfuncs/aggfunc_test.go | 3 +-- executor/aggfuncs/func_count_distinct.go | 2 +- executor/executor_test.go | 34 ++++++++++++++++++++++++ types/json_binary.go | 6 +++++ util/codec/codec.go | 7 +++-- util/codec/codec_test.go | 3 +++ 6 files changed, 50 insertions(+), 5 deletions(-) diff --git a/executor/aggfuncs/aggfunc_test.go b/executor/aggfuncs/aggfunc_test.go index 1d1a98a2d2..537b5af7ac 100644 --- a/executor/aggfuncs/aggfunc_test.go +++ b/executor/aggfuncs/aggfunc_test.go @@ -189,8 +189,7 @@ func distinctUpdateMemDeltaGens(srcChk *chunk.Chunk, dataType *types.FieldType) case mysql.TypeJSON: jsonVal := row.GetJSON(0) bytes := make([]byte, 0) - bytes = append(bytes, jsonVal.TypeCode) - bytes = append(bytes, jsonVal.Value...) + bytes = jsonVal.HashValue(bytes) val = string(bytes) memDelta = int64(len(val)) default: diff --git a/executor/aggfuncs/func_count_distinct.go b/executor/aggfuncs/func_count_distinct.go index b90310c72e..2b4e50a29d 100644 --- a/executor/aggfuncs/func_count_distinct.go +++ b/executor/aggfuncs/func_count_distinct.go @@ -402,7 +402,7 @@ func evalAndEncode( if err != nil || isNull { break } - encodedBytes = appendJSON(encodedBytes, buf, val) + encodedBytes = val.HashValue(encodedBytes) case types.ETString: var val string val, isNull, err = arg.EvalString(sctx, row) diff --git a/executor/executor_test.go b/executor/executor_test.go index 9b469f0705..87b58a9f5a 100644 --- a/executor/executor_test.go +++ b/executor/executor_test.go @@ -5981,6 +5981,40 @@ func TestIsFastPlan(t *testing.T) { } } +func TestCountDistinctJSON(t *testing.T) { + store := testkit.CreateMockStore(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(j JSON)") + tk.MustExec("insert into t values('2010')") + tk.MustExec("insert into t values('2011')") + tk.MustExec("insert into t values('2012')") + tk.MustExec("insert into t values('2010.000')") + tk.MustExec("insert into t values(cast(? as JSON))", uint64(math.MaxUint64)) + tk.MustExec("insert into t values(cast(? as JSON))", float64(math.MaxUint64)) + + tk.MustQuery("select count(distinct j) from t").Check(testkit.Rows("5")) +} + +func TestHashJoinJSON(t *testing.T) { + store := testkit.CreateMockStore(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(id int(11), j JSON, d DOUBLE)") + tk.MustExec("insert into t values(0, '2010', 2010)") + tk.MustExec("insert into t values(1, '2011', 2011)") + tk.MustExec("insert into t values(2, '2012', 2012)") + tk.MustExec("insert into t values(3, cast(? as JSON), ?)", uint64(math.MaxUint64), float64(math.MaxUint64)) + + tk.MustQuery("select /*+inl_hash_join(t2)*/ t1.id, t2.id from t t1 join t t2 on t1.j = t2.d;").Check(testkit.Rows("0 0", "1 1", "2 2")) +} + func TestBinaryStrNumericOperator(t *testing.T) { store := testkit.CreateMockStore(t) diff --git a/types/json_binary.go b/types/json_binary.go index 1b03c56781..badbb47a1c 100644 --- a/types/json_binary.go +++ b/types/json_binary.go @@ -538,6 +538,12 @@ func (bj BinaryJSON) HashValue(buf []byte) []byte { } else { buf = append(buf, bj.Value...) } + case JSONTypeCodeUint64: + if bj.GetUint64() == uint64(float64(bj.GetUint64())) { + buf = appendBinaryFloat64(buf, float64(bj.GetUint64())) + } else { + buf = append(buf, bj.Value...) + } case JSONTypeCodeArray: elemCount := int(jsonEndian.Uint32(bj.Value)) for i := 0; i < elemCount; i++ { diff --git a/util/codec/codec.go b/util/codec/codec.go index a57fd38576..ef29be1cdb 100644 --- a/util/codec/codec.go +++ b/util/codec/codec.go @@ -386,7 +386,8 @@ func encodeHashChunkRowIdx(sc *stmtctx.StatementContext, row chunk.Row, tp *type b = (*[unsafe.Sizeof(v)]byte)(unsafe.Pointer(&v))[:] case mysql.TypeJSON: flag = jsonFlag - b = row.GetBytes(idx) + json := row.GetJSON(idx) + b = json.HashValue(b) default: return 0, nil, errors.Errorf("unsupport column type for encode %d", tp.GetType()) } @@ -645,7 +646,9 @@ func HashChunkSelected(sc *stmtctx.StatementContext, h []hash.Hash64, chk *chunk isNull[i] = !ignoreNull } else { buf[0] = jsonFlag - b = column.GetBytes(i) + json := column.GetJSON(i) + b = b[:0] + b = json.HashValue(b) } // As the golang doc described, `Hash.Write` never returns an error.. diff --git a/util/codec/codec_test.go b/util/codec/codec_test.go index 7d2741ba1e..61447827e5 100644 --- a/util/codec/codec_test.go +++ b/util/codec/codec_test.go @@ -1187,6 +1187,9 @@ func TestHashChunkRow(t *testing.T) { testHashChunkRowEqual(t, "x", []byte("x"), true) testHashChunkRowEqual(t, "x", []byte("y"), false) + + testHashChunkRowEqual(t, types.CreateBinaryJSON(int64(1)), types.CreateBinaryJSON(float64(1.0)), true) + testHashChunkRowEqual(t, types.CreateBinaryJSON(uint64(math.MaxUint64)), types.CreateBinaryJSON(float64(math.MaxUint64)), false) } func TestValueSizeOfSignedInt(t *testing.T) {