planner: fix that vector index output empty result when pk is non-int type (#57629)

close pingcap/tidb#57627
This commit is contained in:
Yiding Cui
2024-11-27 09:38:29 +08:00
committed by GitHub
parent dcc9dcc1ea
commit feb34ecfee
5 changed files with 92 additions and 16 deletions

View File

@ -9,21 +9,25 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 4,
shard_count = 5,
deps = [
"//pkg/config",
"//pkg/domain",
"//pkg/domain/infosync",
"//pkg/meta/model",
"//pkg/parser/model",
"//pkg/planner",
"//pkg/planner/core",
"//pkg/planner/core/base",
"//pkg/planner/core/resolve",
"//pkg/session",
"//pkg/store/mockstore",
"//pkg/testkit",
"//pkg/testkit/testdata",
"//pkg/testkit/testfailpoint",
"//pkg/testkit/testmain",
"//pkg/testkit/testsetup",
"//pkg/types",
"//pkg/util/plancodec",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_stretchr_testify//require",

View File

@ -24,12 +24,16 @@ import (
"github.com/pingcap/tidb/pkg/domain/infosync"
"github.com/pingcap/tidb/pkg/meta/model"
pmodel "github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/planner"
"github.com/pingcap/tidb/pkg/planner/core"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/core/resolve"
"github.com/pingcap/tidb/pkg/session"
"github.com/pingcap/tidb/pkg/store/mockstore"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/testkit/testdata"
"github.com/pingcap/tidb/pkg/testkit/testfailpoint"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/plancodec"
"github.com/pingcap/tipb/go-tipb"
"github.com/stretchr/testify/require"
@ -244,3 +248,67 @@ func TestANNInexWithSimpleCBO(t *testing.T) {
testkit.SetTiFlashReplica(t, dom, "test", "t1")
tk.MustUseIndex("select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 1", "vector_index")
}
func TestANNIndexWithNonIntClusteredPk(t *testing.T) {
store := testkit.CreateMockStoreWithSchemaLease(t, 1*time.Second, mockstore.WithMockTiFlash(2))
tk := testkit.NewTestKit(t, store)
tiflash := infosync.NewMockTiFlash()
infosync.SetMockTiFlash(tiflash)
defer func() {
tiflash.Lock()
tiflash.StatusServer.Close()
tiflash.Unlock()
}()
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/ddl/MockCheckVectorIndexProcess", `return(1)`)
tk.MustExec("use test")
tk.MustExec("drop table if exists t1")
tk.MustExec(`
create table t1 (
vec vector(3),
a int,
b int,
c vector(3),
d vector,
primary key (a, b)
)
`)
tk.MustExec("alter table t1 set tiflash replica 1;")
tk.MustExec("alter table t1 add vector index ((vec_cosine_distance(vec))) USING HNSW;")
tk.MustExec("insert into t1 values ('[1,1,1]', 1, 1, '[1,1,1]', '[1,1,1]')")
dom := domain.GetDomain(tk.Session())
testkit.SetTiFlashReplica(t, dom, "test", "t1")
sctx := tk.Session()
stmts, err := session.Parse(sctx, "select * from t1 use index(vector_index) order by vec_cosine_distance(vec, '[1,1,1]') limit 1")
require.NoError(t, err)
require.Len(t, stmts, 1)
stmt := stmts[0]
ret := &core.PreprocessorReturn{}
nodeW := resolve.NewNodeW(stmt)
err = core.Preprocess(context.Background(), sctx, nodeW, core.WithPreprocessorReturn(ret))
require.NoError(t, err)
var finalPlanTree base.Plan
finalPlanTree, _, err = planner.Optimize(context.Background(), sctx, nodeW, ret.InfoSchema)
require.NoError(t, err)
physicalTree, ok := finalPlanTree.(base.PhysicalPlan)
require.True(t, ok)
// Find the PhysicalTableReader node.
tableReader := physicalTree
for ; len(tableReader.Children()) > 0; tableReader = tableReader.Children()[0] {
}
castedTableReader, ok := tableReader.(*core.PhysicalTableReader)
require.True(t, ok)
tableScan, err := castedTableReader.GetTableScan()
require.NoError(t, err)
// Check that it has the extra vector index information.
require.NotNil(t, tableScan.AnnIndexExtra)
require.Len(t, tableScan.Ranges, 1)
// Check that it's full scan.
require.Equal(t, "[-inf,+inf]", tableScan.Ranges[0].String())
// Check that the -inf and +inf are the correct types.
require.Equal(t, types.KindMinNotNull, tableScan.Ranges[0].LowVal[0].Kind())
require.Equal(t, types.KindMaxValue, tableScan.Ranges[0].HighVal[0].Kind())
}

View File

@ -761,6 +761,19 @@ func compareCandidates(sctx base.PlanContext, prop *property.PhysicalProperty, l
}
func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property.PhysicalProperty) bool {
if prop.VectorProp.VectorHelper != nil && path.Index != nil && path.Index.VectorInfo != nil {
if path.Index == nil || path.Index.VectorInfo == nil {
return false
}
if ds.TableInfo.Columns[path.Index.Columns[0].Offset].ID != prop.VectorProp.Column.ID {
return false
}
if model.IndexableFnNameToDistanceMetric[prop.VectorProp.DistanceFnName.L] != path.Index.VectorInfo.DistanceMetric {
return false
}
return true
}
var isMatchProp bool
if path.IsIntHandlePath {
pkCol := ds.GetPKIsHandleCol()
@ -808,19 +821,6 @@ func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property
}
}
}
if prop.VectorProp.VectorHelper != nil && path.Index.VectorInfo != nil {
if path.Index == nil || path.Index.VectorInfo == nil {
return false
}
if ds.TableInfo.Columns[path.Index.Columns[0].Offset].ID != prop.VectorProp.Column.ID {
return false
}
if model.IndexableFnNameToDistanceMetric[prop.VectorProp.DistanceFnName.L] != path.Index.VectorInfo.DistanceMetric {
return false
}
return true
}
return isMatchProp
}

View File

@ -1194,15 +1194,19 @@ func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, in
continue
}
}
path := &util.AccessPath{Index: index}
if index.VectorInfo != nil {
// Because the value of `TiFlashReplica.Available` changes as the user modify replica, it is not ideal if the state of index changes accordingly.
// So the current way to use the vector indexes is to require the TiFlash Replica to be available.
if !tblInfo.TiFlashReplica.Available {
continue
}
path := genTiFlashPath(tblInfo)
path.StoreType = kv.TiFlash
path.Index = index
publicPaths = append(publicPaths, path)
continue
}
path := &util.AccessPath{Index: index}
publicPaths = append(publicPaths, path)
}
}

View File

@ -156,7 +156,7 @@ func (path *AccessPath) IsTiKVTablePath() bool {
// IsTiFlashSimpleTablePath returns true if it's a TiFlash path and will not use any special indexes like vector index.
func (path *AccessPath) IsTiFlashSimpleTablePath() bool {
return (path.IsIntHandlePath || path.IsCommonHandlePath) && path.StoreType == kv.TiFlash
return path.StoreType == kv.TiFlash && path.Index == nil
}
// SplitCorColAccessCondFromFilters move the necessary filter in the form of index_col = corrlated_col to access conditions.