!6642 新增hnsw & ivfflat reloptions

Merge pull request !6642 from 吉文克/datavec_new_init
This commit is contained in:
opengauss_bot
2024-11-09 09:45:44 +00:00
committed by Gitee
12 changed files with 30 additions and 109 deletions

View File

@ -28,7 +28,6 @@
uint64 (*BitHammingDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance);
double (*BitJaccardDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, uint64 aa, uint64 bb);
static THR_LOCAL bool BitvecNeedInitialization = true;
/*
* Allocate and initialize a new bit vector
@ -65,11 +64,6 @@ Datum hamming_distance(PG_FUNCTION_ARGS)
VarBit *a = PG_GETARG_VARBIT_P(0);
VarBit *b = PG_GETARG_VARBIT_P(1);
if (BitvecNeedInitialization) {
BitvecInit();
BitvecNeedInitialization = false;
}
CheckDims(a, b);
PG_RETURN_FLOAT8((double)BitHammingDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0));
@ -84,11 +78,6 @@ Datum jaccard_distance(PG_FUNCTION_ARGS)
VarBit *a = PG_GETARG_VARBIT_P(0);
VarBit *b = PG_GETARG_VARBIT_P(1);
if (BitvecNeedInitialization) {
BitvecInit();
BitvecNeedInitialization = false;
}
CheckDims(a, b);
PG_RETURN_FLOAT8(BitJaccardDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0, 0, 0));

View File

@ -64,44 +64,6 @@
#define MarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x)
uint32 datavec_index;
void set_extension_index(uint32 index)
{
datavec_index = index;
}
datavec_session_context *get_session_context()
{
if (u_sess->attr.attr_common.extension_session_vars_array[datavec_index] == NULL) {
init_session_vars();
}
return (datavec_session_context *)u_sess->attr.attr_common.extension_session_vars_array[datavec_index];
}
void init_session_vars(void)
{
RepallocSessionVarsArrayIfNecessary();
datavec_session_context *ctx =
(datavec_session_context *)MemoryContextAllocZero(u_sess->self_mem_cxt, sizeof(datavec_session_context));
u_sess->attr.attr_common.extension_session_vars_array[datavec_index] = ctx;
ctx->hnsw_ef_search = 0;
ctx->ivfflat_probes = 0;
DefineCustomIntVariable("hnsw.ef_search", "Sets the size of the dynamic candidate list for search",
"Valid range is 1..1000.", &(get_session_context()->hnsw_ef_search), HNSW_DEFAULT_EF_SEARCH,
HNSW_MIN_EF_SEARCH, HNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL);
MarkGUCPrefixReserved("hnsw");
DefineCustomIntVariable("ivfflat.probes", "Sets the number of probes", "Valid range is 1..lists.",
&(get_session_context()->ivfflat_probes), IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS,
IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
MarkGUCPrefixReserved("ivfflat");
}
/*
* Ensure same dimensions
*/

View File

@ -2566,7 +2566,7 @@ static void InitSqlConfigureNamesInt()
NULL,
NULL,
NULL},
{{"hnsw.ef_search",
{{"hnsw_ef_search",
PGC_USERSET,
NODE_ALL,
QUERY_TUNING_OTHER,
@ -2579,7 +2579,7 @@ static void InitSqlConfigureNamesInt()
NULL,
NULL,
NULL},
{{"ivfflat.probes",
{{"ivfflat_probes",
PGC_USERSET,
NODE_ALL,
QUERY_TUNING_OTHER,

View File

@ -61,6 +61,7 @@
#include "workload/workload.h"
#include "parser/scanner.h"
#include "pgstat.h"
#include "access/datavec/bitvec.h"
THR_LOCAL knl_session_context* u_sess;
@ -1478,6 +1479,7 @@ static void knl_u_libsw_init(knl_u_libsw_context* libsw_cxt)
static void knl_u_datavec_init(knl_u_datavec_context* datavec_cxt)
{
BitvecInit();
datavec_cxt->hnsw_ef_search = 0;
datavec_cxt->ivfflat_probes = 0;
}
@ -1594,6 +1596,8 @@ void knl_session_init(knl_session_context* sess_cxt)
#ifdef ENABLE_HTAP
knl_u_imcstore_init(&sess_cxt->imcstore_ctx);
#endif
knl_u_datavec_init(&sess_cxt->datavec_ctx);
MemoryContextSeal(sess_cxt->top_mem_cxt);
}

View File

@ -17,6 +17,8 @@
#include "miscadmin.h"
#include "knl/knl_variable.h"
#include "access/datavec/hnsw.h"
#include "access/datavec/ivfflat.h"
#include "access/gist_private.h"
#include "access/hash.h"
#include "access/nbtree.h"
@ -123,6 +125,7 @@ static relopt_bool boolRelOpts[] = {
{{"compress_diff_convert", "Whether do diiffer convert in compression", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE},
false},
{{"deduplication", "Enables \"deduplication\" feature for btree index", RELOPT_KIND_BTREE}, false},
{{ "enable_pq", "Whether to enable PQ", RELOPT_KIND_HNSW }, HNSW_DEFAULT_ENABLE_PQ },
/* list terminator */
{{NULL}}};
@ -254,6 +257,20 @@ static relopt_int intRelOpts[] = {
7},
{{ "collate", "set relation default collation", RELOPT_KIND_HEAP }, 0, 0, 2000000000 },
{{ "relrewrite", "set relation relrewrite", RELOPT_KIND_HEAP | RELOPT_KIND_TOAST }, 0, 0, 2000000000 },
{{ "m", "Max number of connections", RELOPT_KIND_HNSW }, HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M },
{{ "ef_construction", "Size of the dynamic candidate list for construction", RELOPT_KIND_HNSW },
HNSW_DEFAULT_EF_CONSTRUCTION,
HNSW_MIN_EF_CONSTRUCTION,
HNSW_MAX_EF_CONSTRUCTION },
{{ "pq_m", "Number of PQ subquantizer", RELOPT_KIND_HNSW }, HNSW_DEFAULT_PQ_M, HNSW_MIN_PQ_M, HNSW_MAX_PQ_M },
{{ "pq_ksub", "Number of centroids for each PQ subquantizer", RELOPT_KIND_HNSW },
HNSW_DEFAULT_PQ_KSUB,
HNSW_MIN_PQ_KSUB,
HNSW_MIN_PQ_KSUB },
{{ "lists", "Number of inverted lists", RELOPT_KIND_IVFFLAT },
IVFFLAT_DEFAULT_LISTS,
IVFFLAT_MIN_LISTS,
IVFFLAT_MAX_LISTS },
/* list terminator */
{{NULL}}
};
@ -469,7 +486,7 @@ static relopt_string stringRelOpts[] = {
},
{
{"storage_type", "Specifies the Table accessor routines",
RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_TOAST | RELOPT_KIND_DATAVEC},
RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_TOAST | RELOPT_KIND_HNSW},
strlen(TABLE_ACCESS_METHOD_ASTORE),
false,
ValidateStrOptTableAccessMethod,

View File

@ -34,24 +34,6 @@
#include "utils/selfuncs.h"
int hnsw_lock_tranche_id;
static relopt_kind hnsw_relopt_kind;
static THR_LOCAL bool HnswNeedInitialization = true;
/*
* Initialize index options and variables
*/
void HnswInit(void)
{
hnsw_relopt_kind = RELOPT_KIND_DATAVEC;
add_int_reloption(hnsw_relopt_kind, "m", "Max number of connections", HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M);
add_int_reloption(hnsw_relopt_kind, "ef_construction", "Size of the dynamic candidate list for construction",
HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION);
add_int_reloption(hnsw_relopt_kind, "pq_m", "Number of PQ subquantizer", HNSW_DEFAULT_PQ_M, HNSW_MIN_PQ_M,
HNSW_MAX_PQ_M);
add_int_reloption(hnsw_relopt_kind, "pq_ksub", "Number of centroids for each PQ subquantizer", HNSW_DEFAULT_PQ_KSUB,
HNSW_MIN_PQ_KSUB, HNSW_MAX_PQ_KSUB);
add_bool_reloption(hnsw_relopt_kind, "enable_pq", "Whether to enable PQ", HNSW_DEFAULT_ENABLE_PQ);
}
/*
* Estimate the cost of an index scan
@ -114,11 +96,7 @@ static bytea *hnswoptions_internal(Datum reloptions, bool validate)
int numoptions;
HnswOptions *rdopts;
if (HnswNeedInitialization) {
HnswInit();
HnswNeedInitialization = false;
}
options = parseRelOptions(reloptions, validate, hnsw_relopt_kind, &numoptions);
options = parseRelOptions(reloptions, validate, RELOPT_KIND_HNSW, &numoptions);
rdopts = (HnswOptions *)allocateReloptStruct(sizeof(HnswOptions), options, numoptions);
fillRelOptions((void *)rdopts, sizeof(HnswOptions), options, numoptions, validate, tab, lengthof(tab));

View File

@ -32,19 +32,6 @@
#include "utils/selfuncs.h"
#include "utils/spccache.h"
static relopt_kind ivfflat_relopt_kind;
static THR_LOCAL bool IvfflatNeedInitialization = true;
/*
* Initialize index options and variables
*/
void IvfflatInit(void)
{
ivfflat_relopt_kind = add_reloption_kind();
add_int_reloption(ivfflat_relopt_kind, "lists", "Number of inverted lists", IVFFLAT_DEFAULT_LISTS,
IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS);
}
/*
* Estimate the cost of an index scan
*/
@ -131,12 +118,7 @@ static bytea *ivfflatoptions_internal(Datum reloptions, bool validate)
int numoptions;
IvfflatOptions *rdopts;
if (IvfflatNeedInitialization) {
IvfflatInit();
IvfflatNeedInitialization = false;
}
options = parseRelOptions(reloptions, validate, ivfflat_relopt_kind, &numoptions);
options = parseRelOptions(reloptions, validate, RELOPT_KIND_IVFFLAT, &numoptions);
rdopts = (IvfflatOptions *)allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions);
fillRelOptions((void *)rdopts, sizeof(IvfflatOptions), options, numoptions, validate, tab, lengthof(tab));

View File

@ -531,7 +531,6 @@ Datum HnswNormValue(const HnswTypeInfo *typeInfo, Oid collation, Datum value);
bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
void HnswInitPage(Buffer buf, Page page);
void HnswInit(void);
List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation,
int m, bool inserting, HnswElement skipElement, IndexScanDesc scan = NULL);
HnswElement HnswGetEntryPoint(Relation index);

View File

@ -296,7 +296,6 @@ Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum);
void IvfflatInitPage(Buffer buf, Page page);
void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state);
PGDLLEXPORT void IvfflatParallelBuildMain(const BgWorkerContext *bwc);
void IvfflatInit(void);
const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index);
Datum ivfflathandler(PG_FUNCTION_ARGS);

View File

@ -79,15 +79,5 @@ Datum binary_quantize(PG_FUNCTION_ARGS);
Datum subvector(PG_FUNCTION_ARGS);
Datum vector_mul(PG_FUNCTION_ARGS);
Datum vector_concat(PG_FUNCTION_ARGS);
void set_extension_index(uint32 index);
void init_session_vars(void);
typedef struct datavec_session_context {
int hnsw_ef_search;
int ivfflat_probes;
} datavec_session_context;
extern uint32 datavec_index;
extern datavec_session_context *get_session_context();
#endif

View File

@ -51,9 +51,10 @@ typedef enum relopt_kind {
RELOPT_KIND_NPARSER = (1 << 12), /* text search configuration options defined by ngram */
RELOPT_KIND_CBTREE = (1 << 13),
RELOPT_KIND_PPARSER = (1 << 14), /* text search configuration options defined by pound */
RELOPT_KIND_DATAVEC = (1 << 15),
RELOPT_KIND_IVFFLAT = (1 << 15),
RELOPT_KIND_HNSW = (1 << 16),
/* if you add a new kind, make sure you update "last_default" too */
RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_DATAVEC,
RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_HNSW,
/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
RELOPT_KIND_MAX = (1 << 30)
} relopt_kind;

View File

@ -5467,7 +5467,7 @@ static void CheckCleanCodeWarningInfo(const int baseNum, const int currentNum,
return;
}
#define BASE_GLOBAL_VARIABLE_NUM 240
#define BASE_GLOBAL_VARIABLE_NUM 237
#define CMAKE_CMD_BUF_LEN 1000