(bm25Index)add check,bugfix

(cherry picked from commit 661811eb6c9a71dd0e4123e523eb969b0f866b29)
This commit is contained in:
Eurekaxun
2025-05-05 14:31:39 +08:00
committed by wlff234
parent 1be61741a0
commit 8eef5af22d
7 changed files with 51 additions and 9 deletions

View File

@ -2765,7 +2765,7 @@ static void InitSqlConfigureNamesReal()
gettext_noop("k1 factor for bm25 metrix."),
NULL},
&u_sess->attr.attr_sql.bm25_k1,
1.5,
1.2,
0.0,
3.0,
NULL,

View File

@ -797,6 +797,9 @@ ObjectAddress DefineIndex(Oid relationId, IndexStmt* stmt, Oid indexRelationId,
*/
lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
rel = heap_open(relationId, lockmode);
if (RelationIsPartitioned(rel) && strcmp(stmt->accessMethod, "bm25") != 0) {
elog(ERROR, "%s index is not supported for partition table.", (stmt->accessMethod));
}
bool segment = get_rel_segment(rel);
TableCreateSupport indexCreateSupport{(int)COMPRESS_TYPE_NONE, false, false, false, false, false, true, false};

View File

@ -22,6 +22,7 @@
*/
#include "postgres.h"
#include "access/multi_redo_api.h"
#include "access/datavec/bm25.h"
/*
@ -47,6 +48,9 @@ static void bm25costestimate_internal(PlannerInfo *root, IndexPath *path, double
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25build);
Datum bm25build(PG_FUNCTION_ARGS)
{
if (IsExtremeRedo()) {
elog(ERROR, "bm25 index do not support extreme rto.");
}
Relation heap = (Relation)PG_GETARG_POINTER(0);
Relation index = (Relation)PG_GETARG_POINTER(1);
IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2);
@ -58,8 +62,11 @@ Datum bm25build(PG_FUNCTION_ARGS)
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25buildempty);
Datum bm25buildempty(PG_FUNCTION_ARGS)
{
if (IsExtremeRedo()) {
elog(ERROR, "bm25 index do not support extreme rto.");
}
Relation index = (Relation)PG_GETARG_POINTER(0);
//bm25buildempty_internal(index);
bm25buildempty_internal(index);
PG_RETURN_VOID();
}
@ -130,6 +137,9 @@ Datum bm25costestimate(PG_FUNCTION_ARGS)
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25insert);
Datum bm25insert(PG_FUNCTION_ARGS)
{
if (IsExtremeRedo()) {
elog(ERROR, "bm25 index do not support extreme rto.");
}
Relation rel = (Relation)PG_GETARG_POINTER(0);
Datum *values = (Datum *)PG_GETARG_POINTER(1);
bool *isnull = reinterpret_cast<bool *>(PG_GETARG_POINTER(2));
@ -160,6 +170,9 @@ Datum bm25delete(PG_FUNCTION_ARGS)
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25bulkdelete);
Datum bm25bulkdelete(PG_FUNCTION_ARGS)
{
if (IsExtremeRedo()) {
elog(ERROR, "bm25 index do not support extreme rto.");
}
IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0);
IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1);
PG_RETURN_POINTER(stats);
@ -168,6 +181,9 @@ Datum bm25bulkdelete(PG_FUNCTION_ARGS)
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25vacuumcleanup);
Datum bm25vacuumcleanup(PG_FUNCTION_ARGS)
{
if (IsExtremeRedo()) {
elog(ERROR, "bm25 index do not support extreme rto.");
}
IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1);
PG_RETURN_POINTER(stats);

View File

@ -418,6 +418,7 @@ static void InsertDocForwardItem(Relation index, uint32 docId, BM25TokenizedDocD
AllocateForwardIdxForToken(index, tokenizedDoc.tokenCount, forwardStart, forwardEnd, metaForwardPage, forkNum);
forwardStartBlkno = metaForwardPage->startPage;
MarkBufferDirty(metabuf);
UnlockReleaseBuffer(metabuf);
uint64 tokenIdx = *forwardStart;
BlockNumber curStep = tokenIdx / BM25_DOC_FORWARD_MAX_COUNT_IN_PAGE;
@ -1053,11 +1054,24 @@ static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, BM25
FreeBuildState(buildstate);
}
static void BuildIndexCheck(Relation index)
{
TupleDesc tupleDesc = RelationGetDescr(index);
FormData_pg_attribute* attrs = tupleDesc->attrs;
for (int i = 0; i < tupleDesc->natts; ++i) {
if (attrs[i].atttypid == TEXTARRAYOID) {
elog(ERROR, "bm25 index is not supported currently for datatype: text array.");
}
}
return;
}
IndexBuildResult* bm25build_internal(Relation heap, Relation index, IndexInfo *indexInfo)
{
IndexBuildResult *result;
BM25BuildState buildstate;
BuildIndexCheck(index);
BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM);
result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult));
@ -1071,6 +1085,7 @@ void bm25buildempty_internal(Relation index)
IndexBuildResult *result;
BM25BuildState buildstate;
BuildIndexCheck(index);
BuildIndex(NULL, index, NULL, &buildstate, MAIN_FORKNUM);
}

View File

@ -36,6 +36,8 @@
#include "access/datavec/bm25heap.h"
#include "access/datavec/bm25.h"
#define BM25_HEAP_DEFAULT_CAPACITY 100
typedef struct BM25QueryToken {
BlockNumber tokenPostingBlock;
float qTokenMaxScore;
@ -534,7 +536,7 @@ static void DocIdsGetHeapCtids(Relation index, BM25EntryPages &entryPages, BM25S
}
BM25DocumentItem *docItem = (BM25DocumentItem*)((char *)page + sizeof(PageHeaderData) +
offset * BM25_DOCUMENT_ITEM_SIZE);
if (docItem->isActived) {
if (!docItem->isActived) {
UnlockReleaseBuffer(buf);
elog(ERROR, "Read invalid doc.");
}
@ -550,7 +552,9 @@ static void BM25IndexScan(Relation index, BM25QueryTokensInfo &queryTokenInfo, u
return;
}
BM25Scorer scorer = BM25Scorer(u_sess->attr.attr_sql.bm25_k1, u_sess->attr.attr_sql.bm25_b, avgdl);
MaxMinHeap<float> heap(so->expectedCandNums);
size_t capacity = so->expectedCandNums == 0 ? BM25_HEAP_DEFAULT_CAPACITY : so->expectedCandNums;
MaxMinHeap<float> heap(capacity);
if (so->expectedCandNums == 0) {
SearchTaat(index, queryTokenInfo, heap, docNums, scorer, so->docIdMask);
} else {
@ -558,9 +562,9 @@ static void BM25IndexScan(Relation index, BM25QueryTokensInfo &queryTokenInfo, u
}
uint32 docId;
int size = heap.size();
int64 size = heap.size();
so->candDocs = (BM25ScanData*)palloc0(sizeof(BM25ScanData) * size);
for (auto i = size - 1; i >= 0; --i) {
for (int64 i = size - 1; i >= 0; --i) {
docId = heap.top().id;
so->candDocs[i].docId = docId;
so->candDocs[i].score = heap.top().val;

View File

@ -169,6 +169,10 @@ uint32 BM25AllocateDocId(Relation index)
if (unlikely(metapBuf->magicNumber != BM25_MAGIC_NUMBER))
elog(ERROR, "bm25 index is not valid");
docId = metapBuf->nextDocId;
if (unlikely(docId == BM25_INVALID_DOC_ID)) {
elog(ERROR, "bm25 doc id exhausted, please rebuild index.");
}
metapBuf->nextDocId++;
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
@ -220,7 +224,7 @@ BlockNumber SeekBlocknoForDoc(Relation index, uint32 docId, BlockNumber startBlk
Page page;
BlockNumber docBlkno = startBlkno;
for (int i = 0; i < step; ++i) {
if (unlikely(BlockNumberIsValid(docBlkno))) {
if (unlikely(!BlockNumberIsValid(docBlkno))) {
elog(ERROR, "SeekBlocknoForDoc: Invalid Block Number.");
}
buf = ReadBuffer(index, docBlkno);

View File

@ -132,8 +132,8 @@ typedef struct BM25DocForwardMetaPageData {
typedef BM25DocForwardMetaPageData *BM25DocForwardMetaPage;
typedef struct BM25DocForwardItem {
bool tokenId;
bool tokenHash;
uint32 tokenId;
uint32 tokenHash;
} BM25DocForwardItem;
typedef struct BM25DocumentItem {