(bm25Index)add check,bugfix
(cherry picked from commit 661811eb6c9a71dd0e4123e523eb969b0f866b29)
This commit is contained in:
@ -2765,7 +2765,7 @@ static void InitSqlConfigureNamesReal()
|
||||
gettext_noop("k1 factor for bm25 metrix."),
|
||||
NULL},
|
||||
&u_sess->attr.attr_sql.bm25_k1,
|
||||
1.5,
|
||||
1.2,
|
||||
0.0,
|
||||
3.0,
|
||||
NULL,
|
||||
|
||||
@ -797,6 +797,9 @@ ObjectAddress DefineIndex(Oid relationId, IndexStmt* stmt, Oid indexRelationId,
|
||||
*/
|
||||
lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
|
||||
rel = heap_open(relationId, lockmode);
|
||||
if (RelationIsPartitioned(rel) && strcmp(stmt->accessMethod, "bm25") != 0) {
|
||||
elog(ERROR, "%s index is not supported for partition table.", (stmt->accessMethod));
|
||||
}
|
||||
|
||||
bool segment = get_rel_segment(rel);
|
||||
TableCreateSupport indexCreateSupport{(int)COMPRESS_TYPE_NONE, false, false, false, false, false, true, false};
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "access/multi_redo_api.h"
|
||||
#include "access/datavec/bm25.h"
|
||||
|
||||
/*
|
||||
@ -47,6 +48,9 @@ static void bm25costestimate_internal(PlannerInfo *root, IndexPath *path, double
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25build);
|
||||
Datum bm25build(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (IsExtremeRedo()) {
|
||||
elog(ERROR, "bm25 index do not support extreme rto.");
|
||||
}
|
||||
Relation heap = (Relation)PG_GETARG_POINTER(0);
|
||||
Relation index = (Relation)PG_GETARG_POINTER(1);
|
||||
IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2);
|
||||
@ -58,8 +62,11 @@ Datum bm25build(PG_FUNCTION_ARGS)
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25buildempty);
|
||||
Datum bm25buildempty(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (IsExtremeRedo()) {
|
||||
elog(ERROR, "bm25 index do not support extreme rto.");
|
||||
}
|
||||
Relation index = (Relation)PG_GETARG_POINTER(0);
|
||||
//bm25buildempty_internal(index);
|
||||
bm25buildempty_internal(index);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
@ -130,6 +137,9 @@ Datum bm25costestimate(PG_FUNCTION_ARGS)
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25insert);
|
||||
Datum bm25insert(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (IsExtremeRedo()) {
|
||||
elog(ERROR, "bm25 index do not support extreme rto.");
|
||||
}
|
||||
Relation rel = (Relation)PG_GETARG_POINTER(0);
|
||||
Datum *values = (Datum *)PG_GETARG_POINTER(1);
|
||||
bool *isnull = reinterpret_cast<bool *>(PG_GETARG_POINTER(2));
|
||||
@ -160,6 +170,9 @@ Datum bm25delete(PG_FUNCTION_ARGS)
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25bulkdelete);
|
||||
Datum bm25bulkdelete(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (IsExtremeRedo()) {
|
||||
elog(ERROR, "bm25 index do not support extreme rto.");
|
||||
}
|
||||
IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0);
|
||||
IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1);
|
||||
PG_RETURN_POINTER(stats);
|
||||
@ -168,6 +181,9 @@ Datum bm25bulkdelete(PG_FUNCTION_ARGS)
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(bm25vacuumcleanup);
|
||||
Datum bm25vacuumcleanup(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (IsExtremeRedo()) {
|
||||
elog(ERROR, "bm25 index do not support extreme rto.");
|
||||
}
|
||||
IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0);
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1);
|
||||
PG_RETURN_POINTER(stats);
|
||||
|
||||
@ -418,6 +418,7 @@ static void InsertDocForwardItem(Relation index, uint32 docId, BM25TokenizedDocD
|
||||
AllocateForwardIdxForToken(index, tokenizedDoc.tokenCount, forwardStart, forwardEnd, metaForwardPage, forkNum);
|
||||
forwardStartBlkno = metaForwardPage->startPage;
|
||||
MarkBufferDirty(metabuf);
|
||||
UnlockReleaseBuffer(metabuf);
|
||||
|
||||
uint64 tokenIdx = *forwardStart;
|
||||
BlockNumber curStep = tokenIdx / BM25_DOC_FORWARD_MAX_COUNT_IN_PAGE;
|
||||
@ -1053,11 +1054,24 @@ static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, BM25
|
||||
FreeBuildState(buildstate);
|
||||
}
|
||||
|
||||
static void BuildIndexCheck(Relation index)
|
||||
{
|
||||
TupleDesc tupleDesc = RelationGetDescr(index);
|
||||
FormData_pg_attribute* attrs = tupleDesc->attrs;
|
||||
for (int i = 0; i < tupleDesc->natts; ++i) {
|
||||
if (attrs[i].atttypid == TEXTARRAYOID) {
|
||||
elog(ERROR, "bm25 index is not supported currently for datatype: text array.");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
IndexBuildResult* bm25build_internal(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||
{
|
||||
IndexBuildResult *result;
|
||||
BM25BuildState buildstate;
|
||||
|
||||
BuildIndexCheck(index);
|
||||
BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM);
|
||||
|
||||
result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult));
|
||||
@ -1071,6 +1085,7 @@ void bm25buildempty_internal(Relation index)
|
||||
IndexBuildResult *result;
|
||||
BM25BuildState buildstate;
|
||||
|
||||
BuildIndexCheck(index);
|
||||
BuildIndex(NULL, index, NULL, &buildstate, MAIN_FORKNUM);
|
||||
}
|
||||
|
||||
|
||||
@ -36,6 +36,8 @@
|
||||
#include "access/datavec/bm25heap.h"
|
||||
#include "access/datavec/bm25.h"
|
||||
|
||||
#define BM25_HEAP_DEFAULT_CAPACITY 100
|
||||
|
||||
typedef struct BM25QueryToken {
|
||||
BlockNumber tokenPostingBlock;
|
||||
float qTokenMaxScore;
|
||||
@ -534,7 +536,7 @@ static void DocIdsGetHeapCtids(Relation index, BM25EntryPages &entryPages, BM25S
|
||||
}
|
||||
BM25DocumentItem *docItem = (BM25DocumentItem*)((char *)page + sizeof(PageHeaderData) +
|
||||
offset * BM25_DOCUMENT_ITEM_SIZE);
|
||||
if (docItem->isActived) {
|
||||
if (!docItem->isActived) {
|
||||
UnlockReleaseBuffer(buf);
|
||||
elog(ERROR, "Read invalid doc.");
|
||||
}
|
||||
@ -550,7 +552,9 @@ static void BM25IndexScan(Relation index, BM25QueryTokensInfo &queryTokenInfo, u
|
||||
return;
|
||||
}
|
||||
BM25Scorer scorer = BM25Scorer(u_sess->attr.attr_sql.bm25_k1, u_sess->attr.attr_sql.bm25_b, avgdl);
|
||||
MaxMinHeap<float> heap(so->expectedCandNums);
|
||||
|
||||
size_t capacity = so->expectedCandNums == 0 ? BM25_HEAP_DEFAULT_CAPACITY : so->expectedCandNums;
|
||||
MaxMinHeap<float> heap(capacity);
|
||||
if (so->expectedCandNums == 0) {
|
||||
SearchTaat(index, queryTokenInfo, heap, docNums, scorer, so->docIdMask);
|
||||
} else {
|
||||
@ -558,9 +562,9 @@ static void BM25IndexScan(Relation index, BM25QueryTokensInfo &queryTokenInfo, u
|
||||
}
|
||||
|
||||
uint32 docId;
|
||||
int size = heap.size();
|
||||
int64 size = heap.size();
|
||||
so->candDocs = (BM25ScanData*)palloc0(sizeof(BM25ScanData) * size);
|
||||
for (auto i = size - 1; i >= 0; --i) {
|
||||
for (int64 i = size - 1; i >= 0; --i) {
|
||||
docId = heap.top().id;
|
||||
so->candDocs[i].docId = docId;
|
||||
so->candDocs[i].score = heap.top().val;
|
||||
|
||||
@ -169,6 +169,10 @@ uint32 BM25AllocateDocId(Relation index)
|
||||
if (unlikely(metapBuf->magicNumber != BM25_MAGIC_NUMBER))
|
||||
elog(ERROR, "bm25 index is not valid");
|
||||
docId = metapBuf->nextDocId;
|
||||
if (unlikely(docId == BM25_INVALID_DOC_ID)) {
|
||||
elog(ERROR, "bm25 doc id exhausted, please rebuild index.");
|
||||
}
|
||||
|
||||
metapBuf->nextDocId++;
|
||||
MarkBufferDirty(buf);
|
||||
UnlockReleaseBuffer(buf);
|
||||
@ -220,7 +224,7 @@ BlockNumber SeekBlocknoForDoc(Relation index, uint32 docId, BlockNumber startBlk
|
||||
Page page;
|
||||
BlockNumber docBlkno = startBlkno;
|
||||
for (int i = 0; i < step; ++i) {
|
||||
if (unlikely(BlockNumberIsValid(docBlkno))) {
|
||||
if (unlikely(!BlockNumberIsValid(docBlkno))) {
|
||||
elog(ERROR, "SeekBlocknoForDoc: Invalid Block Number.");
|
||||
}
|
||||
buf = ReadBuffer(index, docBlkno);
|
||||
|
||||
@ -132,8 +132,8 @@ typedef struct BM25DocForwardMetaPageData {
|
||||
typedef BM25DocForwardMetaPageData *BM25DocForwardMetaPage;
|
||||
|
||||
typedef struct BM25DocForwardItem {
|
||||
bool tokenId;
|
||||
bool tokenHash;
|
||||
uint32 tokenId;
|
||||
uint32 tokenHash;
|
||||
} BM25DocForwardItem;
|
||||
|
||||
typedef struct BM25DocumentItem {
|
||||
|
||||
Reference in New Issue
Block a user