code for Global-Partition-Index feature

Signed-off-by: xiliu <xiliu_h@163.com>
This commit is contained in:
xiliu
2020-08-25 15:10:14 +08:00
parent 339cd59f26
commit c040d78287
157 changed files with 12502 additions and 939 deletions

View File

@ -28,6 +28,7 @@
typedef struct IndexBuildResult {
double heap_tuples; /* # of tuples seen in parent table */
double index_tuples; /* # of tuples inserted into index */
double* global_index_tuples;
} IndexBuildResult;
/*
@ -75,7 +76,7 @@ typedef struct IndexBulkDeleteResult {
} IndexBulkDeleteResult;
/* Typedef for callback function to determine if a tuple is bulk-deletable */
typedef bool (*IndexBulkDeleteCallback)(ItemPointer itemptr, void* state);
typedef bool (*IndexBulkDeleteCallback)(ItemPointer itemptr, void* state, Oid partOid);
/* struct definitions appear in relscan.h */
typedef struct IndexScanDescData* IndexScanDesc;
@ -166,4 +167,33 @@ extern void systable_endscan_ordered(SysScanDesc sysscan);
HeapTuple systable_getnext_back(SysScanDesc sysscan);
/*
* global partition index access method support routines (in genam.c)
*/
typedef struct GPIScanDescData {
HTAB* fakeRelationTable; /* fake partition relation and partition hash table */
Bitmapset* invisiblePartMap; /* cache invisible partition oid in GPI */
Relation parentRelation; /* parent relation of partition */
Relation fakePartRelation; /* fake-relation using partition */
Partition partition; /* partition use to fake partition rel */
Oid currPartOid; /* current partition oid in GPI */
} GPIScanDescData;
typedef GPIScanDescData* GPIScanDesc;
/* Check input partition oid is same as global-partition-index current work partition oid */
inline bool GPIScanCheckPartOid(GPIScanDesc gpiScan, Oid currScanPartOid)
{
if (!PointerIsValid(gpiScan)) {
return false;
}
return gpiScan->currPartOid != currScanPartOid;
}
extern void GPIScanInit(GPIScanDesc* gpiScan);
extern void GPIScanEnd(GPIScanDesc gpiScan);
extern bool GPIGetNextPartRelation(GPIScanDesc gpiScan, MemoryContext cxt, LOCKMODE lmode);
extern void GPISetCurrPartOid(GPIScanDesc gpiScan, Oid partOid);
extern Oid GPIGetCurrPartOid(const GPIScanDesc gpiScan);
#endif /* GENAM_H */

View File

@ -171,6 +171,8 @@ typedef HashMetaPageData* HashMetaPage;
MAXALIGN_DOWN( \
PageGetPageSize(page) - SizeOfPageHeaderData - sizeof(ItemIdData) - MAXALIGN(sizeof(HashPageOpaqueData)))
#define INDEX_MOVED_BY_SPLIT_MASK INDEX_AM_RESERVED_BIT
#define HASH_MIN_FILLFACTOR 10
#define HASH_DEFAULT_FILLFACTOR 75

View File

@ -1125,6 +1125,16 @@ extern MinimalTuple heapFormMinimalTuple(HeapTuple tuple, TupleDesc tuple_desc);
extern MinimalTuple heapFormMinimalTuple(HeapTuple tuple, TupleDesc tuple_desc, Page page);
/* for GPI clean up metadata */
typedef bool (*KeepInvisbleTupleFunc)(Datum checkDatum);
typedef struct KeepInvisbleOpt {
Oid tableOid;
int checkAttnum;
KeepInvisbleTupleFunc checkKeepFunc;
} KeepInvisbleOpt;
bool HeapKeepInvisbleTuple(HeapTuple tuple, TupleDesc tupleDesc, KeepInvisbleTupleFunc checkKeepFunc = NULL);
// for ut test
extern HeapTuple test_HeapUncompressTup2(HeapTuple tuple, TupleDesc tuple_desc, Page dict_page);

View File

@ -40,7 +40,7 @@ typedef struct IndexTupleData {
*
* 15th (high) bit: has nulls
* 14th bit: has var-width attributes
* 13th bit: unused
* 13th bit: AM-defined meaning
* 12-0 bit: size of tuple
* ---------------
*/
@ -61,7 +61,7 @@ typedef IndexAttributeBitMapData* IndexAttributeBitMap;
* t_info manipulation macros
*/
#define INDEX_SIZE_MASK 0x1FFF
/* bit 0x2000 is not used at present */
#define INDEX_AM_RESERVED_BIT 0x2000 /* reserved for index-AM specific usage */
#define INDEX_VAR_MASK 0x4000
#define INDEX_NULL_MASK 0x8000
@ -113,6 +113,7 @@ typedef IndexAttributeBitMapData* IndexAttributeBitMap;
extern IndexTuple index_form_tuple(TupleDesc tuple_descriptor, Datum* values, const bool* isnull);
extern Datum nocache_index_getattr(IndexTuple tup, uint32 attnum, TupleDesc tuple_desc);
extern void index_deform_tuple(IndexTuple tup, TupleDesc tuple_descriptor, Datum* values, bool* isnull);
extern IndexTuple index_truncate_tuple(TupleDesc tupleDescriptor, IndexTuple olditup, int new_indnatts);
extern IndexTuple CopyIndexTuple(IndexTuple source);
#endif /* ITUP_H */

View File

@ -134,29 +134,6 @@ typedef struct BTMetaPageData {
#define BTREE_DEFAULT_FILLFACTOR 90
#define BTREE_NONLEAF_FILLFACTOR 70
/*
* Test whether two btree entries are "the same".
*
* Old comments:
* In addition, we must guarantee that all tuples in the index are unique,
* in order to satisfy some assumptions in Lehman and Yao. The way that we
* do this is by generating a new OID for every insertion that we do in the
* tree. This adds eight bytes to the size of btree index tuples. Note
* that we do not use the OID as part of a composite key; the OID only
* serves as a unique identifier for a given index tuple (logical position
* within a page).
*
* New comments:
* actually, we must guarantee that all tuples in A LEVEL
* are unique, not in ALL INDEX. So, we can use the t_tid
* as unique identifier for a given index tuple (logical position
* within a level). - vadim 04/09/97
*/
#define BTTidSame(i1, i2) \
((i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && (i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \
(i1).ip_posid == (i2).ip_posid)
#define BTEntrySame(i1, i2) BTTidSame((i1)->t_tid, (i2)->t_tid)
/*
* In general, the btree code tries to localize its knowledge about
* page layout to a couple of routines. However, we need a special
@ -266,10 +243,11 @@ typedef struct xl_btree_insert {
* Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
* The _L and _R variants indicate whether the inserted tuple went into the
* left or right split page (and thus, whether newitemoff and the new item
* are stored or not). The _ROOT variants indicate that we are splitting
* the root page, and thus that a newroot record rather than an insert or
* split record should follow. Note that a split record never carries a
* metapage update --- we'll do that in the parent-level update.
* are stored or not). The _HIGHKEY variants indicate that we've logged
* explicitly left page high key value, otherwise redo should use right page
* leftmost key as a left page high key. _HIGHKEY is specified for internal
* pages where right page leftmost key is suppressed, and for leaf pages
* of covering indexes where high key have non-key attributes truncated.
*
* Backup Blk 0: original page / new left page
*
@ -392,6 +370,74 @@ typedef struct xl_btree_newroot {
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
/*
* INCLUDE B-Tree indexes have non-key attributes. These are extra
* attributes that may be returned by index-only scans, but do not influence
* the order of items in the index (formally, non-key attributes are not
* considered to be part of the key space). Non-key attributes are only
* present in leaf index tuples whose item pointers actually point to heap
* tuples. All other types of index tuples (collectively, "pivot" tuples)
* only have key attributes, since pivot tuples only ever need to represent
* how the key space is separated. In general, any B-Tree index that has
* more than one level (i.e. any index that does not just consist of a
* metapage and a single leaf root page) must have some number of pivot
* tuples, since pivot tuples are used for traversing the tree.
*
* We store the number of attributes present inside pivot tuples by abusing
* their item pointer offset field, since pivot tuples never need to store a
* real offset (downlinks only need to store a block number). The offset
* field only stores the number of attributes when the INDEX_ALT_TID_MASK
* bit is set (we never assume that pivot tuples must explicitly store the
* number of attributes, and currently do not bother storing the number of
* attributes unless indnkeyatts actually differs from indnatts).
* INDEX_ALT_TID_MASK is only used for pivot tuples at present, though it's
* possible that it will be used within non-pivot tuples in the future. Do
* not assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot
* tuple.
*
* The 12 least significant offset bits are used to represent the number of
* attributes in INDEX_ALT_TID_MASK tuples, leaving 4 bits that are reserved
* for future use (BT_RESERVED_OFFSET_MASK bits). BT_N_KEYS_OFFSET_MASK should
* be large enough to store any number <= INDEX_MAX_KEYS.
*/
#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT
#define BT_RESERVED_OFFSET_MASK 0xF000
#define BT_N_KEYS_OFFSET_MASK 0x0FFF
/* Get/set downlink block number */
#define BTreeInnerTupleGetDownLink(itup) ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
#define BTreeInnerTupleSetDownLink(itup, blkno) ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
/*
* Get/set leaf page highkey's link. During the second phase of deletion, the
* target leaf page's high key may point to an ancestor page (at all other
* times, the leaf level high key's link is not used). See the nbtree README
* for full details.
*/
#define BTreeTupleGetTopParent(itup) ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
#define BTreeTupleSetTopParent(itup, blkno) \
do { \
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
BTreeTupleSetNAtts((itup), 0); \
} while (0)
/*
* Get/set number of attributes within B-tree index tuple. Asserts should be
* removed when BT_RESERVED_OFFSET_MASK bits will be used.
*/
#define BTreeTupleGetNAtts(itup, rel) \
((itup)->t_info & INDEX_ALT_TID_MASK \
? (AssertMacro((ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_RESERVED_OFFSET_MASK) == 0), \
ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK) \
: IndexRelationGetNumberOfAttributes(rel))
#define BTreeTupleSetNAtts(itup, n) \
do { \
(itup)->t_info |= INDEX_ALT_TID_MASK; \
Assert(((n) & BT_RESERVED_OFFSET_MASK) == 0); \
ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \
} while (0)
/*
* Operator strategy numbers for B-tree have been moved to access/skey.h,
* because many places need to use them in ScanKeyInit() calls.
@ -437,7 +483,7 @@ typedef struct xl_btree_newroot {
typedef struct BTStackData {
BlockNumber bts_blkno;
OffsetNumber bts_offset;
IndexTupleData bts_btentry;
BlockNumber bts_btentry;
struct BTStackData* bts_parent;
} BTStackData;
@ -473,6 +519,7 @@ typedef struct BTScanPosItem { /* what we remember about each match */
ItemPointerData heapTid; /* TID of referenced heap item */
OffsetNumber indexOffset; /* index item's location within page */
LocationIndex tupleOffset; /* IndexTuple's offset in workspace, if any */
Oid partitionOid; /* partition table oid in workspace, if any */
} BTScanPosItem;
typedef struct BTScanPosData {
@ -675,6 +722,7 @@ extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
extern bool _bt_gettuple_internal(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_check_natts(const Relation index, Page page, OffsetNumber offnum);
/*
* prototypes for functions in nbtutils.c
@ -699,6 +747,7 @@ extern void _bt_end_vacuum_callback(int code, Datum arg);
extern Size BTreeShmemSize(void);
extern void BTreeShmemInit(void);
extern void _bt_finish_split(Relation rel, Buffer lbuf, BTStack stack);
extern IndexTuple _bt_nonkey_truncate(Relation idxrel, IndexTuple olditup);
/*
* prototypes for functions in nbtsort.c

View File

@ -259,6 +259,7 @@ extern int8 heaprel_get_compression_from_modes(int16 modes);
extern void CheckGetServerIpAndPort(const char* Address, List** AddrList, bool IsCheck, int real_addr_max);
extern void CheckFoldernameOrFilenamesOrCfgPtah(const char* OptStr, char* OptType);
extern void CheckWaitCleanGpi(const char* value);
extern void ForbidToSetOptionsForPSort(List* options);
extern void ForbidOutUsersToSetInnerOptions(List* user_options);

View File

@ -124,14 +124,16 @@ typedef HBktTblScanDescData* HBktTblScanDesc;
typedef struct IndexScanDescData {
AbsIdxScanDescData sd;
/* scan parameters */
Relation heapRelation; /* heap relation descriptor, or NULL */
Relation indexRelation; /* index relation descriptor */
Snapshot xs_snapshot; /* snapshot to see */
int numberOfKeys; /* number of index qualifier conditions */
int numberOfOrderBys; /* number of ordering operators */
ScanKey keyData; /* array of index qualifier descriptors */
ScanKey orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
Relation heapRelation; /* heap relation descriptor, or NULL */
Relation indexRelation; /* index relation descriptor */
GPIScanDesc xs_gpi_scan; /* global partition index scan use information */
Snapshot xs_snapshot; /* snapshot to see */
int numberOfKeys; /* number of index qualifier conditions */
int numberOfOrderBys; /* number of ordering operators */
ScanKey keyData; /* array of index qualifier descriptors */
ScanKey orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
bool xs_want_ext_oid; /* global partition index need partition oid */
/* signaling to index AM about killing index tuples */
bool kill_prior_tuple; /* last-returned tuple is dead */
@ -162,6 +164,20 @@ typedef struct IndexScanDescData {
#define SizeofIndexScanDescData (offsetof(IndexScanDescData, xs_ctbuf_hdr) + SizeofHeapTupleHeader)
/* Get partition heap oid for bitmap index scan */
#define IndexScanGetPartHeapOid(scan) \
((scan)->indexRelation != NULL \
? (RelationIsPartition((scan)->indexRelation) ? (scan)->indexRelation->rd_partHeapOid : InvalidOid) \
: InvalidOid)
/*
* When the global partition index is used for index scanning,
* checks whether the partition table needs to be
* switched each time an indextuple is obtained.
*/
#define IndexScanNeedSwitchPartRel(scan) \
((scan)->xs_want_ext_oid && GPIScanCheckPartOid((scan)->xs_gpi_scan, (scan)->heapRelation->rd_id))
typedef struct HBktIdxScanDescData {
AbsIdxScanDescData sd;
Relation rs_rd; /* heap relation descriptor */

View File

@ -106,6 +106,9 @@ typedef struct tupleDesc {
int tdrefcount; /* reference count, or -1 if not counting */
} * TupleDesc;
/* Accessor for the i'th attribute of tupdesc. */
#define TupleDescAttr(tupdesc, i) ((tupdesc)->attrs[(i)])
extern TupleDesc CreateTemplateTupleDesc(int natts, bool hasoid);
extern TupleDesc CreateTupleDesc(int natts, bool hasoid, Form_pg_attribute* attrs);
@ -149,7 +152,4 @@ extern bool tupledesc_have_pck(TupleConstr* constr);
extern void copyDroppedAttribute(Form_pg_attribute target, Form_pg_attribute source);
/* Accessor for the i'th attribute of tupdesc. */
#define TupleDescAttr(tupdesc, i) (&(tupdesc)->attrs[(i)])
#endif /* TUPDESC_H */