4928 lines
179 KiB
C++
4928 lines
179 KiB
C++
/*
|
|
* Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
*
|
|
* openGauss is licensed under Mulan PSL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
* You may obtain a copy of Mulan PSL v2 at:
|
|
*
|
|
* http://license.coscl.org.cn/MulanPSL2
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PSL v2 for more details.
|
|
* ---------------------------------------------------------------------------------------
|
|
*
|
|
* cstore_am.cpp
|
|
* routines to support ColStore
|
|
*
|
|
* IDENTIFICATION
|
|
* src/gausskernel/storage/cstore/cstore_am.cpp
|
|
*
|
|
* ---------------------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
#include <fcntl.h>
|
|
#include <sys/file.h>
|
|
#include "access/tableam.h"
|
|
#include "access/tuptoaster.h"
|
|
#include "access/xact.h"
|
|
#include "catalog/catalog.h"
|
|
#include "catalog/indexing.h"
|
|
#include "utils/aiomem.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/snapmgr.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/relcache.h"
|
|
#include "pgstat.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "access/cstore_am.h"
|
|
#include "storage/custorage.h"
|
|
#include "storage/remote_read.h"
|
|
#include "utils/builtins.h"
|
|
#include "access/nbtree.h"
|
|
#include "utils/numeric.h"
|
|
#include "utils/numeric_gs.h"
|
|
#include "storage/cucache_mgr.h"
|
|
#include "storage/cstore/cstore_compress.h"
|
|
#include "access/heapam.h"
|
|
#include "access/sysattr.h"
|
|
#include "executor/instrument.h"
|
|
#include "utils/date.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/rel_gs.h"
|
|
#include "access/heapam.h"
|
|
#include "vecexecutor/vecnodes.h"
|
|
#include "vecexecutor/vecnoderowtovector.h"
|
|
#include "access/cstore_roughcheck_func.h"
|
|
#include "utils/snapmgr.h"
|
|
#include "catalog/storage.h"
|
|
#include "miscadmin.h"
|
|
#include "access/htup.h"
|
|
#include "access/cstore_rewrite.h"
|
|
#include "replication/dataqueue.h"
|
|
#include "securec_check.h"
|
|
#include "commands/tablespace.h"
|
|
#include "workload/workload.h"
|
|
|
|
#ifdef PGXC
|
|
#include "pgxc/pgxc.h"
|
|
#include "pgxc/redistrib.h"
|
|
#endif
|
|
|
|
/* macro for tracing cstore scan */
|
|
#define CSTORESCAN_TRACE_START(_desc_id) \
|
|
do { \
|
|
if (unlikely(this->m_timing_on)) { \
|
|
TRACK_START(this->m_plan_node_id, (_desc_id)); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define CSTORESCAN_TRACE_END(_desc_id) \
|
|
do { \
|
|
if (unlikely(this->m_timing_on)) { \
|
|
TRACK_END(this->m_plan_node_id, (_desc_id)); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define CSTORE_MIN_PREFETCH_COUNT 8
|
|
|
|
#define InitFillColFunction(i, attlen) \
|
|
do { \
|
|
m_colFillFunArrary[i].colFillFun[0] = &CStore::FillVector<false, attlen>; \
|
|
m_colFillFunArrary[i].colFillFun[1] = &CStore::FillVector<true, attlen>; \
|
|
m_fillVectorByTids[i] = &CStore::FillVectorByTids<attlen>; \
|
|
m_fillVectorLateRead[i] = &CStore::FillVectorLateRead<attlen>; \
|
|
} while (0)
|
|
|
|
CStore::CStore()
|
|
: m_relation(NULL),
|
|
m_scanMemContext(NULL),
|
|
m_perScanMemCnxt(NULL),
|
|
m_snapshot(NULL),
|
|
m_colId(NULL),
|
|
m_sysColId(NULL),
|
|
m_lateRead(NULL),
|
|
m_cuStorage(NULL),
|
|
m_CUDescInfo(NULL),
|
|
m_virtualCUDescInfo(NULL),
|
|
m_CUDescIdx(NULL),
|
|
m_lastNumCUDescIdx(0),
|
|
m_prefetch_quantity(0),
|
|
m_prefetch_threshold(0),
|
|
m_load_finish(false),
|
|
m_scanPosInCU(NULL),
|
|
m_RCFuncs(NULL),
|
|
m_fillVectorByTids(NULL),
|
|
m_fillVectorLateRead(NULL),
|
|
m_colFillFunArrary(NULL),
|
|
m_fillMinMaxFunc(NULL),
|
|
m_scanFunc(NULL),
|
|
m_plan_node_id(-1),
|
|
m_colNum(0),
|
|
m_sysColNum(0),
|
|
m_NumLoadCUDesc(0),
|
|
m_NumCUDescIdx(0),
|
|
m_delMaskCUId(InValidCUID),
|
|
m_cursor(0),
|
|
m_rowCursorInCU(0),
|
|
m_startCUID(0),
|
|
m_endCUID(0),
|
|
m_hasDeadRow(false),
|
|
m_needRCheck(false),
|
|
m_onlyConstCol(false),
|
|
m_timing_on(false),
|
|
m_rangeScanInRedis({false,0,0}),
|
|
m_useBtreeIndex(false),
|
|
m_firstColIdx(0),
|
|
m_cuDescIdx(-1),
|
|
m_laterReadCtidColIdx(-1)
|
|
{
|
|
// if you intend to allocate any space in cstore constructor/init scan function
|
|
// please remind that you must put the space deallocate in the deconstructor function
|
|
// do not rely on memory context reset
|
|
// there will be memory leak due to cstore index rescan function.!!!!
|
|
}
|
|
|
|
/*
|
|
* @Description: assign to function point according to different data type.
|
|
* @in - proj: Projection information.
|
|
*/
|
|
void CStore::BindingFp(CStoreScanState* state)
|
|
{
|
|
int i = 0;
|
|
Relation rel = state->ss_currentRelation;
|
|
ProjectionInfo* proj = state->ps.ps_ProjInfo;
|
|
|
|
if (proj->pi_maxOrmin) {
|
|
Assert(list_length(proj->pi_maxOrmin) == list_length(proj->pi_acessedVarNumbers));
|
|
m_scanFunc = &CStore::CStoreMinMaxScan;
|
|
m_fillMinMaxFunc = (fillMinMaxFuncPtr*)palloc0(sizeof(fillMinMaxFuncPtr) * m_colNum);
|
|
|
|
for (i = 0; i < m_colNum; ++i) {
|
|
switch (rel->rd_att->attrs[m_colId[i]]->atttypid) {
|
|
case CHAROID:
|
|
case INT2OID:
|
|
case INT4OID:
|
|
case INT8OID:
|
|
case OIDOID:
|
|
case DATEOID:
|
|
case TIMEOID:
|
|
case TIMESTAMPOID: {
|
|
m_fillMinMaxFunc[i] = &CStore::FillColMinMax;
|
|
break;
|
|
}
|
|
default: {
|
|
m_fillMinMaxFunc[i] = NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < m_colNum; ++i) {
|
|
m_CUDescInfo[i] = New(CurrentMemoryContext) LoadCUDescCtl(m_startCUID);
|
|
if (m_colId[i] > rel->rd_att->natts - 1 || m_colId[i] < 0) {
|
|
continue;
|
|
}
|
|
switch (rel->rd_att->attrs[m_colId[i]]->attlen) {
|
|
case sizeof(char):
|
|
InitFillColFunction(i, (int)sizeof(char));
|
|
break;
|
|
case sizeof(int16):
|
|
InitFillColFunction(i, (int)sizeof(int16));
|
|
break;
|
|
case sizeof(int32):
|
|
InitFillColFunction(i, (int)sizeof(int32));
|
|
break;
|
|
case sizeof(Datum):
|
|
InitFillColFunction(i, (int)sizeof(Datum));
|
|
break;
|
|
case 12:
|
|
InitFillColFunction(i, 12);
|
|
break;
|
|
case 16:
|
|
InitFillColFunction(i, 16);
|
|
break;
|
|
case -1:
|
|
InitFillColFunction(i, -1);
|
|
break;
|
|
case -2:
|
|
InitFillColFunction(i, -2);
|
|
break;
|
|
default:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
(errmsg("unsupported data type length %d of column \"%s\" of relation \"%s\" ",
|
|
(int)rel->rd_att->attrs[m_colId[i]]->attlen,
|
|
NameStr(rel->rd_att->attrs[m_colId[i]]->attname),
|
|
RelationGetRelationName(rel)))));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void CStore::InitFillVecEnv(CStoreScanState* state)
|
|
{
|
|
// the following spaces will live until deconstructor is called.
|
|
// so use m_scanMemContext which is not freed at all until the end.
|
|
AutoContextSwitch newMemCnxt(m_scanMemContext);
|
|
|
|
ProjectionInfo* proj = state->ps.ps_ProjInfo;
|
|
if (proj->pi_acessedVarNumbers != NIL) {
|
|
List* pColList = proj->pi_acessedVarNumbers;
|
|
|
|
m_colNum = list_length(pColList);
|
|
m_colId = (int*)palloc(sizeof(int) * m_colNum);
|
|
m_lateRead = (bool*)palloc0(sizeof(bool) * m_colNum);
|
|
|
|
int i = 0;
|
|
ListCell* cell = NULL;
|
|
|
|
// Initilize which columns should be accessed
|
|
foreach (cell, pColList) {
|
|
// m_colIdx[] start from zero
|
|
Assert(lfirst_int(cell) > 0);
|
|
int colId = lfirst_int(cell) - 1;
|
|
if (colId >= m_relation->rd_att->natts) {
|
|
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
errmsg("column %d does not exist", colId)));
|
|
}
|
|
|
|
if (m_relation->rd_att->attrs[colId]->attisdropped) {
|
|
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
errmsg("column %s does not exist",
|
|
NameStr(m_relation->rd_att->attrs[colId]->attname))));
|
|
}
|
|
m_colId[i] = colId;
|
|
m_lateRead[i] = false;
|
|
i++;
|
|
}
|
|
|
|
// Intilize which columns will be late read
|
|
foreach (cell, proj->pi_lateAceessVarNumbers) {
|
|
int colId = lfirst_int(cell) - 1;
|
|
for (i = 0; i < m_colNum; ++i) {
|
|
if (colId == m_colId[i]) {
|
|
m_lateRead[i] = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
m_scanPosInCU = (int*)palloc0(sizeof(int) * m_colNum);
|
|
m_CUDescInfo = (LoadCUDescCtl**)palloc(sizeof(LoadCUDescCtl*) * m_colNum);
|
|
m_colFillFunArrary = (colFillArray*)palloc(sizeof(colFillArray) * m_colNum);
|
|
m_fillVectorByTids = (FillVectorByTidsFun*)palloc(sizeof(FillVectorByTidsFun) * m_colNum);
|
|
m_fillVectorLateRead = (FillVectorLateReadFun*)palloc(sizeof(FillVectorLateReadFun) * m_colNum);
|
|
|
|
BindingFp(state);
|
|
}
|
|
|
|
// Init sys columns
|
|
if (proj->pi_sysAttrList != NIL) {
|
|
ListCell* cell = NULL;
|
|
List* pSysList = proj->pi_sysAttrList;
|
|
m_sysColNum = list_length(pSysList);
|
|
m_sysColId = (int*)palloc(sizeof(int) * m_sysColNum);
|
|
int i = 0;
|
|
foreach (cell, pSysList) {
|
|
m_sysColId[i++] = lfirst_int(cell);
|
|
}
|
|
}
|
|
|
|
m_onlyConstCol = proj->pi_const;
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
if (m_onlyConstCol) {
|
|
Assert(m_colNum == 0 && m_sysColNum == 0);
|
|
}
|
|
#endif
|
|
|
|
// only access sys columns or const columns
|
|
if (OnlySysOrConstCol()) {
|
|
m_virtualCUDescInfo = New(CurrentMemoryContext) LoadCUDescCtl(m_startCUID);
|
|
}
|
|
}
|
|
|
|
void CStore::InitRoughCheckEnv(CStoreScanState* state)
|
|
{
|
|
// the following spaces will live until deconstructor is called.
|
|
// so use m_scanMemContext which is not freed at all until the end.
|
|
AutoContextSwitch newMemCnxt(m_scanMemContext);
|
|
|
|
// Initialize rough check function
|
|
int nkeys = state->csss_NumScanKeys;
|
|
if (nkeys > 0) {
|
|
CStoreScanKey scanKey = state->csss_ScanKeys;
|
|
Relation rel = state->ss_currentRelation;
|
|
Form_pg_attribute* attrs = rel->rd_att->attrs;
|
|
|
|
m_RCFuncs = (RoughCheckFunc*)palloc(sizeof(RoughCheckFunc) * nkeys);
|
|
for (int i = 0; i < nkeys; i++) {
|
|
int colIdx = m_colId[scanKey[i].cs_attno];
|
|
m_RCFuncs[i] = GetRoughCheckFunc(attrs[colIdx]->atttypid, scanKey[i].cs_strategy, scanKey[i].cs_collation);
|
|
}
|
|
}
|
|
}
|
|
|
|
void CStore::InitScan(CStoreScanState* state, Snapshot snapshot)
|
|
{
|
|
Assert(state && state->ps.ps_ProjInfo);
|
|
|
|
// first of all, create the private memonry context
|
|
m_scanMemContext = AllocSetContextCreate(CurrentMemoryContext,
|
|
"cstore scan memory context",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
m_perScanMemCnxt = AllocSetContextCreate(CurrentMemoryContext,
|
|
"cstore scan per scan memory context",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
|
|
m_scanFunc = &CStore::CStoreScan;
|
|
|
|
// the following spaces will live until deconstructor is called.
|
|
// so use m_scanMemContext which is not freed at all until the end.
|
|
AutoContextSwitch newMemCnxt(m_scanMemContext);
|
|
m_relation = state->ss_currentRelation;
|
|
int attNo = m_relation->rd_att->natts;
|
|
|
|
m_cuStorage = (CUStorage**)palloc(sizeof(CUStorage*) * attNo);
|
|
|
|
for (int i = 0; i < attNo; ++i) {
|
|
if (m_relation->rd_att->attrs[i]->attisdropped) {
|
|
m_cuStorage[i] = NULL;
|
|
continue;
|
|
}
|
|
m_firstColIdx = i;
|
|
// Here we must use physical column id
|
|
CFileNode cFileNode(m_relation->rd_node, m_relation->rd_att->attrs[i]->attnum, MAIN_FORKNUM);
|
|
m_cuStorage[i] = New(CurrentMemoryContext) CUStorage(cFileNode);
|
|
}
|
|
|
|
m_CUDescIdx = (int*)palloc(sizeof(int) * u_sess->attr.attr_storage.max_loaded_cudesc);
|
|
errno_t rc = memset_s((char*)m_CUDescIdx,
|
|
sizeof(int) * u_sess->attr.attr_storage.max_loaded_cudesc,
|
|
0xFF,
|
|
sizeof(int) * u_sess->attr.attr_storage.max_loaded_cudesc);
|
|
securec_check(rc, "\0", "\0");
|
|
m_cursor = 0;
|
|
m_colNum = 0;
|
|
m_NumCUDescIdx = 0;
|
|
m_rowCursorInCU = 0;
|
|
m_prefetch_quantity = 0;
|
|
m_prefetch_threshold =
|
|
Min(CUCache->m_cstoreMaxSize / 4, u_sess->attr.attr_storage.cstore_prefetch_quantity * 1024LL);
|
|
m_snapshot = snapshot;
|
|
m_rangeScanInRedis = state->rangeScanInRedis;
|
|
|
|
SetScanRange();
|
|
|
|
InitFillVecEnv(state);
|
|
|
|
InitRoughCheckEnv(state);
|
|
|
|
/* remember node id of this plan */
|
|
m_plan_node_id = state->ps.plan->plan_node_id;
|
|
}
|
|
|
|
CStore::~CStore()
|
|
{
|
|
m_fillVectorLateRead = NULL;
|
|
m_scanPosInCU = NULL;
|
|
m_colId = NULL;
|
|
m_lateRead = NULL;
|
|
m_scanMemContext = NULL;
|
|
m_snapshot = NULL;
|
|
m_fillVectorByTids = NULL;
|
|
m_virtualCUDescInfo = NULL;
|
|
m_CUDescInfo = NULL;
|
|
m_perScanMemCnxt = NULL;
|
|
m_RCFuncs = NULL;
|
|
m_CUDescIdx = NULL;
|
|
m_colFillFunArrary = NULL;
|
|
m_cuStorage = NULL;
|
|
m_relation = NULL;
|
|
m_fillMinMaxFunc = NULL;
|
|
m_sysColId = NULL;
|
|
}
|
|
|
|
void CStore::Destroy()
|
|
{
|
|
if (m_relation != NULL) {
|
|
int attNo = m_relation->rd_att->natts;
|
|
if (m_cuStorage) {
|
|
for (int i = 0; i < attNo; ++i) {
|
|
if (m_cuStorage[i])
|
|
DELETE_EX(m_cuStorage[i]);
|
|
else {
|
|
Assert(m_relation->rd_att->attrs[i]->attisdropped);
|
|
if (!m_relation->rd_att->attrs[i]->attisdropped) {
|
|
ereport(WARNING, (errmsg("m_cuStorage[%d] is NULL for a valid column", i)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// only access sys columns or const columns
|
|
if (OnlySysOrConstCol()) {
|
|
Assert(m_virtualCUDescInfo);
|
|
DELETE_EX(m_virtualCUDescInfo);
|
|
}
|
|
|
|
if (m_CUDescInfo) {
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
DELETE_EX(m_CUDescInfo[i]);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Important:
|
|
* 1. all objects by NEW() must be freed by DELETE_EX() above;
|
|
* 2. all spaces by palloc()/palloc0() can be freed either pfree() or deleting
|
|
* these memory context following.
|
|
*/
|
|
Assert(m_scanMemContext && m_perScanMemCnxt);
|
|
MemoryContextDelete(m_perScanMemCnxt);
|
|
MemoryContextDelete(m_scanMemContext);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @Description: prefetch one cu according by the given column
|
|
* @Param[IN] col: column id
|
|
* @Param[IN] count: prefetch cache count
|
|
* @Param[IN] cudesc: cu describe
|
|
* @Param[IN/OUT] dList: adio dispatch list
|
|
* @See also:
|
|
*/
|
|
void CStore::CUPrefetch(CUDesc* cudesc, int col, AioDispatchCUDesc_t** dList, int& count, File* vfdList)
|
|
{
|
|
CU* cu_ptr = NULL;
|
|
bool found = false;
|
|
int slotId = CACHE_BLOCK_INVALID_IDX;
|
|
AioDispatchCUDesc_t* aioDescp = NULL;
|
|
|
|
// same value CU
|
|
if (cudesc->cu_size == 0 || cudesc->IsNullCU() || cudesc->IsSameValCU()) {
|
|
return;
|
|
}
|
|
|
|
/* it is better to check all delete and do not prefetch, but actually load cu does not take care of it */
|
|
uint64 load_offset = m_cuStorage[col]->GetAlignCUOffset(cudesc->cu_pointer);
|
|
int head_padding_size = cudesc->cu_pointer - load_offset;
|
|
int load_size = m_cuStorage[col]->GetAlignCUSize(head_padding_size + cudesc->cu_size);
|
|
/* need check and add sitution cu store in many files,
|
|
now if we found, jump the cu, we should think about more to deal with the CU in adio module */
|
|
if (!m_cuStorage[col]->IsCUStoreInOneFile(load_offset, load_size)) {
|
|
ereport(LOG,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("CUPrefetch: skip cloumn(%d), cuid(%u), offset(%lu), size(%d) ",
|
|
col,
|
|
cudesc->cu_id,
|
|
cudesc->cu_pointer,
|
|
cudesc->cu_size)));
|
|
return;
|
|
}
|
|
|
|
DataSlotTag dataSlotTag = CUCache->InitCUSlotTag((RelFileNodeOld *)&m_relation->rd_node, col, cudesc->cu_id,
|
|
cudesc->cu_pointer);
|
|
// find whether already in CUCache, ReserveDataBlock can also find CU in cache,
|
|
// but i still add FindDataBlock here for efficient
|
|
// here we ignore the enter block times
|
|
slotId = CUCache->FindDataBlock(&dataSlotTag, false);
|
|
if (IsValidCacheSlotID(slotId)) {
|
|
ereport(DEBUG1,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("prefetch find cu cache: relid(%u), column(%d), load cuid(%u)",
|
|
m_relation->rd_node.relNode,
|
|
col,
|
|
cudesc->cu_id)));
|
|
CUCache->UnPinDataBlock(slotId);
|
|
return;
|
|
}
|
|
|
|
slotId = CUCache->ReserveDataBlock(&dataSlotTag, cudesc->cu_size, found);
|
|
if (found) {
|
|
CUCache->UnPinDataBlock(slotId);
|
|
return;
|
|
}
|
|
|
|
/* ReserveDataBlock, load_buf ,fd, offset allocate before adio_share_alloc becasue these can auto rollback */
|
|
File file = m_cuStorage[col]->GetCUFileFd(load_offset);
|
|
uint64 file_offset = m_cuStorage[col]->GetCUOffsetInFile(load_offset);
|
|
|
|
aioDescp = (AioDispatchCUDesc_t*)adio_share_alloc(sizeof(AioDispatchCUDesc_t));
|
|
|
|
cu_ptr = CUCache->GetCUBuf(slotId);
|
|
Assert(cu_ptr);
|
|
|
|
cu_ptr->m_head_padding_size = head_padding_size;
|
|
cu_ptr->m_adio_error = false;
|
|
cu_ptr->m_inCUCache = true;
|
|
cu_ptr->m_compressedLoadBuf = (char*)CStoreMemAlloc::Palloc(load_size, false);
|
|
cu_ptr->m_compressedBuf = cu_ptr->m_compressedLoadBuf + cu_ptr->m_head_padding_size;
|
|
cu_ptr->m_compressedBufSize = cudesc->cu_size;
|
|
cu_ptr->SetCUSize(cudesc->cu_size);
|
|
cu_ptr->m_cache_compressed = true;
|
|
|
|
/* iocb filled in later */
|
|
aioDescp->aiocb.data = 0;
|
|
aioDescp->aiocb.aio_fildes = 0;
|
|
aioDescp->aiocb.aio_lio_opcode = 0;
|
|
aioDescp->aiocb.u.c.buf = 0;
|
|
aioDescp->aiocb.u.c.nbytes = 0;
|
|
aioDescp->aiocb.u.c.offset = 0;
|
|
|
|
aioDescp->cuDesc.buf = cu_ptr->m_compressedLoadBuf;
|
|
aioDescp->cuDesc.offset = file_offset;
|
|
aioDescp->cuDesc.size = load_size;
|
|
aioDescp->cuDesc.fd = file;
|
|
vfdList[count] = file;
|
|
aioDescp->cuDesc.io_error = &cu_ptr->m_adio_error;
|
|
aioDescp->cuDesc.slotId = slotId; // slotId maybe CACHE_BLOCK_INVALID_IDX
|
|
aioDescp->cuDesc.cu_pointer = cudesc->cu_pointer;
|
|
aioDescp->cuDesc.reqType = CUListPrefetchType;
|
|
aioDescp->aiocb.aio_reqprio = CompltrPriority(aioDescp->cuDesc.reqType);
|
|
|
|
dList[count] = aioDescp;
|
|
io_prep_pread((struct iocb*)dList[count],
|
|
aioDescp->cuDesc.fd,
|
|
aioDescp->cuDesc.buf,
|
|
aioDescp->cuDesc.size,
|
|
aioDescp->cuDesc.offset);
|
|
count++;
|
|
|
|
CUCache->TerminateCU(false); // already record in dList[count]
|
|
|
|
Assert(IsValidCacheSlotID(slotId));
|
|
CUCache->UnPinDataBlock(slotId);
|
|
CUCache->CULWLockDisown(slotId);
|
|
|
|
ereport(DEBUG1,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("CUPrefetch: relid(%u), slotId(%d), col_id(%d), cu_id(%u), cu_size(%d), cu_point(%lu)",
|
|
m_relation->rd_node.relNode,
|
|
slotId,
|
|
col,
|
|
cudesc->cu_id,
|
|
cudesc->cu_size,
|
|
cudesc->cu_pointer)));
|
|
|
|
/* check need submint io */
|
|
if (count >= MAX_CU_PREFETCH_REQSIZ) {
|
|
int tmp_count = count;
|
|
|
|
HOLD_INTERRUPTS();
|
|
FileAsyncCURead(dList, count);
|
|
count = 0;
|
|
RESUME_INTERRUPTS();
|
|
|
|
FileAsyncCUClose(vfdList, tmp_count);
|
|
// stat cu hdd asyn read
|
|
pgstatCountCUHDDAsynRead4SessionLevel(tmp_count);
|
|
pgstat_count_cu_hdd_asyn(m_relation, tmp_count);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* @Description: cstore scan use this api to prefetch, load CU in vector, preload, organize CU and CU cache,
|
|
* use io_in_process lock to protect io and leave uncompress for next scan, add flag to know which CU load by ADIO(need
|
|
* uncompress) and when scan it, do crc check, decompress, free compressed buf, set flag no need again aio completer
|
|
* thread only do unlock io_in_process
|
|
* @See also:
|
|
*/
|
|
void CStore::CUListPrefetch()
|
|
{
|
|
/* cudesc not load, so no need prefetch */
|
|
if (m_lastNumCUDescIdx == m_NumCUDescIdx) {
|
|
return;
|
|
}
|
|
|
|
/* virtual cu not need load */
|
|
if (OnlySysOrConstCol()) {
|
|
return;
|
|
}
|
|
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatch =
|
|
(AioDispatchCUDesc_t**)palloc(sizeof(AioDispatchCUDesc_t*) * MAX_CU_PREFETCH_REQSIZ);
|
|
AioDispatchCUDesc_t** dList = t_thrd.cstore_cxt.InProgressAioCUDispatch;
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatchCount = 0;
|
|
|
|
File* vfdList = (File*)palloc(sizeof(File) * MAX_CU_PREFETCH_REQSIZ);
|
|
errno_t rc =
|
|
memset_s((char*)vfdList, sizeof(File) * MAX_CU_PREFETCH_REQSIZ, 0xFF, sizeof(File) * MAX_CU_PREFETCH_REQSIZ);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
// load CU each column
|
|
int cuDescIdxTmp = 0;
|
|
for (int col = 0; col < m_colNum; col++) {
|
|
/* late read, no need prefetch */
|
|
if (IsLateRead(col)) {
|
|
continue;
|
|
}
|
|
/* vector load cu */
|
|
for (int cuDescIdx = m_lastNumCUDescIdx; cuDescIdx != m_NumCUDescIdx; IncLoadCuDescIdx(cuDescIdx)) {
|
|
cuDescIdxTmp = cuDescIdx;
|
|
CUDesc* cudesc = &(m_CUDescInfo[col]->cuDescArray[m_CUDescIdx[cuDescIdx]]);
|
|
CUPrefetch(cudesc, m_colId[col], dList, t_thrd.cstore_cxt.InProgressAioCUDispatchCount, vfdList);
|
|
}
|
|
}
|
|
if (t_thrd.cstore_cxt.InProgressAioCUDispatchCount > 0) {
|
|
int tmp_count = t_thrd.cstore_cxt.InProgressAioCUDispatchCount;
|
|
|
|
HOLD_INTERRUPTS();
|
|
FileAsyncCURead(dList, t_thrd.cstore_cxt.InProgressAioCUDispatchCount);
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatchCount = 0;
|
|
RESUME_INTERRUPTS();
|
|
|
|
FileAsyncCUClose(vfdList, tmp_count);
|
|
// stat cu hdd asyn read
|
|
pgstatCountCUHDDAsynRead4SessionLevel(tmp_count);
|
|
pgstat_count_cu_hdd_asyn(m_relation, tmp_count);
|
|
}
|
|
|
|
pfree(dList);
|
|
pfree(vfdList);
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatch = NULL;
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatchCount = 0;
|
|
|
|
ereport(DEBUG1,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("CUListPrefetch: relation(%s), cloumns(%d), cuid from %u to %u ",
|
|
RelationGetRelationName(m_relation),
|
|
m_colNum,
|
|
m_CUDescInfo[0]->cuDescArray[m_CUDescIdx[m_lastNumCUDescIdx]].cu_id,
|
|
m_CUDescInfo[0]->cuDescArray[m_CUDescIdx[cuDescIdxTmp]].cu_id)));
|
|
|
|
m_lastNumCUDescIdx = m_NumCUDescIdx;
|
|
}
|
|
|
|
/*
|
|
* @Description: aio clean up CU status
|
|
* @See also:
|
|
*/
|
|
void CUListPrefetchAbort()
|
|
{
|
|
int count = t_thrd.cstore_cxt.InProgressAioCUDispatchCount;
|
|
int already_submit_count = u_sess->storage_cxt.AsyncSubmitIOCount;
|
|
AioDispatchCUDesc_t** dList = t_thrd.cstore_cxt.InProgressAioCUDispatch;
|
|
|
|
if (t_thrd.cstore_cxt.InProgressAioCUDispatchCount == 0) {
|
|
return;
|
|
}
|
|
ereport(LOG, (errmsg("aio cu prefetch: aio dispatch count(%d)", count)));
|
|
for (int i = already_submit_count; i < count; i++) {
|
|
if (dList[i] == NULL) {
|
|
continue;
|
|
}
|
|
CUCache->AbortCU(dList[i]->cuDesc.slotId);
|
|
adio_share_free(dList[i]);
|
|
dList[i] = NULL;
|
|
}
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatch = NULL;
|
|
t_thrd.cstore_cxt.InProgressAioCUDispatchCount = 0;
|
|
u_sess->storage_cxt.AsyncSubmitIOCount = 0;
|
|
}
|
|
|
|
/*
|
|
* @Description: api function aio clean up CU status
|
|
* @See also:
|
|
*/
|
|
void CStoreAbortCU()
|
|
{
|
|
/* Don't support columnar table in single node mode */
|
|
if (!IS_SINGLE_NODE) {
|
|
CUCache->TerminateCU(true);
|
|
CUListPrefetchAbort();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @Description: AbortCU in verify process.
|
|
*/
|
|
void VerifyAbortCU()
|
|
{
|
|
CUCache->TerminateVerifyCU();
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* @Description: Similiar to CStoreScan, but moving an entire "row" (same CUID) of CU and
|
|
* Corresponding CUDescs and bitmaps.
|
|
* Used so far on CStore partition merging.
|
|
* Supports ADIO.
|
|
* Attention: by using CUStorage->LoadCU we use CStoreMemAlloc::Palloc to
|
|
* palloc CU_ptr(s) assigned to BatchCUData->CUptrData. So
|
|
* they have to be CStoreMemAlloc::Pfree later when finished.
|
|
* @See also: ATExecCStoreMergePartition
|
|
*/
|
|
void CStore::CStoreScanWithCU(_in_ CStoreScanState* state, __inout BatchCUData* batchCUData, _in_ bool isVerify)
|
|
{
|
|
// step1: The number of holding CUDesc is max_loaded_cudesc
|
|
// if we load all CUDesc once, the memory will not enough.
|
|
// So we load CUdesc once for max_loaded_cudesc
|
|
LoadCUDescIfNeed();
|
|
|
|
// step2: Do RoughCheck if need
|
|
// elimiate CU by min/max value of CU.
|
|
// Necessary for ADIO.
|
|
RoughCheckIfNeed(state);
|
|
|
|
/*
|
|
* step3: Have CU hitted
|
|
* we will not fill vector because no CU is hitted
|
|
*/
|
|
ADIO_RUN()
|
|
{
|
|
if (unlikely(m_cursor == m_NumCUDescIdx)) {
|
|
return;
|
|
}
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
if (unlikely(m_NumLoadCUDesc == 0)) {
|
|
return;
|
|
}
|
|
}
|
|
ADIO_END();
|
|
|
|
/*
|
|
* step 4:
|
|
* load CUDescs and CUs of the row that is currently being processed
|
|
*
|
|
* 1. Number of processed rows with CUDescs
|
|
* 2. Number of deadrows. Should be 0 because we do not deal with deadrows in copying CUs
|
|
*/
|
|
int cuDescIdx = m_CUDescIdx[m_cursor];
|
|
Form_pg_attribute* attrs = m_relation->rd_att->attrs;
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
/* colIdx is pysical column id */
|
|
int colIdx = m_colId[i];
|
|
if (attrs[colIdx]->attisdropped) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OPERATION),
|
|
(errmsg("Cannot load CUDesc and CU for a dropped column \"%s\" of table \"%s\"",
|
|
NameStr(attrs[colIdx]->attname),
|
|
RelationGetRelationName(m_relation)))));
|
|
}
|
|
|
|
CUDesc* cuDescPtr = &(m_CUDescInfo[i]->cuDescArray[cuDescIdx]);
|
|
CU* cuDataPtr =
|
|
New(CurrentMemoryContext) CU(attrs[colIdx]->attlen, attrs[colIdx]->atttypmod, attrs[colIdx]->atttypid);
|
|
|
|
/* CStoreMemAlloc::Palloc is used in LoadCU. Need toCStoreMemAlloc::Pfree later */
|
|
if (cuDescPtr->cu_size > 0) {
|
|
m_cuStorage[colIdx]->LoadCU(cuDataPtr,
|
|
cuDescPtr->cu_pointer,
|
|
cuDescPtr->cu_size,
|
|
g_instance.attr.attr_storage.enable_adio_function,
|
|
false);
|
|
|
|
if (cuDataPtr->IsVerified(cuDescPtr->magic) == false) {
|
|
addBadBlockStat(
|
|
&m_cuStorage[colIdx]->m_cnode.m_rnode, ColumnId2ColForkNum(m_cuStorage[colIdx]->m_cnode.m_attid));
|
|
|
|
if (RelationNeedsWAL(m_relation) && CanRemoteRead()) {
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("invalid CU in cu_id %u of relation \"%s\" file %s offset %lu, try to remote read",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer)),
|
|
handle_in_client(true)));
|
|
|
|
m_cuStorage[colIdx]->RemoteLoadCU(cuDataPtr,
|
|
cuDescPtr->cu_pointer,
|
|
cuDescPtr->cu_size,
|
|
g_instance.attr.attr_storage.enable_adio_function,
|
|
false);
|
|
|
|
if (cuDataPtr->IsVerified(cuDescPtr->magic)) {
|
|
m_cuStorage[colIdx]->OverwriteCU(
|
|
cuDataPtr->m_compressedBuf, cuDescPtr->cu_pointer, cuDescPtr->cu_size, false);
|
|
} else {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("fail to remote read CU, data corrupted in network"))));
|
|
}
|
|
} else {
|
|
int elevel = ERROR;
|
|
if (isVerify) {
|
|
elevel = WARNING;
|
|
}
|
|
ereport(elevel,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("CU verification failed. The node is %s, invalid CU in cu_id %u of relation %s,"
|
|
"file %s offset %lu",
|
|
g_instance.attr.attr_common.PGXCNodeName,
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer)),
|
|
handle_in_client(true)));
|
|
}
|
|
}
|
|
}
|
|
|
|
*batchCUData->CUDescData[colIdx] = *cuDescPtr;
|
|
batchCUData->CUptrData[colIdx] = cuDataPtr;
|
|
GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, m_snapshot);
|
|
}
|
|
|
|
batchCUData->hasValue = true;
|
|
|
|
batchCUData->CopyDelMask(m_hasDeadRow ? m_cuDelMask : NULL);
|
|
|
|
// step5: refresh cursor
|
|
// Since we are moving CU in whole, we just move the cursor
|
|
IncLoadCuDescIdx(m_cursor);
|
|
|
|
// We should never have dived into rows in CU in this function
|
|
Assert(m_rowCursorInCU == 0);
|
|
|
|
// step6: prefetch if need
|
|
ADIO_RUN()
|
|
{
|
|
CUListPrefetch();
|
|
}
|
|
ADIO_END();
|
|
}
|
|
|
|
// CStoreScan
|
|
// Scan ColStore table and fill vecBatchOut
|
|
void CStore::CStoreScan(_in_ CStoreScanState* state, _out_ VectorBatch* vecBatchOut)
|
|
{
|
|
// step1: The number of holding CUDesc is max_loaded_cudesc
|
|
// if we load all CUDesc once, the memory will not enough.
|
|
// So we load CUdesc once for max_loaded_cudesc
|
|
CSTORESCAN_TRACE_START(LOAD_CU_DESC);
|
|
LoadCUDescIfNeed();
|
|
CSTORESCAN_TRACE_END(LOAD_CU_DESC);
|
|
|
|
// step2: Do RoughCheck if need
|
|
// elimiate CU by min/max value of CU.
|
|
CSTORESCAN_TRACE_START(MIN_MAX_CHECK);
|
|
RoughCheckIfNeed(state);
|
|
CSTORESCAN_TRACE_END(MIN_MAX_CHECK);
|
|
|
|
// step3: Have CU hitted
|
|
// we will not fill vector because no CU is hitted
|
|
ADIO_RUN()
|
|
{
|
|
if (unlikely(m_cursor == m_NumCUDescIdx)) {
|
|
return;
|
|
}
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
if (unlikely(m_NumLoadCUDesc == 0)) {
|
|
return;
|
|
}
|
|
}
|
|
ADIO_END();
|
|
|
|
// step4: Fill VecBatch
|
|
CSTORESCAN_TRACE_START(FILL_BATCH);
|
|
int deadRows = FillVecBatch(vecBatchOut);
|
|
CSTORESCAN_TRACE_END(FILL_BATCH);
|
|
|
|
// step5: refresh cursor
|
|
RefreshCursor(vecBatchOut->m_rows, deadRows);
|
|
|
|
// step6: prefetch if need
|
|
ADIO_RUN()
|
|
{
|
|
CSTORESCAN_TRACE_START(PREFETCH_CU_LIST);
|
|
CUListPrefetch();
|
|
CSTORESCAN_TRACE_END(PREFETCH_CU_LIST);
|
|
}
|
|
ADIO_END();
|
|
}
|
|
|
|
/*
|
|
* @Description: calculate how many cudescs loaded
|
|
* @Param[IN] end: array idx end
|
|
* @Param[IN] start: array idx start
|
|
* @Return: count of cudescs loaded
|
|
* @See also:
|
|
*/
|
|
int CStore::LoadCudescMinus(int start, int end) const
|
|
{
|
|
if (end >= start) {
|
|
return end - start;
|
|
}
|
|
|
|
return u_sess->attr.attr_storage.max_loaded_cudesc - start + end;
|
|
}
|
|
|
|
/*
|
|
* @Description: check whether cudesc array is full
|
|
* @Param[IN] end: array idx end
|
|
* @Param[IN] start: array idx start
|
|
* @Return: true- have empty slot, false no free slot
|
|
* @See also:
|
|
*/
|
|
bool CStore::HasEnoughCuDescSlot(int start, int end) const
|
|
{
|
|
return LoadCudescMinus(start, end) < u_sess->attr.attr_storage.max_loaded_cudesc - 1;
|
|
}
|
|
|
|
/*
|
|
* @Description: check whether meet load cudesc condition and set load cudesc array idx and so on
|
|
* @Param[IN] cudesc_idx: load cudesc array idx
|
|
* @Return: true--need load cudesc, false-- not need
|
|
* @See also:
|
|
*/
|
|
bool CStore::NeedLoadCUDesc(int32& cudesc_idx)
|
|
{
|
|
bool need_load = false;
|
|
ADIO_RUN()
|
|
{
|
|
// check load condition, first not load finish, second not exceed prefetch count
|
|
if (!m_load_finish && (m_cursor == m_NumCUDescIdx || LoadCudescMinus(m_cursor, m_NumCUDescIdx) <=
|
|
t_thrd.cstore_cxt.cstore_prefetch_count / 2)) {
|
|
need_load = true;
|
|
m_prefetch_quantity = 0;
|
|
cudesc_idx = m_NumCUDescIdx;
|
|
}
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
if (m_cursor >= m_NumLoadCUDesc) {
|
|
need_load = true;
|
|
m_cursor = 0;
|
|
cudesc_idx = 0;
|
|
}
|
|
}
|
|
ADIO_END();
|
|
|
|
return need_load;
|
|
}
|
|
|
|
// The number of holding CUDesc is max_loaded_cudesc
|
|
// if we load all CUDesc once, the memory will not enough.
|
|
// So we load CUdesc once for max_loaded_cudesc
|
|
void CStore::LoadCUDescIfNeed()
|
|
{
|
|
uint32 last_load_num = 0;
|
|
int32 cudesc_idx = 0; // cudesc_idx set 0 when buffer io, and set to m_NumCUDescIdx when adio
|
|
|
|
if (!NeedLoadCUDesc(cudesc_idx)) {
|
|
return;
|
|
}
|
|
|
|
m_NumLoadCUDesc = 0;
|
|
|
|
Assert(m_perScanMemCnxt);
|
|
// we reset when a batch of CUs have been scanned and handled.
|
|
MemoryContextReset(m_perScanMemCnxt);
|
|
#ifdef MEMORY_CONTEXT_CHECKING
|
|
MemoryContextCheck(m_perScanMemCnxt->parent, m_perScanMemCnxt->parent->session_id > 0);
|
|
#endif
|
|
|
|
// Load CUDesc into m_cuDescInfo for all accessed columns
|
|
if (m_colNum > 0) {
|
|
last_load_num = m_CUDescInfo[0]->curLoadNum;
|
|
}
|
|
|
|
do {
|
|
bool found = false;
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
Assert(m_colId[i] >= 0);
|
|
// if enable adio, load one cu for caculate prefetch quantity
|
|
found =
|
|
LoadCUDesc(m_colId[i], m_CUDescInfo[i], g_instance.attr.attr_storage.enable_adio_function, m_snapshot);
|
|
}
|
|
|
|
if (likely(m_colNum > 1 && m_CUDescInfo[0]->curLoadNum > 0)) {
|
|
CheckConsistenceOfCUDescCtl();
|
|
/* check the first CUDesc for all columns */
|
|
CheckConsistenceOfCUDesc(0);
|
|
/* check the last CUDesc for all columns */
|
|
if (m_CUDescInfo[0]->curLoadNum > 1) {
|
|
CheckConsistenceOfCUDesc(m_CUDescInfo[0]->curLoadNum - 1);
|
|
}
|
|
}
|
|
|
|
if (m_colNum > 0) {
|
|
for (int j = (int)m_CUDescInfo[0]->lastLoadNum; j != (int)m_CUDescInfo[0]->curLoadNum;
|
|
IncLoadCuDescIdx(j)) {
|
|
m_CUDescIdx[cudesc_idx] = j;
|
|
IncLoadCuDescIdx(cudesc_idx);
|
|
}
|
|
m_NumLoadCUDesc += LoadCudescMinus(m_CUDescInfo[0]->lastLoadNum, m_CUDescInfo[0]->curLoadNum);
|
|
}
|
|
|
|
ADIO_RUN()
|
|
{
|
|
// if found ,we need to check prefetch quantity and decide whether need load more cudesc
|
|
if (found && m_prefetch_quantity < m_prefetch_threshold &&
|
|
HasEnoughCuDescSlot(last_load_num, m_CUDescInfo[0]->curLoadNum)) {
|
|
continue;
|
|
}
|
|
// load finish, set lastLoadNum to backup values
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
m_CUDescInfo[i]->lastLoadNum = last_load_num;
|
|
}
|
|
if (m_colNum > 0) {
|
|
// give an min prefetch count here,because we need prefetch window to control whether need prefetch
|
|
t_thrd.cstore_cxt.cstore_prefetch_count = Max(m_NumLoadCUDesc, CSTORE_MIN_PREFETCH_COUNT);
|
|
ereport(DEBUG1,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("LoadCUDesc: columns(%d), count(%d), quantity(%d)",
|
|
m_colNum,
|
|
m_NumLoadCUDesc,
|
|
m_prefetch_quantity)));
|
|
}
|
|
break;
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
break;
|
|
}
|
|
ADIO_END();
|
|
} while (1);
|
|
|
|
// sys columns and const columns
|
|
if (m_colNum > 0 && m_sysColNum != 0) {
|
|
// access normal columns and sys columns, use normal column's CUDesc
|
|
m_virtualCUDescInfo = m_CUDescInfo[0];
|
|
} else if (OnlySysOrConstCol()) {
|
|
// only system columns or const columns, use the first column's CUDesc
|
|
Assert(m_virtualCUDescInfo);
|
|
LoadCUDesc(m_firstColIdx, m_virtualCUDescInfo, false, m_snapshot);
|
|
|
|
for (int j = (int)m_virtualCUDescInfo->lastLoadNum; j != (int)m_virtualCUDescInfo->curLoadNum;
|
|
IncLoadCuDescIdx(j)) {
|
|
m_CUDescIdx[cudesc_idx] = j;
|
|
IncLoadCuDescIdx(cudesc_idx);
|
|
}
|
|
m_NumLoadCUDesc = LoadCudescMinus(m_virtualCUDescInfo->lastLoadNum, m_virtualCUDescInfo->curLoadNum);
|
|
// adio used it, but no need add ADIO_RUN(), for buffer io it is no use
|
|
t_thrd.cstore_cxt.cstore_prefetch_count = m_NumLoadCUDesc;
|
|
}
|
|
|
|
// Load new CUs need do rough check
|
|
m_needRCheck = true;
|
|
|
|
// before RoughCheck, m_NumCUDescIdx is length of loaded CUDesc info
|
|
BFIO_RUN()
|
|
{
|
|
m_NumCUDescIdx = m_NumLoadCUDesc;
|
|
}
|
|
BFIO_END();
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* @Description: increase load cudesc array idx
|
|
* @Param[IN/OUT] idx: idx of array
|
|
* @See also:
|
|
*/
|
|
void CStore::IncLoadCuDescIdx(int& idx) const
|
|
{
|
|
idx++;
|
|
|
|
ADIO_RUN()
|
|
{
|
|
if (idx >= u_sess->attr.attr_storage.max_loaded_cudesc) {
|
|
idx = 0;
|
|
}
|
|
}
|
|
ADIO_END();
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* @Description: Set CU range for Range Scan In Redistribute
|
|
*
|
|
* @return: void
|
|
*/
|
|
void CStore::SetScanRange()
|
|
{
|
|
Oid cudescOid = m_relation->rd_rel->relcudescrelid;
|
|
uint32 maxCuId = CStore::GetMaxCUID(cudescOid, m_relation->rd_att, m_snapshot);
|
|
uint32 CUCount = maxCuId - FirstCUID;
|
|
|
|
uint32 startCUID = FirstCUID + 1;
|
|
uint32 endCUID = maxCuId;
|
|
|
|
if (m_rangeScanInRedis.isRangeScanInRedis) {
|
|
ItemPointerData start_ctid;
|
|
ItemPointerData end_ctid;
|
|
|
|
RelationGetCtids(m_relation, &start_ctid, &end_ctid);
|
|
|
|
startCUID = RedisCtidGetBlockNumber(&start_ctid);
|
|
endCUID = RedisCtidGetBlockNumber(&end_ctid);
|
|
CUCount = endCUID - startCUID + 1;
|
|
}
|
|
|
|
m_startCUID = startCUID;
|
|
m_endCUID = endCUID;
|
|
}
|
|
|
|
void CStore::RefreshCursor(int row, int deadRows)
|
|
{
|
|
int cuRowCount = 0;
|
|
int idx = m_CUDescIdx[m_cursor];
|
|
|
|
if (likely(m_CUDescInfo != NULL)) {
|
|
cuRowCount = m_CUDescInfo[0]->cuDescArray[idx].row_count;
|
|
} else {
|
|
Assert(m_virtualCUDescInfo);
|
|
cuRowCount = m_virtualCUDescInfo->cuDescArray[idx].row_count;
|
|
}
|
|
|
|
m_rowCursorInCU = m_rowCursorInCU + row + deadRows;
|
|
|
|
Assert(m_rowCursorInCU <= cuRowCount);
|
|
if (unlikely(m_rowCursorInCU == cuRowCount)) {
|
|
IncLoadCuDescIdx(m_cursor);
|
|
m_rowCursorInCU = 0;
|
|
if (likely(m_scanPosInCU != NULL)) {
|
|
Assert(m_colNum > 0);
|
|
errno_t rc = memset_s(m_scanPosInCU, sizeof(int) * m_colNum, 0, sizeof(int) * m_colNum);
|
|
securec_check(rc, "", "");
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @Description: cudesc rough check
|
|
* @Param[IN] cuDescIdx:index of load cudesc info
|
|
* @Param[IN] nkeys: keys of scanKey
|
|
* @Param[IN] scanKey: cstore scan key
|
|
* @Return: true--hit, false--not hit
|
|
* @See also:
|
|
*/
|
|
bool CStore::RoughCheck(CStoreScanKey scanKey, int nkeys, int cuDescIdx)
|
|
{
|
|
bool hitCU = true;
|
|
|
|
for (int j = 0; j < nkeys; j++) {
|
|
int seq = scanKey[j].cs_attno;
|
|
CUDesc* cudesc = &(m_CUDescInfo[seq]->cuDescArray[cuDescIdx]);
|
|
bool isNullKey = scanKey[j].cs_flags & SK_ISNULL;
|
|
if ((cudesc->IsNullCU() && !isNullKey) || cudesc->IsNoMinMaxCU())
|
|
continue;
|
|
if (isNullKey)
|
|
hitCU = cudesc->CUHasNull() || cudesc->IsNullCU();
|
|
else
|
|
hitCU = m_RCFuncs[j](cudesc, scanKey[j].cs_argument);
|
|
if (!hitCU)
|
|
break;
|
|
}
|
|
return hitCU;
|
|
}
|
|
|
|
void CStore::RoughCheckIfNeed(_in_ CStoreScanState* state)
|
|
{
|
|
int nkeys = state->csss_NumScanKeys;
|
|
CStoreScanKey scanKey = state->csss_ScanKeys;
|
|
PlanState* planstate = (PlanState*)state;
|
|
uint32 curLoadNum;
|
|
uint32 lastLoadNum;
|
|
|
|
// m_needRCheck is true means these CUs alreay done the rough check
|
|
// m_colNum == 0 means not have normal columns
|
|
if (likely(!m_needRCheck)) {
|
|
return;
|
|
}
|
|
|
|
if (likely(nkeys == 0 || scanKey == NULL || m_colNum == 0)) {
|
|
/* when no where condition, we also need set m_lastNumCUDescIdx and m_NumCUDescIdx for prefetch once */
|
|
ADIO_RUN()
|
|
{
|
|
m_NumCUDescIdx = (m_NumCUDescIdx + m_NumLoadCUDesc) % u_sess->attr.attr_storage.max_loaded_cudesc;
|
|
m_needRCheck = false;
|
|
}
|
|
ADIO_END();
|
|
return;
|
|
}
|
|
|
|
int pos = 0;
|
|
bool hitCU = true;
|
|
int cudesc_idx_tmp = 0;
|
|
|
|
ADIO_RUN()
|
|
{
|
|
/* pos is rough check start point */
|
|
cudesc_idx_tmp = m_NumCUDescIdx;
|
|
pos = m_NumCUDescIdx;
|
|
}
|
|
ADIO_END();
|
|
|
|
lastLoadNum = m_CUDescInfo[0]->lastLoadNum;
|
|
curLoadNum = m_CUDescInfo[0]->curLoadNum;
|
|
for (int i = (int)lastLoadNum; i != (int)curLoadNum; IncLoadCuDescIdx(i), IncLoadCuDescIdx(cudesc_idx_tmp)) {
|
|
hitCU = RoughCheck(scanKey, nkeys, i);
|
|
if (hitCU) {
|
|
// fliter CU not hit
|
|
ADIO_RUN()
|
|
{
|
|
m_CUDescIdx[pos] = m_CUDescIdx[cudesc_idx_tmp];
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
m_CUDescIdx[pos] = m_CUDescIdx[i];
|
|
}
|
|
ADIO_END();
|
|
|
|
IncLoadCuDescIdx(pos);
|
|
}
|
|
|
|
if (planstate->instrument) {
|
|
RCInfo* rcPtr = &(planstate->instrument->rcInfo);
|
|
|
|
if (!hitCU) {
|
|
int seq = scanKey[0].cs_attno;
|
|
CUDesc *cudesc = &(m_CUDescInfo[seq]->cuDescArray[i]);
|
|
planstate->instrument->nfiltered1 += cudesc->row_count;
|
|
|
|
Relation cuDescRel = heap_open(m_relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
Relation idxRel = index_open(cuDescRel->rd_rel->relcudescidx, AccessShareLock);
|
|
ScanKeyData key[2];
|
|
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber,
|
|
F_INT4EQ, Int32GetDatum(VitrualDelColID));
|
|
ScanKeyInit(&key[1], (AttrNumber)CUDescCUIDAttr, BTEqualStrategyNumber,
|
|
F_OIDEQ, UInt32GetDatum(cudesc->cu_id));
|
|
SysScanDesc cuDescScan = systable_beginscan_ordered(cuDescRel, idxRel, m_snapshot, 2, key);
|
|
|
|
HeapTuple tmpTup = NULL;
|
|
|
|
if ((tmpTup = systable_getnext_ordered(cuDescScan, ForwardScanDirection)) != NULL) {
|
|
bool isNull = false;
|
|
uint32 deadRowCount = 0;
|
|
uint32 rowCount = DatumGetUInt32(fastgetattr(tmpTup, CUDescRowCountAttr,
|
|
cuDescRel->rd_att, &isNull));
|
|
Datum v = fastgetattr(tmpTup, CUDescCUPointerAttr, cuDescRel->rd_att, &isNull);
|
|
if (!isNull) {
|
|
int8 *bitmap = (int8 *)PG_DETOAST_DATUM(DatumGetPointer(v));
|
|
unsigned char delBitMap[MaxDelBitmapSize];
|
|
uint32 nBytes = (rowCount + 7) / 8;
|
|
errno_t rc = memcpy_s(delBitMap, MaxDelBitmapSize,
|
|
VARDATA_ANY(bitmap), VARSIZE_ANY_EXHDR(bitmap));
|
|
securec_check(rc, "", "");
|
|
for (uint32 j = 0; j < nBytes; j++) {
|
|
deadRowCount += NumberOfBit1Set[delBitMap[j]];
|
|
}
|
|
/* because new memory may be created, so we have to check and free in time. */
|
|
if ((Pointer)bitmap != DatumGetPointer(v)) {
|
|
pfree_ext(bitmap);
|
|
}
|
|
}
|
|
planstate->instrument->nfiltered1 -= deadRowCount;
|
|
}
|
|
systable_endscan_ordered(cuDescScan);
|
|
index_close(idxRel, AccessShareLock);
|
|
heap_close(cuDescRel, AccessShareLock);
|
|
|
|
rcPtr->IncNoneCUNum();
|
|
} else {
|
|
rcPtr->IncSomeCUNum();
|
|
}
|
|
planstate->instrument->needRCInfo = true;
|
|
}
|
|
}
|
|
|
|
ADIO_RUN()
|
|
{
|
|
/* m_NumCUDescIdx is rough check end point, ,so need update here */
|
|
if (pos == m_NumCUDescIdx) {
|
|
m_NumCUDescIdx = (m_NumCUDescIdx + m_NumLoadCUDesc) % u_sess->attr.attr_storage.max_loaded_cudesc;
|
|
} else {
|
|
m_NumCUDescIdx = pos;
|
|
}
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
m_NumLoadCUDesc = pos;
|
|
}
|
|
ADIO_END();
|
|
|
|
// set flag for already done the rought check
|
|
m_needRCheck = false;
|
|
}
|
|
|
|
void CStore::InitReScan()
|
|
{
|
|
/* Set scan cu range */
|
|
SetScanRange();
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
m_CUDescInfo[i]->Reset(m_startCUID);
|
|
}
|
|
|
|
int totalSize = 0;
|
|
errno_t rc = 0;
|
|
if (likely(m_scanPosInCU != NULL)) {
|
|
Assert(m_colNum > 0);
|
|
totalSize = sizeof(int) * m_colNum;
|
|
rc = memset_s(m_scanPosInCU, totalSize, 0, totalSize);
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
m_delMaskCUId = InValidCUID;
|
|
m_hasDeadRow = false;
|
|
m_prefetch_quantity = 0;
|
|
|
|
m_load_finish = false;
|
|
if (m_CUDescIdx != NULL) {
|
|
totalSize = sizeof(int) * u_sess->attr.attr_storage.max_loaded_cudesc;
|
|
rc = memset_s(m_CUDescIdx, totalSize, 0xFF, totalSize);
|
|
securec_check(rc, "\0", "\0");
|
|
}
|
|
|
|
// only access sys columns or const columns
|
|
if (OnlySysOrConstCol()) {
|
|
Assert(m_virtualCUDescInfo);
|
|
m_virtualCUDescInfo->Reset(m_startCUID);
|
|
}
|
|
|
|
// m_sysColNum shouldn't be reset or changed.
|
|
// m_colNum shouldn't be reset or changed.
|
|
m_NumLoadCUDesc = 0;
|
|
m_NumCUDescIdx = 0;
|
|
m_lastNumCUDescIdx = 0;
|
|
m_cursor = 0;
|
|
m_rowCursorInCU = 0;
|
|
m_cuDescIdx = -1;
|
|
m_laterReadCtidColIdx = -1;
|
|
|
|
m_needRCheck = false;
|
|
}
|
|
|
|
void CStore::InitPartReScan(Relation rel)
|
|
{
|
|
Assert(m_cuStorage);
|
|
|
|
// change to the new partition relation.
|
|
m_relation = rel;
|
|
int attNo = m_relation->rd_att->natts;
|
|
|
|
// the following spaces will live until deconstructor is called.
|
|
// so use m_scanMemContext which is not freed at all until the end.
|
|
AutoContextSwitch newMemCnxt(m_scanMemContext);
|
|
|
|
// because new partition has different file handler, so we must
|
|
// destroy the old *m_cuStorage*, which will close the open fd,
|
|
// and then create an new object for next partition.
|
|
for (int i = 0; i < attNo; ++i) {
|
|
if (m_relation->rd_att->attrs[i]->attisdropped)
|
|
continue;
|
|
if (m_cuStorage[i]) {
|
|
DELETE_EX(m_cuStorage[i]);
|
|
}
|
|
|
|
// Here we must use physical column id
|
|
CFileNode cFileNode(m_relation->rd_node, m_relation->rd_att->attrs[i]->attnum, MAIN_FORKNUM);
|
|
m_cuStorage[i] = New(CurrentMemoryContext) CUStorage(cFileNode);
|
|
}
|
|
}
|
|
|
|
// FORCE_INLINE
|
|
bool CStore::IsEndScan() const
|
|
{
|
|
// all CUDesc already scanned
|
|
ADIO_RUN()
|
|
{
|
|
return (m_cursor == m_NumCUDescIdx && m_load_finish) ? true : false;
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
return (m_NumCUDescIdx == 0) ? true : false;
|
|
}
|
|
ADIO_END();
|
|
}
|
|
|
|
FORCE_INLINE
|
|
bool CStore::IsLateRead(int id) const
|
|
{
|
|
Assert(m_lateRead);
|
|
return m_lateRead[id];
|
|
}
|
|
|
|
void CStore::ResetLateRead()
|
|
{
|
|
for (int i = 0; i < m_colNum; ++i)
|
|
m_lateRead[i] = false;
|
|
}
|
|
|
|
/*
|
|
* @Description: set m_timing_on according state->ps.instrument and its timer
|
|
* @IN state: cstore scan state
|
|
* @Return: true if instrument::need_timer is set true; otherwise return false
|
|
* @See also:
|
|
*/
|
|
void CStore::SetTiming(CStoreScanState* state)
|
|
{
|
|
m_timing_on = (NULL != ((ScanState*)state)->ps.instrument && ((ScanState*)state)->ps.instrument->need_timer);
|
|
}
|
|
|
|
void CStore::ScanByTids(_in_ CStoreIndexScanState* state, _in_ VectorBatch* idxOut, _out_ VectorBatch* vbout)
|
|
{
|
|
Assert(state && idxOut && vbout);
|
|
Assert(idxOut->m_cols >= 1);
|
|
|
|
CSTORESCAN_TRACE_START(SCAN_BY_TID);
|
|
|
|
int* indexOutBaseTabAttr = state->m_indexOutBaseTabAttr;
|
|
int indexOutAttrNo = state->m_indexOutAttrNo;
|
|
|
|
/* set if use btree index */
|
|
m_useBtreeIndex = (state->m_indexScan == NULL) ? true : false;
|
|
|
|
/*
|
|
* Pre-Step: For const-targetlist, set output rows.
|
|
*/
|
|
vbout->m_rows = idxOut->m_rows;
|
|
|
|
ScalarVector* tids = idxOut->m_arr + idxOut->m_cols - 1;
|
|
|
|
// Step 1: Fill normal column Vector according to tid
|
|
CSTORESCAN_TRACE_START(FILL_VECTOR_BATCH_BY_TID);
|
|
for (int i = 0; i < m_colNum; i++) {
|
|
int idx = m_colId[i];
|
|
|
|
// Judge whether this colIdx has been scan in index table scan
|
|
bool isInIndexOut = false;
|
|
for (int j = 0; j < indexOutAttrNo; j++) {
|
|
if (idx == indexOutBaseTabAttr[j] - 1) {
|
|
// copy index table scan to vector out
|
|
// shallow copy
|
|
FillVectorByIndex(idx, tids, idxOut->m_arr + j, vbout->m_arr + idx);
|
|
vbout->m_rows = (vbout->m_arr + idx)->m_rows;
|
|
isInIndexOut = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (isInIndexOut)
|
|
continue;
|
|
|
|
Assert(m_fillVectorByTids[i]);
|
|
(this->*m_fillVectorByTids[i])(idx, tids, &vbout->m_arr[idx]);
|
|
vbout->m_rows = vbout->m_arr[idx].m_rows;
|
|
}
|
|
CSTORESCAN_TRACE_END(FILL_VECTOR_BATCH_BY_TID);
|
|
|
|
// Step 2: Fill syscolum if need
|
|
for (int i = 0; i < m_sysColNum; i++) {
|
|
int sysColIdx = m_sysColId[i];
|
|
ScalarVector* sysVec = vbout->GetSysVector(sysColIdx);
|
|
|
|
switch (sysColIdx) {
|
|
case SelfItemPointerAttributeNumber: {
|
|
FillSysVecByTid<SelfItemPointerAttributeNumber>(tids, sysVec);
|
|
break;
|
|
}
|
|
case TableOidAttributeNumber: {
|
|
FillSysVecByTid<TableOidAttributeNumber>(tids, sysVec);
|
|
break;
|
|
}
|
|
case XC_NodeIdAttributeNumber: {
|
|
FillSysVecByTid<XC_NodeIdAttributeNumber>(tids, sysVec);
|
|
break;
|
|
}
|
|
case MinTransactionIdAttributeNumber: {
|
|
FillSysVecByTid<MinTransactionIdAttributeNumber>(tids, sysVec);
|
|
break;
|
|
}
|
|
default: {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
(errmsg("Cannot to fill unsupported system column %d for column store table", sysColIdx))));
|
|
break;
|
|
}
|
|
}
|
|
|
|
vbout->m_rows = sysVec->m_rows;
|
|
}
|
|
|
|
// Step 3: fill const columns if need
|
|
if (unlikely(m_onlyConstCol)) {
|
|
// only set row count
|
|
int liveRows = 0;
|
|
ScalarVector* vec = vbout->m_arr;
|
|
ScalarValue* tidValue = tids->m_vals;
|
|
uint32 curCUId = InValidCUID;
|
|
uint32 thisCUId = InValidCUID;
|
|
uint32 rowOffset = 0;
|
|
|
|
for (int i = 0; i < tids->m_rows; i++) {
|
|
ItemPointer tidPtr = (ItemPointer)&tidValue[i];
|
|
thisCUId = ItemPointerGetBlockNumber(tidPtr);
|
|
|
|
// Note that tidPointer->rowOffset start from 1
|
|
rowOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
|
|
// Get CUDesc and delmask if need
|
|
if (curCUId != thisCUId) {
|
|
curCUId = thisCUId;
|
|
GetCUDeleteMaskIfNeed(curCUId, m_snapshot);
|
|
}
|
|
// It is a live row, not a dead row
|
|
if (m_delMaskCUId != InValidCUID && !IsDeadRow(curCUId, rowOffset))
|
|
++liveRows;
|
|
}
|
|
|
|
vec->m_rows = liveRows;
|
|
vbout->m_rows = vec->m_rows;
|
|
}
|
|
|
|
CSTORESCAN_TRACE_END(SCAN_BY_TID);
|
|
}
|
|
|
|
// form a cudes tuple for deleting bitmap
|
|
HeapTuple CStore::FormVCCUDescTup(
|
|
_in_ TupleDesc cudesc, _in_ const char* delMask, _in_ uint32 cuId, _in_ int32 rowCount, _in_ uint32 magic)
|
|
{
|
|
Datum values[CUDescMaxAttrNum] = {0};
|
|
bool nulls[CUDescMaxAttrNum] = {0};
|
|
text* tmpCuPointData = NULL;
|
|
|
|
values[CUDescColIDAttr - 1] = Int32GetDatum(VitrualDelColID);
|
|
nulls[CUDescColIDAttr - 1] = false;
|
|
|
|
values[CUDescCUIDAttr - 1] = UInt32GetDatum(cuId);
|
|
nulls[CUDescCUIDAttr - 1] = false;
|
|
|
|
values[CUDescRowCountAttr - 1] = Int32GetDatum(rowCount);
|
|
nulls[CUDescRowCountAttr - 1] = false;
|
|
|
|
values[CUDescCUMagicAttr - 1] = UInt32GetDatum(magic);
|
|
nulls[CUDescCUMagicAttr - 1] = false;
|
|
|
|
// deleting bitmap maybe be NULL, when any updating
|
|
// or deleting never happens.
|
|
if (delMask) {
|
|
nulls[CUDescCUPointerAttr - 1] = false;
|
|
|
|
int delMaskBytes = (rowCount + 7) / 8;
|
|
tmpCuPointData = cstring_to_text_with_len((const char*)delMask, delMaskBytes);
|
|
values[CUDescCUPointerAttr - 1] = PointerGetDatum(tmpCuPointData);
|
|
Assert(VARSIZE_ANY_EXHDR(PointerGetDatum(tmpCuPointData)) == (uint32)delMaskBytes);
|
|
} else
|
|
nulls[CUDescCUPointerAttr - 1] = true;
|
|
|
|
// the other fields are useless, so set them null.
|
|
nulls[CUDescMinAttr - 1] = true;
|
|
nulls[CUDescMaxAttr - 1] = true;
|
|
nulls[CUDescCUModeAttr - 1] = true;
|
|
nulls[CUDescSizeAttr - 1] = true;
|
|
nulls[CUDescCUExtraAttr - 1] = true;
|
|
|
|
HeapTuple newTup = (HeapTuple)tableam_tops_form_tuple(cudesc, values, nulls, HEAP_TUPLE);
|
|
|
|
// ok, the temp data has been copied to newTup.
|
|
// now we must free it before returning.
|
|
if (tmpCuPointData != NULL) {
|
|
pfree_ext(tmpCuPointData);
|
|
}
|
|
return newTup;
|
|
}
|
|
|
|
// pCudescTupDesc: Cudesc tuple description.
|
|
// pCudesc: a CUDesc object holding all information about a complete Cudesc Tuple.
|
|
// values[]: used during forming tuple.
|
|
// nulls[]: used during forming tuple.
|
|
// pColAttr: attribute data of one column, who matches pCudesc above, for column-store table.
|
|
HeapTuple CStore::FormCudescTuple(_in_ CUDesc* pCudesc, _in_ TupleDesc pCudescTupDesc,
|
|
_in_ Datum pTupVals[CUDescMaxAttrNum], _in_ bool pTupNulls[CUDescMaxAttrNum], _in_ Form_pg_attribute pColAttr)
|
|
{
|
|
errno_t rc = memset_s(pTupNulls, CUDescMaxAttrNum, false, CUDescMaxAttrNum);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
pTupVals[CUDescColIDAttr - 1] = Int32GetDatum(pColAttr->attnum);
|
|
pTupVals[CUDescCUIDAttr - 1] = UInt32GetDatum(pCudesc->cu_id);
|
|
|
|
int minDataLen = 0, maxDataLen = 0;
|
|
char *minDataPtr = NULL, *maxDataPtr = NULL;
|
|
|
|
if (pColAttr->attlen > 0) {
|
|
if (pColAttr->attbyval) {
|
|
// Now we use int8 to store
|
|
minDataLen = maxDataLen = sizeof(Datum);
|
|
} else if (pColAttr->attlen <= MIN_MAX_LEN) {
|
|
minDataLen = maxDataLen = pColAttr->attlen;
|
|
} else {
|
|
Assert(minDataLen == 0 && maxDataLen == 0);
|
|
}
|
|
|
|
minDataPtr = pCudesc->cu_min;
|
|
maxDataPtr = pCudesc->cu_max;
|
|
} else {
|
|
Assert(pCudesc->cu_min[0] >= 0 && pCudesc->cu_min[0] < MIN_MAX_LEN);
|
|
Assert(pCudesc->cu_max[0] >= 0 && pCudesc->cu_max[0] < MIN_MAX_LEN);
|
|
|
|
minDataLen = pCudesc->cu_min[0];
|
|
minDataPtr = pCudesc->cu_min + 1;
|
|
|
|
maxDataLen = pCudesc->cu_max[0];
|
|
maxDataPtr = pCudesc->cu_max + 1;
|
|
}
|
|
pTupVals[CUDescMinAttr - 1] = PointerGetDatum(cstring_to_text_with_len(minDataPtr, minDataLen));
|
|
pTupVals[CUDescMaxAttr - 1] = PointerGetDatum(cstring_to_text_with_len(maxDataPtr, maxDataLen));
|
|
|
|
pTupVals[CUDescRowCountAttr - 1] = Int32GetDatum(pCudesc->row_count);
|
|
pTupVals[CUDescCUModeAttr - 1] = Int32GetDatum(pCudesc->cu_mode);
|
|
pTupVals[CUDescSizeAttr - 1] = Int32GetDatum(pCudesc->cu_size);
|
|
|
|
text* tmpStr3 = cstring_to_text_with_len((const char*)&pCudesc->cu_pointer, sizeof(CUPointer));
|
|
pTupVals[CUDescCUPointerAttr - 1] = PointerGetDatum(tmpStr3);
|
|
pTupVals[CUDescCUMagicAttr - 1] = UInt32GetDatum(pCudesc->magic);
|
|
Assert(pTupVals[CUDescCUMagicAttr - 1] > 0);
|
|
|
|
// add attribute extra and set null flag.
|
|
pTupNulls[CUDescCUExtraAttr - 1] = true;
|
|
|
|
return (HeapTuple)tableam_tops_form_tuple(pCudescTupDesc, pTupVals, pTupNulls, HEAP_TUPLE);
|
|
}
|
|
|
|
/* description: future plan-refact CStore::LoadCUDesc() with DeformCudescTuple(). */
|
|
void CStore::DeformCudescTuple(
|
|
_in_ HeapTuple pCudescTup, _in_ TupleDesc pCudescTupDesc, _in_ Form_pg_attribute pColAttr, _out_ CUDesc* pCudesc)
|
|
{
|
|
errno_t rc = EOK;
|
|
bool isnull = false;
|
|
|
|
pCudesc->cu_id = DatumGetUInt32(fastgetattr(pCudescTup, CUDescCUIDAttr, pCudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
// Put min value into cudesc->min
|
|
char* valPtr = DatumGetPointer(fastgetattr(pCudescTup, CUDescMinAttr, pCudescTupDesc, &isnull));
|
|
if (!isnull) {
|
|
if (pColAttr->attlen > 0) {
|
|
if (pColAttr->attbyval) {
|
|
Assert((int)VARSIZE_ANY_EXHDR(valPtr) == sizeof(Datum));
|
|
rc = memcpy_s(pCudesc->cu_min, MIN_MAX_LEN, VARDATA_ANY(valPtr), sizeof(Datum));
|
|
securec_check(rc, "", "");
|
|
} else if (pColAttr->attlen <= MIN_MAX_LEN) {
|
|
Assert((int)VARSIZE_ANY_EXHDR(valPtr) == pColAttr->attlen);
|
|
rc = memcpy_s(pCudesc->cu_min, MIN_MAX_LEN, VARDATA_ANY(valPtr), pColAttr->attlen);
|
|
securec_check(rc, "", "");
|
|
} else {
|
|
Assert(pCudesc->cu_min[0] == 0);
|
|
}
|
|
} else {
|
|
pCudesc->cu_min[0] = VARSIZE_ANY_EXHDR(valPtr);
|
|
if (pCudesc->cu_min[0] > 0) {
|
|
Assert(pCudesc->cu_min[0] < MIN_MAX_LEN);
|
|
rc = memcpy_s(pCudesc->cu_min + 1, (MIN_MAX_LEN - 1), VARDATA_ANY(valPtr), pCudesc->cu_min[0]);
|
|
securec_check(rc, "", "");
|
|
} else {
|
|
Assert(pCudesc->cu_min[0] == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Put max value into cudesc->max
|
|
valPtr = DatumGetPointer(fastgetattr(pCudescTup, CUDescMaxAttr, pCudescTupDesc, &isnull));
|
|
if (!isnull) {
|
|
if (pColAttr->attlen > 0) {
|
|
if (pColAttr->attbyval) {
|
|
Assert((int)VARSIZE_ANY_EXHDR(valPtr) == sizeof(Datum));
|
|
rc = memcpy_s(pCudesc->cu_max, MIN_MAX_LEN, VARDATA_ANY(valPtr), sizeof(Datum));
|
|
securec_check(rc, "", "");
|
|
} else if (pColAttr->attlen <= MIN_MAX_LEN) {
|
|
Assert((int)VARSIZE_ANY_EXHDR(valPtr) == pColAttr->attlen);
|
|
rc = memcpy_s(pCudesc->cu_max, MIN_MAX_LEN, VARDATA_ANY(valPtr), pColAttr->attlen);
|
|
securec_check(rc, "", "");
|
|
} else {
|
|
Assert(pCudesc->cu_max[0] == 0);
|
|
}
|
|
} else {
|
|
pCudesc->cu_max[0] = VARSIZE_ANY_EXHDR(valPtr);
|
|
if (pCudesc->cu_max[0] > 0) {
|
|
Assert(pCudesc->cu_max[0] < MIN_MAX_LEN);
|
|
rc = memcpy_s(pCudesc->cu_max + 1, (MIN_MAX_LEN - 1), VARDATA_ANY(valPtr), pCudesc->cu_max[0]);
|
|
securec_check(rc, "", "");
|
|
} else {
|
|
Assert(pCudesc->cu_max[0] == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
pCudesc->row_count = DatumGetInt32(fastgetattr(pCudescTup, CUDescRowCountAttr, pCudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
// Put CUMode into cudesc->cumode
|
|
pCudesc->cu_mode = DatumGetInt32(fastgetattr(pCudescTup, CUDescCUModeAttr, pCudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
// Put cusize into cudesc->cu_size
|
|
pCudesc->cu_size = DatumGetInt32(fastgetattr(pCudescTup, CUDescSizeAttr, pCudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
// Put CUPointer into cudesc->cuPointer
|
|
char* cu_ptr = DatumGetPointer(fastgetattr(pCudescTup, CUDescCUPointerAttr, pCudescTupDesc, &isnull));
|
|
if (!isnull) {
|
|
Assert(VARSIZE_ANY_EXHDR(cu_ptr) == sizeof(CUPointer));
|
|
pCudesc->cu_pointer = *(CUPointer*)VARDATA_ANY(cu_ptr);
|
|
} else
|
|
Assert(pCudesc->cu_pointer == 0);
|
|
|
|
// Put magic into cudesc->magic
|
|
pCudesc->magic = DatumGetUInt32(fastgetattr(pCudescTup, CUDescCUMagicAttr, pCudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
}
|
|
|
|
bool CStore::IsTheWholeCuDeleted(int rowsInCu)
|
|
{
|
|
return m_hasDeadRow && IsTheWholeCuDeleted((char*)m_cuDelMask, rowsInCu);
|
|
}
|
|
|
|
/*
|
|
* compute the factors of an input value and make: n = 64 * a + 8 * b + 1 * c
|
|
*/
|
|
static inline void compute_factors_of_n(unsigned int n, unsigned int& a, unsigned int& b, unsigned int& c)
|
|
{
|
|
a = (n >> 6); /* explanation of this statement: a = n/64 */
|
|
b = (n & 0x3F) >> 3; /* explanation of this statement: b = ( (n - a * 64) / 8 ) */
|
|
c = (n & 0x07); /* explanation of this statement: c = ( n - a * 64 - b * 8) */
|
|
}
|
|
|
|
/*
|
|
* @Description: check whether all tuples within this CU have been deleted.
|
|
* if so, return true; otherwise return false.
|
|
* @IN rowsInCu: how many tuples to hold within this bitmap
|
|
* @IN delBitmapPtr: deleted bitmap
|
|
* @Return: true if all tuples within this bitmap have been deleted.
|
|
* false if any tuple is live.
|
|
* @See also:
|
|
*/
|
|
bool CStore::IsTheWholeCuDeleted(char* delBitmapPtr, int rowsInCu)
|
|
{
|
|
static const uint8 map[] = {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF};
|
|
unsigned int numUint64 = 0;
|
|
unsigned int numUint8 = 0;
|
|
unsigned int mapIdx = 0;
|
|
|
|
/*
|
|
* numUint64 means how many Uint64 data to use when the number of values
|
|
* is rowsInCu; that is (rowsInCu/64) because Uint64 holds 64 bits.
|
|
* numUint8 means how many Uint8 data to use excluding (numUint64 * 64)
|
|
* values; that is ( (rowsInCu - numUint64 * 64) / 8 ). also we exclude
|
|
* the last half-byte.
|
|
*/
|
|
compute_factors_of_n((unsigned int)rowsInCu, numUint64, numUint8, mapIdx);
|
|
|
|
/* compare quickly by taking *delBitmapPtr* as uint64 array. */
|
|
uint64* uint64Item = (uint64*)delBitmapPtr;
|
|
for (unsigned int i = 0; i < numUint64; ++i) {
|
|
if (*uint64Item != 0xFFFFFFFFFFFFFFFF) {
|
|
return false;
|
|
}
|
|
++uint64Item;
|
|
}
|
|
|
|
/* compare the remainings by taking them as char array. */
|
|
uint8* uint8Item = (uint8*)uint64Item;
|
|
for (unsigned int i = 0; i < numUint8; ++i) {
|
|
if (*uint8Item != 0xFF) {
|
|
return false;
|
|
}
|
|
++uint8Item;
|
|
}
|
|
|
|
/*
|
|
* if (rowsInCu != 8*N), the last byte must be handled specially.
|
|
* we will use *map[]* to compare directly and quickly.
|
|
*/
|
|
if (mapIdx != 0) {
|
|
return (*uint8Item == map[mapIdx]);
|
|
}
|
|
|
|
/* ok, the whole cu is deleted. */
|
|
return true;
|
|
}
|
|
|
|
Datum CStore::CudescTupGetMinMaxDatum(
|
|
_in_ CUDesc* pCudesc, _in_ Form_pg_attribute pColAttr, _in_ bool min, _out_ bool* shouldFree)
|
|
{
|
|
Assert(pCudesc->IsSameValCU());
|
|
*shouldFree = false;
|
|
|
|
char* value = NULL;
|
|
char* dataPtr = min ? pCudesc->cu_min : pCudesc->cu_max;
|
|
errno_t rc = EOK;
|
|
|
|
if (pColAttr->attbyval) {
|
|
// case 1: attlen > 0 && attlen <= sizeof(Datum)
|
|
return (*(Datum*)dataPtr);
|
|
}
|
|
|
|
*shouldFree = true;
|
|
|
|
if (pColAttr->attlen > (int)sizeof(Datum)) {
|
|
// case 2: attlen > sizeof(Datum) && attlen <= MIN_MAX_LEN
|
|
Assert(pColAttr->attlen <= MIN_MAX_LEN);
|
|
value = (char*)palloc(pColAttr->attlen);
|
|
rc = memcpy_s(value, pColAttr->attlen, dataPtr, pColAttr->attlen);
|
|
securec_check(rc, "", "");
|
|
} else if (pColAttr->attlen == -1) {
|
|
// case 3: attlen == -1, including empty string ( not null string ).
|
|
Assert((int)dataPtr[0] >= 0 && (int)dataPtr[0] < MIN_MAX_LEN);
|
|
value = (char*)palloc(dataPtr[0] + VARHDRSZ_SHORT);
|
|
SET_VARSIZE_SHORT(value, dataPtr[0] + VARHDRSZ_SHORT);
|
|
if (dataPtr[0] > 0) {
|
|
rc = memcpy_s(value + VARHDRSZ_SHORT, dataPtr[0], dataPtr + 1, dataPtr[0]);
|
|
securec_check(rc, "", "");
|
|
}
|
|
} else {
|
|
// case 4: attlen == -2
|
|
Assert((int)dataPtr[0] > 0 && (int)dataPtr[0] < MIN_MAX_LEN);
|
|
Assert(dataPtr[(int)dataPtr[0]] == '\0');
|
|
value = (char*)palloc(dataPtr[0]);
|
|
rc = memcpy_s(value, dataPtr[0], dataPtr + 1, dataPtr[0]);
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
return PointerGetDatum(value);
|
|
}
|
|
|
|
// set Cudesc Mode after min/max value have been computed.
|
|
// return true if need to write cu file. otherwise return false.
|
|
bool CStore::SetCudescModeForMinMaxVal(_in_ bool fullNulls, _in_ bool hasMinMaxFunc, _in_ bool hasNull,
|
|
_in_ int maxVarStrLen, _in_ int attlen, __inout CUDesc* cuDescPtr)
|
|
{
|
|
if (!fullNulls) {
|
|
// if hasNull is true, it's the NULL bitmap that stores the all null info.
|
|
// it must exists. so don't call SetSameValCU() when this CU has null values.
|
|
// attlen should 0 be larger than 0 and smaller or equal to 8
|
|
if ((attlen > 0 && attlen <= (int)sizeof(Datum)) && hasMinMaxFunc && !hasNull &&
|
|
(*((Datum*)(cuDescPtr->cu_min)) == *((Datum*)(cuDescPtr->cu_max)))) {
|
|
cuDescPtr->SetSameValCU();
|
|
} else if ((attlen < 0) && maxVarStrLen < MIN_MAX_LEN && hasMinMaxFunc && !hasNull) {
|
|
Assert(cuDescPtr->cu_min[0] < MIN_MAX_LEN);
|
|
Assert(cuDescPtr->cu_max[0] < MIN_MAX_LEN);
|
|
|
|
if (cuDescPtr->cu_min[0] == cuDescPtr->cu_max[0] &&
|
|
(memcmp(cuDescPtr->cu_min + 1, cuDescPtr->cu_max + 1, cuDescPtr->cu_min[0]) == 0)) {
|
|
cuDescPtr->SetSameValCU();
|
|
}
|
|
} else if ((attlen > (int)sizeof(Datum) && attlen <= MIN_MAX_LEN) && hasMinMaxFunc && !hasNull) {
|
|
if (memcmp(cuDescPtr->cu_min, cuDescPtr->cu_max, attlen) == 0) {
|
|
cuDescPtr->SetSameValCU();
|
|
}
|
|
} else {
|
|
if (hasMinMaxFunc) {
|
|
if (hasNull)
|
|
cuDescPtr->SetCUHasNull();
|
|
else
|
|
cuDescPtr->SetNormalCU();
|
|
} else
|
|
cuDescPtr->SetNoMinMaxCU();
|
|
}
|
|
} else
|
|
cuDescPtr->SetNullCU();
|
|
|
|
return (!cuDescPtr->IsNullCU() && !cuDescPtr->IsSameValCU());
|
|
}
|
|
|
|
// set *cudesc* mode for one column with new value to be added.
|
|
// *attlen* is from Form_pg_attribute.attlen.
|
|
// *attval* is computed by the DEFAULT expression.
|
|
// true returned if new values must be written into cu files,
|
|
// otherwise false returned.
|
|
bool CStore::SetCudescModeForTheSameVal(
|
|
_in_ bool fullNulls, _in_ FuncSetMinMax SetMinMaxFunc, _in_ int attlen, _in_ Datum attval, __inout CUDesc* cudesc)
|
|
{
|
|
if (!fullNulls) {
|
|
/* flag to set the first value */
|
|
bool first = true;
|
|
|
|
if (SetMinMaxFunc == NULL) {
|
|
cudesc->SetNoMinMaxCU();
|
|
} else if (attlen > 0 && attlen <= MIN_MAX_LEN) {
|
|
(SetMinMaxFunc)(attval, cudesc, &first);
|
|
cudesc->SetSameValCU();
|
|
} else if (attlen < 0) {
|
|
char* ptr = DatumGetPointer(attval);
|
|
int len = VARSIZE_ANY(ptr);
|
|
if (len < MIN_MAX_LEN) {
|
|
(SetMinMaxFunc)(attval, cudesc, &first);
|
|
cudesc->SetSameValCU();
|
|
} else
|
|
cudesc->SetNormalCU();
|
|
} else {
|
|
cudesc->SetNormalCU();
|
|
}
|
|
} else
|
|
cudesc->SetNullCU();
|
|
|
|
return (!cudesc->IsNullCU() && !cudesc->IsSameValCU());
|
|
}
|
|
|
|
// We add a virtual column for marking deleted rows
|
|
// The VC is divided into CUs. The cuDesc of VC includes colId, cuid,
|
|
// row_count, del_mask, cu_mode, magic.
|
|
void CStore::SaveVCCUDesc(Oid cudescOid, uint32 cuId, int rowCount, uint32 magic, int options, const char* delBitmap)
|
|
{
|
|
Relation cudescHeapRel = heap_open(cudescOid, RowExclusiveLock);
|
|
Relation cudescIndexRel = index_open(cudescHeapRel->rd_rel->relcudescidx, RowExclusiveLock);
|
|
TupleDesc tupdesc = RelationGetDescr(cudescHeapRel);
|
|
|
|
Datum values[CUDescMaxAttrNum];
|
|
bool nulls[CUDescMaxAttrNum];
|
|
text* tmpCuPointData = NULL;
|
|
|
|
errno_t rc = memset_s(nulls, CUDescMaxAttrNum, true, CUDescMaxAttrNum);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
values[CUDescColIDAttr - 1] = Int32GetDatum(VitrualDelColID);
|
|
nulls[CUDescColIDAttr - 1] = false;
|
|
|
|
values[CUDescCUIDAttr - 1] = UInt32GetDatum(cuId);
|
|
nulls[CUDescCUIDAttr - 1] = false;
|
|
|
|
values[CUDescRowCountAttr - 1] = Int32GetDatum(rowCount);
|
|
nulls[CUDescRowCountAttr - 1] = false;
|
|
|
|
values[CUDescCUMagicAttr - 1] = UInt32GetDatum(magic);
|
|
nulls[CUDescCUMagicAttr - 1] = false;
|
|
|
|
if (delBitmap != NULL) {
|
|
nulls[CUDescCUPointerAttr - 1] = false;
|
|
|
|
int delMaskBytes = (rowCount + 7) / 8;
|
|
tmpCuPointData = cstring_to_text_with_len((const char*)delBitmap, delMaskBytes);
|
|
values[CUDescCUPointerAttr - 1] = PointerGetDatum(tmpCuPointData);
|
|
Assert(VARSIZE_ANY_EXHDR(PointerGetDatum(tmpCuPointData)) == (uint32)delMaskBytes);
|
|
}
|
|
|
|
HeapTuple tup = (HeapTuple)tableam_tops_form_tuple(tupdesc, values, nulls, HEAP_TUPLE);
|
|
|
|
// We always generate xlog for cudesc tuple
|
|
options &= (~TABLE_INSERT_SKIP_WAL);
|
|
(void)heap_insert(cudescHeapRel, tup, GetCurrentCommandId(true), options, NULL);
|
|
index_insert(cudescIndexRel,
|
|
values,
|
|
nulls,
|
|
&(tup->t_self),
|
|
cudescHeapRel,
|
|
cudescIndexRel->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
|
|
|
|
heap_freetuple(tup);
|
|
tup = NULL;
|
|
|
|
index_close(cudescIndexRel, RowExclusiveLock);
|
|
heap_close(cudescHeapRel, RowExclusiveLock);
|
|
|
|
if (tmpCuPointData != NULL) {
|
|
pfree_ext(tmpCuPointData);
|
|
}
|
|
}
|
|
|
|
uint32 CStore::GetMaxCUID(Oid cudescHeap, TupleDesc cstoreRelTupDesc, Snapshot snapshotArg)
|
|
{
|
|
ScanKeyData key;
|
|
HeapTuple tup;
|
|
bool isnull = false;
|
|
|
|
/* Any snapshot is used here to find the max cu id, which includes the aborted or crashed transactions. */
|
|
Snapshot snapshot = NULL;
|
|
snapshot = snapshotArg ? snapshotArg : SnapshotAny;
|
|
|
|
// find a column which is not dropped.
|
|
int attrId = 0;
|
|
for (int i = 0; i < cstoreRelTupDesc->natts; ++i) {
|
|
if (!cstoreRelTupDesc->attrs[i]->attisdropped) {
|
|
attrId = cstoreRelTupDesc->attrs[i]->attnum;
|
|
break;
|
|
}
|
|
}
|
|
Assert(attrId > 0);
|
|
|
|
// Open the CUDesc relation and its index
|
|
Relation heapRel = heap_open(cudescHeap, AccessShareLock);
|
|
TupleDesc heapTupDesc = RelationGetDescr(heapRel);
|
|
Relation indexRel = index_open(heapRel->rd_rel->relcudescidx, AccessShareLock);
|
|
uint32 maxCuId = FirstCUID;
|
|
|
|
// Setup scan key to fetch from the index by col_id.
|
|
ScanKeyInit(&key, (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attrId));
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(heapRel, indexRel, snapshot, 1, &key);
|
|
// Use BackwardScanDirection scan to Optimize for geting last CU description of column.
|
|
if ((tup = systable_getnext_ordered(cudesc_scan, BackwardScanDirection)) != NULL) {
|
|
maxCuId = DatumGetUInt32(fastgetattr(tup, CUDescCUIDAttr, heapTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
}
|
|
|
|
systable_endscan_ordered(cudesc_scan);
|
|
|
|
index_close(indexRel, AccessShareLock);
|
|
heap_close(heapRel, AccessShareLock);
|
|
|
|
return maxCuId;
|
|
}
|
|
|
|
static uint32 GetMaxCuIdFromCbtreeIndex(Relation heapRel, Relation idxRel)
|
|
{
|
|
uint32 maxCuID = FirstCUID;
|
|
ItemPointer tid;
|
|
|
|
IndexScanDesc indexScan = (IndexScanDesc)index_beginscan(heapRel, idxRel, GetActiveSnapshot(), 0, 0);
|
|
index_rescan(indexScan, NULL, 0, NULL, 0);
|
|
while ((tid = index_getnext_tid(indexScan, ForwardScanDirection)) != NULL) {
|
|
uint32 tmpCuID = ItemPointerGetBlockNumber(tid);
|
|
if (tmpCuID > maxCuID) {
|
|
maxCuID = tmpCuID;
|
|
}
|
|
}
|
|
index_endscan(indexScan);
|
|
|
|
return maxCuID;
|
|
}
|
|
|
|
/* get max CU id from cgin index relation */
|
|
static uint32 GetMaxCuIdFromCginIndex(Relation heapRel, Relation idxRel)
|
|
{
|
|
uint32 maxCuID = FirstCUID;
|
|
|
|
TupleDesc tidDesc = CreateTemplateTupleDesc(1, false);
|
|
TupleDescInitEntry(tidDesc, 1, "tid", TIDOID, -1, 0);
|
|
VectorBatch *tids = New(CurrentMemoryContext)VectorBatch(CurrentMemoryContext, tidDesc);
|
|
|
|
IndexScanDesc indexScan = index_beginscan_bitmap(idxRel, GetActiveSnapshot(), 0);
|
|
/* If sort is NULL, tids only contain 1 row to get the max cu ID */
|
|
int64 nTids = index_column_getbitmap(indexScan, NULL, tids);
|
|
if (nTids == 1) {
|
|
/* get the max cu ID */
|
|
ScalarVector *pVector = &tids->m_arr[0];
|
|
ItemPointer tid = (ItemPointer)(pVector->m_vals);
|
|
maxCuID = ItemPointerGetBlockNumber(tid);
|
|
}
|
|
index_endscan(indexScan);
|
|
FreeTupleDesc(tidDesc);
|
|
|
|
return maxCuID;
|
|
}
|
|
|
|
uint32 CStore::GetMaxIndexCUID(Relation heapRel, List *indexRel)
|
|
{
|
|
ListCell *lc = NULL;
|
|
uint32 maxCuID = FirstCUID;
|
|
uint32 tmpCuID = 0;
|
|
foreach (lc, indexRel) {
|
|
Relation idxRel = (Relation)lfirst(lc);
|
|
switch (idxRel->rd_rel->relam) {
|
|
case CBTREE_AM_OID: {
|
|
tmpCuID = GetMaxCuIdFromCbtreeIndex(heapRel, idxRel);
|
|
break;
|
|
}
|
|
case CGIN_AM_OID: {
|
|
tmpCuID = GetMaxCuIdFromCginIndex(heapRel, idxRel);
|
|
break;
|
|
}
|
|
default: {
|
|
Assert(0);
|
|
break;
|
|
}
|
|
}
|
|
if (tmpCuID > maxCuID) {
|
|
maxCuID = tmpCuID;
|
|
}
|
|
}
|
|
return maxCuID;
|
|
}
|
|
|
|
// get the max cu pointer form cu desc
|
|
CUPointer CStore::GetMaxCUPointerFromDesc(_in_ int attrno, _in_ Oid cudescHeap)
|
|
{
|
|
// Open the CUDesc relation and its index.
|
|
Relation cudescHeapRel = heap_open(cudescHeap, AccessShareLock);
|
|
TupleDesc cudescTupDesc = RelationGetDescr(cudescHeapRel);
|
|
Relation cudescIndexRel = index_open(cudescHeapRel->rd_rel->relcudescidx, AccessShareLock);
|
|
|
|
// Setup scan key to fetch from the index by col_id.
|
|
ScanKeyData key;
|
|
ScanKeyInit(&key, (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attrno));
|
|
|
|
// Any snapshot is used to get the newest CU Pointer, which includes the aborted or crashed transactions.
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudescHeapRel, cudescIndexRel, SnapshotAny, 1, &key);
|
|
|
|
// Optimize for geting last CU description of column.
|
|
// Use BackwardScanDirection scan
|
|
CUPointer maxCUPointer = 0;
|
|
HeapTuple tup = NULL;
|
|
while ((tup = systable_getnext_ordered(cudesc_scan, BackwardScanDirection)) != NULL) {
|
|
bool isnull = true;
|
|
uint32 cuSize = DatumGetInt32(fastgetattr(tup, CUDescSizeAttr, cudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
char* cuBegin = DatumGetPointer(fastgetattr(tup, CUDescCUPointerAttr, cudescTupDesc, &isnull));
|
|
Assert(!isnull);
|
|
uint64 cuEnd = *((uint64*)VARDATA_ANY(cuBegin)) + cuSize;
|
|
|
|
if (cuEnd > maxCUPointer) {
|
|
maxCUPointer = cuEnd;
|
|
}
|
|
}
|
|
|
|
systable_endscan_ordered(cudesc_scan);
|
|
cudesc_scan = NULL;
|
|
|
|
index_close(cudescIndexRel, AccessShareLock);
|
|
cudescIndexRel = NULL;
|
|
heap_close(cudescHeapRel, AccessShareLock);
|
|
cudescHeapRel = NULL;
|
|
return maxCUPointer;
|
|
}
|
|
|
|
// get the max cu pointer
|
|
CUPointer CStore::GetMaxCUPointer(_in_ int attrno, _in_ Relation rel)
|
|
{
|
|
CUPointer maxPointerfromCudesc = 0;
|
|
CUPointer maxPointerfromCufile = 0;
|
|
|
|
// get max cu pointer from cudesc
|
|
maxPointerfromCudesc = CStore::GetMaxCUPointerFromDesc(attrno, rel->rd_rel->relcudescrelid);
|
|
|
|
/* if process is killed when insert CU to the end of CU file, when process restart the transaction of insert will
|
|
* abort the GetMaxCUPointerFromDesc which using DirtySnapshot will not get the max cu pointer so need check the CU
|
|
* file size and compare which one is bigger
|
|
*
|
|
* get max cu pointer from cu file
|
|
*/
|
|
maxPointerfromCufile = GetColDataFileSize(rel, attrno);
|
|
|
|
// cu file may partital write when process is killed, so allign the maxPointerfromCufile to ALIGNOF_CUSIZE (8K)
|
|
int align_size = RelationIsTsStore(rel) ? ALIGNOF_TIMESERIES_CUSIZE : ALIGNOF_CUSIZE;
|
|
maxPointerfromCufile = CUAlignUtils::AlignCuSize(maxPointerfromCufile, align_size);
|
|
|
|
return Max(maxPointerfromCudesc, maxPointerfromCufile);
|
|
}
|
|
|
|
// This function will save the CU description of CU into CUDesc table
|
|
// which is a rowstore table. thus we can leverage the visibility check of
|
|
// rowstore. Note that we use attribute number in order to support
|
|
// 'alter table add/drop table'.
|
|
// attno is physical attribute number
|
|
void CStore::SaveCUDesc(_in_ Relation rel, _in_ CUDesc* cuDescPtr, _in_ int col, int options)
|
|
{
|
|
Assert(rel != NULL);
|
|
Assert(col >= 0);
|
|
|
|
if (rel->rd_att->attrs[col]->attisdropped) {
|
|
ereport(PANIC,
|
|
(errmsg("Cannot save CUDesc for a dropped column \"%s\" of table \"%s\"",
|
|
NameStr(rel->rd_att->attrs[col]->attname),
|
|
RelationGetRelationName(rel))));
|
|
}
|
|
|
|
Relation cudesc_rel = heap_open(rel->rd_rel->relcudescrelid, RowExclusiveLock);
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, RowExclusiveLock);
|
|
|
|
Datum values[CUDescMaxAttrNum];
|
|
bool nulls[CUDescMaxAttrNum];
|
|
HeapTuple tup = CStore::FormCudescTuple(cuDescPtr, cudesc_rel->rd_att, values, nulls, rel->rd_att->attrs[col]);
|
|
|
|
// We always generate xlog for cudesc tuple
|
|
options &= (~TABLE_INSERT_SKIP_WAL);
|
|
|
|
(void)heap_insert(cudesc_rel, tup, GetCurrentCommandId(true), options, NULL);
|
|
index_insert(idx_rel,
|
|
values,
|
|
nulls,
|
|
&(tup->t_self),
|
|
cudesc_rel,
|
|
idx_rel->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
|
|
|
|
heap_freetuple(tup);
|
|
pfree(DatumGetPointer(values[CUDescMinAttr - 1]));
|
|
pfree(DatumGetPointer(values[CUDescMaxAttr - 1]));
|
|
pfree(DatumGetPointer(values[CUDescCUPointerAttr - 1]));
|
|
|
|
index_close(idx_rel, RowExclusiveLock);
|
|
heap_close(cudesc_rel, RowExclusiveLock);
|
|
}
|
|
|
|
/*
|
|
* Load CUDesc information of column according to loadInfoPtr
|
|
* LoadCUDescCtrl include maxCUDescNum for this load, because if we load all
|
|
* it need big memory to hold
|
|
* this function is special for adio, third param adio_work control adio like enable_adio_function.
|
|
* because GetLivedRowNumbers should not work in adio model
|
|
*/
|
|
bool CStore::LoadCUDesc(
|
|
_in_ int col, __inout LoadCUDescCtl* loadCUDescInfoPtr, _in_ bool prefetch_control, _in_ Snapshot snapShot)
|
|
{
|
|
ScanKeyData key[3];
|
|
HeapTuple tup;
|
|
errno_t rc = EOK;
|
|
bool found = false;
|
|
int loadNum = 0;
|
|
|
|
Assert(col >= 0);
|
|
Assert(loadCUDescInfoPtr);
|
|
if (col >= m_relation->rd_att->natts) {
|
|
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("col index exceed col number, col:%d, number:%d", col, m_relation->rd_att->natts)));
|
|
}
|
|
/*
|
|
* we will reset m_perScanMemCnxt when switch to the next batch of cudesc data.
|
|
* so the spaces only used for this batch should be managed by m_perScanMemCnxt.
|
|
*/
|
|
AutoContextSwitch newMemCnxt(m_perScanMemCnxt);
|
|
|
|
ADIO_RUN()
|
|
{
|
|
loadCUDescInfoPtr->lastLoadNum = loadCUDescInfoPtr->curLoadNum;
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
loadCUDescInfoPtr->lastLoadNum = 0;
|
|
loadCUDescInfoPtr->curLoadNum = 0;
|
|
}
|
|
ADIO_END();
|
|
|
|
CUDesc* cuDescArray = loadCUDescInfoPtr->cuDescArray;
|
|
/*
|
|
* Open the CUDesc relation and its index
|
|
*/
|
|
Relation cudesc_rel = heap_open(m_relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
TupleDesc cudesc_tupdesc = cudesc_rel->rd_att;
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, AccessShareLock);
|
|
bool needLengthInfo = m_relation->rd_att->attrs[col]->attlen < 0;
|
|
/* Convert logical id is to physical id of attribute */
|
|
int attid = m_relation->rd_att->attrs[col]->attnum;
|
|
|
|
/*
|
|
* Setup scan key to fetch from the index by attid and CU ID range.
|
|
*/
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attid));
|
|
|
|
ScanKeyInit(&key[1],
|
|
(AttrNumber)CUDescCUIDAttr,
|
|
BTGreaterEqualStrategyNumber,
|
|
F_OIDGE,
|
|
UInt32GetDatum(loadCUDescInfoPtr->nextCUID));
|
|
|
|
ScanKeyInit(&key[2], (AttrNumber)CUDescCUIDAttr, BTLessEqualStrategyNumber, F_OIDLE, UInt32GetDatum(m_endCUID));
|
|
|
|
snapShot = (snapShot == NULL) ? GetActiveSnapshot() : snapShot;
|
|
|
|
Assert(snapShot != NULL);
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudesc_rel, idx_rel, snapShot, 3, key);
|
|
/* Scan cudesc tuple order by cuid ascending order */
|
|
while ((tup = systable_getnext_ordered(cudesc_scan, ForwardScanDirection)) != NULL) {
|
|
Datum values[CUDescCUExtraAttr] = {0};
|
|
bool isnull[CUDescCUExtraAttr] = {0};
|
|
char* valPtr = NULL;
|
|
|
|
/* here use heap_deform_tuple() because cudesc tupe stored was bad.
|
|
* min and max are var length but store in middle of tuple, if we use fastgetattr()
|
|
* here may cause high cpu cost.
|
|
* by the way, it is better store tupe in form of
|
|
* attribute 1: fixed length
|
|
* attribute 2: fixed length
|
|
* ...... : fixed length
|
|
* attribute n: var length
|
|
* attribute n+1: var length
|
|
* ...... : var length
|
|
*/
|
|
heap_deform_tuple(tup, cudesc_tupdesc, values, isnull);
|
|
|
|
uint32 cu_id = DatumGetUInt32(values[CUDescCUIDAttr - 1]);
|
|
Assert(!isnull[CUDescCUIDAttr - 1]);
|
|
|
|
if (IsDicVCU(cu_id))
|
|
continue;
|
|
|
|
/* Put cusize into cudesc->cu_size */
|
|
int32 cu_size = DatumGetInt32(values[CUDescSizeAttr - 1]);
|
|
Assert(!isnull[CUDescSizeAttr - 1]);
|
|
|
|
ADIO_RUN()
|
|
{
|
|
loadNum = (int)loadCUDescInfoPtr->curLoadNum;
|
|
IncLoadCuDescIdx(loadNum);
|
|
/* case1: check whether can load more ;case 2: m_virtualCUDescInfo check here whether array overflow */
|
|
if (m_CUDescIdx[m_cursor] == loadNum || !HasEnoughCuDescSlot(loadCUDescInfoPtr->lastLoadNum, loadNum)) {
|
|
break;
|
|
}
|
|
m_prefetch_quantity += cu_size;
|
|
}
|
|
ADIO_ELSE()
|
|
{
|
|
if (!loadCUDescInfoPtr->HasFreeSlot())
|
|
break;
|
|
}
|
|
ADIO_END();
|
|
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_size = cu_size;
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].xmin = HeapTupleGetRawXmin(tup);
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_id = cu_id;
|
|
loadCUDescInfoPtr->nextCUID = cu_id;
|
|
|
|
/* Parallel scan CU divide. */
|
|
if (u_sess->stream_cxt.producer_dop > 1 &&
|
|
(cu_id % u_sess->stream_cxt.producer_dop != (uint32)u_sess->stream_cxt.smp_id))
|
|
continue;
|
|
|
|
/* Put min value into cudesc->min */
|
|
if (!isnull[CUDescMinAttr - 1]) {
|
|
char* minPtr = cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_min;
|
|
int len_1 = MIN_MAX_LEN;
|
|
valPtr = DatumGetPointer(values[CUDescMinAttr - 1]);
|
|
if (needLengthInfo) {
|
|
*minPtr = VARSIZE_ANY_EXHDR(valPtr);
|
|
minPtr = minPtr + 1;
|
|
len_1 -= 1;
|
|
}
|
|
rc = memcpy_s(minPtr, len_1, VARDATA_ANY(valPtr), VARSIZE_ANY_EXHDR(valPtr));
|
|
securec_check(rc, "", "");
|
|
}
|
|
/* Put max value into cudesc->max */
|
|
if (!isnull[CUDescMaxAttr - 1]) {
|
|
char* maxPtr = cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_max;
|
|
int len_2 = MIN_MAX_LEN;
|
|
valPtr = DatumGetPointer(values[CUDescMaxAttr - 1]);
|
|
if (needLengthInfo) {
|
|
*maxPtr = VARSIZE_ANY_EXHDR(valPtr);
|
|
maxPtr = maxPtr + 1;
|
|
len_2 -= 1;
|
|
}
|
|
rc = memcpy_s(maxPtr, len_2, VARDATA_ANY(valPtr), VARSIZE_ANY_EXHDR(valPtr));
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].row_count = DatumGetInt32(values[CUDescRowCountAttr - 1]);
|
|
Assert(!isnull[CUDescRowCountAttr - 1]);
|
|
|
|
/* Put CUMode into cudesc->cumode */
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_mode = DatumGetInt32(values[CUDescCUModeAttr - 1]);
|
|
Assert(!isnull[CUDescCUModeAttr - 1]);
|
|
|
|
/* Put CUPointer into cudesc->cuPointer */
|
|
Assert(col != VitrualDelColID);
|
|
Assert(!isnull[CUDescCUPointerAttr - 1]);
|
|
valPtr = DatumGetPointer(values[CUDescCUPointerAttr - 1]);
|
|
rc = memcpy_s(&cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_pointer,
|
|
sizeof(CUPointer),
|
|
VARDATA_ANY(valPtr),
|
|
sizeof(CUPointer));
|
|
securec_check(rc, "", "");
|
|
Assert(VARSIZE_ANY_EXHDR(valPtr) == sizeof(CUPointer));
|
|
|
|
/* Put magic into cudesc->magic */
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].magic = DatumGetUInt32(values[CUDescCUMagicAttr - 1]);
|
|
Assert(!isnull[CUDescCUMagicAttr - 1]);
|
|
|
|
found = true;
|
|
|
|
IncLoadCuDescIdx(*(int*)&loadCUDescInfoPtr->curLoadNum);
|
|
/* only load one cu for adio,because we need caculate cu size for prefetch quantity */
|
|
if (prefetch_control) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
systable_endscan_ordered(cudesc_scan);
|
|
index_close(idx_rel, AccessShareLock);
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
|
|
ADIO_RUN()
|
|
{
|
|
if (tup == NULL) {
|
|
/* no tup found means prefetch finish */
|
|
m_load_finish = true;
|
|
}
|
|
}
|
|
ADIO_END();
|
|
|
|
if (found) {
|
|
/* nextCUID must be greater than loaded cudesc */
|
|
loadCUDescInfoPtr->nextCUID++;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
int CStore::FillVecBatch(_out_ VectorBatch* vecBatchOut)
|
|
{
|
|
Assert(vecBatchOut);
|
|
|
|
int idx = m_CUDescIdx[m_cursor];
|
|
int deadRows = 0, i;
|
|
this->m_cuDescIdx = idx;
|
|
bool hasCtidForLateRead = false;
|
|
|
|
/* Step 1: fill normal columns if need */
|
|
for (i = 0; i < m_colNum; ++i) {
|
|
int colIdx = m_colId[i];
|
|
|
|
if (m_relation->rd_att->attrs[colIdx]->attisdropped) {
|
|
ereport(PANIC,
|
|
(errmsg("Cannot fill VecBatch for a dropped column \"%s\" of table \"%s\"",
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
RelationGetRelationName(m_relation))));
|
|
}
|
|
if (likely(colIdx >= 0)) {
|
|
Assert(colIdx < vecBatchOut->m_cols);
|
|
|
|
ScalarVector* vec = vecBatchOut->m_arr + colIdx;
|
|
CUDesc* cuDescPtr = m_CUDescInfo[i]->cuDescArray + idx;
|
|
GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, m_snapshot);
|
|
|
|
// We can't late read data
|
|
if (!IsLateRead(i)) {
|
|
int funIdx = m_hasDeadRow ? 1 : 0;
|
|
deadRows = (this->*m_colFillFunArrary[i].colFillFun[funIdx])(i, cuDescPtr, vec);
|
|
} else {
|
|
// We haven't fill ctid for late read columns
|
|
if (!hasCtidForLateRead) {
|
|
if (!m_hasDeadRow)
|
|
deadRows = FillTidForLateRead<false>(cuDescPtr, vec);
|
|
else
|
|
deadRows = FillTidForLateRead<true>(cuDescPtr, vec);
|
|
|
|
hasCtidForLateRead = true;
|
|
this->m_laterReadCtidColIdx = colIdx;
|
|
} else
|
|
vec->m_rows = vecBatchOut->m_rows;
|
|
}
|
|
vecBatchOut->m_rows = vec->m_rows;
|
|
}
|
|
}
|
|
|
|
// Step 2: fill sys columns if need
|
|
for (i = 0; i < m_sysColNum; ++i) {
|
|
int sysColIdx = m_sysColId[i];
|
|
ScalarVector* sysVec = vecBatchOut->GetSysVector(sysColIdx);
|
|
deadRows = FillSysColVector(sysColIdx, m_virtualCUDescInfo->cuDescArray + idx, sysVec);
|
|
vecBatchOut->m_rows = sysVec->m_rows;
|
|
}
|
|
|
|
// Step 3: fill const columns if need
|
|
if (unlikely(m_onlyConstCol)) {
|
|
// We only set row count
|
|
CUDesc* cuDescPtr = m_virtualCUDescInfo->cuDescArray + idx;
|
|
int liveRows = 0, leftSize = cuDescPtr->row_count - m_rowCursorInCU;
|
|
ScalarVector* vec = vecBatchOut->m_arr;
|
|
errno_t rc = memset_s(vec->m_flag, sizeof(uint8) * BatchMaxSize, 0, sizeof(uint8) * BatchMaxSize);
|
|
securec_check(rc, "", "");
|
|
Assert(deadRows == 0 && leftSize > 0);
|
|
|
|
GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, m_snapshot);
|
|
|
|
for (i = 0; i < leftSize && liveRows < BatchMaxSize; i++) {
|
|
if (IsDeadRow(cuDescPtr->cu_id, i + m_rowCursorInCU))
|
|
++deadRows;
|
|
else
|
|
++liveRows;
|
|
}
|
|
vec->m_rows = liveRows;
|
|
vecBatchOut->m_rows = vec->m_rows;
|
|
}
|
|
/* Step 4: fill other columns if need, most likely for the dropped column */
|
|
for (i = 0; i < vecBatchOut->m_cols; i++) {
|
|
if (m_relation->rd_att->attrs[i]->attisdropped) {
|
|
ScalarVector* vec = vecBatchOut->m_arr + i;
|
|
vec->m_rows = vecBatchOut->m_rows;
|
|
vec->SetAllNull();
|
|
}
|
|
}
|
|
|
|
return deadRows;
|
|
}
|
|
|
|
// Fill vector of column
|
|
template <bool hasDeadRow, int attlen>
|
|
int CStore::FillVector(_in_ int seq, _in_ CUDesc* cuDescPtr, _out_ ScalarVector* vec)
|
|
{
|
|
int colIdx = this->m_colId[seq];
|
|
int pos = 0;
|
|
int deadRows = 0;
|
|
|
|
// reset the flag value
|
|
errno_t rc = memset_s(vec->m_flag, sizeof(uint8) * BatchMaxSize, 0, sizeof(uint8) * BatchMaxSize);
|
|
securec_check(rc, "", "");
|
|
|
|
// step 1: Caculate how many rows left
|
|
int leftRows = cuDescPtr->row_count - this->m_rowCursorInCU;
|
|
Assert(leftRows > 0);
|
|
|
|
// step 2: CU is filled with all NULL values
|
|
if (cuDescPtr->IsNullCU()) {
|
|
for (int i = 0; i < leftRows && pos < BatchMaxSize; ++i) {
|
|
if (hasDeadRow && this->IsDeadRow(cuDescPtr->cu_id, i + this->m_rowCursorInCU)) {
|
|
++deadRows;
|
|
continue;
|
|
}
|
|
vec->SetNull(pos);
|
|
++pos;
|
|
}
|
|
vec->m_rows = pos;
|
|
return deadRows;
|
|
}
|
|
|
|
// step 3: If min and max are equal, no CU is stored
|
|
if (cuDescPtr->IsSameValCU()) {
|
|
for (int i = 0; i < leftRows && pos < BatchMaxSize; ++i) {
|
|
if (hasDeadRow && this->IsDeadRow(cuDescPtr->cu_id, i + this->m_rowCursorInCU)) {
|
|
++deadRows;
|
|
continue;
|
|
}
|
|
|
|
if (attlen > 0 && attlen <= 8) {
|
|
Datum cuMin = *(Datum*)(cuDescPtr->cu_min);
|
|
vec->m_vals[pos] = cuMin;
|
|
} else if (attlen == 12 || attlen == 16) {
|
|
Datum cuMin = PointerGetDatum(cuDescPtr->cu_min);
|
|
vec->AddVar(cuMin, pos);
|
|
} else {
|
|
Datum cuMin = PointerGetDatum(cuDescPtr->cu_min + 1);
|
|
Size len = (Size)(unsigned char)cuDescPtr->cu_min[0];
|
|
Assert(len < MIN_MAX_LEN);
|
|
|
|
// Convert string into varattrib_1b
|
|
// It is safe because len < MIN_MAX_LEN
|
|
char tmpStr[MIN_MAX_LEN + 4];
|
|
if (attlen == -1) {
|
|
Size varLen = len + VARHDRSZ_SHORT;
|
|
SET_VARSIZE_SHORT(tmpStr, varLen);
|
|
rc = memcpy_s(VARDATA_ANY(tmpStr), sizeof(tmpStr) - VARHDRSZ_SHORT, DatumGetPointer(cuMin), len);
|
|
securec_check(rc, "", "");
|
|
cuMin = PointerGetDatum(tmpStr);
|
|
}
|
|
vec->AddVar(cuMin, pos);
|
|
}
|
|
++pos;
|
|
}
|
|
vec->m_rows = pos;
|
|
return deadRows;
|
|
}
|
|
|
|
// step 4: Get CU data. Add a 'this' pointer to help sourceinsight understands
|
|
// this is a member function reference.
|
|
int slotId = CACHE_BLOCK_INVALID_IDX;
|
|
CSTORESCAN_TRACE_START(GET_CU_DATA);
|
|
CU* cuPtr = this->GetCUData(cuDescPtr, colIdx, attlen, slotId);
|
|
CSTORESCAN_TRACE_END(GET_CU_DATA);
|
|
|
|
// step 5: CUToVector
|
|
pos = cuPtr->ToVector<attlen, hasDeadRow>(
|
|
vec, leftRows, this->m_rowCursorInCU, this->m_scanPosInCU[seq], deadRows, this->m_cuDelMask);
|
|
|
|
if (IsValidCacheSlotID(slotId)) {
|
|
// CU is pinned
|
|
CUCache->UnPinDataBlock(slotId);
|
|
} else
|
|
Assert(false);
|
|
|
|
vec->m_rows = pos;
|
|
return deadRows;
|
|
}
|
|
|
|
void CStore::FillVectorByIndex(
|
|
_in_ int colIdx, _in_ ScalarVector* tids, _in_ ScalarVector* srcVec, _out_ ScalarVector* destVec)
|
|
{
|
|
Assert(colIdx >= 0 && tids && destVec && srcVec);
|
|
uint32 curCUId = InValidCUID, thisCUId, rowOffset;
|
|
|
|
ScalarValue* destValue = destVec->m_vals;
|
|
ScalarValue* srcValue = srcVec->m_vals;
|
|
ScalarValue* tidValue = tids->m_vals;
|
|
|
|
for (int i = 0; i < tids->m_rows; i++) {
|
|
ItemPointer tidPtr = (ItemPointer)&tidValue[i];
|
|
thisCUId = ItemPointerGetBlockNumber(tidPtr);
|
|
|
|
// Note that tidPointer->rowOffset start from 1
|
|
rowOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
|
|
// Step 1: Get delmask if need
|
|
if (curCUId != thisCUId) {
|
|
curCUId = thisCUId;
|
|
GetCUDeleteMaskIfNeed(curCUId, m_snapshot);
|
|
}
|
|
|
|
// Step 2: It is a live row, not a dead row
|
|
// We need fill vector
|
|
if (m_delMaskCUId != InValidCUID && !IsDeadRow(curCUId, rowOffset)) {
|
|
if (srcVec->IsNull(i))
|
|
destVec->SetNull(destVec->m_rows++);
|
|
else
|
|
destValue[destVec->m_rows++] = srcValue[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
template <int sysColOid>
|
|
void CStore::FillSysVecByTid(_in_ ScalarVector* tids, _out_ ScalarVector* destVec)
|
|
{
|
|
Assert(tids && destVec);
|
|
uint32 curCUId = InValidCUID, thisCUId, rowOffset;
|
|
ScalarValue* destValue = destVec->m_vals;
|
|
ScalarValue* tidValue = tids->m_vals;
|
|
destVec->m_rows = 0;
|
|
TransactionId xmin = InvalidTransactionId;
|
|
|
|
for (int i = 0; i < tids->m_rows; i++) {
|
|
ItemPointer tidPtr = (ItemPointer)&tidValue[i];
|
|
thisCUId = ItemPointerGetBlockNumber(tidPtr);
|
|
|
|
// Note that tidPointer->rowOffset start from 1
|
|
rowOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
|
|
// Step 1: Get CUDesc and delmask if need
|
|
if (curCUId != thisCUId) {
|
|
curCUId = thisCUId;
|
|
this->GetCUDeleteMaskIfNeed(curCUId, m_snapshot);
|
|
|
|
if (this->m_delMaskCUId != InValidCUID && sysColOid == MinTransactionIdAttributeNumber) {
|
|
xmin = this->GetCUXmin(curCUId);
|
|
}
|
|
}
|
|
// Step 2: It is a live row, not a dead row
|
|
// We need fill vector
|
|
if (this->m_delMaskCUId != InValidCUID && !this->IsDeadRow(curCUId, rowOffset)) {
|
|
switch (sysColOid) {
|
|
case SelfItemPointerAttributeNumber: {
|
|
destValue[destVec->m_rows++] = *(ScalarValue*)tidPtr;
|
|
break;
|
|
}
|
|
case XC_NodeIdAttributeNumber: {
|
|
destValue[destVec->m_rows++] = u_sess->pgxc_cxt.PGXCNodeIdentifier;
|
|
break;
|
|
}
|
|
case TableOidAttributeNumber: {
|
|
destValue[destVec->m_rows++] = RelationGetRelid(m_relation);
|
|
break;
|
|
}
|
|
case MinTransactionIdAttributeNumber: {
|
|
destValue[destVec->m_rows++] = xmin;
|
|
break;
|
|
}
|
|
default:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
(errmsg("Cannot to fill unsupported system column %d for column store table", sysColOid))));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @Description: fill vector by tid in cstore scan late read.
|
|
* @in colIdx: the index of the this column.
|
|
* @in tids: the tid vector.
|
|
* @in cuDescPtr: the pointer to the CUDesc.
|
|
* @out vec: the output ScalarVector.
|
|
* @template attlen: the length of this column.
|
|
*/
|
|
template <int attlen>
|
|
void CStore::FillVectorLateRead(
|
|
_in_ int colIdx, _in_ ScalarVector* tids, _in_ CUDesc* cuDescPtr, _out_ ScalarVector* vec)
|
|
{
|
|
ScalarValue* tidVals = tids->m_vals;
|
|
ItemPointer tidPtr = NULL;
|
|
|
|
uint32 tmpCuId = InValidCUID;
|
|
uint32 tmpOffset = 0;
|
|
|
|
int pos = 0;
|
|
|
|
// Case 1: It is full of NULL value
|
|
if (cuDescPtr->IsNullCU()) {
|
|
for (int rowCnt = 0; rowCnt < tids->m_rows; ++rowCnt) {
|
|
tidPtr = (ItemPointer)(tidVals + rowCnt);
|
|
tmpCuId = ItemPointerGetBlockNumber(tidPtr);
|
|
tmpOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
if (this->IsDeadRow(cuDescPtr->cu_id, tmpOffset)) {
|
|
continue;
|
|
}
|
|
vec->SetNull(pos);
|
|
pos++;
|
|
}
|
|
|
|
vec->m_rows = pos;
|
|
return;
|
|
}
|
|
|
|
// Case 2: It is full of the same value
|
|
if (cuDescPtr->IsSameValCU()) {
|
|
for (int rowCnt = 0; rowCnt < tids->m_rows; ++rowCnt) {
|
|
tidPtr = (ItemPointer)(tidVals + rowCnt);
|
|
tmpCuId = ItemPointerGetBlockNumber(tidPtr);
|
|
tmpOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
if (this->IsDeadRow(cuDescPtr->cu_id, tmpOffset)) {
|
|
continue;
|
|
}
|
|
|
|
if (attlen > 0 && attlen <= 8) {
|
|
Datum cuMin = *(Datum*)(cuDescPtr->cu_min);
|
|
vec->m_vals[pos] = cuMin;
|
|
} else if (attlen == 12 || attlen == 16) {
|
|
Datum cuMin = PointerGetDatum(cuDescPtr->cu_min);
|
|
vec->AddVar(cuMin, pos);
|
|
} else {
|
|
Datum cuMin = PointerGetDatum(cuDescPtr->cu_min + 1);
|
|
Size len = (Size)(unsigned char)cuDescPtr->cu_min[0];
|
|
Assert(len < MIN_MAX_LEN);
|
|
|
|
// Convert string into varattrib_1b
|
|
// It is safe because len < MIN_MAX_LEN
|
|
char tmpStr[MIN_MAX_LEN + VARHDRSZ];
|
|
if (attlen == -1) {
|
|
SET_VARSIZE_SHORT(tmpStr, len + VARHDRSZ_SHORT);
|
|
errno_t rc =
|
|
memcpy_s(tmpStr + VARHDRSZ_SHORT, sizeof(tmpStr) - VARHDRSZ_SHORT, DatumGetPointer(cuMin), len);
|
|
securec_check(rc, "\0", "\0");
|
|
cuMin = PointerGetDatum(tmpStr);
|
|
}
|
|
|
|
vec->AddVar(cuMin, pos);
|
|
}
|
|
pos++;
|
|
}
|
|
|
|
vec->m_rows = pos;
|
|
return;
|
|
}
|
|
|
|
// Case 3: It is a normal CU
|
|
int slotId = CACHE_BLOCK_INVALID_IDX;
|
|
CSTORESCAN_TRACE_START(GET_CU_DATA_LATER_READ);
|
|
CU* cuPtr = this->GetCUData(cuDescPtr, colIdx, attlen, slotId);
|
|
CSTORESCAN_TRACE_END(GET_CU_DATA_LATER_READ);
|
|
|
|
if (cuPtr->HasNullValue()) {
|
|
pos = cuPtr->ToVectorLateRead<attlen, true>(tids, vec);
|
|
} else {
|
|
pos = cuPtr->ToVectorLateRead<attlen, false>(tids, vec);
|
|
}
|
|
|
|
if (IsValidCacheSlotID(slotId)) {
|
|
// CU is pinned
|
|
CUCache->UnPinDataBlock(slotId);
|
|
} else {
|
|
Assert(false);
|
|
}
|
|
|
|
vec->m_rows = pos;
|
|
return;
|
|
}
|
|
|
|
template <int attlen>
|
|
void CStore::FillVectorByTids(_in_ int colIdx, _in_ ScalarVector* tids, _out_ ScalarVector* vec)
|
|
{
|
|
ScalarValue* tidVals = tids->m_vals;
|
|
ItemPointer tidPtr = NULL;
|
|
|
|
uint32 curCUId = InValidCUID;
|
|
uint32 tmpCuId = InValidCUID;
|
|
|
|
uint32 tmpOffset = 0;
|
|
uint32 firstOffset = 0;
|
|
uint32 nextOffset = 0;
|
|
|
|
CUDesc cuDesc;
|
|
int pos = 0;
|
|
int contiguous = 0;
|
|
bool found = false;
|
|
|
|
// we will do only once Pin/Unpin cache within the same cu.
|
|
bool needLoadCu = false;
|
|
int slot = CACHE_BLOCK_INVALID_IDX;
|
|
CU* lastCU = NULL;
|
|
errno_t rc = EOK;
|
|
|
|
// Main copy procedure: copy each value into the output vector. Be careful
|
|
// to reuse previous value's CU and CU descriptor.
|
|
for (int rowCnt = 0; rowCnt < tids->m_rows; ++rowCnt) {
|
|
// Note that tidPointer->tmpOffset start from 1
|
|
tidPtr = (ItemPointer)(tidVals + rowCnt);
|
|
tmpCuId = ItemPointerGetBlockNumber(tidPtr);
|
|
tmpOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
|
|
// Step 1: Get CUDesc and deletion mask if needed
|
|
if (curCUId != tmpCuId) {
|
|
if (lastCU != NULL) {
|
|
// switch to new cu. so at first unpin the
|
|
// previous cu cache as earlier as possible.
|
|
Assert(slot != CACHE_BLOCK_INVALID_IDX);
|
|
CUCache->UnPinDataBlock(slot);
|
|
|
|
// reset after unpin action.
|
|
lastCU = NULL;
|
|
slot = CACHE_BLOCK_INVALID_IDX;
|
|
}
|
|
|
|
// fetch cudesc tuple and deletion bitmap.
|
|
curCUId = tmpCuId;
|
|
found = this->GetCUDesc(colIdx, curCUId, &cuDesc, this->m_snapshot);
|
|
if (!found) {
|
|
if (m_useBtreeIndex) {
|
|
m_delMaskCUId = InValidCUID;
|
|
continue;
|
|
} else {
|
|
Assert(false);
|
|
ereport(FATAL,
|
|
(errmsg("compression unit descriptor not found, table(%s), column(%s), relfilenode(%u/%u/%u), "
|
|
"cuid(%u)).",
|
|
RelationGetRelationName(this->m_relation),
|
|
NameStr(this->m_relation->rd_att->attrs[colIdx]->attname),
|
|
this->m_relation->rd_node.spcNode,
|
|
this->m_relation->rd_node.dbNode,
|
|
this->m_relation->rd_node.relNode,
|
|
curCUId)));
|
|
}
|
|
} else {
|
|
this->GetCUDeleteMaskIfNeed(curCUId, this->m_snapshot);
|
|
}
|
|
|
|
// indicate to load data if needed when switch to a new cu.
|
|
needLoadCu = true;
|
|
}
|
|
|
|
// check if the current cu is valid(visible)
|
|
if (m_delMaskCUId == InValidCUID)
|
|
continue;
|
|
|
|
// step 2: compute how many data contiguous within the same cu.
|
|
contiguous = 0;
|
|
nextOffset = tmpOffset;
|
|
firstOffset = tmpOffset;
|
|
|
|
while (tmpCuId == curCUId // within the same cu.
|
|
&& tmpOffset == nextOffset // contiguous offset.
|
|
&& !this->IsDeadRow(curCUId, tmpOffset)) // it's a dead data.
|
|
{
|
|
++contiguous;
|
|
++nextOffset;
|
|
|
|
if (unlikely(++rowCnt == tids->m_rows))
|
|
break;
|
|
|
|
// fetch and check the next data
|
|
// contiguous within the same cu.
|
|
tidPtr = (ItemPointer)(tidVals + rowCnt);
|
|
tmpCuId = ItemPointerGetBlockNumber(tidPtr);
|
|
tmpOffset = ItemPointerGetOffsetNumber(tidPtr) - 1;
|
|
}
|
|
|
|
if (unlikely(contiguous == 0)) {
|
|
// this is a dead data, so check the next data.
|
|
Assert(this->IsDeadRow(curCUId, tmpOffset));
|
|
continue;
|
|
} else if (tmpCuId != curCUId || !this->IsDeadRow(curCUId, tmpOffset)) {
|
|
// if it's the first data of the next cu,
|
|
// or the first new offset within the same cu is not a
|
|
// dead data, we have to check it again.
|
|
--rowCnt;
|
|
}
|
|
|
|
/*
|
|
* step 3: fill the output vector.
|
|
* Case 1: It is full of NULL value
|
|
*/
|
|
if (cuDesc.IsNullCU()) {
|
|
for (int k = 0; k < contiguous; ++k)
|
|
vec->SetNull(pos++);
|
|
continue;
|
|
}
|
|
|
|
// Case 2: It is full of the same value
|
|
if (cuDesc.IsSameValCU()) {
|
|
if (attlen > 0 && attlen <= 8) {
|
|
Datum cuMin = *(Datum*)(cuDesc.cu_min);
|
|
ScalarValue* dest = vec->m_vals + pos;
|
|
|
|
// batch assign cuMin to dest[contiguous].
|
|
for (uint32 k = 0; k < ((uint32)contiguous >> 2); ++k) {
|
|
*dest++ = cuMin;
|
|
*dest++ = cuMin;
|
|
*dest++ = cuMin;
|
|
*dest++ = cuMin;
|
|
}
|
|
for (int k = 0; k < (contiguous & 0x03); ++k) {
|
|
*dest++ = cuMin;
|
|
}
|
|
pos += contiguous;
|
|
} else if (attlen == 12 || attlen == 16) {
|
|
// NB: be careful AddVar() will insert 1B header
|
|
// before each value.
|
|
Datum cuMin = PointerGetDatum(cuDesc.cu_min);
|
|
for (int k = 0; k < contiguous; ++k)
|
|
vec->AddVar(cuMin, pos++);
|
|
} else {
|
|
Datum cuMin = PointerGetDatum(cuDesc.cu_min + 1);
|
|
Size len = (Size)(unsigned char)cuDesc.cu_min[0];
|
|
Assert(len < MIN_MAX_LEN);
|
|
|
|
// Convert string into varattrib_1b
|
|
// It is safe because len < MIN_MAX_LEN
|
|
char tmpStr[MIN_MAX_LEN + 4];
|
|
if (attlen == -1) {
|
|
SET_VARSIZE_SHORT(tmpStr, len + VARHDRSZ_SHORT);
|
|
rc =
|
|
memcpy_s(tmpStr + VARHDRSZ_SHORT, sizeof(tmpStr) - VARHDRSZ_SHORT, DatumGetPointer(cuMin), len);
|
|
securec_check(rc, "", "");
|
|
cuMin = PointerGetDatum(tmpStr);
|
|
}
|
|
|
|
for (int k = 0; k < contiguous; ++k)
|
|
vec->AddVar(cuMin, pos++);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Case 3: It is a normal CU
|
|
CU* cuPtr = lastCU;
|
|
|
|
if (unlikely(needLoadCu)) {
|
|
Assert(lastCU == NULL);
|
|
Assert(slot == CACHE_BLOCK_INVALID_IDX);
|
|
|
|
// load new cu data, and then reset the flag.
|
|
CSTORESCAN_TRACE_START(GET_CU_DATA_FROM_CACHE);
|
|
cuPtr = this->GetCUData(&cuDesc, colIdx, attlen, slot);
|
|
CSTORESCAN_TRACE_END(GET_CU_DATA_FROM_CACHE);
|
|
lastCU = cuPtr;
|
|
needLoadCu = false;
|
|
}
|
|
Assert(cuPtr);
|
|
|
|
if (cuPtr->m_nulls == NULL) {
|
|
Assert(!cuPtr->HasNullValue());
|
|
switch (attlen) {
|
|
case sizeof(char):
|
|
case sizeof(int16):
|
|
case sizeof(int32): {
|
|
// because the source is of 1/2/4 bytes length, and
|
|
// the destination is 8 bytes, so assign each item
|
|
// in for loop.
|
|
ScalarValue* dest = vec->m_vals + pos;
|
|
for (int k = 0; k < contiguous; ++k)
|
|
*dest++ = cuPtr->GetValue<attlen, false>(firstOffset++);
|
|
pos += contiguous;
|
|
break;
|
|
}
|
|
case sizeof(Datum): {
|
|
// because the source and the destination are both of 8 bytes
|
|
// length, so copy all data in one batch by memcpy().
|
|
rc = memcpy_s((char*)(vec->m_vals + pos),
|
|
(size_t)(uint32)contiguous << 3,
|
|
((uint64*)cuPtr->m_srcData + firstOffset),
|
|
(size_t)(uint32)contiguous << 3);
|
|
securec_check(rc, "\0", "\0");
|
|
pos += contiguous;
|
|
break;
|
|
}
|
|
case -1:
|
|
case -2: {
|
|
// Total bytes to be copied is calculated from the offset table.
|
|
int32* offset = cuPtr->m_offset + firstOffset;
|
|
int32 obase = offset[0];
|
|
if (contiguous > cuDesc.row_count || contiguous >= (int)(cuPtr->m_offsetSize / sizeof(int32))) {
|
|
ereport(defence_errlevel(), (errcode(ERRCODE_DATA_CORRUPTED),
|
|
errmsg("Tid and CUDesc, CUId: %u, colId: %d, contiguous: %d, row count: %d.",
|
|
curCUId, colIdx, contiguous, cuDesc.row_count),
|
|
errdetail("please reindex the relation. relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(this->m_relation), RelationGetNamespace(this->m_relation), RelationGetRelid(this->m_relation),
|
|
this->m_relation->rd_node.spcNode, this->m_relation->rd_node.dbNode, this->m_relation->rd_node.relNode)));
|
|
}
|
|
int totalBytes = offset[contiguous] - obase;
|
|
char* src = cuPtr->m_srcData + obase;
|
|
// We can copy all elements in one batch together and fix the pointer
|
|
// by adding the memory base. It is manaully unrolled the loop to give
|
|
// a strong hint to compiler
|
|
char* base = vec->AddVars(src, totalBytes) - obase;
|
|
ScalarValue* dest = vec->m_vals + pos;
|
|
for (uint32 k = 0; k < ((uint32)contiguous >> 2); ++k) {
|
|
*dest++ = (ScalarValue)(base + *offset++);
|
|
*dest++ = (ScalarValue)(base + *offset++);
|
|
*dest++ = (ScalarValue)(base + *offset++);
|
|
*dest++ = (ScalarValue)(base + *offset++);
|
|
}
|
|
for (int k = 0; k < (contiguous & 0x3); k++)
|
|
*dest++ = (ScalarValue)(base + *offset++);
|
|
pos += contiguous;
|
|
break;
|
|
}
|
|
case 12:
|
|
case 16: {
|
|
// NB: be careful AddVar() will insert 1B header
|
|
// before each value.
|
|
for (int k = 0; k < contiguous; ++k) {
|
|
ScalarValue value = cuPtr->GetValue<attlen, false>(firstOffset++);
|
|
vec->AddVar(PointerGetDatum(value), pos++);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
Assert(0);
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), (errmsg("unsupported datatype branch"))));
|
|
break;
|
|
}
|
|
} else {
|
|
Assert(cuPtr->HasNullValue());
|
|
|
|
// in normal case take care null values.
|
|
for (int k = 0; k < contiguous; ++k) {
|
|
if (unlikely(cuPtr->IsNull(firstOffset)))
|
|
vec->SetNull(pos);
|
|
else {
|
|
ScalarValue value = cuPtr->GetValue<attlen, true>(firstOffset);
|
|
switch (attlen) {
|
|
case sizeof(char):
|
|
case sizeof(int16):
|
|
case sizeof(int32):
|
|
case sizeof(Datum):
|
|
vec->m_vals[pos] = value;
|
|
break;
|
|
case 12:
|
|
case 16:
|
|
case -1:
|
|
case -2:
|
|
vec->AddVar(PointerGetDatum(value), pos);
|
|
break;
|
|
default:
|
|
Assert(0);
|
|
ereport(
|
|
ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), (errmsg("unsupported datatype branch"))));
|
|
break;
|
|
}
|
|
}
|
|
|
|
++firstOffset;
|
|
++pos;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastCU != NULL) {
|
|
// Unpin the last used cu cache.
|
|
Assert(slot != CACHE_BLOCK_INVALID_IDX);
|
|
CUCache->UnPinDataBlock(slot);
|
|
}
|
|
|
|
vec->m_rows = pos;
|
|
}
|
|
|
|
void CStore::FillScanBatchLateIfNeed(__inout VectorBatch* vecBatch)
|
|
{
|
|
ScalarVector* tidVec = NULL;
|
|
int ctidId = -1, colIdx;
|
|
|
|
// Step 1: fill the late read columns except the first late read column
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
colIdx = m_colId[i];
|
|
if (IsLateRead(i) && colIdx >= 0) {
|
|
Assert(colIdx < vecBatch->m_cols);
|
|
|
|
if (tidVec != NULL) {
|
|
CUDesc* cuDescPtr = this->m_CUDescInfo[i]->cuDescArray + this->m_cuDescIdx;
|
|
this->GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, this->m_snapshot);
|
|
(this->*m_fillVectorLateRead[i])(colIdx, tidVec, cuDescPtr, vecBatch->m_arr + colIdx);
|
|
} else {
|
|
// The first late read column should be filled with ctid
|
|
tidVec = vecBatch->m_arr + colIdx;
|
|
ctidId = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 2: fill the first late read column
|
|
if (ctidId >= 0) {
|
|
colIdx = m_colId[ctidId];
|
|
Assert(IsLateRead(ctidId) && colIdx >= 0);
|
|
|
|
CUDesc* cuDescPtr = this->m_CUDescInfo[ctidId]->cuDescArray + this->m_cuDescIdx;
|
|
this->GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, this->m_snapshot);
|
|
(this->*m_fillVectorLateRead[ctidId])(colIdx, tidVec, cuDescPtr, vecBatch->m_arr + colIdx);
|
|
}
|
|
}
|
|
|
|
// We fill vector with ctid, because these columns can be read as late as possible.
|
|
// After finishing qual, read these columns.
|
|
template <bool hasDeadRow>
|
|
int CStore::FillTidForLateRead(_in_ CUDesc* cuDescPtr, _out_ ScalarVector* vec)
|
|
{
|
|
Assert(cuDescPtr && vec);
|
|
uint32 cur_cuid = cuDescPtr->cu_id;
|
|
int leftSize = cuDescPtr->row_count - m_rowCursorInCU;
|
|
int pos = 0, deadRows = 0;
|
|
Assert(leftSize > 0);
|
|
|
|
for (int i = 0; i < leftSize && pos < BatchMaxSize; i++) {
|
|
if (unlikely(hasDeadRow && IsDeadRow(cuDescPtr->cu_id, i + m_rowCursorInCU))) {
|
|
++deadRows;
|
|
} else {
|
|
// because sizeof(*itemPtr) is not the same to
|
|
// sizeof(vec->m_vals[0]), so zero it at first.
|
|
vec->m_vals[pos] = 0;
|
|
ItemPointer itemPtr = (ItemPointer)&vec->m_vals[pos];
|
|
|
|
// Note that itemPtr->offset start from 1
|
|
ItemPointerSet(itemPtr, cur_cuid, i + m_rowCursorInCU + 1);
|
|
++pos;
|
|
}
|
|
}
|
|
vec->m_rows = pos;
|
|
return deadRows;
|
|
}
|
|
|
|
int CStore::FillSysColVector(_in_ int colIdx, _in_ CUDesc* cuDescPtr, _out_ ScalarVector* vec)
|
|
{
|
|
Assert(cuDescPtr && vec);
|
|
uint32 cur_cuid = cuDescPtr->cu_id;
|
|
int leftSize = cuDescPtr->row_count - m_rowCursorInCU;
|
|
int pos = 0, deadRows = 0;
|
|
Assert(leftSize > 0);
|
|
|
|
errno_t rc = memset_s(vec->m_flag, sizeof(uint8) * BatchMaxSize, 0, sizeof(uint8) * BatchMaxSize);
|
|
securec_check(rc, "", "");
|
|
GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, m_snapshot);
|
|
|
|
for (int i = 0; i < leftSize && pos < BatchMaxSize; i++) {
|
|
if (IsDeadRow(cuDescPtr->cu_id, i + m_rowCursorInCU)) {
|
|
++deadRows;
|
|
continue;
|
|
}
|
|
switch (colIdx) {
|
|
case SelfItemPointerAttributeNumber: {
|
|
/* description: future plan-set vec->m_desc */
|
|
vec->m_desc.typeId = INT8OID;
|
|
|
|
vec->m_vals[pos] = 0;
|
|
ItemPointer itemPtr = (ItemPointer)&vec->m_vals[pos];
|
|
|
|
// Note that itemPtr->offset start from 1
|
|
ItemPointerSet(itemPtr, cur_cuid, i + m_rowCursorInCU + 1);
|
|
break;
|
|
}
|
|
case XC_NodeIdAttributeNumber: {
|
|
vec->m_vals[pos] = u_sess->pgxc_cxt.PGXCNodeIdentifier;
|
|
break;
|
|
}
|
|
case TableOidAttributeNumber: {
|
|
vec->m_vals[pos] = RelationGetRelid(m_relation);
|
|
break;
|
|
}
|
|
case MinTransactionIdAttributeNumber: {
|
|
vec->m_vals[pos] = cuDescPtr->xmin;
|
|
break;
|
|
}
|
|
default:
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), (errmsg("Column store don't support"))));
|
|
break;
|
|
}
|
|
++pos;
|
|
}
|
|
vec->m_rows = pos;
|
|
|
|
return deadRows;
|
|
}
|
|
|
|
/*
|
|
* Get CUDesc of column according to cuid.
|
|
*/
|
|
bool CStore::GetCUDesc(_in_ int col, _in_ uint32 cuid, _out_ CUDesc* cuDescPtr, _in_ Snapshot snapShot)
|
|
{
|
|
ScanKeyData key[2];
|
|
HeapTuple tup;
|
|
bool found = false;
|
|
errno_t rc = EOK;
|
|
Assert(col >= 0);
|
|
|
|
// we will reset m_perScanMemCnxt when switch to the next batch of cudesc data.
|
|
// so the spaces only used for this batch should be managed by m_perScanMemCnxt.
|
|
AutoContextSwitch newMemCnxt(m_perScanMemCnxt);
|
|
|
|
/*
|
|
* Open the CUDesc relation and its index
|
|
*/
|
|
Relation cudesc_rel = heap_open(m_relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
TupleDesc cudesc_tupdesc = cudesc_rel->rd_att;
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, AccessShareLock);
|
|
bool isFixedLen = m_relation->rd_att->attrs[col]->attlen > 0 ? true : false;
|
|
// Convert logical id is to physical id of attribute
|
|
int attid = m_relation->rd_att->attrs[col]->attnum;
|
|
|
|
/*
|
|
* Setup scan key to fetch from the index by attid.
|
|
*/
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attid));
|
|
|
|
ScanKeyInit(&key[1], (AttrNumber)CUDescCUIDAttr, BTEqualStrategyNumber, F_OIDEQ, UInt32GetDatum(cuid));
|
|
|
|
snapShot = (snapShot == NULL) ? GetActiveSnapshot() : snapShot;
|
|
Assert(snapShot != NULL);
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudesc_rel, idx_rel, snapShot, 2, key);
|
|
// only loop once
|
|
while ((tup = systable_getnext_ordered(cudesc_scan, ForwardScanDirection)) != NULL) {
|
|
Datum values[CUDescCUExtraAttr] = {0};
|
|
bool isnull[CUDescCUExtraAttr] = {0};
|
|
char* valPtr = NULL;
|
|
|
|
heap_deform_tuple(tup, cudesc_tupdesc, values, isnull);
|
|
|
|
uint32 cu_id = DatumGetUInt32(values[CUDescCUIDAttr - 1]);
|
|
Assert(!isnull[CUDescCUIDAttr - 1] && cu_id == cuid && found == false);
|
|
|
|
cuDescPtr->xmin = HeapTupleGetRawXmin(tup);
|
|
|
|
cuDescPtr->cu_id = cu_id;
|
|
|
|
// Put min value into cudesc->min
|
|
if (!isnull[CUDescMinAttr - 1]) {
|
|
char* minPtr = cuDescPtr->cu_min;
|
|
char len_1 = MIN_MAX_LEN;
|
|
valPtr = DatumGetPointer(values[CUDescMinAttr - 1]);
|
|
if (!isFixedLen) {
|
|
*minPtr = (char)VARSIZE_ANY_EXHDR(valPtr);
|
|
minPtr = minPtr + 1;
|
|
len_1 -= 1;
|
|
}
|
|
rc = memcpy_s(minPtr, len_1, VARDATA_ANY(valPtr), VARSIZE_ANY_EXHDR(valPtr));
|
|
securec_check(rc, "", "");
|
|
}
|
|
// Put max value into cudesc->max
|
|
if (!isnull[CUDescMaxAttr - 1]) {
|
|
char* maxPtr = cuDescPtr->cu_max;
|
|
char len_2 = MIN_MAX_LEN;
|
|
valPtr = DatumGetPointer(values[CUDescMaxAttr - 1]);
|
|
if (!isFixedLen) {
|
|
*maxPtr = VARSIZE_ANY_EXHDR(valPtr);
|
|
maxPtr = maxPtr + 1;
|
|
len_2 -= 1;
|
|
}
|
|
rc = memcpy_s(maxPtr, len_2, VARDATA_ANY(valPtr), VARSIZE_ANY_EXHDR(valPtr));
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
cuDescPtr->row_count = DatumGetInt32(values[CUDescRowCountAttr - 1]);
|
|
Assert(!isnull[CUDescRowCountAttr - 1]);
|
|
|
|
// Put CUMode into cudesc->cumode
|
|
cuDescPtr->cu_mode = DatumGetInt32(values[CUDescCUModeAttr - 1]);
|
|
Assert(!isnull[CUDescCUModeAttr - 1]);
|
|
|
|
// Put cusize into cudesc->cu_size
|
|
cuDescPtr->cu_size = DatumGetInt32(values[CUDescSizeAttr - 1]);
|
|
Assert(!isnull[CUDescSizeAttr - 1]);
|
|
|
|
// Put CUPointer into cudesc->cuPointer
|
|
char* cu_ptr = DatumGetPointer(values[CUDescCUPointerAttr - 1]);
|
|
Assert(!isnull[CUDescCUPointerAttr - 1] && cu_ptr);
|
|
rc = memcpy_s(&cuDescPtr->cu_pointer, sizeof(CUPointer), VARDATA_ANY(cu_ptr), sizeof(CUPointer));
|
|
securec_check(rc, "", "");
|
|
Assert(VARSIZE_ANY_EXHDR(cu_ptr) == sizeof(CUPointer));
|
|
|
|
cuDescPtr->magic = DatumGetUInt32(values[CUDescCUMagicAttr - 1]);
|
|
Assert(!isnull[CUDescCUMagicAttr - 1]);
|
|
found = true;
|
|
}
|
|
systable_endscan_ordered(cudesc_scan);
|
|
index_close(idx_rel, AccessShareLock);
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
|
|
return found;
|
|
}
|
|
|
|
void CStore::GetCUDeleteMaskIfNeed(_in_ uint32 cuid, _in_ Snapshot snapShot)
|
|
{
|
|
ScanKeyData key[2];
|
|
HeapTuple tup;
|
|
bool isnull = false;
|
|
errno_t rc = EOK;
|
|
bool found = false;
|
|
|
|
// delete mask has been loaded
|
|
if (m_delMaskCUId == cuid)
|
|
return;
|
|
|
|
// we will reset m_perScanMemCnxt when switch to the next batch of cudesc data.
|
|
// so the spaces only used for this batch should be managed by m_perScanMemCnxt.
|
|
AutoContextSwitch newMemCnxt(m_perScanMemCnxt);
|
|
|
|
// Open the CUDesc relation and its index
|
|
Relation cudesc_rel = heap_open(m_relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
TupleDesc cudesc_tupdesc = cudesc_rel->rd_att;
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, AccessShareLock);
|
|
|
|
// Setup scan key to fetch from the index by attid.
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(VitrualDelColID));
|
|
|
|
ScanKeyInit(&key[1], (AttrNumber)CUDescCUIDAttr, BTEqualStrategyNumber, F_OIDEQ, UInt32GetDatum(cuid));
|
|
|
|
snapShot = (snapShot == NULL) ? GetActiveSnapshot() : snapShot;
|
|
Assert(snapShot != NULL);
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudesc_rel, idx_rel, snapShot, 2, key);
|
|
|
|
if ((tup = systable_getnext_ordered(cudesc_scan, ForwardScanDirection)) != NULL) {
|
|
// Put CUPointer into cudesc->cuPointer
|
|
Datum v = fastgetattr(tup, CUDescCUPointerAttr, cudesc_tupdesc, &isnull);
|
|
if (isnull)
|
|
m_hasDeadRow = false;
|
|
else {
|
|
m_hasDeadRow = true;
|
|
int8* bitmap = (int8*)PG_DETOAST_DATUM(DatumGetPointer(v));
|
|
rc = memcpy_s(m_cuDelMask, MaxDelBitmapSize, VARDATA_ANY(bitmap), VARSIZE_ANY_EXHDR(bitmap));
|
|
securec_check(rc, "", "");
|
|
|
|
// because new memory may be created, so we have to check and free in time.
|
|
if ((Pointer)bitmap != DatumGetPointer(v)) {
|
|
pfree_ext(bitmap);
|
|
}
|
|
}
|
|
|
|
found = true;
|
|
}
|
|
|
|
systable_endscan_ordered(cudesc_scan);
|
|
index_close(idx_rel, AccessShareLock);
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
if (!found) {
|
|
TransactionId currGlobalXmin = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin);
|
|
Assert(snapShot->xmin > 0);
|
|
if (TransactionIdPrecedes(snapShot->xmin, currGlobalXmin))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SNAPSHOT_INVALID),
|
|
(errmsg("Snapshot too old."),
|
|
errdetail("Could not get the old version of CUDeleteBitmap, RecentGlobalXmin: %lu, "
|
|
"snapShot->xmin: %lu, snapShot->xmax: %lu",
|
|
currGlobalXmin,
|
|
snapShot->xmin,
|
|
snapShot->xmax),
|
|
errhint("This is a safe error report, will not impact data consistency, retry your query if "
|
|
"needed."))));
|
|
else {
|
|
if (m_useBtreeIndex)
|
|
m_delMaskCUId = InValidCUID;
|
|
else {
|
|
ereport(PANIC,
|
|
(errmsg("CU Delete bitmap is missing."),
|
|
errdetail("There might be some issue about cu %u delete bitmap, Please contact HW engineers "
|
|
"for support.",
|
|
cuid)));
|
|
}
|
|
}
|
|
} else {
|
|
m_delMaskCUId = cuid;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
CU* CStore::GetUnCompressCUData(
|
|
Relation rel, int col, uint32 cuid, _out_ int& slotId, ForkNumber forkNum, bool enterCache) const
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void CStore::CheckConsistenceOfCUData(CUDesc* cuDescPtr, CU* cu, AttrNumber col) const
|
|
{
|
|
/*
|
|
* This memory barrier prevents unordered read, which may cause using NOT-uncompress-completed CU.
|
|
* We must add memory barrier before returning cuPtr in every branch of function GetCUData.
|
|
*/
|
|
#ifdef __aarch64__
|
|
pg_memory_barrier();
|
|
#endif
|
|
|
|
/* check the src data ptr. */
|
|
if (cu->m_srcData == NULL) {
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("The m_srcData ptr of CU is NULL in CheckConsistenceOfCUData."),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation), RelationGetNamespace(m_relation), RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode, m_relation->rd_node.dbNode, m_relation->rd_node.relNode),
|
|
errdetail_internal("CU info: table column %d, id %u, offset %lu, size %d, row count %d",
|
|
col,
|
|
cuDescPtr->cu_id, cuDescPtr->cu_pointer, cuDescPtr->cu_size, cuDescPtr->row_count)));
|
|
}
|
|
|
|
/* check the offset ptr. */
|
|
if ((cu->m_eachValSize < 0 && cu->m_offset == NULL) || (cu->HasNullValue() && cu->m_offset == NULL)) {
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("The m_offset ptr of CU is NULL in CheckConsistenceOfCUData."),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation), RelationGetNamespace(m_relation), RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode, m_relation->rd_node.dbNode, m_relation->rd_node.relNode),
|
|
errdetail_internal("CU info: table column %d, id %u, offset %lu, size %d, row count %d",
|
|
col,
|
|
cuDescPtr->cu_id, cuDescPtr->cu_pointer, cuDescPtr->cu_size, cuDescPtr->row_count)));
|
|
}
|
|
|
|
/* check the magic number */
|
|
if (cu->m_magic != cuDescPtr->magic) {
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("magic mismatch between cached CU data and CUDesc, CUDesc's magic %u, CU's magic %u",
|
|
cuDescPtr->magic,
|
|
cu->m_magic),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation), RelationGetNamespace(m_relation), RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode, m_relation->rd_node.dbNode, m_relation->rd_node.relNode),
|
|
errdetail_internal("CU info: table column %d, id %u, offset %lu, size %d, row count %d",
|
|
col,
|
|
cuDescPtr->cu_id, cuDescPtr->cu_pointer, cuDescPtr->cu_size, cuDescPtr->row_count)));
|
|
}
|
|
|
|
/* check the row number */
|
|
if (cu->m_offsetSize > 0) {
|
|
/* see also CU::FormValuesOffset() */
|
|
if ((cu->m_offsetSize / (int)sizeof(int32)) != (cuDescPtr->row_count + 1)) {
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("row_count mismatch between cached CU data and CUDesc, CUDesc's row_count %d, CU's "
|
|
"row_count %d",
|
|
cuDescPtr->row_count,
|
|
((cu->m_offsetSize / (int)sizeof(int32)) - 1)),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation),
|
|
RelationGetNamespace(m_relation),
|
|
RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode),
|
|
errdetail_internal("CU info: table column %d, id %u, offset %lu, size %d, magic %u",
|
|
col,
|
|
cuDescPtr->cu_id,
|
|
cuDescPtr->cu_pointer,
|
|
cuDescPtr->cu_size,
|
|
cuDescPtr->magic)));
|
|
}
|
|
}
|
|
|
|
/* check cu size */
|
|
if (cu->m_cuSize != (uint32)cuDescPtr->cu_size) {
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("cu_size mismatch between cached CU data and CUDesc, CUDesc's cu_size %u, CU's cu_size %u",
|
|
(uint32)cuDescPtr->cu_size,
|
|
cu->m_cuSize),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation),
|
|
RelationGetNamespace(m_relation),
|
|
RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode),
|
|
errdetail_internal("CU info: table column %d, id %u, offset %lu, row count %d, magic %u",
|
|
col,
|
|
cuDescPtr->cu_id,
|
|
cuDescPtr->cu_pointer,
|
|
cuDescPtr->row_count,
|
|
cuDescPtr->magic)));
|
|
}
|
|
}
|
|
|
|
#define GetUncompressErrMsg(ret_code) ((CU_ERR_CRC == (ret_code)) ? "incorrect checksum" : "incorrect magic")
|
|
|
|
// Put the CU in the cache and return a pointer to the CU data.
|
|
// The CU is returned pinned, callers must unpin it when finished.
|
|
// 1. Record a fetch (read).
|
|
// 2. Look for the CU in the cache via FindDataBlock() first.
|
|
// This should succeed most of the time, and it is fast.
|
|
// 3. If FindDataBlock() cannot get the cu then use InsertCU().
|
|
// 4. If FindDataBlock() or InsertCU() discover the CU is already in the cache then
|
|
// Record the cache hit, and return the CU buffer and the cache entry.
|
|
// 5. If InsertCU() does not find an entry, it reserves memory,
|
|
// a CU decriptor slot, and a CU data slot.
|
|
// 6. Load the CU from disk and setup the CU data slot and Check the CRC.
|
|
// 7. Uncompress the CU data buffer, if necessary
|
|
// 8. Free the compressed buffer.
|
|
// 9. Update the memory reservation.
|
|
// 10.Resume the busy CUbuffer, wakeup any threads waiting for
|
|
// the cache entry.
|
|
CU* CStore::GetCUData(CUDesc* cuDescPtr, int colIdx, int valSize, int& slotId)
|
|
{
|
|
/*
|
|
* we will reset m_PerScanMemCnxt when switch to the next batch of cudesc data.
|
|
* so the spaces only used for this batch should be managed by m_PerScanMemCnxt,
|
|
* including the peices of space used in the decompression.
|
|
*/
|
|
if (m_relation->rd_att->attrs[colIdx]->attisdropped) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OPERATION),
|
|
(errmsg("Cannot get CUData for a dropped column \"%s\" of table \"%s\"",
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
RelationGetRelationName(m_relation)))));
|
|
}
|
|
|
|
AutoContextSwitch newMemCnxt(this->m_perScanMemCnxt);
|
|
|
|
CU* cuPtr = NULL;
|
|
Form_pg_attribute* attrs = m_relation->rd_att->attrs;
|
|
CUUncompressedRetCode retCode = CU_OK;
|
|
bool hasFound = false;
|
|
DataSlotTag dataSlotTag =
|
|
CUCache->InitCUSlotTag((RelFileNodeOld *)&m_relation->rd_node, colIdx, cuDescPtr->cu_id, cuDescPtr->cu_pointer);
|
|
|
|
// Record a fetch (read).
|
|
// The fetch count is the sum of the hits and reads.
|
|
if (m_rowCursorInCU == 0) {
|
|
pgstat_count_buffer_read(m_relation);
|
|
}
|
|
|
|
RETRY_LOAD_CU:
|
|
|
|
// Look for the CU in the cache first, this is quick and
|
|
// should succeed most of the time.
|
|
slotId = CUCache->FindDataBlock(&dataSlotTag, (m_rowCursorInCU == 0));
|
|
|
|
// If the CU is not in the cache, reserve it.
|
|
// Get a cache slot, reserve memory, and put it in the hashtable.
|
|
// ReserveDataBlock() may block waiting for space or CU Cache slots
|
|
if (IsValidCacheSlotID(slotId)) {
|
|
hasFound = true;
|
|
} else {
|
|
hasFound = false;
|
|
slotId = CUCache->ReserveDataBlock(&dataSlotTag, cuDescPtr->cu_size, hasFound);
|
|
}
|
|
|
|
// Use the cached CU
|
|
cuPtr = CUCache->GetCUBuf(slotId);
|
|
cuPtr->m_inCUCache = true;
|
|
cuPtr->SetAttInfo(valSize, attrs[colIdx]->atttypmod, attrs[colIdx]->atttypid);
|
|
|
|
// If the CU was already in the cache, return it.
|
|
if (hasFound) {
|
|
// Wait for a read to complete, if still in progress
|
|
if (CUCache->DataBlockWaitIO(slotId)) {
|
|
CUCache->UnPinDataBlock(slotId);
|
|
ereport(LOG,
|
|
(errmodule(MOD_CACHE),
|
|
errmsg("CU wait IO find an error, need to reload! table(%s), column(%s), relfilenode(%u/%u/%u), "
|
|
"cuid(%u)",
|
|
RelationGetRelationName(m_relation),
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode,
|
|
cuDescPtr->cu_id)));
|
|
goto RETRY_LOAD_CU;
|
|
}
|
|
|
|
// when cstore scan first access CU, count mem_hit
|
|
if (m_rowCursorInCU == 0) {
|
|
// Record cache hit.
|
|
pgstat_count_buffer_hit(m_relation);
|
|
// stat CU SSD hit
|
|
pgstatCountCUMemHit4SessionLevel();
|
|
pgstat_count_cu_mem_hit(m_relation);
|
|
}
|
|
|
|
if (!cuPtr->m_cache_compressed) {
|
|
CheckConsistenceOfCUData(cuDescPtr, cuPtr, (AttrNumber)(colIdx + 1));
|
|
return cuPtr;
|
|
}
|
|
if (cuPtr->m_cache_compressed) {
|
|
retCode = CUCache->StartUncompressCU(cuDescPtr, slotId, this->m_plan_node_id, this->m_timing_on, ALIGNOF_CUSIZE);
|
|
if (retCode == CU_RELOADING) {
|
|
CUCache->UnPinDataBlock(slotId);
|
|
ereport(LOG, (errmodule(MOD_CACHE),
|
|
errmsg("The CU is being reloaded by remote read thread. Retry to load CU! table(%s), "
|
|
"column(%s), relfilenode(%u/%u/%u), cuid(%u)",
|
|
RelationGetRelationName(m_relation), NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
m_relation->rd_node.spcNode, m_relation->rd_node.dbNode, m_relation->rd_node.relNode,
|
|
cuDescPtr->cu_id)));
|
|
goto RETRY_LOAD_CU;
|
|
} else if (retCode == CU_ERR_ADIO) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_IO_ERROR),
|
|
errmodule(MOD_ADIO),
|
|
errmsg("Load CU failed in adio! table(%s), column(%s), relfilenode(%u/%u/%u), cuid(%u)",
|
|
RelationGetRelationName(m_relation),
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode,
|
|
cuDescPtr->cu_id)));
|
|
} else if (retCode == CU_ERR_CRC || retCode == CU_ERR_MAGIC) {
|
|
/* Prefech CU contains incorrect checksum */
|
|
addBadBlockStat(
|
|
&m_cuStorage[colIdx]->m_cnode.m_rnode, ColumnId2ColForkNum(m_cuStorage[colIdx]->m_cnode.m_attid));
|
|
|
|
if (RelationNeedsWAL(m_relation) && CanRemoteRead()) {
|
|
/* clear CacheBlockInProgressIO and CacheBlockInProgressUncompress but not free cu buffer */
|
|
CUCache->TerminateCU(false);
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("invalid CU in cu_id %u of relation %s file %s offset %lu, prefetch %s, try to "
|
|
"remote read",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer,
|
|
GetUncompressErrMsg(retCode))),
|
|
handle_in_client(true)));
|
|
|
|
/* remote load cu */
|
|
retCode = GetCUDataFromRemote(cuDescPtr, cuPtr, colIdx, valSize, slotId);
|
|
if (retCode == CU_RELOADING) {
|
|
/* other thread in remote read */
|
|
CUCache->UnPinDataBlock(slotId);
|
|
ereport(LOG, (errmodule(MOD_CACHE),
|
|
errmsg("The CU is being reloaded by remote read thread. Retry to load CU! table(%s), "
|
|
"column(%s), relfilenode(%u/%u/%u), cuid(%u)",
|
|
RelationGetRelationName(m_relation),
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname), m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode, m_relation->rd_node.relNode, cuDescPtr->cu_id)));
|
|
goto RETRY_LOAD_CU;
|
|
}
|
|
} else {
|
|
// unlogged table can not remote read
|
|
CUCache->TerminateCU(true);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("invalid CU in cu_id %u of relation %s file %s offset %lu, prefetch %s",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer,
|
|
GetUncompressErrMsg(retCode)),
|
|
errdetail("Can not remote read for unlogged/temp table. Should truncate table and "
|
|
"re-import data."),
|
|
handle_in_client(true))));
|
|
}
|
|
} else {
|
|
Assert(retCode == CU_OK);
|
|
}
|
|
}
|
|
|
|
CheckConsistenceOfCUData(cuDescPtr, cuPtr, (AttrNumber)(colIdx + 1));
|
|
return cuPtr;
|
|
}
|
|
|
|
// stat CU hdd sync read
|
|
pgstatCountCUHDDSyncRead4SessionLevel();
|
|
pgstat_count_cu_hdd_sync(m_relation);
|
|
|
|
m_cuStorage[colIdx]->LoadCU(
|
|
cuPtr, cuDescPtr->cu_pointer, cuDescPtr->cu_size, g_instance.attr.attr_storage.enable_adio_function, true);
|
|
|
|
ADIO_RUN()
|
|
{
|
|
ereport(DEBUG1,
|
|
(errmodule(MOD_ADIO),
|
|
errmsg("GetCUData:relation(%s), colIdx(%d), load cuid(%u), slotId(%d)",
|
|
RelationGetRelationName(m_relation),
|
|
colIdx,
|
|
cuDescPtr->cu_id,
|
|
slotId)));
|
|
}
|
|
ADIO_END();
|
|
|
|
// Mark the CU as no longer io busy, and wake any waiters
|
|
CUCache->DataBlockCompleteIO(slotId);
|
|
|
|
retCode = CUCache->StartUncompressCU(cuDescPtr, slotId, this->m_plan_node_id, this->m_timing_on, ALIGNOF_CUSIZE);
|
|
if (retCode == CU_RELOADING) {
|
|
CUCache->UnPinDataBlock(slotId);
|
|
ereport(LOG,
|
|
(errmodule(MOD_CACHE),
|
|
errmsg("The CU is being reloaded by remote read thread. Retry to load CU! table(%s), column(%s), "
|
|
"relfilenode(%u/%u/%u), cuid(%u)",
|
|
RelationGetRelationName(m_relation),
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode,
|
|
cuDescPtr->cu_id)));
|
|
goto RETRY_LOAD_CU;
|
|
} else if (retCode == CU_ERR_CRC || retCode == CU_ERR_MAGIC) {
|
|
/* Sync load CU contains incorrect checksum */
|
|
addBadBlockStat(
|
|
&m_cuStorage[colIdx]->m_cnode.m_rnode, ColumnId2ColForkNum(m_cuStorage[colIdx]->m_cnode.m_attid));
|
|
|
|
if (RelationNeedsWAL(m_relation) && CanRemoteRead()) {
|
|
/* clear CacheBlockInProgressIO and CacheBlockInProgressUncompress but not free cu buffer */
|
|
CUCache->TerminateCU(false);
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg(
|
|
"invalid CU in cu_id %u of relation %s file %s offset %lu, sync load %s, try to remote read",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer,
|
|
GetUncompressErrMsg(retCode)),
|
|
handle_in_client(true))));
|
|
|
|
/* remote load cu */
|
|
retCode = GetCUDataFromRemote(cuDescPtr, cuPtr, colIdx, valSize, slotId);
|
|
if (retCode == CU_RELOADING) {
|
|
/* other thread in remote read */
|
|
CUCache->UnPinDataBlock(slotId);
|
|
ereport(LOG,
|
|
(errmodule(MOD_CACHE),
|
|
errmsg("The CU is being reloaded by remote read thread. Retry to load CU! table(%s), "
|
|
"column(%s), relfilenode(%u/%u/%u), cuid(%u)",
|
|
RelationGetRelationName(m_relation), NameStr(m_relation->rd_att->attrs[colIdx]->attname),
|
|
m_relation->rd_node.spcNode, m_relation->rd_node.dbNode, m_relation->rd_node.relNode,
|
|
cuDescPtr->cu_id)));
|
|
goto RETRY_LOAD_CU;
|
|
}
|
|
} else {
|
|
// unlogged table can not remote read
|
|
CUCache->TerminateCU(true);
|
|
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("invalid CU in cu_id %u of relation %s file %s offset %lu, sync load %s", cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation), relcolpath(m_cuStorage[colIdx]), cuDescPtr->cu_pointer,
|
|
GetUncompressErrMsg(retCode)),
|
|
errdetail("Can not remote read for unlogged/temp table. Should truncate table and re-import "
|
|
"data."))));
|
|
}
|
|
}
|
|
|
|
Assert(retCode == CU_OK);
|
|
|
|
if (t_thrd.vacuum_cxt.VacuumCostActive) {
|
|
// cu cache misses, so we update vacuum stats
|
|
t_thrd.vacuum_cxt.VacuumCostBalance += u_sess->attr.attr_storage.VacuumCostPageMiss;
|
|
}
|
|
|
|
CheckConsistenceOfCUData(cuDescPtr, cuPtr, (AttrNumber)(colIdx + 1));
|
|
return cuPtr;
|
|
}
|
|
|
|
/*
|
|
* @Description: Only call by CStore::GetCUData(), for remote load cu
|
|
* @IN/OUT cuDescPtr: cu desc ptr
|
|
* @IN/OUT cuPtr: cu ptr
|
|
* @IN/OUT colIdx: columm idx
|
|
* @IN/OUT slotId: slot id, must be pinned
|
|
* @IN/OUT valSize: value size
|
|
* @Return: CU Uncompressed Return Code
|
|
* @See also: CStore::GetCUData
|
|
*/
|
|
CUUncompressedRetCode CStore::GetCUDataFromRemote(
|
|
CUDesc* cuDescPtr, CU* cuPtr, int colIdx, int valSize, const int& slotId)
|
|
{
|
|
Form_pg_attribute* attrs = m_relation->rd_att->attrs;
|
|
CUUncompressedRetCode retCode = CU_OK;
|
|
|
|
/* reuse memory and check if have some other session is updating it concurrently. */
|
|
if (CUCache->ReserveCstoreDataBlockWithSlotId(slotId)) {
|
|
cuPtr = CUCache->GetCUBuf(slotId);
|
|
cuPtr->m_inCUCache = true;
|
|
cuPtr->SetAttInfo(valSize, attrs[colIdx]->atttypmod, attrs[colIdx]->atttypid);
|
|
|
|
/*
|
|
* remote load need CU compressed. (cuPtr->m_compressedLoadBuf != NULL)
|
|
* if CU uncompressed, means other thread remote read cu already and uncompress it.
|
|
*/
|
|
CUCache->AcquireCompressLock(slotId);
|
|
|
|
if (cuPtr->m_cache_compressed) {
|
|
m_cuStorage[colIdx]->RemoteLoadCU(cuPtr,
|
|
cuDescPtr->cu_pointer,
|
|
cuDescPtr->cu_size,
|
|
g_instance.attr.attr_storage.enable_adio_function,
|
|
true);
|
|
|
|
if (cuPtr->IsVerified(cuDescPtr->magic))
|
|
m_cuStorage[colIdx]->OverwriteCU(
|
|
cuPtr->m_compressedBuf, cuDescPtr->cu_pointer, cuDescPtr->cu_size, false);
|
|
}
|
|
|
|
CUCache->RealeseCompressLock(slotId);
|
|
|
|
CUCache->DataBlockCompleteIO(slotId);
|
|
} else {
|
|
if (CUCache->DataBlockWaitIO(slotId)) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_IO_ERROR),
|
|
errmodule(MOD_CACHE),
|
|
errmsg("There is an IO error when remote read CU in cu_id %u of relation %s file %s offset %lu. "
|
|
"slotId %d, column \"%s\" ",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer,
|
|
slotId,
|
|
NameStr(m_relation->rd_att->attrs[colIdx]->attname))));
|
|
}
|
|
}
|
|
|
|
retCode = CUCache->StartUncompressCU(cuDescPtr, slotId, this->m_plan_node_id, this->m_timing_on, ALIGNOF_CUSIZE);
|
|
if (retCode == CU_ERR_CRC || retCode == CU_ERR_MAGIC) {
|
|
/* remote load crc error */
|
|
CUCache->TerminateCU(true);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
|
(errmsg("invalid CU in cu_id %u of relation %s file %s offset %lu, remote read %s",
|
|
cuDescPtr->cu_id,
|
|
RelationGetRelationName(m_relation),
|
|
relcolpath(m_cuStorage[colIdx]),
|
|
cuDescPtr->cu_pointer,
|
|
GetUncompressErrMsg(retCode)))));
|
|
}
|
|
|
|
return retCode;
|
|
}
|
|
|
|
/*
|
|
* @Description: scan virtual cudesc to calculate row count
|
|
* @Param[IN] col: column id
|
|
* @Param[IN/OUT] loadCUDescInfoPtr: load info ptr for cudesc
|
|
* @Param[IN] snapShot: scan snapShot
|
|
* @Return: true -- need load again; false -- load finish
|
|
* @See also: only called by GetLivedRowNumbers
|
|
*/
|
|
bool CStore::GetCURowCount(_in_ int col, __inout LoadCUDescCtl* loadCUDescInfoPtr, _in_ Snapshot snapShot)
|
|
{
|
|
ScanKeyData key[2];
|
|
HeapTuple tup;
|
|
bool isnull = false;
|
|
bool found = false;
|
|
|
|
Assert(col >= 0);
|
|
Assert(loadCUDescInfoPtr);
|
|
|
|
// we will reset m_perScanMemCnxt when switch to the next batch of cudesc data.
|
|
// so the spaces only used for this batch should be managed by m_perScanMemCnxt.
|
|
AutoContextSwitch newMemCnxt(m_perScanMemCnxt);
|
|
|
|
loadCUDescInfoPtr->lastLoadNum = 0;
|
|
loadCUDescInfoPtr->curLoadNum = 0;
|
|
|
|
CUDesc* cuDescArray = loadCUDescInfoPtr->cuDescArray;
|
|
int attid = m_relation->rd_att->attrs[col]->attnum;
|
|
Relation cudesc_rel = heap_open(m_relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
TupleDesc cudesc_tupdesc = cudesc_rel->rd_att;
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, AccessShareLock);
|
|
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attid));
|
|
ScanKeyInit(&key[1],
|
|
(AttrNumber)CUDescCUIDAttr,
|
|
BTGreaterEqualStrategyNumber,
|
|
F_OIDGE,
|
|
UInt32GetDatum(loadCUDescInfoPtr->nextCUID));
|
|
snapShot = (snapShot == NULL) ? GetActiveSnapshot() : snapShot;
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudesc_rel, idx_rel, snapShot, 2, key);
|
|
while ((tup = systable_getnext_ordered(cudesc_scan, ForwardScanDirection)) != NULL) {
|
|
uint32 cu_id = DatumGetUInt32(fastgetattr(tup, CUDescCUIDAttr, cudesc_tupdesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
if (IsDicVCU(cu_id))
|
|
continue;
|
|
|
|
if (!loadCUDescInfoPtr->HasFreeSlot())
|
|
break;
|
|
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].cu_id = cu_id;
|
|
loadCUDescInfoPtr->nextCUID = cu_id;
|
|
|
|
cuDescArray[loadCUDescInfoPtr->curLoadNum].row_count =
|
|
DatumGetInt32(fastgetattr(tup, CUDescRowCountAttr, cudesc_tupdesc, &isnull));
|
|
Assert(!isnull);
|
|
|
|
loadCUDescInfoPtr->curLoadNum++;
|
|
found = true;
|
|
}
|
|
|
|
systable_endscan_ordered(cudesc_scan);
|
|
index_close(idx_rel, AccessShareLock);
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
|
|
if (found) {
|
|
// nextCUID must be greater than loaded cudesc
|
|
loadCUDescInfoPtr->nextCUID++;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Get the lived row numbers of relation.
|
|
*/
|
|
int64 CStore::GetLivedRowNumbers(int64* totaldeadrows)
|
|
{
|
|
int64 rowNumbers = 0;
|
|
LoadCUDescCtl loadInfo(m_startCUID);
|
|
|
|
*totaldeadrows = 0;
|
|
while (GetCURowCount(m_firstColIdx, &loadInfo, m_snapshot)) {
|
|
CUDesc* cuDescArray = loadInfo.cuDescArray;
|
|
for (uint32 i = 0; i < loadInfo.curLoadNum; ++i) {
|
|
GetCUDeleteMaskIfNeed(cuDescArray[i].cu_id, m_snapshot);
|
|
rowNumbers += cuDescArray[i].row_count;
|
|
if (m_hasDeadRow) {
|
|
int nBytes = (cuDescArray[i].row_count + 7) / 8;
|
|
for (int j = 0; j < nBytes; ++j) {
|
|
*totaldeadrows += NumberOfBit1Set[m_cuDelMask[j]];
|
|
rowNumbers -= NumberOfBit1Set[m_cuDelMask[j]];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
loadInfo.Destroy();
|
|
|
|
return rowNumbers;
|
|
}
|
|
|
|
// It is to judge the row whether dead.
|
|
bool CStore::IsDeadRow(uint32 cuid, uint32 row) const
|
|
{
|
|
Assert(cuid == m_delMaskCUId);
|
|
|
|
/* show any tuples including deleted tuples just for analyse */
|
|
if (u_sess->attr.attr_common.XactReadOnly && u_sess->attr.attr_storage.enable_show_any_tuples)
|
|
return false;
|
|
return (m_hasDeadRow && ((m_cuDelMask[row >> 3] & (1 << (row % 8))) != 0));
|
|
}
|
|
|
|
void CStore::RunScan(_in_ CStoreScanState* state, _out_ VectorBatch* vecBatchOut)
|
|
{
|
|
(this->*m_scanFunc)(state, vecBatchOut);
|
|
}
|
|
|
|
// unlink cu files: 16385_c1.0 16385_c1.1 16385_c1.2 ...
|
|
static void CStoreUnlinkCuDataFiles(CUStorage* cuStorage)
|
|
{
|
|
int fileId = 0;
|
|
char tmpFileName[MAXPGPATH];
|
|
|
|
while (1) {
|
|
if (!cuStorage->IsDataFileExist(fileId))
|
|
break;
|
|
|
|
cuStorage->GetFileName(tmpFileName, MAXPGPATH, fileId);
|
|
if (unlink(tmpFileName)) {
|
|
ereport(WARNING, (errmsg("could not unlink file \"%s\": %m", tmpFileName)));
|
|
}
|
|
++fileId;
|
|
}
|
|
}
|
|
|
|
// unlink bcm files: 16385_c1_bcm 16385_c1_bcm.1 16385_c1_bcm.2 ...
|
|
static void CStoreUnlinkCuBcmFiles(CUStorage* cuStorage)
|
|
{
|
|
int fileId = 0;
|
|
char tmpFileName[MAXPGPATH];
|
|
|
|
while (1) {
|
|
if (!cuStorage->IsBcmFileExist(fileId))
|
|
break;
|
|
|
|
cuStorage->GetBcmFileName(tmpFileName, fileId);
|
|
if (unlink(tmpFileName)) {
|
|
ereport(WARNING, (errmsg("could not unlink file \"%s\": %m", tmpFileName)));
|
|
}
|
|
++fileId;
|
|
}
|
|
}
|
|
|
|
// invalid column space cache
|
|
void CStore::InvalidRelSpaceCache(RelFileNode* rnode)
|
|
{
|
|
CFileNode cFileNode(*rnode, VirtualSpaceCacheColID, MAIN_FORKNUM);
|
|
CStoreAllocator::InvalidColSpaceCache(cFileNode);
|
|
}
|
|
|
|
/* unlink data file for one column of a relation. */
|
|
void CStore::UnlinkColDataFile(const RelFileNode& rnode, AttrNumber attrnum, bool bcmIncluded)
|
|
{
|
|
Assert(attrnum > 0);
|
|
CFileNode cFileNode(rnode, attrnum, MAIN_FORKNUM);
|
|
|
|
CStoreAllocator::InvalidColSpaceCache(cFileNode);
|
|
|
|
CUStorage cuStorage(cFileNode);
|
|
|
|
if (!t_thrd.xact_cxt.xactDelayDDL) {
|
|
/* unlink data files: C1.0, C1.1 ... */
|
|
CStoreUnlinkCuDataFiles(&cuStorage);
|
|
} else {
|
|
ereport(LOG,
|
|
(errmsg(
|
|
"delay unlinking column file %u/%u/%u att %d", rnode.spcNode, rnode.dbNode, rnode.relNode, attrnum)));
|
|
}
|
|
|
|
if (bcmIncluded) {
|
|
/* unlink bcm file: C1_bcm, C1_bcm.1 ... */
|
|
CStoreUnlinkCuBcmFiles(&cuStorage);
|
|
}
|
|
|
|
cuStorage.Destroy();
|
|
}
|
|
|
|
/* DONT call in redo */
|
|
void CStore::CreateStorage(Relation rel, Oid newRelFileNode)
|
|
{
|
|
TupleDesc desc = RelationGetDescr(rel);
|
|
int nattrs = desc->natts;
|
|
Form_pg_attribute* attrs = desc->attrs;
|
|
char relpersistence = rel->rd_rel->relpersistence;
|
|
|
|
RelFileNode rd_node = rel->rd_node;
|
|
if (OidIsValid(newRelFileNode)) {
|
|
// use the new filenode if *newRelFileNode* is valid.
|
|
rd_node.relNode = newRelFileNode;
|
|
}
|
|
|
|
for (int i = 0; i < nattrs; i++) {
|
|
if (attrs[i]->attisdropped)
|
|
continue;
|
|
int attrid = attrs[i]->attnum;
|
|
|
|
CFileNode cnode(rd_node, attrid, MAIN_FORKNUM);
|
|
|
|
// create cu file in disk.
|
|
CUStorage* custorage = New(CurrentMemoryContext) CUStorage(cnode);
|
|
Assert(custorage);
|
|
custorage->CreateStorage(0, false);
|
|
DELETE_EX(custorage);
|
|
|
|
// log and insert into the pending delete list.
|
|
CStoreRelCreateStorage(&rd_node, attrid, relpersistence, rel->rd_rel->relowner);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @Description: truncate column data files which relation CREATE and TRUNCATE in same XACT block
|
|
* @IN rel: column relation
|
|
*/
|
|
void CStore::TruncateStorageInSameXact(Relation rel)
|
|
{
|
|
TupleDesc desc = RelationGetDescr(rel);
|
|
int nattrs = desc->natts;
|
|
Form_pg_attribute* attrs = desc->attrs;
|
|
RelFileNode rd_node = rel->rd_node;
|
|
uint64 totalSize = 0;
|
|
|
|
/*
|
|
* This relfilenode will be truncated, so we should invaild the blocks at
|
|
* the bcm element array.
|
|
*/
|
|
BCMArrayDropAllBlocks(rd_node);
|
|
|
|
/* make CU data cache invalid before truncate its data file */
|
|
CUCache->DropRelationCUCache(rd_node);
|
|
|
|
for (int i = 0; i < nattrs; i++) {
|
|
int attrid = attrs[i]->attnum;
|
|
CFileNode cnode(rd_node, attrid, MAIN_FORKNUM);
|
|
|
|
// calculate each column size
|
|
totalSize += GetSMgrRelSize(&rd_node, rel->rd_backend, (attrid + FirstColForkNum));
|
|
|
|
// invalid column space cache
|
|
CStoreAllocator::InvalidColSpaceCache(cnode);
|
|
|
|
CUStorage* custorage = New(CurrentMemoryContext) CUStorage(cnode);
|
|
|
|
// data file
|
|
custorage->TruncateDataFile();
|
|
|
|
// bcm buffer
|
|
RelFileNodeBackend relfilenodebackend = {rd_node, rel->rd_backend};
|
|
DropRelFileNodeBuffers(relfilenodebackend, ColumnId2ColForkNum(attrid), 0);
|
|
|
|
// bcm
|
|
custorage->TruncateBcmFile();
|
|
|
|
DELETE_EX(custorage);
|
|
}
|
|
|
|
// decrease the permanent space on users' record
|
|
perm_space_decrease(rel->rd_rel->relowner, totalSize, RelationUsesSpaceType(rel->rd_rel->relpersistence));
|
|
}
|
|
|
|
/*
|
|
* @Describe: Get max and min value from cu.
|
|
*
|
|
* @in - cuDescPtr Cu information
|
|
* @in - pos current number of rows.
|
|
* @out - vec ScalarVector struct, storage a column data.
|
|
*/
|
|
void CStore::FillColMinMax(CUDesc* cuDescPtr, ScalarVector* vec, int pos)
|
|
{
|
|
if (!cuDescPtr->IsNullCU()) {
|
|
vec->m_vals[pos] = *(ScalarValue*)cuDescPtr->cu_min;
|
|
vec->m_vals[pos + 1] = *(ScalarValue*)cuDescPtr->cu_max;
|
|
} else {
|
|
vec->SetNull(pos);
|
|
vec->SetNull(pos + 1);
|
|
}
|
|
|
|
vec->m_rows = pos + 2;
|
|
}
|
|
|
|
/*
|
|
* @Describe: We only read cudesc for getting min/max value if no dead rows
|
|
* Bypass optimization for special SQL case which are like 'select min(col1), max(col2) from t'
|
|
* @in - state CStore Scan State.
|
|
* @out - vecBatchOut store data struct.
|
|
*/
|
|
void CStore::CStoreMinMaxScan(_in_ CStoreScanState* state, _out_ VectorBatch* vecBatchOut)
|
|
{
|
|
int pos = 0;
|
|
|
|
/*
|
|
* Step 1: The number of holding CUDesc is max_loaded_cudesc
|
|
* if we load all CUDesc once, the memory will not enough.
|
|
* So we load CUdesc once for max_loaded_cudesc
|
|
*/
|
|
CSTORESCAN_TRACE_START(LOAD_CU_DESC);
|
|
LoadCUDescIfNeed();
|
|
CSTORESCAN_TRACE_END(LOAD_CU_DESC);
|
|
|
|
CSTORESCAN_TRACE_START(MIN_MAX_CHECK);
|
|
RoughCheckIfNeed(state);
|
|
CSTORESCAN_TRACE_END(MIN_MAX_CHECK);
|
|
|
|
/* Step 2: Is end of scan ? */
|
|
if (IsEndScan())
|
|
return;
|
|
|
|
/*
|
|
* Step 3: Fill min/max if cudesc has min/max, or fill all data.
|
|
* case1. The cudesc of all columns have min/max and no dead rows
|
|
* case2. There are dead rows
|
|
* case3. Some columns don't have min/max at all now. And some columns have.
|
|
* for example, numeric column.
|
|
* description: future plan-Save min/max for numeric column when load data
|
|
*/
|
|
int idx = m_CUDescIdx[m_cursor];
|
|
int maxVecRows = 0;
|
|
bool needFixRows = false, onlyFillMinMax = true;
|
|
int deadRows = 0;
|
|
|
|
CSTORESCAN_TRACE_START(FILL_BATCH);
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
int colIdx = m_colId[i];
|
|
|
|
ScalarVector* vec = vecBatchOut->m_arr + colIdx;
|
|
CUDesc* cuDescPtr = m_CUDescInfo[i]->cuDescArray + idx;
|
|
GetCUDeleteMaskIfNeed(cuDescPtr->cu_id, m_snapshot);
|
|
|
|
Assert(0 == pos);
|
|
if (!m_hasDeadRow && !cuDescPtr->IsNoMinMaxCU() && this->m_fillMinMaxFunc[i]) {
|
|
(this->*m_fillMinMaxFunc[i])(cuDescPtr, vec, pos);
|
|
} else {
|
|
int funIdx = m_hasDeadRow ? 1 : 0;
|
|
deadRows = (this->*m_colFillFunArrary[i].colFillFun[funIdx])(i, cuDescPtr, vec);
|
|
if (vec->m_rows > 1)
|
|
onlyFillMinMax = false;
|
|
}
|
|
|
|
/* We need fix if exist rows be not inconformity, and mark max rows. */
|
|
if (vec->m_rows != maxVecRows) {
|
|
if (0 != maxVecRows)
|
|
needFixRows = true;
|
|
|
|
if (vec->m_rows > maxVecRows)
|
|
maxVecRows = vec->m_rows;
|
|
}
|
|
}
|
|
CSTORESCAN_TRACE_END(FILL_BATCH);
|
|
|
|
/*
|
|
* Step 4: Fix the m_rows of VecBatch and padding value for some column if need.
|
|
* Because some column only fill min/max and some column fill batch data for some
|
|
* special corner cases
|
|
*/
|
|
if (needFixRows) {
|
|
for (int i = 0; i < m_colNum; ++i) {
|
|
int colIdx = m_colId[i];
|
|
ScalarVector* vec = vecBatchOut->m_arr + colIdx;
|
|
if (vec->m_rows != maxVecRows) {
|
|
for (int k = vec->m_rows; k < maxVecRows; ++k) {
|
|
/* Fix rows, set this value to null which is not affect min/max result. */
|
|
vec->SetNull(k);
|
|
}
|
|
}
|
|
vec->m_rows = maxVecRows;
|
|
}
|
|
}
|
|
|
|
vecBatchOut->m_rows = maxVecRows;
|
|
|
|
/* Step 5: Refresh cursor if need */
|
|
if (onlyFillMinMax) {
|
|
IncLoadCuDescIdx(m_cursor);
|
|
m_rowCursorInCU = 0;
|
|
if (likely(m_scanPosInCU != NULL)) {
|
|
Assert(m_colNum > 0);
|
|
errno_t rc = memset_s(m_scanPosInCU, sizeof(int) * m_colNum, 0, sizeof(int) * m_colNum);
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
} else {
|
|
RefreshCursor(vecBatchOut->m_rows, deadRows);
|
|
}
|
|
|
|
pos = vecBatchOut->m_rows;
|
|
Assert(pos <= BatchMaxSize);
|
|
}
|
|
|
|
/*
|
|
* @Description: get cu xmin by cu id
|
|
* @IN cuid: cu id
|
|
* @Return: cu xmin
|
|
*/
|
|
TransactionId CStore::GetCUXmin(uint32 cuid)
|
|
{
|
|
// get first not dropped column
|
|
int colid = CStoreGetfstColIdx(m_relation);
|
|
|
|
// cu xmin use the cudec record xmin which column 0
|
|
CUDesc cudesc;
|
|
bool found = this->GetCUDesc(colid, cuid, &cudesc, m_snapshot);
|
|
if (!found) {
|
|
Assert(false);
|
|
ereport(FATAL,
|
|
(errmsg("compression unit descriptor(talbe \"%s\", column \"%s\", cuid %u) not found",
|
|
RelationGetRelationName(m_relation),
|
|
NameStr(m_relation->rd_att->attrs[colid]->attname),
|
|
cuid)));
|
|
}
|
|
|
|
return cudesc.xmin;
|
|
}
|
|
|
|
/*
|
|
* @Description: return the col idx which is filled with ctid.
|
|
* @Return: m_laterReadCtidColIdx
|
|
*/
|
|
int CStore::GetLateReadCtid() const
|
|
{
|
|
return m_laterReadCtidColIdx;
|
|
}
|
|
|
|
void CStore::CheckConsistenceOfCUDesc(int cudescIdx) const
|
|
{
|
|
CUDesc* firstCUDesc = m_CUDescInfo[0]->cuDescArray + cudescIdx;
|
|
CUDesc* checkCUDesc = NULL;
|
|
for (int col = 1; col < m_colNum; ++col) {
|
|
checkCUDesc = m_CUDescInfo[col]->cuDescArray + cudescIdx;
|
|
if (checkCUDesc->cu_id == firstCUDesc->cu_id && checkCUDesc->row_count == firstCUDesc->row_count) {
|
|
continue;
|
|
}
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg(
|
|
"Inconsistent of CUDesc(table column, CUDesc index, CU id, number of rows) during batch loading, "
|
|
"CUDesc[%d] (%d %d %u %d), CUDesc[%d] (%d %d %u %d)",
|
|
0,
|
|
(m_colId[0] + 1),
|
|
cudescIdx,
|
|
firstCUDesc->cu_id,
|
|
firstCUDesc->row_count,
|
|
col,
|
|
(m_colId[col] + 1),
|
|
cudescIdx,
|
|
checkCUDesc->cu_id,
|
|
checkCUDesc->row_count),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation),
|
|
RelationGetNamespace(m_relation),
|
|
RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode)));
|
|
}
|
|
}
|
|
|
|
void CStore::CheckConsistenceOfCUDescCtl(void)
|
|
{
|
|
LoadCUDescCtl* firstCUDescCtl = m_CUDescInfo[0];
|
|
LoadCUDescCtl* checkCUdescCtl = NULL;
|
|
|
|
for (int i = 1; i < m_colNum; ++i) {
|
|
checkCUdescCtl = m_CUDescInfo[i];
|
|
if (checkCUdescCtl->nextCUID == firstCUDescCtl->nextCUID &&
|
|
checkCUdescCtl->lastLoadNum == firstCUDescCtl->lastLoadNum &&
|
|
checkCUdescCtl->curLoadNum == firstCUDescCtl->curLoadNum) {
|
|
continue;
|
|
}
|
|
ereport(defence_errlevel(),
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("Inconsistent of CUDescCtl(table column, next CU ID, last load number, current load number) "
|
|
"during batch loading, "
|
|
"CUDescCtl[%d] is (%d %u %u %u), CUDescCtl[%d] is (%d %u %u %u)",
|
|
0,
|
|
(m_colId[0] + 1),
|
|
firstCUDescCtl->nextCUID,
|
|
firstCUDescCtl->lastLoadNum,
|
|
firstCUDescCtl->curLoadNum,
|
|
i,
|
|
(m_colId[i] + 1),
|
|
checkCUdescCtl->nextCUID,
|
|
checkCUdescCtl->lastLoadNum,
|
|
checkCUdescCtl->curLoadNum),
|
|
errdetail("relation info: name \"%s\", namespace id %u, id %u, relfilenode %u/%u/%u",
|
|
RelationGetRelationName(m_relation),
|
|
RelationGetNamespace(m_relation),
|
|
RelationGetRelid(m_relation),
|
|
m_relation->rd_node.spcNode,
|
|
m_relation->rd_node.dbNode,
|
|
m_relation->rd_node.relNode)));
|
|
}
|
|
}
|
|
|
|
void CStore::IncLoadCuDescCursor()
|
|
{
|
|
m_cursor++;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Init the things needed by delta scan
|
|
*
|
|
* @in node: cstore scan state
|
|
* @in snapshot: the snapshot used to scan delta relation
|
|
* @no return
|
|
*/
|
|
void InitScanDeltaRelation(CStoreScanState* node, Snapshot snapshot)
|
|
{
|
|
Relation deltaRelation;
|
|
TableScanDesc deltaScanDesc;
|
|
Relation cstoreRel = node->ss_currentRelation;
|
|
|
|
if (node->ss_currentRelation == NULL)
|
|
return;
|
|
|
|
deltaRelation = heap_open(cstoreRel->rd_rel->reldeltarelid, AccessShareLock);
|
|
deltaScanDesc = tableam_scan_begin(deltaRelation, snapshot, 0, NULL);
|
|
|
|
node->ss_currentDeltaRelation = deltaRelation;
|
|
node->ss_currentDeltaScanDesc = deltaScanDesc;
|
|
node->ss_deltaScan = false;
|
|
node->ss_deltaScanEnd = false;
|
|
ExecAssignScanType(node, RelationGetDescr(deltaRelation));
|
|
}
|
|
|
|
/*
|
|
* Fill the junk(system) columns for delta scan
|
|
*
|
|
* @in sysIdx: the idx of system column
|
|
* @inout outBatch: the vector batch to fill
|
|
* @in slot: the tuple slot from delta relation
|
|
* @in deltaRelation: the delta relation
|
|
* @no return
|
|
*/
|
|
static void FillDeltaSysColumn(int sysIdx, VectorBatch* outBatch, TupleTableSlot* slot, Relation deltaRelation)
|
|
{
|
|
ScalarVector* destVec = outBatch->GetSysVector(sysIdx);
|
|
ScalarValue* destValue = destVec->m_vals;
|
|
int idx = outBatch->m_rows;
|
|
switch (sysIdx) {
|
|
case SelfItemPointerAttributeNumber: {
|
|
destValue[idx] = 0;
|
|
ItemPointer destTid = (ItemPointer)(destValue + idx);
|
|
*destTid = ((HeapTuple) slot->tts_tuple)->t_self;
|
|
break;
|
|
}
|
|
case XC_NodeIdAttributeNumber: {
|
|
destValue[idx] = u_sess->pgxc_cxt.PGXCNodeIdentifier;
|
|
break;
|
|
}
|
|
case TableOidAttributeNumber: {
|
|
destValue[idx] = RelationGetRelid(deltaRelation);
|
|
break;
|
|
}
|
|
case MinTransactionIdAttributeNumber: {
|
|
destValue[idx] = HeapTupleGetRawXmin((HeapTuple)slot->tts_tuple);
|
|
break;
|
|
}
|
|
default:
|
|
ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("Column store don't support")));
|
|
break;
|
|
}
|
|
destVec->m_rows = idx + 1;
|
|
}
|
|
|
|
/*
|
|
* put one tuple slot into the out vector batch
|
|
*
|
|
* @in node: cstore scan state
|
|
* @inout outBatch: the vector batch to fill
|
|
* @in slot: the tuple slot from delta relation
|
|
* @in tmpContext: the context which is used for per tuple
|
|
* @Return true if outBatch is full, else return false.
|
|
*/
|
|
static bool FillOneDeltaTuple(
|
|
CStoreScanState* node, VectorBatch* outBatch, TupleTableSlot* slot, MemoryContext tmpContext)
|
|
{
|
|
int sysIndex = 0;
|
|
ListCell* lc = NULL;
|
|
List* sysVarList = node->ps.ps_ProjInfo->pi_sysAttrList;
|
|
|
|
/* Fill the sys columns */
|
|
foreach (lc, sysVarList) {
|
|
sysIndex = lfirst_int(lc);
|
|
FillDeltaSysColumn(sysIndex, outBatch, slot, node->ss_currentDeltaRelation);
|
|
}
|
|
|
|
/* Fill the normal columns */
|
|
return VectorizeOneTuple(outBatch, slot, tmpContext);
|
|
}
|
|
|
|
/*
|
|
* scan the delta relation and fill the vector batch
|
|
*
|
|
* @in node: cstore scan state
|
|
* @inout outBatch: the vector batch to fill
|
|
* @in indexqual: the original qual on index columns used
|
|
* @no return
|
|
*/
|
|
void ScanDeltaStore(CStoreScanState* node, VectorBatch* outBatch, List* indexqual)
|
|
{
|
|
if (node->ss_deltaScanEnd)
|
|
return;
|
|
|
|
/* For SMP, only the first thread scan delta table. */
|
|
if (u_sess->stream_cxt.smp_id != 0)
|
|
return;
|
|
|
|
bool hasIndexFilter = (list_length(indexqual) > 0);
|
|
HeapTuple tuple = NULL;
|
|
TupleTableSlot* slot = node->ss_ScanTupleSlot;
|
|
TableScanDesc scandesc = (TableScanDesc)(node->ss_currentDeltaScanDesc);
|
|
ExprContext* econtext = node->ps.ps_ExprContext;
|
|
ResetExprContext(econtext);
|
|
|
|
while (true) {
|
|
tuple = (HeapTuple) tableam_scan_getnexttuple(scandesc, ForwardScanDirection);
|
|
|
|
if (tuple != NULL) {
|
|
(void)ExecStoreTuple(tuple, /* tuple to store */
|
|
slot, /* slot to store in */
|
|
scandesc->rs_cbuf, /* buffer associated with this tuple */
|
|
false); /* pfree this pointer */
|
|
|
|
/* If there is index qual, use it to filter the delta rows before */
|
|
if (hasIndexFilter) {
|
|
econtext->ecxt_scantuple = slot;
|
|
if (!ExecQual(indexqual, econtext, false)) {
|
|
(void)ExecClearTuple(slot);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* put the tuple into out batch */
|
|
if (FillOneDeltaTuple(node, outBatch, slot, econtext->ecxt_per_tuple_memory)) {
|
|
(void)ExecClearTuple(slot);
|
|
break;
|
|
}
|
|
} else {
|
|
(void)ExecClearTuple(slot);
|
|
node->ss_deltaScanEnd = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!BatchIsNull(outBatch))
|
|
node->ss_deltaScan = true;
|
|
}
|
|
|
|
/*
|
|
* clean the scan desc and close the delta relation
|
|
*
|
|
* @in node: cstore scan state
|
|
* @no return
|
|
*/
|
|
void EndScanDeltaRelation(CStoreScanState* node)
|
|
{
|
|
if (node->ss_currentDeltaScanDesc) {
|
|
tableam_scan_end((TableScanDesc)(node->ss_currentDeltaScanDesc));
|
|
ExecCloseScanRelation(node->ss_currentDeltaRelation);
|
|
}
|
|
}
|
|
|
|
// CStoreHeapBeginScan - begin cstore relation scan
|
|
// Initialize CStoreScanDesc data structure
|
|
CStoreScanDesc CStoreBeginScan(Relation relation, int colNum, int16* colIdx, Snapshot snapshot, bool scanDelta)
|
|
{
|
|
Assert(colNum > 0 && colIdx);
|
|
// Step 1: Create CStoreScanState structure
|
|
CStoreScanDesc scanstate;
|
|
scanstate = makeNode(CStoreScanState);
|
|
scanstate->ps.plan = (Plan*)makeNode(CStoreScan);
|
|
scanstate->ps.ps_ProjInfo = makeNode(ProjectionInfo);
|
|
ProjectionInfo* projInfo = scanstate->ps.ps_ProjInfo;
|
|
|
|
// Step 2: Construct accessed columns
|
|
List *accessAttrList = NULL;
|
|
List *sysAttrList = NULL;
|
|
for (int i = 0; i < colNum; ++i) {
|
|
/* dropped column and not system column */
|
|
if (colIdx[i] >= 0 && relation->rd_att->attrs[colIdx[i] - 1]->attisdropped)
|
|
continue;
|
|
if (colIdx[i] >= 0)
|
|
accessAttrList = lappend_int(accessAttrList, colIdx[i]);
|
|
else
|
|
sysAttrList = lappend_int(sysAttrList, colIdx[i]);
|
|
}
|
|
projInfo->pi_acessedVarNumbers = accessAttrList;
|
|
projInfo->pi_sysAttrList = sysAttrList;
|
|
projInfo->pi_const = false;
|
|
|
|
// Step 3: Init CStoreScan
|
|
scanstate->ss_currentRelation = relation;
|
|
scanstate->csss_NumScanKeys = 0;
|
|
scanstate->csss_ScanKeys = NULL;
|
|
/*
|
|
* increment relation ref count while scanning relation
|
|
*
|
|
* This is just to make really sure the relcache entry won't go away while
|
|
* the scan has a pointer to it. Caller should be holding the rel open
|
|
* anyway, so this is redundant in all normal scenarios...
|
|
*/
|
|
RelationIncrementReferenceCount(relation);
|
|
scanstate->m_CStore = New(CurrentMemoryContext) CStore();
|
|
scanstate->m_CStore->InitScan(scanstate, snapshot);
|
|
|
|
// Step 4: Initialize scanBatch
|
|
scanstate->m_pScanBatch =
|
|
New(CurrentMemoryContext) VectorBatch(CurrentMemoryContext, scanstate->ss_currentRelation->rd_att);
|
|
if (projInfo->pi_sysAttrList)
|
|
scanstate->m_pScanBatch->CreateSysColContainer(CurrentMemoryContext, projInfo->pi_sysAttrList);
|
|
|
|
// Step5: Initialize delta scan
|
|
if (scanDelta) {
|
|
scanstate->ss_ScanTupleSlot = MakeTupleTableSlot();
|
|
ExprContext* econtext = makeNode(ExprContext);
|
|
econtext->ecxt_per_tuple_memory = AllocSetContextCreate(CurrentMemoryContext,
|
|
"cstore delta scan",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
scanstate->ps.ps_ExprContext = econtext;
|
|
InitScanDeltaRelation(scanstate, (snapshot != NULL ? snapshot : GetActiveSnapshot()));
|
|
} else {
|
|
scanstate->ss_currentDeltaRelation = NULL;
|
|
scanstate->ss_currentDeltaScanDesc = NULL;
|
|
scanstate->ss_deltaScan = false;
|
|
scanstate->ss_deltaScanEnd = true;
|
|
}
|
|
|
|
return scanstate;
|
|
}
|
|
|
|
/*
|
|
* GetCStoreNextBatch
|
|
* We can call these function like this: CStoreScanDesc cstoreScanDesc = CStoreBeginScan();
|
|
*/
|
|
VectorBatch* CStoreGetNextBatch(CStoreScanDesc cstoreScanState)
|
|
{
|
|
VectorBatch* vecBatch = cstoreScanState->m_pScanBatch;
|
|
vecBatch->Reset();
|
|
cstoreScanState->m_CStore->RunScan(cstoreScanState, vecBatch);
|
|
|
|
/* scan delta table */
|
|
if (cstoreScanState->m_CStore->IsEndScan() && BatchIsNull(vecBatch)) {
|
|
ScanDeltaStore(cstoreScanState, vecBatch, NULL);
|
|
Assert(vecBatch != NULL);
|
|
vecBatch->FixRowCount();
|
|
}
|
|
|
|
return vecBatch;
|
|
}
|
|
|
|
/*
|
|
* @Description: Scan an entire "row" (same CUID) of CU and Corresponding CUDescs
|
|
* and bitmaps into BatchCUData.
|
|
* Used so far on CStore partition merging.
|
|
* Supports ADIO.
|
|
* Attention: by using CUStorage->LoadCU we use CStoreMemAlloc::Palloc to
|
|
* palloc CU_ptr(s) assigned to BatchCUData->CUptrData. So
|
|
* they have to be CStoreMemAlloc::Pfree later when finished.
|
|
* @See also: ATExecCStoreMergePartition
|
|
*/
|
|
void CStoreScanNextTrunkOfCU(_in_ CStoreScanDesc cstoreScanState, __inout BatchCUData* tmpCUData)
|
|
{
|
|
cstoreScanState->m_CStore->CStoreScanWithCU(cstoreScanState, tmpCUData);
|
|
}
|
|
|
|
FORCE_INLINE
|
|
bool CStoreIsEndScan(CStoreScanDesc cstoreScanState)
|
|
{
|
|
return cstoreScanState->m_CStore->IsEndScan() && cstoreScanState->ss_deltaScanEnd;
|
|
}
|
|
|
|
// Clean up cstoreScanState memory
|
|
// NOTICE: VectorBatch must clean by MemoryContext
|
|
void CStoreEndScan(CStoreScanDesc cstoreScanState)
|
|
{
|
|
ProjectionInfo* projInfo = cstoreScanState->ps.ps_ProjInfo;
|
|
/*
|
|
* decrement relation reference count and free scan descriptor storage
|
|
*/
|
|
RelationDecrementReferenceCount(cstoreScanState->ss_currentRelation);
|
|
|
|
// Free memory
|
|
Assert(cstoreScanState->m_CStore != NULL);
|
|
Assert(projInfo);
|
|
DELETE_EX(cstoreScanState->m_CStore);
|
|
// NOTICE: will not clean memory which alloc in memeory context
|
|
delete cstoreScanState->m_pScanBatch;
|
|
cstoreScanState->m_pScanBatch = NULL;
|
|
|
|
// end scan delta table
|
|
if (cstoreScanState->ss_currentDeltaRelation != NULL) {
|
|
ExecDropSingleTupleTableSlot(cstoreScanState->ss_ScanTupleSlot);
|
|
ExecFreeExprContext(&cstoreScanState->ps);
|
|
EndScanDeltaRelation(cstoreScanState);
|
|
}
|
|
|
|
if (projInfo->pi_acessedVarNumbers) {
|
|
list_free(projInfo->pi_acessedVarNumbers);
|
|
}
|
|
|
|
if (projInfo->pi_sysAttrList) {
|
|
list_free(projInfo->pi_sysAttrList);
|
|
}
|
|
|
|
if (projInfo->pi_PackLateAccessVarNumbers) {
|
|
list_free(projInfo->pi_PackLateAccessVarNumbers);
|
|
}
|
|
pfree_ext(projInfo);
|
|
pfree_ext(cstoreScanState->ps.plan);
|
|
}
|
|
|
|
// CStoreRelGetCUNum
|
|
// Get CU numbers of relation by now
|
|
uint32 CStoreRelGetCUNumByNow(CStoreScanDesc cstoreScanState)
|
|
{
|
|
ScanKeyData key;
|
|
HeapTuple tup;
|
|
bool isnull = false;
|
|
Relation relation = cstoreScanState->m_CStore->m_relation;
|
|
|
|
/*
|
|
* Open the CUDesc relation and its index
|
|
*/
|
|
Relation cudesc_rel = heap_open(relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
TupleDesc cudesc_tupdesc = cudesc_rel->rd_att;
|
|
Relation idx_rel = index_open(cudesc_rel->rd_rel->relcudescidx, AccessShareLock);
|
|
|
|
int attid = relation->rd_att->attrs[0]->attnum;
|
|
|
|
if (relation->rd_att->attrs[0]->attisdropped) {
|
|
int fstColIdx = CStoreGetfstColIdx(relation);
|
|
attid = relation->rd_att->attrs[fstColIdx]->attnum;
|
|
}
|
|
|
|
/* Setup scan key to fetch from the index by col_id. */
|
|
ScanKeyInit(&key, (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attid));
|
|
|
|
SysScanDesc cudesc_scan = systable_beginscan_ordered(cudesc_rel, idx_rel, SnapshotNow, 1, &key);
|
|
|
|
uint32 max_cuid = FirstCUID;
|
|
|
|
/*
|
|
* Optimize for geting last CU description of column.
|
|
* Use BackwardScanDirection scan
|
|
*/
|
|
if ((tup = systable_getnext_ordered(cudesc_scan, BackwardScanDirection)) != NULL) {
|
|
max_cuid = DatumGetUInt32(fastgetattr(tup, CUDescCUIDAttr, cudesc_tupdesc, &isnull));
|
|
}
|
|
systable_endscan_ordered(cudesc_scan);
|
|
index_close(idx_rel, AccessShareLock);
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
|
|
return (max_cuid - FirstCUID);
|
|
}
|
|
|
|
/*
|
|
* @Description: Delete the column information from the cudesc table for a CStore table.
|
|
* It's used for Alter CStore Table Drop Column
|
|
* @Param[IN] rel: the target relation
|
|
* @Param[IN] attrnum: column attrnum to be dropped
|
|
* @Return: void
|
|
* @See also:
|
|
*/
|
|
void CStoreDropColumnInCuDesc(Relation rel, AttrNumber attrnum)
|
|
{
|
|
ScanKeyData key[1];
|
|
SysScanDesc scan;
|
|
HeapTuple tup;
|
|
Oid cudescOid = rel->rd_rel->relcudescrelid;
|
|
Relation cudescHeap = heap_open(cudescOid, RowExclusiveLock);
|
|
|
|
int attrno = attrnum;
|
|
ScanKeyInit(&key[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attrno));
|
|
|
|
scan = systable_beginscan(cudescHeap, rel->rd_rel->relcudescidx, false, NULL, 1, key);
|
|
|
|
while (HeapTupleIsValid(tup = systable_getnext(scan))) {
|
|
simple_heap_delete(cudescHeap, &tup->t_self);
|
|
}
|
|
|
|
systable_endscan(scan);
|
|
|
|
heap_close(cudescHeap, RowExclusiveLock);
|
|
}
|
|
|
|
/*
|
|
* @Description: get the first colum index that is not dropped
|
|
* @Param[IN] rel: the target relation
|
|
* @Return: the first column index that is not dropped
|
|
* @See also:
|
|
*/
|
|
int CStoreGetfstColIdx(Relation rel)
|
|
{
|
|
for (int i = 0; i < rel->rd_att->natts; i++) {
|
|
if (!rel->rd_att->attrs[i]->attisdropped)
|
|
return i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* covert cu pointer to bigint
|
|
*
|
|
* VCU will not covert. and maybe cover to negative value
|
|
*/
|
|
Datum cupointer_bigint(PG_FUNCTION_ARGS)
|
|
{
|
|
/* physical length of a text* */
|
|
Datum text_datum = PG_GETARG_DATUM(0);
|
|
Size text_size = (toast_raw_datum_size(text_datum) - VARHDRSZ);
|
|
|
|
CUPointer cu_pointer = 0;
|
|
|
|
/* size == sizeof(CUPointer) AND not toast tuple */
|
|
if (text_size != sizeof(CUPointer) || !VARATT_IS_SHORT(text_datum)) {
|
|
/* not CUPointer */
|
|
PG_RETURN_INT64(0);
|
|
}
|
|
|
|
int rc = memcpy_s(&cu_pointer, sizeof(CUPointer), VARDATA_ANY(text_datum), sizeof(CUPointer));
|
|
securec_check(rc, "", "");
|
|
Assert(VARSIZE_ANY_EXHDR(text_datum) == sizeof(CUPointer));
|
|
|
|
PG_RETURN_INT64(cu_pointer);
|
|
}
|
|
|
|
CUDescScan::CUDescScan(_in_ Relation relation)
|
|
: m_cudesc(NULL), m_cudescIndex(NULL), m_snapshot(NULL),
|
|
m_cuids(NIL), m_deletemasks(NIL), m_needFreeMasks(NIL), m_valids(NIL)
|
|
{
|
|
m_cudesc = heap_open(relation->rd_rel->relcudescrelid, AccessShareLock);
|
|
m_cudescIndex = index_open(m_cudesc->rd_rel->relcudescidx, AccessShareLock);
|
|
|
|
/*
|
|
* m_scanKey[0] = VitrualDelColID, m_scanKey[1] will be set to CUDI when doing scan.
|
|
* We only init m_scanKey[0] = 0 here.
|
|
*/
|
|
ScanKeyInit(&m_scanKey[0], (AttrNumber)CUDescColIDAttr, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(VitrualDelColID));
|
|
ScanKeyInit(&m_scanKey[1], (AttrNumber)CUDescCUIDAttr, BTEqualStrategyNumber, F_OIDEQ, UInt32GetDatum(0));
|
|
}
|
|
|
|
CUDescScan::~CUDescScan()
|
|
{
|
|
m_cudesc = NULL;
|
|
m_cudescIndex = NULL;
|
|
m_snapshot = NULL;
|
|
m_cuids = NIL;
|
|
m_deletemasks = NIL;
|
|
m_needFreeMasks = NIL;
|
|
m_valids = NIL;
|
|
}
|
|
|
|
void CUDescScan::FreeCache()
|
|
{
|
|
ListCell* needFreeMaskCell = NULL;
|
|
ListCell* maskCell = NULL;
|
|
forboth (needFreeMaskCell, m_needFreeMasks, maskCell, m_deletemasks) {
|
|
if ((bool)lfirst_int(needFreeMaskCell)) {
|
|
pfree(lfirst(maskCell));
|
|
}
|
|
}
|
|
|
|
list_free_ext(m_cuids);
|
|
list_free_ext(m_deletemasks);
|
|
list_free_ext(m_needFreeMasks);
|
|
list_free_ext(m_valids);
|
|
}
|
|
|
|
void CUDescScan::Destroy()
|
|
{
|
|
index_close(m_cudescIndex, AccessShareLock);
|
|
heap_close(m_cudesc, AccessShareLock);
|
|
FreeCache();
|
|
}
|
|
|
|
void CUDescScan::ResetSnapshot(Snapshot snapshot)
|
|
{
|
|
m_snapshot = snapshot;
|
|
FreeCache();
|
|
}
|
|
|
|
bool CUDescScan::CheckAliveInCache(uint32 CUId, uint32 rownum, bool* found)
|
|
{
|
|
ListCell* cuidCell = NULL;
|
|
ListCell* delmaskCell = NULL;
|
|
ListCell* validCell = NULL;
|
|
|
|
forthree (cuidCell, m_cuids, delmaskCell, m_deletemasks, validCell, m_valids) {
|
|
/* Oid type is also unit32. */
|
|
uint32 cachedCUId = (uint32)lfirst_oid(cuidCell);
|
|
if (cachedCUId == CUId) {
|
|
*found = true;
|
|
bool valid = (bool)lfirst_int(validCell);
|
|
if (valid) {
|
|
int8* bitmap = (int8*)lfirst(delmaskCell);
|
|
unsigned char* cachedDelMask = (unsigned char*)VARDATA_ANY(bitmap);
|
|
if (cachedDelMask == NULL) {
|
|
/* All rows are alive*/
|
|
return true;
|
|
} else {
|
|
return !IsDeadRow(rownum, cachedDelMask);
|
|
}
|
|
} else {
|
|
*found = false;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
*found = false;
|
|
return false;
|
|
}
|
|
|
|
bool CUDescScan::CheckItemIsAlive(ItemPointer tid)
|
|
{
|
|
HeapTuple tup;
|
|
bool isnull = false;
|
|
bool isAlive = false;
|
|
|
|
uint32 CUId = ItemPointerGetBlockNumber(tid);
|
|
uint32 rownum = ItemPointerGetOffsetNumber(tid) - 1;
|
|
|
|
/* First check in cache, if not found, we do follow scan. */
|
|
bool foundInCache = false;
|
|
isAlive = CheckAliveInCache(CUId, rownum, &foundInCache);
|
|
if (foundInCache) {
|
|
return isAlive;
|
|
}
|
|
|
|
/* We set m_scanKey[0]=VitrualDelColID in constructor func. Here we set m_scanKey[1]=CUID */
|
|
m_scanKey[1].sk_argument = UInt32GetDatum(CUId);
|
|
|
|
TupleDesc cudescTupdesc = m_cudesc->rd_att;
|
|
SysScanDesc scanDesc = systable_beginscan_ordered(m_cudesc, m_cudescIndex, m_snapshot, 2, m_scanKey);
|
|
|
|
int8* delMask = NULL;
|
|
bool needFreeMask = false;
|
|
bool valid = false;
|
|
|
|
if ((tup = systable_getnext_ordered(scanDesc, ForwardScanDirection)) != NULL) {
|
|
/* Put CUPointer into cudesc->cuPointer. */
|
|
Datum v = fastgetattr(tup, CUDescCUPointerAttr, cudescTupdesc, &isnull);
|
|
if (isnull) {
|
|
/* All rows are alvie. */
|
|
isAlive = true;
|
|
} else {
|
|
int8* bitmap = (int8*)PG_DETOAST_DATUM(DatumGetPointer(v));
|
|
unsigned char* cuDelMask = (unsigned char*)VARDATA_ANY(bitmap);
|
|
isAlive = !IsDeadRow(rownum, cuDelMask);
|
|
|
|
/* Because new memory may be created, so we have to check and free in time. */
|
|
if ((Pointer)bitmap != DatumGetPointer(v)) {
|
|
needFreeMask = true;
|
|
}
|
|
|
|
/* Record the mask. */
|
|
delMask = bitmap;
|
|
}
|
|
valid = true;
|
|
} else {
|
|
isAlive = false;
|
|
valid = false;
|
|
}
|
|
|
|
systable_endscan_ordered(scanDesc);
|
|
|
|
/* Save the sacn result in cache. */
|
|
lappend_oid(m_cuids, (Oid)CUId);
|
|
lappend(m_deletemasks, delMask);
|
|
lappend_int(m_needFreeMasks, (int)needFreeMask);
|
|
lappend_int(m_valids, (int)valid);
|
|
|
|
return isAlive;
|
|
}
|
|
|
|
inline bool CUDescScan::IsDeadRow(uint32 row, unsigned char* cuDelMask)
|
|
{
|
|
return ((cuDelMask[row >> 3] & (1 << (row % 8))) != 0);
|
|
}
|