first commit for openGauss server
This commit is contained in:
995
src/gausskernel/runtime/executor/nodeSamplescan.cpp
Executable file
995
src/gausskernel/runtime/executor/nodeSamplescan.cpp
Executable file
@ -0,0 +1,995 @@
|
||||
/*
|
||||
* Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
*
|
||||
* openGauss is licensed under Mulan PSL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
* You may obtain a copy of Mulan PSL v2 at:
|
||||
*
|
||||
* http://license.coscl.org.cn/MulanPSL2
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PSL v2 for more details.
|
||||
* -------------------------------------------------------------------------
|
||||
* nodeSamplescan.cpp
|
||||
*
|
||||
* Support routines for sample scans of relations (table sampling).
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/gausskernel/runtime/executor/nodeSamplescan.cpp
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "knl/knl_variable.h"
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "access/relscan.h"
|
||||
#include "access/tableam.h"
|
||||
#include "commands/vacuum.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/nodeSamplescan.h"
|
||||
#include "executor/nodeSeqscan.h"
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
#ifdef PGXC
|
||||
#include "pgxc/pgxc.h"
|
||||
#endif
|
||||
#include "storage/predicate.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/rel_gs.h"
|
||||
#include "utils/snapmgr.h"
|
||||
#include "utils/tqual.h"
|
||||
#include "vecexecutor/vecnodecstorescan.h"
|
||||
#include "nodes/execnodes.h"
|
||||
|
||||
static double sample_random_fract(void);
|
||||
|
||||
/*
|
||||
* Description: Initialize relation descriptor for sample table scan.
|
||||
*
|
||||
* Parameters:
|
||||
* @in scanstate: ScanState information
|
||||
* @in currentRelation: relation being scanned
|
||||
*
|
||||
* Returns: HeapScanDesc
|
||||
*/
|
||||
AbsTblScanDesc InitSampleScanDesc(ScanState* scanstate, Relation currentRelation)
|
||||
{
|
||||
bool allow_sync = false;
|
||||
bool use_bulkread = false;
|
||||
AbsTblScanDesc current_scan_desc = NULL;
|
||||
SampleScanParams* sample_scan_info = &scanstate->sampleScanInfo;
|
||||
|
||||
/* Need scan all block. */
|
||||
if (sample_scan_info->sampleType == BERNOULLI_SAMPLE) {
|
||||
allow_sync = true;
|
||||
|
||||
/*
|
||||
* Use bulkread, since we're scanning all pages. But pagemode visibility
|
||||
* checking is a win only at larger sampling fractions.
|
||||
*/
|
||||
use_bulkread = true;
|
||||
} else {
|
||||
allow_sync = false;
|
||||
|
||||
/*
|
||||
* Bulkread buffer access strategy probably makes sense unless we're
|
||||
* scanning a very small fraction of the table.
|
||||
*/
|
||||
use_bulkread = (((BaseTableSample*)sample_scan_info->tsm_state)->percent[0] >= 1);
|
||||
}
|
||||
|
||||
current_scan_desc = abs_tbl_beginscan_sampling(
|
||||
currentRelation, scanstate->ps.state->es_snapshot, 0, NULL, use_bulkread, allow_sync, scanstate);
|
||||
|
||||
return current_scan_desc;
|
||||
}
|
||||
static inline HeapTuple SampleFetchNextTuple(SeqScanState* node)
|
||||
{
|
||||
HeapScanDesc heapScanDesc = GetHeapScanDesc(node->ss_currentScanDesc);
|
||||
heapScanDesc->rs_ss_accessor = node->ss_scanaccessor;
|
||||
|
||||
/*
|
||||
* Get the next tuple for table sample, and return it.
|
||||
* Scans the relation using the sampling method and returns
|
||||
* the next qualifying tuple. We call the ExecScan() routine and pass it
|
||||
* the appropriate access method functions.
|
||||
*/
|
||||
return (((RowTableSample*)node->sampleScanInfo.tsm_state)->scanSample)();
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get the next tuple from the table for sample scan.
|
||||
*
|
||||
* Parameters:
|
||||
* @in node: ScanState information
|
||||
*
|
||||
* Returns: TupleTableSlot
|
||||
*
|
||||
*/
|
||||
TupleTableSlot* SeqSampleNext(SeqScanState* node)
|
||||
{
|
||||
TupleTableSlot* slot = node->ss_ScanTupleSlot;
|
||||
HeapTuple tuple = SampleFetchNextTuple(node);
|
||||
|
||||
return ExecMakeTupleSlot(tuple, GetHeapScanDesc(node->ss_currentScanDesc), slot);
|
||||
}
|
||||
|
||||
TupleTableSlot* HbktSeqSampleNext(SeqScanState* node)
|
||||
{
|
||||
TupleTableSlot* slot = node->ss_ScanTupleSlot;
|
||||
HeapTuple tuple = NULL;
|
||||
HBktTblScanDesc hb_scan = (HBktTblScanDesc)node->ss_currentScanDesc;
|
||||
|
||||
Assert(node->ss_currentScanDesc->type == T_ScanDesc_HBucket);
|
||||
|
||||
while (hb_scan != NULL) {
|
||||
tuple = SampleFetchNextTuple(node);
|
||||
if (tuple != NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* try switch to next partition */
|
||||
if (!hbkt_sampling_scan_nextbucket(hb_scan)) {
|
||||
break;
|
||||
}
|
||||
|
||||
(((RowTableSample*)node->sampleScanInfo.tsm_state)->resetSampleScan)();
|
||||
}
|
||||
|
||||
return ExecMakeTupleSlot(tuple, GetHeapScanDesc(node->ss_currentScanDesc), slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get seed value.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::getSeed()
|
||||
{
|
||||
Datum datum;
|
||||
bool isnull = false;
|
||||
ExprContext* econtext = sampleScanState->ps.ps_ExprContext;
|
||||
ExprState* repeatable = sampleScanState->sampleScanInfo.repeatable;
|
||||
|
||||
if (NULL != repeatable) {
|
||||
datum = ExecEvalExprSwitchContext(repeatable, econtext, &isnull, NULL);
|
||||
if (isnull) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
|
||||
errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
|
||||
}
|
||||
|
||||
/*
|
||||
* The REPEATABLE parameter has been coerced to float8 by the parser.
|
||||
* The reason for using float8 at the SQL level is that it will
|
||||
* produce unsurprising results both for users used to databases that
|
||||
* accept only integers in the REPEATABLE clause and for those who
|
||||
* might expect that REPEATABLE works like setseed() (a float in the
|
||||
* range from -1 to 1).
|
||||
*
|
||||
* We use hashfloat8() to convert the supplied value into a suitable
|
||||
* seed. For regression-testing purposes, that has the convenient
|
||||
* property that REPEATABLE(0) gives a machine-independent result.
|
||||
*/
|
||||
seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
|
||||
} else {
|
||||
seed = random();
|
||||
}
|
||||
|
||||
if (seed > 0) {
|
||||
gs_srandom(seed);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get percent value.
|
||||
* Parameters: null
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::getPercent()
|
||||
{
|
||||
int i = 0;
|
||||
ListCell* arg = NULL;
|
||||
bool isnull = false;
|
||||
|
||||
ExprContext* econtext = sampleScanState->ps.ps_ExprContext;
|
||||
List* args = sampleScanState->sampleScanInfo.args;
|
||||
Datum* params = (Datum*)palloc0(list_length(args) * sizeof(Datum));
|
||||
|
||||
Assert(list_length(args));
|
||||
percent = (double*)palloc0(SAMPLEARGSNUM * sizeof(double));
|
||||
|
||||
foreach (arg, args) {
|
||||
ExprState* argstate = (ExprState*)lfirst(arg);
|
||||
|
||||
params[i] = ExecEvalExprSwitchContext(argstate, econtext, &isnull, NULL);
|
||||
if (isnull) {
|
||||
ereport(
|
||||
ERROR, (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), errmsg("TABLESAMPLE parameter cannot be null")));
|
||||
}
|
||||
|
||||
percent[i] = DatumGetFloat4(params[i]);
|
||||
|
||||
if (percent[i] < MIN_PERCENT_ARG || percent[i] > MAX_PERCENT_ARG || isnan(percent[i])) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), errmsg("sample percentage must be between 0 and 100")));
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get next random block number.
|
||||
* Parameters: null
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::system_nextsampleblock()
|
||||
{
|
||||
BlockNumber blockindex = 0;
|
||||
|
||||
/* We should start from currentBlock + 1. */
|
||||
for (blockindex = currentBlock + 1; blockindex < totalBlockNum; blockindex++) {
|
||||
if (sample_random_fract() < percent[SYSTEM_SAMPLE]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (blockindex < totalBlockNum) {
|
||||
currentBlock = blockindex;
|
||||
} else {
|
||||
currentBlock = InvalidBlockNumber;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get sequential next offset.
|
||||
* Parameters: null
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::system_nextsampletuple()
|
||||
{
|
||||
OffsetNumber tupoffset = currentOffset;
|
||||
|
||||
/* Advance to next possible offset on page */
|
||||
if (tupoffset == InvalidOffsetNumber) {
|
||||
tupoffset = FirstOffsetNumber;
|
||||
} else {
|
||||
tupoffset++;
|
||||
}
|
||||
|
||||
if (tupoffset > curBlockMaxoffset) {
|
||||
tupoffset = InvalidOffsetNumber;
|
||||
}
|
||||
|
||||
currentOffset = tupoffset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get sequential next block.
|
||||
* Parameters: null
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::bernoulli_nextsampleblock()
|
||||
{
|
||||
if (currentBlock + 1 < totalBlockNum) {
|
||||
currentBlock++;
|
||||
} else {
|
||||
currentBlock = InvalidBlockNumber;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get random next block.
|
||||
* Parameters: null
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::bernoulli_nextsampletuple()
|
||||
{
|
||||
OffsetNumber tupoffset = currentOffset;
|
||||
|
||||
/* Advance to first/next tuple in block */
|
||||
if (tupoffset == InvalidOffsetNumber) {
|
||||
tupoffset = FirstOffsetNumber;
|
||||
} else {
|
||||
tupoffset++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over tuple offsets until finding suitable TID or reaching end of
|
||||
* block.
|
||||
*/
|
||||
for (; tupoffset <= curBlockMaxoffset; tupoffset++) {
|
||||
if (sample_random_fract() < percent[BERNOULLI_SAMPLE]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tupoffset > curBlockMaxoffset) {
|
||||
tupoffset = InvalidOffsetNumber;
|
||||
}
|
||||
|
||||
currentOffset = tupoffset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Initialize base tableSample info.
|
||||
*
|
||||
* Parameters:
|
||||
* @in scanstate: ScanState information
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
BaseTableSample::BaseTableSample(void* scanstate)
|
||||
: runState(GETMAXBLOCK),
|
||||
totalBlockNum(0),
|
||||
currentBlock(InvalidBlockNumber),
|
||||
currentOffset(InvalidOffsetNumber),
|
||||
curBlockMaxoffset(InvalidOffsetNumber),
|
||||
finished(false)
|
||||
{
|
||||
TableSampleType sampleType;
|
||||
bool vectorized = ((ScanState*)scanstate)->ps.vectorized;
|
||||
|
||||
sampleScanState = (ScanState*)scanstate;
|
||||
sampleType = ((ScanState*)scanstate)->sampleScanInfo.sampleType;
|
||||
getPercent();
|
||||
getSeed();
|
||||
|
||||
/* Save vecsample ScanState if it is vectorized. */
|
||||
if (vectorized) {
|
||||
vecsampleScanState = (CStoreScanState*)scanstate;
|
||||
}
|
||||
|
||||
/* We can transform hybrid to system or bernoulli for optimize according to value of args. */
|
||||
if ((sampleType == BERNOULLI_SAMPLE) ||
|
||||
(sampleType == HYBRID_SAMPLE && percent[SYSTEM_SAMPLE] == MAX_PERCENT_ARG)) {
|
||||
percent[BERNOULLI_SAMPLE] = (sampleType == BERNOULLI_SAMPLE) ? percent[0] / MAX_PERCENT_ARG
|
||||
: percent[BERNOULLI_SAMPLE] / MAX_PERCENT_ARG;
|
||||
nextSampleBlock_function = &BaseTableSample::bernoulli_nextsampleblock;
|
||||
nextSampleTuple_function = &BaseTableSample::bernoulli_nextsampletuple;
|
||||
} else if ((sampleType == SYSTEM_SAMPLE) ||
|
||||
(sampleType == HYBRID_SAMPLE && percent[BERNOULLI_SAMPLE] == MAX_PERCENT_ARG)) {
|
||||
percent[SYSTEM_SAMPLE] =
|
||||
(sampleType == SYSTEM_SAMPLE) ? percent[0] / MAX_PERCENT_ARG : percent[SYSTEM_SAMPLE] / MAX_PERCENT_ARG;
|
||||
nextSampleBlock_function = &BaseTableSample::system_nextsampleblock;
|
||||
nextSampleTuple_function = &BaseTableSample::system_nextsampletuple;
|
||||
} else {
|
||||
Assert(sampleType == HYBRID_SAMPLE);
|
||||
percent[SYSTEM_SAMPLE] = percent[SYSTEM_SAMPLE] / MAX_PERCENT_ARG;
|
||||
percent[BERNOULLI_SAMPLE] = percent[BERNOULLI_SAMPLE] / MAX_PERCENT_ARG;
|
||||
nextSampleBlock_function = &BaseTableSample::system_nextsampleblock;
|
||||
nextSampleTuple_function = &BaseTableSample::bernoulli_nextsampletuple;
|
||||
}
|
||||
scanTupState = 0;
|
||||
}
|
||||
|
||||
BaseTableSample::~BaseTableSample()
|
||||
{
|
||||
sampleScanState = NULL;
|
||||
vecsampleScanState = NULL;
|
||||
percent = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Reset Sample Scan parameter.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void BaseTableSample::resetSampleScan()
|
||||
{
|
||||
runState = GETMAXBLOCK;
|
||||
totalBlockNum = 0;
|
||||
currentOffset = InvalidOffsetNumber;
|
||||
currentBlock = InvalidBlockNumber;
|
||||
curBlockMaxoffset = InvalidOffsetNumber;
|
||||
finished = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Initialize Sample Scan parameter.
|
||||
*
|
||||
* Parameters:
|
||||
* @in scanstate: ScanState information
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
RowTableSample::RowTableSample(ScanState* scanstate) : BaseTableSample(scanstate)
|
||||
{}
|
||||
|
||||
RowTableSample::~RowTableSample()
|
||||
{}
|
||||
|
||||
/*
|
||||
* Description: Get max offset for current block.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void RowTableSample::getMaxOffset()
|
||||
{
|
||||
HeapScanDesc heapscan = NULL;
|
||||
AbsTblScanDesc scan = sampleScanState->ss_currentScanDesc;
|
||||
bool pagemode = GetHeapScanDesc(scan)->rs_pageatatime;
|
||||
Page page;
|
||||
|
||||
Assert(BlockNumberIsValid(currentBlock));
|
||||
|
||||
if (scanTupState == NEWBLOCK) {
|
||||
abs_tbl_getpage(scan, currentBlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* When not using pagemode, we must lock the buffer during tuple
|
||||
* visibility checks.
|
||||
*/
|
||||
heapscan = GetHeapScanDesc(scan);
|
||||
if (!pagemode) {
|
||||
LockBuffer(heapscan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
}
|
||||
|
||||
page = (Page)BufferGetPage(heapscan->rs_cbuf);
|
||||
curBlockMaxoffset = PageGetMaxOffsetNumber(page);
|
||||
|
||||
/* Found visible tuple, return it. */
|
||||
if (!pagemode) {
|
||||
LockBuffer(heapscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Scan tuple according to currentblock and current currentoffset.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: ScanValid (the flag which identify the tuple is valid or not)
|
||||
*/
|
||||
ScanValid RowTableSample::scanTup()
|
||||
{
|
||||
HeapScanDesc scan = GetHeapScanDesc(sampleScanState->ss_currentScanDesc);
|
||||
bool pagemode = scan->rs_pageatatime;
|
||||
HeapTuple tuple = &(scan->rs_ctup);
|
||||
Snapshot snapshot = scan->rs_snapshot;
|
||||
ItemId itemid;
|
||||
Page page;
|
||||
bool all_visible = false;
|
||||
bool visible = false;
|
||||
|
||||
if (scanTupState == NEWBLOCK) {
|
||||
if (BlockNumberIsValid(currentBlock)) {
|
||||
/*
|
||||
* Report our new scan position for synchronization purposes.
|
||||
*
|
||||
* Note: we do this before checking for end of scan so that the
|
||||
* final state of the position hint is back at the start of the
|
||||
* rel. That's not strictly necessary, but otherwise when you run
|
||||
* the same query multiple times the starting position would shift
|
||||
* a little bit backwards on every invocation, which is confusing.
|
||||
* We don't guarantee any specific ordering in general, though.
|
||||
*/
|
||||
if (scan->rs_syncscan) {
|
||||
ss_report_location(scan->rs_rd, currentBlock);
|
||||
}
|
||||
} else {
|
||||
if (scan->rs_inited) {
|
||||
if (BufferIsValid(scan->rs_cbuf)) {
|
||||
ReleaseBuffer(scan->rs_cbuf);
|
||||
}
|
||||
scan->rs_cbuf = InvalidBuffer;
|
||||
scan->rs_cblock = InvalidBlockNumber;
|
||||
scan->rs_inited = false;
|
||||
}
|
||||
|
||||
tuple->t_data = NULL;
|
||||
|
||||
return INVALIDBLOCKNO;
|
||||
}
|
||||
|
||||
if (!scan->rs_inited) {
|
||||
scan->rs_inited = true;
|
||||
}
|
||||
|
||||
scanTupState = NONEWBLOCK;
|
||||
}
|
||||
|
||||
Assert(currentBlock < scan->rs_nblocks);
|
||||
|
||||
/* Current block alreadly have be readed. */
|
||||
if (currentOffset == InvalidOffsetNumber) {
|
||||
/*
|
||||
* If we get here, it means we've exhausted the items on this page and
|
||||
* it's time to move to the next.
|
||||
*/
|
||||
if (!pagemode) {
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
}
|
||||
|
||||
return INVALIDOFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* When not using pagemode, we must lock the buffer during tuple
|
||||
* visibility checks.
|
||||
*/
|
||||
if (!pagemode) {
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
}
|
||||
|
||||
page = (Page)BufferGetPage(scan->rs_cbuf);
|
||||
all_visible = PageIsAllVisible(page) && !(snapshot->takenDuringRecovery);
|
||||
|
||||
/* Skip invalid tuple pointers. */
|
||||
itemid = PageGetItemId(page, currentOffset);
|
||||
if (!ItemIdIsNormal(itemid)) {
|
||||
return NEXTDATA;
|
||||
}
|
||||
|
||||
tuple->t_data = (HeapTupleHeader)PageGetItem(page, itemid);
|
||||
tuple->t_len = ItemIdGetLength(itemid);
|
||||
HeapTupleCopyBaseFromPage(tuple, page);
|
||||
ItemPointerSet(&(tuple->t_self), currentBlock, currentOffset);
|
||||
|
||||
if (all_visible) {
|
||||
visible = true;
|
||||
} else {
|
||||
BufferDesc* bufHdr = GetBufferDescriptor(scan->rs_cbuf - 1);
|
||||
bool isTmpLock = false;
|
||||
|
||||
if (!LWLockHeldByMe(bufHdr->content_lock)) {
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
isTmpLock = true;
|
||||
}
|
||||
|
||||
visible = HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot, scan->rs_cbuf);
|
||||
|
||||
if (isTmpLock) {
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
}
|
||||
}
|
||||
|
||||
/* in pagemode, heapgetpage did this for us */
|
||||
if (!pagemode) {
|
||||
CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, scan->rs_cbuf, snapshot);
|
||||
}
|
||||
|
||||
if (visible) {
|
||||
/* Found visible tuple, return it. */
|
||||
if (!pagemode) {
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
}
|
||||
|
||||
/* Count successfully-fetched tuples as heap fetches */
|
||||
pgstat_count_heap_getnext(scan->rs_rd);
|
||||
|
||||
elog(DEBUG2,
|
||||
"Get one tuple [currentBlock: %u, currentOffset: %u] for relation: %s on %s.",
|
||||
currentBlock,
|
||||
currentOffset,
|
||||
NameStr(scan->rs_rd->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
|
||||
return VALIDDATA;
|
||||
}
|
||||
|
||||
return NEXTDATA;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get sample tuple for row table.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: HeapTuple
|
||||
*/
|
||||
HeapTuple RowTableSample::scanSample()
|
||||
{
|
||||
HeapScanDesc scan = GetHeapScanDesc(sampleScanState->ss_currentScanDesc);
|
||||
HeapTuple tuple = &(scan->rs_ctup);
|
||||
|
||||
if (finished == true) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Return NULL if no data or percent value is 0. */
|
||||
if ((scan->rs_nblocks == 0) ||
|
||||
(sampleScanState->sampleScanInfo.sampleType == BERNOULLI_SAMPLE && percent[0] == 0) ||
|
||||
(sampleScanState->sampleScanInfo.sampleType == SYSTEM_SAMPLE && percent[0] == 0) ||
|
||||
(sampleScanState->sampleScanInfo.sampleType == HYBRID_SAMPLE && percent[BERNOULLI_SAMPLE] == 0 &&
|
||||
percent[SYSTEM_SAMPLE] == 0)) {
|
||||
Assert(!BufferIsValid(scan->rs_cbuf));
|
||||
tuple->t_data = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
switch (runState) {
|
||||
/* Get num of max block. */
|
||||
case GETMAXBLOCK: {
|
||||
totalBlockNum = scan->rs_nblocks;
|
||||
runState = GETBLOCKNO;
|
||||
elog(DEBUG2,
|
||||
"Get %u blocks for relation: %s on %s.",
|
||||
totalBlockNum,
|
||||
NameStr(scan->rs_rd->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
break;
|
||||
}
|
||||
case GETBLOCKNO: {
|
||||
/* Get current block no with method of function. */
|
||||
(this->*nextSampleBlock_function)();
|
||||
|
||||
if (BlockNumberIsValid(currentBlock)) {
|
||||
runState = GETMAXOFFSET;
|
||||
} else {
|
||||
runState = GETDATA;
|
||||
}
|
||||
|
||||
scanTupState = NEWBLOCK;
|
||||
break;
|
||||
}
|
||||
case GETMAXOFFSET: {
|
||||
getMaxOffset();
|
||||
runState = GETOFFSET;
|
||||
elog(DEBUG2,
|
||||
"Get %d tuples in blockno: %u for relation: %s on %s.",
|
||||
curBlockMaxoffset,
|
||||
currentBlock,
|
||||
NameStr(scan->rs_rd->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
break;
|
||||
}
|
||||
case GETOFFSET: {
|
||||
(this->*nextSampleTuple_function)();
|
||||
|
||||
runState = GETDATA;
|
||||
break;
|
||||
}
|
||||
case GETDATA: {
|
||||
ScanValid scanState = scanTup();
|
||||
|
||||
switch (scanState) {
|
||||
case VALIDDATA: {
|
||||
runState = GETOFFSET;
|
||||
return &(scan->rs_ctup);
|
||||
}
|
||||
case NEXTDATA: {
|
||||
runState = GETOFFSET;
|
||||
break;
|
||||
}
|
||||
case INVALIDBLOCKNO: {
|
||||
/* All block alreadly be scaned finish. */
|
||||
finished = true;
|
||||
return NULL;
|
||||
}
|
||||
case INVALIDOFFSET: {
|
||||
runState = GETBLOCKNO;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Initialize Sample Scan parameter for CStoreScanState.
|
||||
*
|
||||
* Parameters:
|
||||
* @in scanstate: CStoreScanState information
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
ColumnTableSample::ColumnTableSample(CStoreScanState* scanstate)
|
||||
: BaseTableSample(scanstate), currentCuId(0), batchRowCount(0)
|
||||
{
|
||||
offsetIds = (uint16*)palloc0(sizeof(uint16) * BatchMaxSize);
|
||||
errno_t rc = memset_s(offsetIds, sizeof(uint16) * BatchMaxSize, 0, sizeof(uint16) * BatchMaxSize);
|
||||
securec_check(rc, "", "");
|
||||
|
||||
/* Create new VectorBatch for construct tids to get sample VectorBatch. */
|
||||
TupleDesc tupdesc = CreateTemplateTupleDesc(1, false);
|
||||
TupleDescInitEntry(tupdesc, (AttrNumber)1, "tids", INT8OID, -1, 0);
|
||||
tids = New(CurrentMemoryContext) VectorBatch(CurrentMemoryContext, tupdesc);
|
||||
}
|
||||
|
||||
ColumnTableSample::~ColumnTableSample()
|
||||
{
|
||||
if (offsetIds) {
|
||||
pfree_ext(offsetIds);
|
||||
offsetIds = NULL;
|
||||
}
|
||||
if (tids) {
|
||||
delete tids;
|
||||
tids = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Reset Vec Sample Scan parameter.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void ColumnTableSample::resetVecSampleScan()
|
||||
{
|
||||
currentCuId = 0;
|
||||
batchRowCount = 0;
|
||||
|
||||
/* Reset common parameters for table sample. */
|
||||
(((ColumnTableSample*)vecsampleScanState->sampleScanInfo.tsm_state)->resetSampleScan)();
|
||||
|
||||
if (tids) {
|
||||
tids->Reset();
|
||||
}
|
||||
|
||||
if (offsetIds) {
|
||||
errno_t rc = memset_s(offsetIds, sizeof(uint16) * BatchMaxSize, 0, sizeof(uint16) * BatchMaxSize);
|
||||
securec_check(rc, "", "");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get current block max offset.
|
||||
*
|
||||
* Parameters: null
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void ColumnTableSample::getMaxOffset()
|
||||
{
|
||||
CUDesc cu_desc;
|
||||
int fstColIdx = 0;
|
||||
Assert(BlockNumberIsValid(currentBlock));
|
||||
curBlockMaxoffset = InvalidOffsetNumber;
|
||||
|
||||
/* If the first column has dropped, we should change the index of first column. */
|
||||
if (vecsampleScanState->ss_currentRelation->rd_att->attrs[0]->attisdropped) {
|
||||
fstColIdx = CStoreGetfstColIdx(vecsampleScanState->ss_currentRelation);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get CUDesc of column according to currentCuId.
|
||||
*/
|
||||
if (vecsampleScanState->m_CStore->GetCUDesc(fstColIdx, currentCuId, &cu_desc, GetActiveSnapshot()) != true) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We try our best to keep the rules of acquiring tuples about row relations:
|
||||
* 1). ignore to sample tuples dead
|
||||
* 2). ignore to sample tuples recently dead
|
||||
* 3). ignore to sample tuples being inserted in progress by other transactions
|
||||
* 4). ignore to sample tuples being deleted in progress by our transactions
|
||||
* 5). ignore to sample tuples being deleted in progress by other transactions
|
||||
* SnapshotNow can satisfy the rule 1) 2) 3) 4), so it's used here.
|
||||
*/
|
||||
vecsampleScanState->m_CStore->GetCUDeleteMaskIfNeed(currentCuId, GetActiveSnapshot());
|
||||
|
||||
/* Quit this loop quickly if all the tuples are dead in this CU unit. */
|
||||
if (vecsampleScanState->m_CStore->IsTheWholeCuDeleted(cu_desc.row_count)) {
|
||||
return;
|
||||
}
|
||||
|
||||
curBlockMaxoffset = cu_desc.row_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get sample VectorBatch by tids(CuId+offsetId).
|
||||
*
|
||||
* Parameters:
|
||||
* @in state: CStoreScanState information
|
||||
* @in cuId: CuId of current CU
|
||||
* @in maxOffset: max Offset of current CU
|
||||
* @in offsetIds: random offsetIds of current CU
|
||||
* @in tids: construct VectorBatch of tids by cuId and offsetIds
|
||||
* @in vbout: return values of VectorBatch
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void ColumnTableSample::getBatchBySamples(VectorBatch* vbout)
|
||||
{
|
||||
ScalarVector* vec = tids->m_arr;
|
||||
tids->Reset();
|
||||
|
||||
/* Fill VectorBatch of tids with CuId and offsetId. */
|
||||
for (int j = 0; j < batchRowCount; j++) {
|
||||
/* We can be sure it is not dead row. */
|
||||
vec->m_vals[j] = 0;
|
||||
ItemPointer itemPtr = (ItemPointer)&vec->m_vals[j];
|
||||
|
||||
/* Note that itemPtr->offset start from 1 */
|
||||
ItemPointerSet(itemPtr, currentCuId, offsetIds[j]);
|
||||
}
|
||||
vec->m_rows = batchRowCount;
|
||||
tids->m_rows = vec->m_rows;
|
||||
|
||||
/* Scan VectorBatch by tids. */
|
||||
if (!BatchIsNull(tids)) {
|
||||
CStoreIndexScanState* indexScanState = makeNode(CStoreIndexScanState);
|
||||
indexScanState->m_indexOutAttrNo = 0;
|
||||
|
||||
vecsampleScanState->m_CStore->ScanByTids(indexScanState, tids, vbout);
|
||||
vecsampleScanState->m_CStore->ResetLateRead();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Scan each offsets and get sample VectorBatch by tids.
|
||||
*
|
||||
* Parameters:
|
||||
* @in pOutBatch: return values of VectorBatch
|
||||
*
|
||||
* Returns: ScanValid (the flag which identify the tuple is valid or not)
|
||||
*/
|
||||
ScanValid ColumnTableSample::scanBatch(VectorBatch* pOutBatch)
|
||||
{
|
||||
Assert(BlockNumberIsValid(currentBlock));
|
||||
|
||||
/* Current block alreadly have be readed. */
|
||||
if (currentOffset == InvalidOffsetNumber) {
|
||||
if (batchRowCount > 0) {
|
||||
/*
|
||||
* If we get here, it means we've exhausted the items on this CU and
|
||||
* it's time to move to the next CU.
|
||||
*/
|
||||
getBatchBySamples(pOutBatch);
|
||||
|
||||
errno_t rc = memset_s(offsetIds, sizeof(uint16) * BatchMaxSize, 0, sizeof(uint16) * BatchMaxSize);
|
||||
securec_check(rc, "", "");
|
||||
}
|
||||
|
||||
return INVALIDOFFSET;
|
||||
}
|
||||
|
||||
if (!vecsampleScanState->m_CStore->IsDeadRow(currentCuId, (uint32)currentOffset)) {
|
||||
elog(DEBUG2,
|
||||
"Get one tuple [currentCuId: %u, currentOffset: %u] for relation: %s on %s.",
|
||||
currentCuId,
|
||||
currentOffset,
|
||||
NameStr(vecsampleScanState->ss_currentRelation->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
|
||||
/* Get current row from CU and fill into vector until to finish one batch. */
|
||||
offsetIds[batchRowCount++] = currentOffset;
|
||||
if (batchRowCount >= BatchMaxSize) {
|
||||
getBatchBySamples(pOutBatch);
|
||||
|
||||
batchRowCount = 0;
|
||||
errno_t rc = memset_s(offsetIds, sizeof(uint16) * BatchMaxSize, 0, sizeof(uint16) * BatchMaxSize);
|
||||
securec_check(rc, "", "");
|
||||
|
||||
return VALIDDATA;
|
||||
}
|
||||
}
|
||||
|
||||
return NEXTDATA;
|
||||
}
|
||||
|
||||
/*
|
||||
* Description: Get sample VectoBatch for column table.
|
||||
*
|
||||
* Parameters:
|
||||
* @in pOutBatch: return values of VectorBatch
|
||||
*
|
||||
* Returns: void
|
||||
*/
|
||||
void ColumnTableSample::scanVecSample(VectorBatch* pOutBatch)
|
||||
{
|
||||
/* Return NULL if finish scan or percent value is 0. */
|
||||
if ((finished == true) || (vecsampleScanState->sampleScanInfo.sampleType == BERNOULLI_SAMPLE && percent[0] == 0) ||
|
||||
(vecsampleScanState->sampleScanInfo.sampleType == SYSTEM_SAMPLE && percent[0] == 0) ||
|
||||
(vecsampleScanState->sampleScanInfo.sampleType == HYBRID_SAMPLE && percent[BERNOULLI_SAMPLE] == 0 &&
|
||||
percent[SYSTEM_SAMPLE] == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
switch (runState) {
|
||||
case GETMAXBLOCK: {
|
||||
/* Get num of max CU. */
|
||||
totalBlockNum = CStoreRelGetCUNumByNow((CStoreScanDesc)vecsampleScanState);
|
||||
runState = GETBLOCKNO;
|
||||
elog(DEBUG2,
|
||||
"Get %u CUs for relation: %s on %s.",
|
||||
totalBlockNum,
|
||||
NameStr(vecsampleScanState->ss_currentRelation->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
break;
|
||||
}
|
||||
case GETBLOCKNO: {
|
||||
/* Get random or sequence CUId as current block. */
|
||||
(this->*nextSampleBlock_function)();
|
||||
|
||||
if (!BlockNumberIsValid(currentBlock)) {
|
||||
/* All block alreadly be scaned finish. */
|
||||
finished = true;
|
||||
return;
|
||||
}
|
||||
|
||||
currentCuId = currentBlock + FirstCUID + 1;
|
||||
runState = GETMAXOFFSET;
|
||||
break;
|
||||
}
|
||||
case GETMAXOFFSET: {
|
||||
getMaxOffset();
|
||||
|
||||
if (InvalidOffsetNumber == curBlockMaxoffset) {
|
||||
runState = GETBLOCKNO;
|
||||
} else {
|
||||
runState = GETOFFSET;
|
||||
}
|
||||
|
||||
elog(DEBUG2,
|
||||
"Get %d tuples in CUNo: %u for relation: %s on %s.",
|
||||
curBlockMaxoffset,
|
||||
currentBlock,
|
||||
NameStr(vecsampleScanState->ss_currentRelation->rd_rel->relname),
|
||||
g_instance.attr.attr_common.PGXCNodeName);
|
||||
break;
|
||||
}
|
||||
case GETOFFSET: {
|
||||
(this->*nextSampleTuple_function)();
|
||||
|
||||
runState = GETDATA;
|
||||
break;
|
||||
}
|
||||
case GETDATA: {
|
||||
ScanValid scanState = scanBatch(pOutBatch);
|
||||
|
||||
switch (scanState) {
|
||||
case VALIDDATA: {
|
||||
runState = GETOFFSET;
|
||||
return;
|
||||
}
|
||||
case NEXTDATA: {
|
||||
runState = GETOFFSET;
|
||||
break;
|
||||
}
|
||||
case INVALIDOFFSET: {
|
||||
runState = GETBLOCKNO;
|
||||
|
||||
/* Return the last batch if filled and get new CU and batch. */
|
||||
if (batchRowCount > 0) {
|
||||
batchRowCount = 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static double sample_random_fract(void)
|
||||
{
|
||||
return ((double)gs_random() + 1) / ((double)MAX_RANDOM_VALUE + 2);
|
||||
}
|
||||
Reference in New Issue
Block a user