Fallback to normal case when dsm segment is full. Add on_dsm_detach.
This commit is contained in:
@ -182,8 +182,8 @@ static bool ExecParallelInitializeDSM(PlanState *planstate, ExecParallelInitiali
|
|||||||
if (planstate->plan->parallel_aware) {
|
if (planstate->plan->parallel_aware) {
|
||||||
switch (nodeTag(planstate)) {
|
switch (nodeTag(planstate)) {
|
||||||
case T_SeqScanState:
|
case T_SeqScanState:
|
||||||
ExecSeqScanInitializeDSM((SeqScanState *)planstate, d->pcxt, cxt->pwCtx->pscan_num);
|
ExecSeqScanInitializeDSM((SeqScanState *)planstate, d->pcxt, cxt->pwCtx->queryInfo.pscan_num);
|
||||||
cxt->pwCtx->pscan_num++;
|
cxt->pwCtx->queryInfo.pscan_num++;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -211,10 +211,10 @@ static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool
|
|||||||
* otherwise, find the already allocated space.
|
* otherwise, find the already allocated space.
|
||||||
*/
|
*/
|
||||||
if (!reinitialize) {
|
if (!reinitialize) {
|
||||||
cxt->pwCtx->tupleQueue = (char *)palloc0(PARALLEL_TUPLE_QUEUE_SIZE * (Size)pcxt->nworkers);
|
cxt->pwCtx->queryInfo.tupleQueue = (char *)palloc0(PARALLEL_TUPLE_QUEUE_SIZE * (Size)pcxt->nworkers);
|
||||||
}
|
}
|
||||||
Assert(cxt->pwCtx->tupleQueue != NULL);
|
Assert(cxt->pwCtx->queryInfo.tupleQueue != NULL);
|
||||||
char *tqueuespace = cxt->pwCtx->tupleQueue;
|
char *tqueuespace = cxt->pwCtx->queryInfo.tupleQueue;
|
||||||
|
|
||||||
/* Create the queues, and become the receiver for each. */
|
/* Create the queues, and become the receiver for each. */
|
||||||
for (int i = 0; i < pcxt->nworkers; ++i) {
|
for (int i = 0; i < pcxt->nworkers; ++i) {
|
||||||
@ -227,6 +227,27 @@ static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool
|
|||||||
return responseq;
|
return responseq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up tuple queue readers to read the results of a parallel subplan.
|
||||||
|
* All the workers are expected to return tuples matching tupDesc.
|
||||||
|
*
|
||||||
|
* This is separate from ExecInitParallelPlan() because we can launch the
|
||||||
|
* worker processes and let them start doing something before we do this.
|
||||||
|
*/
|
||||||
|
void ExecParallelCreateReaders(ParallelExecutorInfo *pei, TupleDesc tupDesc)
|
||||||
|
{
|
||||||
|
Assert(pei->reader == NULL);
|
||||||
|
int nworkers = pei->pcxt->nworkers_launched;
|
||||||
|
if (nworkers > 0) {
|
||||||
|
pei->reader = (TupleQueueReader **)palloc((uint32)nworkers * sizeof(TupleQueueReader *));
|
||||||
|
for (int i = 0; i < nworkers; i++) {
|
||||||
|
shm_mq_set_handle(pei->tqueue[i], pei->pcxt->worker[i].bgwhandle);
|
||||||
|
pei->reader[i] = CreateTupleQueueReader(pei->tqueue[i], tupDesc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Re-initialize the parallel executor info such that it can be reused by
|
* Re-initialize the parallel executor info such that it can be reused by
|
||||||
* workers.
|
* workers.
|
||||||
@ -235,6 +256,7 @@ void ExecParallelReinitialize(ParallelExecutorInfo *pei)
|
|||||||
{
|
{
|
||||||
ReinitializeParallelDSM(pei->pcxt);
|
ReinitializeParallelDSM(pei->pcxt);
|
||||||
pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
|
pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
|
||||||
|
pei->reader = NULL;
|
||||||
pei->finished = false;
|
pei->finished = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,21 +311,23 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
|
|||||||
* asked for has been allocated or initialized yet, though, so do that.
|
* asked for has been allocated or initialized yet, though, so do that.
|
||||||
*/
|
*/
|
||||||
MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
|
MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
|
||||||
|
ParallelQueryInfo queryInfo;
|
||||||
|
int rc = memset_s(&queryInfo, sizeof(ParallelQueryInfo), 0, sizeof(ParallelQueryInfo));
|
||||||
|
securec_check(rc, "", "");
|
||||||
|
|
||||||
/* Store serialized PlannedStmt. */
|
/* Store serialized PlannedStmt. */
|
||||||
cxt->pwCtx->pstmt_space = ExecSerializePlan(planstate->plan, estate);
|
queryInfo.pstmt_space = ExecSerializePlan(planstate->plan, estate);
|
||||||
|
|
||||||
/* Store serialized ParamListInfo. */
|
/* Store serialized ParamListInfo. */
|
||||||
cxt->pwCtx->param_space = (char *)palloc0(param_len);
|
queryInfo.param_space = (char *)palloc0(param_len);
|
||||||
cxt->pwCtx->param_len = param_len;
|
queryInfo.param_len = param_len;
|
||||||
SerializeParamList(estate->es_param_list_info, cxt->pwCtx->param_space, param_len);
|
SerializeParamList(estate->es_param_list_info, queryInfo.param_space, param_len);
|
||||||
|
|
||||||
/* Allocate space for each worker's BufferUsage; no need to initialize. */
|
/* Allocate space for each worker's BufferUsage; no need to initialize. */
|
||||||
cxt->pwCtx->bufUsage = (BufferUsage *)palloc0(sizeof(BufferUsage) * pcxt->nworkers);
|
queryInfo.bufUsage = (BufferUsage *)palloc0(sizeof(BufferUsage) * pcxt->nworkers);
|
||||||
pei->buffer_usage = cxt->pwCtx->bufUsage;
|
pei->buffer_usage = queryInfo.bufUsage;
|
||||||
|
/* We don't need the TupleQueueReaders yet, though. */
|
||||||
/* Set up tuple queues. */
|
pei->reader = NULL;
|
||||||
pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If instrumentation options were supplied, allocate space for the
|
* If instrumentation options were supplied, allocate space for the
|
||||||
@ -311,19 +335,19 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
|
|||||||
* during ExecParallelInitializeDSM.
|
* during ExecParallelInitializeDSM.
|
||||||
*/
|
*/
|
||||||
if (estate->es_instrument) {
|
if (estate->es_instrument) {
|
||||||
cxt->pwCtx->instrumentation = (SharedExecutorInstrumentation *)palloc0(instrumentation_len);
|
queryInfo.instrumentation = (SharedExecutorInstrumentation *)palloc0(instrumentation_len);
|
||||||
cxt->pwCtx->instrumentation->instrument_options = estate->es_instrument;
|
queryInfo.instrumentation->instrument_options = estate->es_instrument;
|
||||||
cxt->pwCtx->instrumentation->instrument_offset = instrument_offset;
|
queryInfo.instrumentation->instrument_offset = instrument_offset;
|
||||||
cxt->pwCtx->instrumentation->num_workers = nworkers;
|
queryInfo.instrumentation->num_workers = nworkers;
|
||||||
cxt->pwCtx->instrumentation->num_plan_nodes = e.nnodes;
|
queryInfo.instrumentation->num_plan_nodes = e.nnodes;
|
||||||
Instrumentation *instrument = GetInstrumentationArray(cxt->pwCtx->instrumentation);
|
Instrumentation *instrument = GetInstrumentationArray(queryInfo.instrumentation);
|
||||||
for (int i = 0; i < nworkers * e.nnodes; ++i) {
|
for (int i = 0; i < nworkers * e.nnodes; ++i) {
|
||||||
InstrInit(&instrument[i], estate->es_instrument);
|
InstrInit(&instrument[i], estate->es_instrument);
|
||||||
}
|
}
|
||||||
pei->instrumentation = cxt->pwCtx->instrumentation;
|
pei->instrumentation = queryInfo.instrumentation;
|
||||||
}
|
}
|
||||||
|
|
||||||
cxt->pwCtx->pscan = (ParallelHeapScanDesc *)palloc0(sizeof(ParallelHeapScanDesc) * e.nnodes);
|
queryInfo.pscan = (ParallelHeapScanDesc *)palloc0(sizeof(ParallelHeapScanDesc) * e.nnodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Give parallel-aware nodes a chance to initialize their shared data.
|
* Give parallel-aware nodes a chance to initialize their shared data.
|
||||||
@ -331,9 +355,14 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
|
|||||||
* if it exists.
|
* if it exists.
|
||||||
*/
|
*/
|
||||||
d.pcxt = pcxt;
|
d.pcxt = pcxt;
|
||||||
d.instrumentation = cxt->pwCtx->instrumentation;
|
d.instrumentation = queryInfo.instrumentation;
|
||||||
d.nnodes = 0;
|
d.nnodes = 0;
|
||||||
|
|
||||||
|
cxt->pwCtx->queryInfo = queryInfo;
|
||||||
|
|
||||||
|
/* Set up the tuple queues that the workers will write into. */
|
||||||
|
pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false);
|
||||||
|
|
||||||
/* Here we switch to old context, cause heap_beginscan_parallel need malloc memory */
|
/* Here we switch to old context, cause heap_beginscan_parallel need malloc memory */
|
||||||
(void)MemoryContextSwitchTo(oldcontext);
|
(void)MemoryContextSwitchTo(oldcontext);
|
||||||
(void)ExecParallelInitializeDSM(planstate, &d);
|
(void)ExecParallelInitializeDSM(planstate, &d);
|
||||||
@ -397,12 +426,43 @@ void ExecParallelFinish(ParallelExecutorInfo *pei)
|
|||||||
if (pei->finished)
|
if (pei->finished)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* First, wait for the workers to finish. */
|
int nworkers = pei->pcxt->nworkers_launched;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Detach from tuple queues ASAP, so that any still-active workers will
|
||||||
|
* notice that no further results are wanted.
|
||||||
|
*/
|
||||||
|
if (pei->tqueue != NULL) {
|
||||||
|
for (i = 0; i < nworkers; i++) {
|
||||||
|
shm_mq_detach(pei->tqueue[i]);
|
||||||
|
}
|
||||||
|
pfree(pei->tqueue);
|
||||||
|
pei->tqueue = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* While we're waiting for the workers to finish, let's get rid of the
|
||||||
|
* tuple queue readers. (Any other local cleanup could be done here too.)
|
||||||
|
*/
|
||||||
|
if (pei->reader != NULL) {
|
||||||
|
for (i = 0; i < nworkers; i++) {
|
||||||
|
DestroyTupleQueueReader(pei->reader[i]);
|
||||||
|
}
|
||||||
|
pfree(pei->reader);
|
||||||
|
pei->reader = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now wait for the workers to finish. */
|
||||||
WaitForParallelWorkersToFinish(pei->pcxt);
|
WaitForParallelWorkersToFinish(pei->pcxt);
|
||||||
|
|
||||||
/* Next, accumulate buffer usage. */
|
/*
|
||||||
for (int i = 0; i < pei->pcxt->nworkers; ++i)
|
* Next, accumulate buffer usage. (This must wait for the workers to
|
||||||
|
* finish, or we might get incomplete data.)
|
||||||
|
*/
|
||||||
|
for (i = 0; i < nworkers; ++i) {
|
||||||
InstrAccumParallelQuery(&pei->buffer_usage[i]);
|
InstrAccumParallelQuery(&pei->buffer_usage[i]);
|
||||||
|
}
|
||||||
|
|
||||||
/* Finally, accumulate instrumentation, if any. */
|
/* Finally, accumulate instrumentation, if any. */
|
||||||
if (pei->instrumentation) {
|
if (pei->instrumentation) {
|
||||||
@ -436,7 +496,7 @@ static DestReceiver *ExecParallelGetReceiver(void *seg)
|
|||||||
Assert(seg != NULL);
|
Assert(seg != NULL);
|
||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
||||||
|
|
||||||
char *mqspace = cxt->pwCtx->tupleQueue;
|
char *mqspace = cxt->pwCtx->queryInfo.tupleQueue;
|
||||||
mqspace += t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
|
mqspace += t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
|
||||||
shm_mq *mq = (shm_mq *)mqspace;
|
shm_mq *mq = (shm_mq *)mqspace;
|
||||||
shm_mq_set_sender(mq, t_thrd.proc);
|
shm_mq_set_sender(mq, t_thrd.proc);
|
||||||
@ -451,10 +511,10 @@ static QueryDesc *ExecParallelGetQueryDesc(void *seg, DestReceiver *receiver, in
|
|||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
||||||
|
|
||||||
/* Reconstruct leader-supplied PlannedStmt. */
|
/* Reconstruct leader-supplied PlannedStmt. */
|
||||||
PlannedStmt *pstmt = (PlannedStmt *)stringToNode(cxt->pwCtx->pstmt_space);
|
PlannedStmt *pstmt = (PlannedStmt *)stringToNode(cxt->pwCtx->queryInfo.pstmt_space);
|
||||||
|
|
||||||
/* Reconstruct ParamListInfo. */
|
/* Reconstruct ParamListInfo. */
|
||||||
ParamListInfo paramLI = RestoreParamList(cxt->pwCtx->param_space, cxt->pwCtx->param_len);
|
ParamListInfo paramLI = RestoreParamList(cxt->pwCtx->queryInfo.param_space, cxt->pwCtx->queryInfo.param_len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a QueryDesc for the query.
|
* Create a QueryDesc for the query.
|
||||||
@ -553,7 +613,7 @@ void ParallelQueryMain(void *seg)
|
|||||||
/* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
|
/* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
|
||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
|
||||||
DestReceiver *receiver = ExecParallelGetReceiver(seg);
|
DestReceiver *receiver = ExecParallelGetReceiver(seg);
|
||||||
SharedExecutorInstrumentation *instrumentation = cxt->pwCtx->instrumentation;
|
SharedExecutorInstrumentation *instrumentation = cxt->pwCtx->queryInfo.instrumentation;
|
||||||
if (instrumentation != NULL)
|
if (instrumentation != NULL)
|
||||||
instrument_options = instrumentation->instrument_options;
|
instrument_options = instrumentation->instrument_options;
|
||||||
QueryDesc *queryDesc = ExecParallelGetQueryDesc(seg, receiver, instrument_options);
|
QueryDesc *queryDesc = ExecParallelGetQueryDesc(seg, receiver, instrument_options);
|
||||||
@ -568,7 +628,7 @@ void ParallelQueryMain(void *seg)
|
|||||||
ExecutorFinish(queryDesc);
|
ExecutorFinish(queryDesc);
|
||||||
|
|
||||||
/* Report buffer usage during parallel execution. */
|
/* Report buffer usage during parallel execution. */
|
||||||
BufferUsage *buffer_usage = cxt->pwCtx->bufUsage;
|
BufferUsage *buffer_usage = cxt->pwCtx->queryInfo.bufUsage;
|
||||||
InstrEndParallelQuery(&buffer_usage[t_thrd.bgworker_cxt.ParallelWorkerNumber]);
|
InstrEndParallelQuery(&buffer_usage[t_thrd.bgworker_cxt.ParallelWorkerNumber]);
|
||||||
|
|
||||||
/* Report instrumentation data if any instrumentation options are set. */
|
/* Report instrumentation data if any instrumentation options are set. */
|
||||||
|
@ -121,7 +121,6 @@ GatherState *ExecInitGather(Gather *node, EState *estate, int eflags)
|
|||||||
TupleTableSlot *ExecGather(GatherState *node)
|
TupleTableSlot *ExecGather(GatherState *node)
|
||||||
{
|
{
|
||||||
TupleTableSlot *fslot = node->funnel_slot;
|
TupleTableSlot *fslot = node->funnel_slot;
|
||||||
int i;
|
|
||||||
TupleTableSlot *slot = NULL;
|
TupleTableSlot *slot = NULL;
|
||||||
TupleTableSlot *resultSlot = NULL;
|
TupleTableSlot *resultSlot = NULL;
|
||||||
ExprDoneCond isDone;
|
ExprDoneCond isDone;
|
||||||
@ -143,8 +142,6 @@ TupleTableSlot *ExecGather(GatherState *node)
|
|||||||
* parallel mode is active then we can try to fire up some workers.
|
* parallel mode is active then we can try to fire up some workers.
|
||||||
*/
|
*/
|
||||||
if (gather->num_workers > 0 && IsInParallelMode()) {
|
if (gather->num_workers > 0 && IsInParallelMode()) {
|
||||||
bool got_any_worker = false;
|
|
||||||
|
|
||||||
/* Initialize the workers required to execute Gather node. */
|
/* Initialize the workers required to execute Gather node. */
|
||||||
if (!node->pei)
|
if (!node->pei)
|
||||||
node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers);
|
node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers);
|
||||||
@ -157,31 +154,28 @@ TupleTableSlot *ExecGather(GatherState *node)
|
|||||||
LaunchParallelWorkers(pcxt);
|
LaunchParallelWorkers(pcxt);
|
||||||
|
|
||||||
/* Set up tuple queue readers to read the results. */
|
/* Set up tuple queue readers to read the results. */
|
||||||
if (pcxt->nworkers > 0) {
|
if (pcxt->nworkers_launched > 0) {
|
||||||
node->nreaders = 0;
|
ExecParallelCreateReaders(node->pei, fslot->tts_tupleDescriptor);
|
||||||
node->reader = (TupleQueueReader **)palloc(pcxt->nworkers * sizeof(TupleQueueReader *));
|
|
||||||
|
|
||||||
for (i = 0; i < pcxt->nworkers; ++i) {
|
/* Make a working array showing the active readers */
|
||||||
if (pcxt->worker[i].bgwhandle == NULL)
|
node->nreaders = pcxt->nworkers_launched;
|
||||||
continue;
|
Size readerSize = node->nreaders * sizeof(TupleQueueReader *);
|
||||||
|
node->reader = (TupleQueueReader **)palloc(readerSize);
|
||||||
|
|
||||||
shm_mq_set_handle(node->pei->tqueue[i], pcxt->worker[i].bgwhandle);
|
int rc = memcpy_s(node->reader, readerSize, node->pei->reader, readerSize);
|
||||||
node->reader[node->nreaders++] =
|
securec_check(rc, "", "");
|
||||||
CreateTupleQueueReader(node->pei->tqueue[i], fslot->tts_tupleDescriptor);
|
|
||||||
got_any_worker = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* No workers? Then never mind. */
|
|
||||||
if (!got_any_worker) {
|
|
||||||
ExecShutdownGatherWorkers(node);
|
|
||||||
} else {
|
|
||||||
t_thrd.subrole = BACKGROUND_LEADER;
|
t_thrd.subrole = BACKGROUND_LEADER;
|
||||||
|
} else {
|
||||||
|
/* No workers? Then never mind. */
|
||||||
|
node->nreaders = 0;
|
||||||
|
node->reader = NULL;
|
||||||
}
|
}
|
||||||
|
node->nextreader = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Run plan locally if no workers or not single-copy. */
|
/* Run plan locally if no workers or not single-copy. */
|
||||||
node->need_to_scan_locally = (node->reader == NULL) ||
|
node->need_to_scan_locally = (node->nreaders == 0) ||
|
||||||
(!gather->single_copy && u_sess->attr.attr_sql.parallel_leader_participation);
|
(!gather->single_copy && u_sess->attr.attr_sql.parallel_leader_participation);
|
||||||
node->initialized = true;
|
node->initialized = true;
|
||||||
}
|
}
|
||||||
@ -261,10 +255,10 @@ static TupleTableSlot *gather_getnext(GatherState *gatherstate)
|
|||||||
PlanState *outerPlan = outerPlanState(gatherstate);
|
PlanState *outerPlan = outerPlanState(gatherstate);
|
||||||
TupleTableSlot *fslot = gatherstate->funnel_slot;
|
TupleTableSlot *fslot = gatherstate->funnel_slot;
|
||||||
|
|
||||||
while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally) {
|
while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally) {
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
if (gatherstate->reader != NULL) {
|
if (gatherstate->nreaders > 0) {
|
||||||
HeapTuple tup = gather_readnext(gatherstate);
|
HeapTuple tup = gather_readnext(gatherstate);
|
||||||
if (HeapTupleIsValid(tup)) {
|
if (HeapTupleIsValid(tup)) {
|
||||||
(void)ExecStoreTuple(tup, /* tuple to store */
|
(void)ExecStoreTuple(tup, /* tuple to store */
|
||||||
@ -306,15 +300,13 @@ static HeapTuple gather_readnext(GatherState *gatherstate)
|
|||||||
HeapTuple tup = TupleQueueReaderNext(reader, true, &readerdone);
|
HeapTuple tup = TupleQueueReaderNext(reader, true, &readerdone);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this reader is done, remove it. If all readers are done,
|
* If this reader is done, remove it from our working array of active
|
||||||
* clean up remaining worker state.
|
* readers. If all readers are done, we're outta here.
|
||||||
*/
|
*/
|
||||||
if (readerdone) {
|
if (readerdone) {
|
||||||
Assert(!tup);
|
Assert(!tup);
|
||||||
DestroyTupleQueueReader(reader);
|
|
||||||
--gatherstate->nreaders;
|
--gatherstate->nreaders;
|
||||||
if (gatherstate->nreaders == 0) {
|
if (gatherstate->nreaders == 0) {
|
||||||
ExecShutdownGatherWorkers(gatherstate);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
Size remainSize = sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader);
|
Size remainSize = sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader);
|
||||||
@ -366,9 +358,7 @@ static HeapTuple gather_readnext(GatherState *gatherstate)
|
|||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
* ExecShutdownGatherWorkers
|
* ExecShutdownGatherWorkers
|
||||||
*
|
*
|
||||||
* Destroy the parallel workers. Collect all the stats after
|
* Stop all the parallel workers.
|
||||||
* workers are stopped, else some work done by workers won't be
|
|
||||||
* accounted.
|
|
||||||
* ----------------------------------------------------------------
|
* ----------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static void ExecShutdownGatherWorkers(GatherState *node)
|
static void ExecShutdownGatherWorkers(GatherState *node)
|
||||||
@ -377,14 +367,8 @@ static void ExecShutdownGatherWorkers(GatherState *node)
|
|||||||
if (node->pei != NULL)
|
if (node->pei != NULL)
|
||||||
ExecParallelFinish(node->pei);
|
ExecParallelFinish(node->pei);
|
||||||
|
|
||||||
/* Shut down tuple queue readers before shutting down workers. */
|
/* Flush local copy of reader array */
|
||||||
if (node->reader != NULL) {
|
pfree_ext(node->reader);
|
||||||
for (int i = 0; i < node->nreaders; ++i)
|
|
||||||
DestroyTupleQueueReader(node->reader[i]);
|
|
||||||
|
|
||||||
pfree(node->reader);
|
|
||||||
node->reader = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
|
@ -672,12 +672,12 @@ void ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt, int nod
|
|||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
|
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
|
||||||
|
|
||||||
/* Here we can't use palloc, cause we have switch to old memctx in ExecInitParallelPlan */
|
/* Here we can't use palloc, cause we have switch to old memctx in ExecInitParallelPlan */
|
||||||
cxt->pwCtx->pscan[nodeid] = (ParallelHeapScanDesc)MemoryContextAllocZero(cxt->memCtx, node->pscan_len);
|
cxt->pwCtx->queryInfo.pscan[nodeid] = (ParallelHeapScanDesc)MemoryContextAllocZero(cxt->memCtx, node->pscan_len);
|
||||||
heap_parallelscan_initialize(cxt->pwCtx->pscan[nodeid], node->pscan_len, node->ss_currentRelation,
|
heap_parallelscan_initialize(cxt->pwCtx->queryInfo.pscan[nodeid], node->pscan_len, node->ss_currentRelation,
|
||||||
estate->es_snapshot);
|
estate->es_snapshot);
|
||||||
cxt->pwCtx->pscan[nodeid]->plan_node_id = node->ps.plan->plan_node_id;
|
cxt->pwCtx->queryInfo.pscan[nodeid]->plan_node_id = node->ps.plan->plan_node_id;
|
||||||
node->ss_currentScanDesc =
|
node->ss_currentScanDesc =
|
||||||
(AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, cxt->pwCtx->pscan[nodeid]);
|
(AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, cxt->pwCtx->queryInfo.pscan[nodeid]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
@ -691,9 +691,9 @@ void ExecSeqScanInitializeWorker(SeqScanState *node, void *context)
|
|||||||
ParallelHeapScanDesc pscan = NULL;
|
ParallelHeapScanDesc pscan = NULL;
|
||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
|
knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
|
||||||
|
|
||||||
for (int i = 0; i < cxt->pwCtx->pscan_num; i++) {
|
for (int i = 0; i < cxt->pwCtx->queryInfo.pscan_num; i++) {
|
||||||
if (node->ps.plan->plan_node_id == cxt->pwCtx->pscan[i]->plan_node_id) {
|
if (node->ps.plan->plan_node_id == cxt->pwCtx->queryInfo.pscan[i]->plan_node_id) {
|
||||||
pscan = cxt->pwCtx->pscan[i];
|
pscan = cxt->pwCtx->queryInfo.pscan[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -452,12 +452,9 @@ TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle, TupleDesc tupled
|
|||||||
*/
|
*/
|
||||||
void DestroyTupleQueueReader(TupleQueueReader *reader)
|
void DestroyTupleQueueReader(TupleQueueReader *reader)
|
||||||
{
|
{
|
||||||
if (reader->queue != NULL) {
|
if (reader->remapinfo != NULL) {
|
||||||
shm_mq_detach(reader->queue);
|
|
||||||
reader->queue = NULL;
|
|
||||||
}
|
|
||||||
if (reader->remapinfo != NULL)
|
|
||||||
pfree(reader->remapinfo);
|
pfree(reader->remapinfo);
|
||||||
|
}
|
||||||
pfree(reader);
|
pfree(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,8 +132,22 @@ void InitializeParallelDSM(ParallelContext *pcxt, const void *snap)
|
|||||||
*/
|
*/
|
||||||
pcxt->seg = dsm_create();
|
pcxt->seg = dsm_create();
|
||||||
|
|
||||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
|
MemoryContext oldcontext = NULL;
|
||||||
MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
|
knl_u_parallel_context *cxt = NULL;
|
||||||
|
if (pcxt->seg != NULL) {
|
||||||
|
cxt = (knl_u_parallel_context *)pcxt->seg;
|
||||||
|
oldcontext = MemoryContextSwitchTo(cxt->memCtx);
|
||||||
|
} else {
|
||||||
|
/* DSM segment is full, use backend private memory, set worker to 0, fallback to serial query */
|
||||||
|
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||||||
|
pcxt->nworkers = 0;
|
||||||
|
pcxt->seg = palloc(sizeof(knl_u_parallel_context));
|
||||||
|
cxt = (knl_u_parallel_context *)pcxt->seg;
|
||||||
|
cxt->memCtx = TopMemoryContext;
|
||||||
|
cxt->pwCtx = (ParallelInfoContext*)palloc0(sizeof(ParallelInfoContext));
|
||||||
|
cxt->used = true;
|
||||||
|
slist_init(&cxt->on_detach);
|
||||||
|
}
|
||||||
|
|
||||||
/* Initialize fixed-size state in shared memory. */
|
/* Initialize fixed-size state in shared memory. */
|
||||||
cxt->pwCtx->database_id = u_sess->proc_cxt.MyDatabaseId;
|
cxt->pwCtx->database_id = u_sess->proc_cxt.MyDatabaseId;
|
||||||
@ -644,7 +658,7 @@ void DestroyParallelContext(ParallelContext *pcxt)
|
|||||||
* stored there.
|
* stored there.
|
||||||
*/
|
*/
|
||||||
if (pcxt->seg != NULL) {
|
if (pcxt->seg != NULL) {
|
||||||
dsm_detach(&(pcxt->seg));
|
dsm_detach(&(pcxt->seg), pcxt->nworkers > 0 ? true : false);
|
||||||
pcxt->seg = NULL;
|
pcxt->seg = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1052,6 +1066,7 @@ void ParallelWorkerMain(Datum main_arg)
|
|||||||
|
|
||||||
/* Report success. */
|
/* Report success. */
|
||||||
pq_putmessage('X', NULL, 0);
|
pq_putmessage('X', NULL, 0);
|
||||||
|
pq_stop_redirect_to_shm_mq();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -31,6 +31,13 @@
|
|||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "postmaster/bgworker_internals.h"
|
#include "postmaster/bgworker_internals.h"
|
||||||
|
|
||||||
|
/* Backend-local tracking for on-detach callbacks. */
|
||||||
|
typedef struct __dsm_segment_detach_callback {
|
||||||
|
on_dsm_detach_callback function;
|
||||||
|
Datum arg;
|
||||||
|
slist_node node;
|
||||||
|
} dsm_segment_detach_callback;
|
||||||
|
|
||||||
#ifdef __USE_NUMA
|
#ifdef __USE_NUMA
|
||||||
static void RestoreCpuAffinity(cpu_set_t *cpuset)
|
static void RestoreCpuAffinity(cpu_set_t *cpuset)
|
||||||
{
|
{
|
||||||
@ -44,10 +51,33 @@ static void RestoreCpuAffinity(cpu_set_t *cpuset)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void dsm_detach(void **seg)
|
/*
|
||||||
|
* newMemCtx means whether the memctx is new created or not.
|
||||||
|
* When newMemCtx is true, we just need to call pfree to free the mem,
|
||||||
|
* or we can call MemoryContextDelete to jsut delete the whole new created
|
||||||
|
* memctx.
|
||||||
|
*/
|
||||||
|
void dsm_detach(void **seg, bool newMemCtx)
|
||||||
{
|
{
|
||||||
|
Assert(seg != NULL);
|
||||||
Assert(*seg != NULL);
|
Assert(*seg != NULL);
|
||||||
knl_u_parallel_context *ctx = (knl_u_parallel_context *)*seg;
|
knl_u_parallel_context *ctx = (knl_u_parallel_context *)*seg;
|
||||||
|
/*
|
||||||
|
* Invoke registered callbacks. Just in case one of those callbacks
|
||||||
|
* throws a further error that brings us back here, pop the callback
|
||||||
|
* before invoking it, to avoid infinite error recursion.
|
||||||
|
*/
|
||||||
|
while (!slist_is_empty(&ctx->on_detach)) {
|
||||||
|
slist_node *node = slist_pop_head_node(&ctx->on_detach);
|
||||||
|
dsm_segment_detach_callback *cb = slist_container(dsm_segment_detach_callback, node, node);
|
||||||
|
on_dsm_detach_callback function = cb->function;
|
||||||
|
Datum arg = cb->arg;
|
||||||
|
pfree(cb);
|
||||||
|
|
||||||
|
function(seg, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newMemCtx) {
|
||||||
#ifdef __USE_NUMA
|
#ifdef __USE_NUMA
|
||||||
RestoreCpuAffinity(ctx->pwCtx->cpuset);
|
RestoreCpuAffinity(ctx->pwCtx->cpuset);
|
||||||
#endif
|
#endif
|
||||||
@ -55,6 +85,10 @@ void dsm_detach(void **seg)
|
|||||||
ctx->memCtx = NULL;
|
ctx->memCtx = NULL;
|
||||||
ctx->pwCtx = NULL;
|
ctx->pwCtx = NULL;
|
||||||
ctx->used = false;
|
ctx->used = false;
|
||||||
|
} else {
|
||||||
|
pfree(ctx->pwCtx);
|
||||||
|
pfree(ctx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void *dsm_create(void)
|
void *dsm_create(void)
|
||||||
@ -69,11 +103,24 @@ void *dsm_create(void)
|
|||||||
(void)MemoryContextSwitchTo(oldContext);
|
(void)MemoryContextSwitchTo(oldContext);
|
||||||
|
|
||||||
u_sess->parallel_ctx[i].used = true;
|
u_sess->parallel_ctx[i].used = true;
|
||||||
|
slist_init(&u_sess->parallel_ctx[i].on_detach);
|
||||||
return &(u_sess->parallel_ctx[i]);
|
return &(u_sess->parallel_ctx[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("too many dynamic shared memory segments")));
|
ereport(WARNING, (errmsg("too many dynamic shared memory segments")));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register an on-detach callback for a dynamic shared memory segment.
|
||||||
|
*/
|
||||||
|
void on_dsm_detach(void *seg, on_dsm_detach_callback function, Datum arg)
|
||||||
|
{
|
||||||
|
dsm_segment_detach_callback *cb =
|
||||||
|
(dsm_segment_detach_callback*)MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment_detach_callback));
|
||||||
|
cb->function = function;
|
||||||
|
cb->arg = arg;
|
||||||
|
knl_u_parallel_context *ctx = (knl_u_parallel_context *)seg;
|
||||||
|
slist_push_head(&ctx->on_detach, &cb->node);
|
||||||
|
}
|
||||||
|
@ -21,15 +21,18 @@
|
|||||||
typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
|
typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
|
||||||
|
|
||||||
typedef struct ParallelExecutorInfo {
|
typedef struct ParallelExecutorInfo {
|
||||||
PlanState *planstate;
|
PlanState *planstate; /* plan subtree we're running in parallel */
|
||||||
ParallelContext *pcxt;
|
ParallelContext *pcxt; /* parallel context we're using */
|
||||||
BufferUsage *buffer_usage;
|
BufferUsage *buffer_usage; /* points to bufusage area in DSM */
|
||||||
SharedExecutorInstrumentation *instrumentation;
|
SharedExecutorInstrumentation *instrumentation; /* optional */
|
||||||
shm_mq_handle **tqueue;
|
bool finished; /* set true by ExecParallelFinish */
|
||||||
bool finished;
|
/* These two arrays have pcxt->nworkers_launched entries: */
|
||||||
|
shm_mq_handle **tqueue; /* tuple queues for worker output */
|
||||||
|
struct TupleQueueReader **reader; /* tuple reader/writer support */
|
||||||
} ParallelExecutorInfo;
|
} ParallelExecutorInfo;
|
||||||
|
|
||||||
extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers);
|
extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers);
|
||||||
|
extern void ExecParallelCreateReaders(ParallelExecutorInfo *pei, TupleDesc tupDesc);
|
||||||
extern void ExecParallelFinish(ParallelExecutorInfo *pei);
|
extern void ExecParallelFinish(ParallelExecutorInfo *pei);
|
||||||
extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
|
extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
|
||||||
extern void ExecParallelReinitialize(ParallelExecutorInfo *pei);
|
extern void ExecParallelReinitialize(ParallelExecutorInfo *pei);
|
||||||
|
@ -2046,6 +2046,26 @@ typedef struct knl_u_ext_fdw_context {
|
|||||||
/* Info need to pass from leader to worker */
|
/* Info need to pass from leader to worker */
|
||||||
struct ParallelHeapScanDescData;
|
struct ParallelHeapScanDescData;
|
||||||
typedef uint64 XLogRecPtr;
|
typedef uint64 XLogRecPtr;
|
||||||
|
typedef struct ParallelQueryInfo {
|
||||||
|
struct SharedExecutorInstrumentation *instrumentation;
|
||||||
|
BufferUsage *bufUsage;
|
||||||
|
char *tupleQueue;
|
||||||
|
char *pstmt_space;
|
||||||
|
char *param_space;
|
||||||
|
Size param_len;
|
||||||
|
int pscan_num;
|
||||||
|
ParallelHeapScanDescData **pscan;
|
||||||
|
} ParallelQueryInfo;
|
||||||
|
|
||||||
|
struct BTShared;
|
||||||
|
struct SharedSort;
|
||||||
|
typedef struct ParallelBtreeInfo {
|
||||||
|
char *queryText;
|
||||||
|
BTShared *btShared;
|
||||||
|
SharedSort *sharedSort;
|
||||||
|
SharedSort *sharedSort2;
|
||||||
|
} ParallelBtreeInfo;
|
||||||
|
|
||||||
typedef struct ParallelInfoContext {
|
typedef struct ParallelInfoContext {
|
||||||
Oid database_id;
|
Oid database_id;
|
||||||
Oid authenticated_user_id;
|
Oid authenticated_user_id;
|
||||||
@ -2060,11 +2080,6 @@ typedef struct ParallelInfoContext {
|
|||||||
BackendId parallel_master_backend_id;
|
BackendId parallel_master_backend_id;
|
||||||
TimestampTz xact_ts;
|
TimestampTz xact_ts;
|
||||||
TimestampTz stmt_ts;
|
TimestampTz stmt_ts;
|
||||||
char *pstmt_space;
|
|
||||||
char *param_space;
|
|
||||||
Size param_len;
|
|
||||||
int pscan_num;
|
|
||||||
ParallelHeapScanDescData **pscan;
|
|
||||||
int usedComboCids;
|
int usedComboCids;
|
||||||
struct ComboCidKeyData *comboCids;
|
struct ComboCidKeyData *comboCids;
|
||||||
char *tsnapspace;
|
char *tsnapspace;
|
||||||
@ -2092,14 +2107,17 @@ typedef struct ParallelInfoContext {
|
|||||||
TransactionId *ParallelCurrentXids;
|
TransactionId *ParallelCurrentXids;
|
||||||
char *library_name;
|
char *library_name;
|
||||||
char *function_name;
|
char *function_name;
|
||||||
BufferUsage *bufUsage;
|
|
||||||
char *tupleQueue;
|
|
||||||
struct SharedExecutorInstrumentation *instrumentation;
|
|
||||||
char *namespace_search_path;
|
char *namespace_search_path;
|
||||||
#ifdef __USE_NUMA
|
#ifdef __USE_NUMA
|
||||||
int numaNode;
|
int numaNode;
|
||||||
cpu_set_t *cpuset;
|
cpu_set_t *cpuset;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
union {
|
||||||
|
ParallelQueryInfo queryInfo; /* parameters for parallel query only */
|
||||||
|
ParallelBtreeInfo btreeInfo; /* parameters for parallel create index(btree) only */
|
||||||
|
};
|
||||||
|
|
||||||
/* Mutex protects remaining fields. */
|
/* Mutex protects remaining fields. */
|
||||||
slock_t mutex;
|
slock_t mutex;
|
||||||
/* Maximum XactLastRecEnd of any worker. */
|
/* Maximum XactLastRecEnd of any worker. */
|
||||||
@ -2108,8 +2126,9 @@ typedef struct ParallelInfoContext {
|
|||||||
|
|
||||||
typedef struct knl_u_parallel_context {
|
typedef struct knl_u_parallel_context {
|
||||||
ParallelInfoContext *pwCtx;
|
ParallelInfoContext *pwCtx;
|
||||||
MemoryContext memCtx;
|
MemoryContext memCtx; /* memory context used to malloc memory */
|
||||||
bool used;
|
slist_head on_detach; /* On-detach callbacks. */
|
||||||
|
bool used; /* used or not */
|
||||||
} knl_u_parallel_context;
|
} knl_u_parallel_context;
|
||||||
|
|
||||||
enum knl_session_status {
|
enum knl_session_status {
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
/* Functions that create or remove mappings. */
|
/* Functions that create or remove mappings. */
|
||||||
extern void *dsm_create(void);
|
extern void *dsm_create(void);
|
||||||
#define dsm_attach(dsmHandle)
|
#define dsm_attach(dsmHandle)
|
||||||
extern void dsm_detach(void **seg);
|
extern void dsm_detach(void **seg, bool newMemCtx);
|
||||||
|
|
||||||
/* Resource management functions. */
|
/* Resource management functions. */
|
||||||
#define dsm_pin_mapping(dsmSegment)
|
#define dsm_pin_mapping(dsmSegment)
|
||||||
@ -41,8 +41,7 @@ extern void dsm_detach(void **seg);
|
|||||||
#define dsm_segment_handle(dsmSegment)
|
#define dsm_segment_handle(dsmSegment)
|
||||||
|
|
||||||
/* Cleanup hooks. */
|
/* Cleanup hooks. */
|
||||||
#define on_dsm_detach(dsmSegment, callbackFunc, arg)
|
typedef void (*on_dsm_detach_callback) (void *seg, Datum arg);
|
||||||
#define cancel_on_dsm_detach(dsmSegment, callbackFunc, arg)
|
extern void on_dsm_detach(void *seg, on_dsm_detach_callback function, Datum arg);
|
||||||
#define reset_on_dsm_detach
|
|
||||||
|
|
||||||
#endif /* DSM_H */
|
#endif /* DSM_H */
|
Reference in New Issue
Block a user