Fallback to normal case when dsm segment is full. Add on_dsm_detach.

This commit is contained in:
TotaJ
2020-10-12 14:59:09 +08:00
parent 13b34b53cd
commit b84b4cc418
9 changed files with 325 additions and 201 deletions

View File

@ -145,14 +145,14 @@ static bool ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateConte
/* Call estimators for parallel-aware nodes. */ /* Call estimators for parallel-aware nodes. */
if (planstate->plan->parallel_aware) { if (planstate->plan->parallel_aware) {
switch (nodeTag(planstate)) { switch (nodeTag(planstate)) {
case T_SeqScanState: case T_SeqScanState:
ExecSeqScanEstimate((SeqScanState *)planstate, e->pcxt); ExecSeqScanEstimate((SeqScanState *)planstate, e->pcxt);
break; break;
default: default:
break; break;
} }
} }
return planstate_tree_walker(planstate, (bool (*)())ExecParallelEstimate, e); return planstate_tree_walker(planstate, (bool (*)())ExecParallelEstimate, e);
} }
@ -179,16 +179,16 @@ static bool ExecParallelInitializeDSM(PlanState *planstate, ExecParallelInitiali
knl_u_parallel_context *cxt = (knl_u_parallel_context *)d->pcxt->seg; knl_u_parallel_context *cxt = (knl_u_parallel_context *)d->pcxt->seg;
/* Call initializers for parallel-aware plan nodes. */ /* Call initializers for parallel-aware plan nodes. */
if (planstate->plan->parallel_aware) { if (planstate->plan->parallel_aware) {
switch (nodeTag(planstate)) { switch (nodeTag(planstate)) {
case T_SeqScanState: case T_SeqScanState:
ExecSeqScanInitializeDSM((SeqScanState *)planstate, d->pcxt, cxt->pwCtx->pscan_num); ExecSeqScanInitializeDSM((SeqScanState *)planstate, d->pcxt, cxt->pwCtx->queryInfo.pscan_num);
cxt->pwCtx->pscan_num++; cxt->pwCtx->queryInfo.pscan_num++;
break; break;
default: default:
break; break;
} }
} }
return planstate_tree_walker(planstate, (bool (*)())ExecParallelInitializeDSM, d); return planstate_tree_walker(planstate, (bool (*)())ExecParallelInitializeDSM, d);
} }
@ -211,10 +211,10 @@ static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool
* otherwise, find the already allocated space. * otherwise, find the already allocated space.
*/ */
if (!reinitialize) { if (!reinitialize) {
cxt->pwCtx->tupleQueue = (char *)palloc0(PARALLEL_TUPLE_QUEUE_SIZE * (Size)pcxt->nworkers); cxt->pwCtx->queryInfo.tupleQueue = (char *)palloc0(PARALLEL_TUPLE_QUEUE_SIZE * (Size)pcxt->nworkers);
} }
Assert(cxt->pwCtx->tupleQueue != NULL); Assert(cxt->pwCtx->queryInfo.tupleQueue != NULL);
char *tqueuespace = cxt->pwCtx->tupleQueue; char *tqueuespace = cxt->pwCtx->queryInfo.tupleQueue;
/* Create the queues, and become the receiver for each. */ /* Create the queues, and become the receiver for each. */
for (int i = 0; i < pcxt->nworkers; ++i) { for (int i = 0; i < pcxt->nworkers; ++i) {
@ -227,6 +227,27 @@ static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool
return responseq; return responseq;
} }
/*
* Set up tuple queue readers to read the results of a parallel subplan.
* All the workers are expected to return tuples matching tupDesc.
*
* This is separate from ExecInitParallelPlan() because we can launch the
* worker processes and let them start doing something before we do this.
*/
void ExecParallelCreateReaders(ParallelExecutorInfo *pei, TupleDesc tupDesc)
{
Assert(pei->reader == NULL);
int nworkers = pei->pcxt->nworkers_launched;
if (nworkers > 0) {
pei->reader = (TupleQueueReader **)palloc((uint32)nworkers * sizeof(TupleQueueReader *));
for (int i = 0; i < nworkers; i++) {
shm_mq_set_handle(pei->tqueue[i], pei->pcxt->worker[i].bgwhandle);
pei->reader[i] = CreateTupleQueueReader(pei->tqueue[i], tupDesc);
}
}
}
/* /*
* Re-initialize the parallel executor info such that it can be reused by * Re-initialize the parallel executor info such that it can be reused by
* workers. * workers.
@ -235,6 +256,7 @@ void ExecParallelReinitialize(ParallelExecutorInfo *pei)
{ {
ReinitializeParallelDSM(pei->pcxt); ReinitializeParallelDSM(pei->pcxt);
pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true); pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
pei->reader = NULL;
pei->finished = false; pei->finished = false;
} }
@ -289,21 +311,23 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
* asked for has been allocated or initialized yet, though, so do that. * asked for has been allocated or initialized yet, though, so do that.
*/ */
MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx); MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
ParallelQueryInfo queryInfo;
int rc = memset_s(&queryInfo, sizeof(ParallelQueryInfo), 0, sizeof(ParallelQueryInfo));
securec_check(rc, "", "");
/* Store serialized PlannedStmt. */ /* Store serialized PlannedStmt. */
cxt->pwCtx->pstmt_space = ExecSerializePlan(planstate->plan, estate); queryInfo.pstmt_space = ExecSerializePlan(planstate->plan, estate);
/* Store serialized ParamListInfo. */ /* Store serialized ParamListInfo. */
cxt->pwCtx->param_space = (char *)palloc0(param_len); queryInfo.param_space = (char *)palloc0(param_len);
cxt->pwCtx->param_len = param_len; queryInfo.param_len = param_len;
SerializeParamList(estate->es_param_list_info, cxt->pwCtx->param_space, param_len); SerializeParamList(estate->es_param_list_info, queryInfo.param_space, param_len);
/* Allocate space for each worker's BufferUsage; no need to initialize. */ /* Allocate space for each worker's BufferUsage; no need to initialize. */
cxt->pwCtx->bufUsage = (BufferUsage *)palloc0(sizeof(BufferUsage) * pcxt->nworkers); queryInfo.bufUsage = (BufferUsage *)palloc0(sizeof(BufferUsage) * pcxt->nworkers);
pei->buffer_usage = cxt->pwCtx->bufUsage; pei->buffer_usage = queryInfo.bufUsage;
/* We don't need the TupleQueueReaders yet, though. */
/* Set up tuple queues. */ pei->reader = NULL;
pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false);
/* /*
* If instrumentation options were supplied, allocate space for the * If instrumentation options were supplied, allocate space for the
@ -311,19 +335,19 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
* during ExecParallelInitializeDSM. * during ExecParallelInitializeDSM.
*/ */
if (estate->es_instrument) { if (estate->es_instrument) {
cxt->pwCtx->instrumentation = (SharedExecutorInstrumentation *)palloc0(instrumentation_len); queryInfo.instrumentation = (SharedExecutorInstrumentation *)palloc0(instrumentation_len);
cxt->pwCtx->instrumentation->instrument_options = estate->es_instrument; queryInfo.instrumentation->instrument_options = estate->es_instrument;
cxt->pwCtx->instrumentation->instrument_offset = instrument_offset; queryInfo.instrumentation->instrument_offset = instrument_offset;
cxt->pwCtx->instrumentation->num_workers = nworkers; queryInfo.instrumentation->num_workers = nworkers;
cxt->pwCtx->instrumentation->num_plan_nodes = e.nnodes; queryInfo.instrumentation->num_plan_nodes = e.nnodes;
Instrumentation *instrument = GetInstrumentationArray(cxt->pwCtx->instrumentation); Instrumentation *instrument = GetInstrumentationArray(queryInfo.instrumentation);
for (int i = 0; i < nworkers * e.nnodes; ++i) { for (int i = 0; i < nworkers * e.nnodes; ++i) {
InstrInit(&instrument[i], estate->es_instrument); InstrInit(&instrument[i], estate->es_instrument);
} }
pei->instrumentation = cxt->pwCtx->instrumentation; pei->instrumentation = queryInfo.instrumentation;
} }
cxt->pwCtx->pscan = (ParallelHeapScanDesc *)palloc0(sizeof(ParallelHeapScanDesc) * e.nnodes); queryInfo.pscan = (ParallelHeapScanDesc *)palloc0(sizeof(ParallelHeapScanDesc) * e.nnodes);
/* /*
* Give parallel-aware nodes a chance to initialize their shared data. * Give parallel-aware nodes a chance to initialize their shared data.
@ -331,9 +355,14 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
* if it exists. * if it exists.
*/ */
d.pcxt = pcxt; d.pcxt = pcxt;
d.instrumentation = cxt->pwCtx->instrumentation; d.instrumentation = queryInfo.instrumentation;
d.nnodes = 0; d.nnodes = 0;
cxt->pwCtx->queryInfo = queryInfo;
/* Set up the tuple queues that the workers will write into. */
pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false);
/* Here we switch to old context, cause heap_beginscan_parallel need malloc memory */ /* Here we switch to old context, cause heap_beginscan_parallel need malloc memory */
(void)MemoryContextSwitchTo(oldcontext); (void)MemoryContextSwitchTo(oldcontext);
(void)ExecParallelInitializeDSM(planstate, &d); (void)ExecParallelInitializeDSM(planstate, &d);
@ -397,12 +426,43 @@ void ExecParallelFinish(ParallelExecutorInfo *pei)
if (pei->finished) if (pei->finished)
return; return;
/* First, wait for the workers to finish. */ int nworkers = pei->pcxt->nworkers_launched;
int i;
/*
* Detach from tuple queues ASAP, so that any still-active workers will
* notice that no further results are wanted.
*/
if (pei->tqueue != NULL) {
for (i = 0; i < nworkers; i++) {
shm_mq_detach(pei->tqueue[i]);
}
pfree(pei->tqueue);
pei->tqueue = NULL;
}
/*
* While we're waiting for the workers to finish, let's get rid of the
* tuple queue readers. (Any other local cleanup could be done here too.)
*/
if (pei->reader != NULL) {
for (i = 0; i < nworkers; i++) {
DestroyTupleQueueReader(pei->reader[i]);
}
pfree(pei->reader);
pei->reader = NULL;
}
/* Now wait for the workers to finish. */
WaitForParallelWorkersToFinish(pei->pcxt); WaitForParallelWorkersToFinish(pei->pcxt);
/* Next, accumulate buffer usage. */ /*
for (int i = 0; i < pei->pcxt->nworkers; ++i) * Next, accumulate buffer usage. (This must wait for the workers to
* finish, or we might get incomplete data.)
*/
for (i = 0; i < nworkers; ++i) {
InstrAccumParallelQuery(&pei->buffer_usage[i]); InstrAccumParallelQuery(&pei->buffer_usage[i]);
}
/* Finally, accumulate instrumentation, if any. */ /* Finally, accumulate instrumentation, if any. */
if (pei->instrumentation) { if (pei->instrumentation) {
@ -436,7 +496,7 @@ static DestReceiver *ExecParallelGetReceiver(void *seg)
Assert(seg != NULL); Assert(seg != NULL);
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg; knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
char *mqspace = cxt->pwCtx->tupleQueue; char *mqspace = cxt->pwCtx->queryInfo.tupleQueue;
mqspace += t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE; mqspace += t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
shm_mq *mq = (shm_mq *)mqspace; shm_mq *mq = (shm_mq *)mqspace;
shm_mq_set_sender(mq, t_thrd.proc); shm_mq_set_sender(mq, t_thrd.proc);
@ -451,10 +511,10 @@ static QueryDesc *ExecParallelGetQueryDesc(void *seg, DestReceiver *receiver, in
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg; knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
/* Reconstruct leader-supplied PlannedStmt. */ /* Reconstruct leader-supplied PlannedStmt. */
PlannedStmt *pstmt = (PlannedStmt *)stringToNode(cxt->pwCtx->pstmt_space); PlannedStmt *pstmt = (PlannedStmt *)stringToNode(cxt->pwCtx->queryInfo.pstmt_space);
/* Reconstruct ParamListInfo. */ /* Reconstruct ParamListInfo. */
ParamListInfo paramLI = RestoreParamList(cxt->pwCtx->param_space, cxt->pwCtx->param_len); ParamListInfo paramLI = RestoreParamList(cxt->pwCtx->queryInfo.param_space, cxt->pwCtx->queryInfo.param_len);
/* /*
* Create a QueryDesc for the query. * Create a QueryDesc for the query.
@ -553,7 +613,7 @@ void ParallelQueryMain(void *seg)
/* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */ /* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg; knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
DestReceiver *receiver = ExecParallelGetReceiver(seg); DestReceiver *receiver = ExecParallelGetReceiver(seg);
SharedExecutorInstrumentation *instrumentation = cxt->pwCtx->instrumentation; SharedExecutorInstrumentation *instrumentation = cxt->pwCtx->queryInfo.instrumentation;
if (instrumentation != NULL) if (instrumentation != NULL)
instrument_options = instrumentation->instrument_options; instrument_options = instrumentation->instrument_options;
QueryDesc *queryDesc = ExecParallelGetQueryDesc(seg, receiver, instrument_options); QueryDesc *queryDesc = ExecParallelGetQueryDesc(seg, receiver, instrument_options);
@ -568,7 +628,7 @@ void ParallelQueryMain(void *seg)
ExecutorFinish(queryDesc); ExecutorFinish(queryDesc);
/* Report buffer usage during parallel execution. */ /* Report buffer usage during parallel execution. */
BufferUsage *buffer_usage = cxt->pwCtx->bufUsage; BufferUsage *buffer_usage = cxt->pwCtx->queryInfo.bufUsage;
InstrEndParallelQuery(&buffer_usage[t_thrd.bgworker_cxt.ParallelWorkerNumber]); InstrEndParallelQuery(&buffer_usage[t_thrd.bgworker_cxt.ParallelWorkerNumber]);
/* Report instrumentation data if any instrumentation options are set. */ /* Report instrumentation data if any instrumentation options are set. */

View File

@ -121,7 +121,6 @@ GatherState *ExecInitGather(Gather *node, EState *estate, int eflags)
TupleTableSlot *ExecGather(GatherState *node) TupleTableSlot *ExecGather(GatherState *node)
{ {
TupleTableSlot *fslot = node->funnel_slot; TupleTableSlot *fslot = node->funnel_slot;
int i;
TupleTableSlot *slot = NULL; TupleTableSlot *slot = NULL;
TupleTableSlot *resultSlot = NULL; TupleTableSlot *resultSlot = NULL;
ExprDoneCond isDone; ExprDoneCond isDone;
@ -143,8 +142,6 @@ TupleTableSlot *ExecGather(GatherState *node)
* parallel mode is active then we can try to fire up some workers. * parallel mode is active then we can try to fire up some workers.
*/ */
if (gather->num_workers > 0 && IsInParallelMode()) { if (gather->num_workers > 0 && IsInParallelMode()) {
bool got_any_worker = false;
/* Initialize the workers required to execute Gather node. */ /* Initialize the workers required to execute Gather node. */
if (!node->pei) if (!node->pei)
node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers); node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers);
@ -157,31 +154,28 @@ TupleTableSlot *ExecGather(GatherState *node)
LaunchParallelWorkers(pcxt); LaunchParallelWorkers(pcxt);
/* Set up tuple queue readers to read the results. */ /* Set up tuple queue readers to read the results. */
if (pcxt->nworkers > 0) { if (pcxt->nworkers_launched > 0) {
node->nreaders = 0; ExecParallelCreateReaders(node->pei, fslot->tts_tupleDescriptor);
node->reader = (TupleQueueReader **)palloc(pcxt->nworkers * sizeof(TupleQueueReader *));
for (i = 0; i < pcxt->nworkers; ++i) { /* Make a working array showing the active readers */
if (pcxt->worker[i].bgwhandle == NULL) node->nreaders = pcxt->nworkers_launched;
continue; Size readerSize = node->nreaders * sizeof(TupleQueueReader *);
node->reader = (TupleQueueReader **)palloc(readerSize);
shm_mq_set_handle(node->pei->tqueue[i], pcxt->worker[i].bgwhandle); int rc = memcpy_s(node->reader, readerSize, node->pei->reader, readerSize);
node->reader[node->nreaders++] = securec_check(rc, "", "");
CreateTupleQueueReader(node->pei->tqueue[i], fslot->tts_tupleDescriptor);
got_any_worker = true;
}
}
/* No workers? Then never mind. */
if (!got_any_worker) {
ExecShutdownGatherWorkers(node);
} else {
t_thrd.subrole = BACKGROUND_LEADER; t_thrd.subrole = BACKGROUND_LEADER;
} else {
/* No workers? Then never mind. */
node->nreaders = 0;
node->reader = NULL;
} }
node->nextreader = 0;
} }
/* Run plan locally if no workers or not single-copy. */ /* Run plan locally if no workers or not single-copy. */
node->need_to_scan_locally = (node->reader == NULL) || node->need_to_scan_locally = (node->nreaders == 0) ||
(!gather->single_copy && u_sess->attr.attr_sql.parallel_leader_participation); (!gather->single_copy && u_sess->attr.attr_sql.parallel_leader_participation);
node->initialized = true; node->initialized = true;
} }
@ -261,10 +255,10 @@ static TupleTableSlot *gather_getnext(GatherState *gatherstate)
PlanState *outerPlan = outerPlanState(gatherstate); PlanState *outerPlan = outerPlanState(gatherstate);
TupleTableSlot *fslot = gatherstate->funnel_slot; TupleTableSlot *fslot = gatherstate->funnel_slot;
while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally) { while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally) {
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
if (gatherstate->reader != NULL) { if (gatherstate->nreaders > 0) {
HeapTuple tup = gather_readnext(gatherstate); HeapTuple tup = gather_readnext(gatherstate);
if (HeapTupleIsValid(tup)) { if (HeapTupleIsValid(tup)) {
(void)ExecStoreTuple(tup, /* tuple to store */ (void)ExecStoreTuple(tup, /* tuple to store */
@ -306,15 +300,13 @@ static HeapTuple gather_readnext(GatherState *gatherstate)
HeapTuple tup = TupleQueueReaderNext(reader, true, &readerdone); HeapTuple tup = TupleQueueReaderNext(reader, true, &readerdone);
/* /*
* If this reader is done, remove it. If all readers are done, * If this reader is done, remove it from our working array of active
* clean up remaining worker state. * readers. If all readers are done, we're outta here.
*/ */
if (readerdone) { if (readerdone) {
Assert(!tup); Assert(!tup);
DestroyTupleQueueReader(reader);
--gatherstate->nreaders; --gatherstate->nreaders;
if (gatherstate->nreaders == 0) { if (gatherstate->nreaders == 0) {
ExecShutdownGatherWorkers(gatherstate);
return NULL; return NULL;
} }
Size remainSize = sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader); Size remainSize = sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader);
@ -366,9 +358,7 @@ static HeapTuple gather_readnext(GatherState *gatherstate)
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecShutdownGatherWorkers * ExecShutdownGatherWorkers
* *
* Destroy the parallel workers. Collect all the stats after * Stop all the parallel workers.
* workers are stopped, else some work done by workers won't be
* accounted.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
static void ExecShutdownGatherWorkers(GatherState *node) static void ExecShutdownGatherWorkers(GatherState *node)
@ -377,14 +367,8 @@ static void ExecShutdownGatherWorkers(GatherState *node)
if (node->pei != NULL) if (node->pei != NULL)
ExecParallelFinish(node->pei); ExecParallelFinish(node->pei);
/* Shut down tuple queue readers before shutting down workers. */ /* Flush local copy of reader array */
if (node->reader != NULL) { pfree_ext(node->reader);
for (int i = 0; i < node->nreaders; ++i)
DestroyTupleQueueReader(node->reader[i]);
pfree(node->reader);
node->reader = NULL;
}
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------

View File

@ -672,12 +672,12 @@ void ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt, int nod
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg; knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
/* Here we can't use palloc, cause we have switch to old memctx in ExecInitParallelPlan */ /* Here we can't use palloc, cause we have switch to old memctx in ExecInitParallelPlan */
cxt->pwCtx->pscan[nodeid] = (ParallelHeapScanDesc)MemoryContextAllocZero(cxt->memCtx, node->pscan_len); cxt->pwCtx->queryInfo.pscan[nodeid] = (ParallelHeapScanDesc)MemoryContextAllocZero(cxt->memCtx, node->pscan_len);
heap_parallelscan_initialize(cxt->pwCtx->pscan[nodeid], node->pscan_len, node->ss_currentRelation, heap_parallelscan_initialize(cxt->pwCtx->queryInfo.pscan[nodeid], node->pscan_len, node->ss_currentRelation,
estate->es_snapshot); estate->es_snapshot);
cxt->pwCtx->pscan[nodeid]->plan_node_id = node->ps.plan->plan_node_id; cxt->pwCtx->queryInfo.pscan[nodeid]->plan_node_id = node->ps.plan->plan_node_id;
node->ss_currentScanDesc = node->ss_currentScanDesc =
(AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, cxt->pwCtx->pscan[nodeid]); (AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, cxt->pwCtx->queryInfo.pscan[nodeid]);
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
@ -691,9 +691,9 @@ void ExecSeqScanInitializeWorker(SeqScanState *node, void *context)
ParallelHeapScanDesc pscan = NULL; ParallelHeapScanDesc pscan = NULL;
knl_u_parallel_context *cxt = (knl_u_parallel_context *)context; knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
for (int i = 0; i < cxt->pwCtx->pscan_num; i++) { for (int i = 0; i < cxt->pwCtx->queryInfo.pscan_num; i++) {
if (node->ps.plan->plan_node_id == cxt->pwCtx->pscan[i]->plan_node_id) { if (node->ps.plan->plan_node_id == cxt->pwCtx->queryInfo.pscan[i]->plan_node_id) {
pscan = cxt->pwCtx->pscan[i]; pscan = cxt->pwCtx->queryInfo.pscan[i];
break; break;
} }
} }

View File

@ -452,12 +452,9 @@ TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle, TupleDesc tupled
*/ */
void DestroyTupleQueueReader(TupleQueueReader *reader) void DestroyTupleQueueReader(TupleQueueReader *reader)
{ {
if (reader->queue != NULL) { if (reader->remapinfo != NULL) {
shm_mq_detach(reader->queue);
reader->queue = NULL;
}
if (reader->remapinfo != NULL)
pfree(reader->remapinfo); pfree(reader->remapinfo);
}
pfree(reader); pfree(reader);
} }

View File

@ -132,8 +132,22 @@ void InitializeParallelDSM(ParallelContext *pcxt, const void *snap)
*/ */
pcxt->seg = dsm_create(); pcxt->seg = dsm_create();
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg; MemoryContext oldcontext = NULL;
MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx); knl_u_parallel_context *cxt = NULL;
if (pcxt->seg != NULL) {
cxt = (knl_u_parallel_context *)pcxt->seg;
oldcontext = MemoryContextSwitchTo(cxt->memCtx);
} else {
/* DSM segment is full, use backend private memory, set worker to 0, fallback to serial query */
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
pcxt->nworkers = 0;
pcxt->seg = palloc(sizeof(knl_u_parallel_context));
cxt = (knl_u_parallel_context *)pcxt->seg;
cxt->memCtx = TopMemoryContext;
cxt->pwCtx = (ParallelInfoContext*)palloc0(sizeof(ParallelInfoContext));
cxt->used = true;
slist_init(&cxt->on_detach);
}
/* Initialize fixed-size state in shared memory. */ /* Initialize fixed-size state in shared memory. */
cxt->pwCtx->database_id = u_sess->proc_cxt.MyDatabaseId; cxt->pwCtx->database_id = u_sess->proc_cxt.MyDatabaseId;
@ -644,7 +658,7 @@ void DestroyParallelContext(ParallelContext *pcxt)
* stored there. * stored there.
*/ */
if (pcxt->seg != NULL) { if (pcxt->seg != NULL) {
dsm_detach(&(pcxt->seg)); dsm_detach(&(pcxt->seg), pcxt->nworkers > 0 ? true : false);
pcxt->seg = NULL; pcxt->seg = NULL;
} }
@ -1052,6 +1066,7 @@ void ParallelWorkerMain(Datum main_arg)
/* Report success. */ /* Report success. */
pq_putmessage('X', NULL, 0); pq_putmessage('X', NULL, 0);
pq_stop_redirect_to_shm_mq();
} }
/* /*

View File

@ -31,6 +31,13 @@
#include "utils/memutils.h" #include "utils/memutils.h"
#include "postmaster/bgworker_internals.h" #include "postmaster/bgworker_internals.h"
/* Backend-local tracking for on-detach callbacks. */
typedef struct __dsm_segment_detach_callback {
on_dsm_detach_callback function;
Datum arg;
slist_node node;
} dsm_segment_detach_callback;
#ifdef __USE_NUMA #ifdef __USE_NUMA
static void RestoreCpuAffinity(cpu_set_t *cpuset) static void RestoreCpuAffinity(cpu_set_t *cpuset)
{ {
@ -44,17 +51,44 @@ static void RestoreCpuAffinity(cpu_set_t *cpuset)
} }
#endif #endif
void dsm_detach(void **seg) /*
* newMemCtx means whether the memctx is new created or not.
* When newMemCtx is true, we just need to call pfree to free the mem,
* or we can call MemoryContextDelete to jsut delete the whole new created
* memctx.
*/
void dsm_detach(void **seg, bool newMemCtx)
{ {
Assert(seg != NULL);
Assert(*seg != NULL); Assert(*seg != NULL);
knl_u_parallel_context *ctx = (knl_u_parallel_context *)*seg; knl_u_parallel_context *ctx = (knl_u_parallel_context *)*seg;
/*
* Invoke registered callbacks. Just in case one of those callbacks
* throws a further error that brings us back here, pop the callback
* before invoking it, to avoid infinite error recursion.
*/
while (!slist_is_empty(&ctx->on_detach)) {
slist_node *node = slist_pop_head_node(&ctx->on_detach);
dsm_segment_detach_callback *cb = slist_container(dsm_segment_detach_callback, node, node);
on_dsm_detach_callback function = cb->function;
Datum arg = cb->arg;
pfree(cb);
function(seg, arg);
}
if (newMemCtx) {
#ifdef __USE_NUMA #ifdef __USE_NUMA
RestoreCpuAffinity(ctx->pwCtx->cpuset); RestoreCpuAffinity(ctx->pwCtx->cpuset);
#endif #endif
MemoryContextDelete(ctx->memCtx); MemoryContextDelete(ctx->memCtx);
ctx->memCtx = NULL; ctx->memCtx = NULL;
ctx->pwCtx = NULL; ctx->pwCtx = NULL;
ctx->used = false; ctx->used = false;
} else {
pfree(ctx->pwCtx);
pfree(ctx);
}
} }
void *dsm_create(void) void *dsm_create(void)
@ -69,11 +103,24 @@ void *dsm_create(void)
(void)MemoryContextSwitchTo(oldContext); (void)MemoryContextSwitchTo(oldContext);
u_sess->parallel_ctx[i].used = true; u_sess->parallel_ctx[i].used = true;
slist_init(&u_sess->parallel_ctx[i].on_detach);
return &(u_sess->parallel_ctx[i]); return &(u_sess->parallel_ctx[i]);
} }
} }
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("too many dynamic shared memory segments"))); ereport(WARNING, (errmsg("too many dynamic shared memory segments")));
return NULL; return NULL;
} }
/*
* Register an on-detach callback for a dynamic shared memory segment.
*/
void on_dsm_detach(void *seg, on_dsm_detach_callback function, Datum arg)
{
dsm_segment_detach_callback *cb =
(dsm_segment_detach_callback*)MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment_detach_callback));
cb->function = function;
cb->arg = arg;
knl_u_parallel_context *ctx = (knl_u_parallel_context *)seg;
slist_push_head(&ctx->on_detach, &cb->node);
}

View File

@ -1,38 +1,41 @@
/* -------------------------------------------------------------------- /* --------------------------------------------------------------------
* execParallel.h * execParallel.h
* POSTGRES parallel execution interface * POSTGRES parallel execution interface
* *
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* src/include/executor/execParallel.h * src/include/executor/execParallel.h
* -------------------------------------------------------------------- * --------------------------------------------------------------------
*/ */
#ifndef EXECPARALLEL_H #ifndef EXECPARALLEL_H
#define EXECPARALLEL_H #define EXECPARALLEL_H
#include "access/parallel.h" #include "access/parallel.h"
#include "nodes/execnodes.h" #include "nodes/execnodes.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
#include "nodes/plannodes.h" #include "nodes/plannodes.h"
typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation; typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
typedef struct ParallelExecutorInfo { typedef struct ParallelExecutorInfo {
PlanState *planstate; PlanState *planstate; /* plan subtree we're running in parallel */
ParallelContext *pcxt; ParallelContext *pcxt; /* parallel context we're using */
BufferUsage *buffer_usage; BufferUsage *buffer_usage; /* points to bufusage area in DSM */
SharedExecutorInstrumentation *instrumentation; SharedExecutorInstrumentation *instrumentation; /* optional */
shm_mq_handle **tqueue; bool finished; /* set true by ExecParallelFinish */
bool finished; /* These two arrays have pcxt->nworkers_launched entries: */
} ParallelExecutorInfo; shm_mq_handle **tqueue; /* tuple queues for worker output */
struct TupleQueueReader **reader; /* tuple reader/writer support */
extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers); } ParallelExecutorInfo;
extern void ExecParallelFinish(ParallelExecutorInfo *pei);
extern void ExecParallelCleanup(ParallelExecutorInfo *pei); extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers);
extern void ExecParallelReinitialize(ParallelExecutorInfo *pei); extern void ExecParallelCreateReaders(ParallelExecutorInfo *pei, TupleDesc tupDesc);
extern void ExecParallelFinish(ParallelExecutorInfo *pei);
extern void ParallelQueryMain(void *seg); extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
#endif /* EXECPARALLEL_H */ extern void ExecParallelReinitialize(ParallelExecutorInfo *pei);
extern void ParallelQueryMain(void *seg);
#endif /* EXECPARALLEL_H */

View File

@ -2046,6 +2046,26 @@ typedef struct knl_u_ext_fdw_context {
/* Info need to pass from leader to worker */ /* Info need to pass from leader to worker */
struct ParallelHeapScanDescData; struct ParallelHeapScanDescData;
typedef uint64 XLogRecPtr; typedef uint64 XLogRecPtr;
typedef struct ParallelQueryInfo {
struct SharedExecutorInstrumentation *instrumentation;
BufferUsage *bufUsage;
char *tupleQueue;
char *pstmt_space;
char *param_space;
Size param_len;
int pscan_num;
ParallelHeapScanDescData **pscan;
} ParallelQueryInfo;
struct BTShared;
struct SharedSort;
typedef struct ParallelBtreeInfo {
char *queryText;
BTShared *btShared;
SharedSort *sharedSort;
SharedSort *sharedSort2;
} ParallelBtreeInfo;
typedef struct ParallelInfoContext { typedef struct ParallelInfoContext {
Oid database_id; Oid database_id;
Oid authenticated_user_id; Oid authenticated_user_id;
@ -2060,11 +2080,6 @@ typedef struct ParallelInfoContext {
BackendId parallel_master_backend_id; BackendId parallel_master_backend_id;
TimestampTz xact_ts; TimestampTz xact_ts;
TimestampTz stmt_ts; TimestampTz stmt_ts;
char *pstmt_space;
char *param_space;
Size param_len;
int pscan_num;
ParallelHeapScanDescData **pscan;
int usedComboCids; int usedComboCids;
struct ComboCidKeyData *comboCids; struct ComboCidKeyData *comboCids;
char *tsnapspace; char *tsnapspace;
@ -2092,14 +2107,17 @@ typedef struct ParallelInfoContext {
TransactionId *ParallelCurrentXids; TransactionId *ParallelCurrentXids;
char *library_name; char *library_name;
char *function_name; char *function_name;
BufferUsage *bufUsage;
char *tupleQueue;
struct SharedExecutorInstrumentation *instrumentation;
char *namespace_search_path; char *namespace_search_path;
#ifdef __USE_NUMA #ifdef __USE_NUMA
int numaNode; int numaNode;
cpu_set_t *cpuset; cpu_set_t *cpuset;
#endif #endif
union {
ParallelQueryInfo queryInfo; /* parameters for parallel query only */
ParallelBtreeInfo btreeInfo; /* parameters for parallel create index(btree) only */
};
/* Mutex protects remaining fields. */ /* Mutex protects remaining fields. */
slock_t mutex; slock_t mutex;
/* Maximum XactLastRecEnd of any worker. */ /* Maximum XactLastRecEnd of any worker. */
@ -2108,8 +2126,9 @@ typedef struct ParallelInfoContext {
typedef struct knl_u_parallel_context { typedef struct knl_u_parallel_context {
ParallelInfoContext *pwCtx; ParallelInfoContext *pwCtx;
MemoryContext memCtx; MemoryContext memCtx; /* memory context used to malloc memory */
bool used; slist_head on_detach; /* On-detach callbacks. */
bool used; /* used or not */
} knl_u_parallel_context; } knl_u_parallel_context;
enum knl_session_status { enum knl_session_status {

View File

@ -1,48 +1,47 @@
/* ------------------------------------------------------------------------- /* -------------------------------------------------------------------------
* *
* dsm.h * dsm.h
* manage dynamic shared memory segments * manage dynamic shared memory segments
* *
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* src/include/storage/dsm.h * src/include/storage/dsm.h
* *
* ------------------------------------------------------------------------- * -------------------------------------------------------------------------
*/ */
#ifndef DSM_H #ifndef DSM_H
#define DSM_H #define DSM_H
#define DSM_MAX_ITEM_PER_QUERY 8 #define DSM_MAX_ITEM_PER_QUERY 8
/* Startup and shutdown functions. */ /* Startup and shutdown functions. */
#define dsm_cleanup_using_control_segment(oldControlHandle) #define dsm_cleanup_using_control_segment(oldControlHandle)
#define dsm_postmaster_startup(shmemHeader) #define dsm_postmaster_startup(shmemHeader)
#define dsm_backend_shutdown #define dsm_backend_shutdown
#define dsm_detach_all #define dsm_detach_all
#define dsm_set_control_handle(dsmHandle) #define dsm_set_control_handle(dsmHandle)
/* Functions that create or remove mappings. */ /* Functions that create or remove mappings. */
extern void *dsm_create(void); extern void *dsm_create(void);
#define dsm_attach(dsmHandle) #define dsm_attach(dsmHandle)
extern void dsm_detach(void **seg); extern void dsm_detach(void **seg, bool newMemCtx);
/* Resource management functions. */ /* Resource management functions. */
#define dsm_pin_mapping(dsmSegment) #define dsm_pin_mapping(dsmSegment)
#define dsm_unpin_mapping(dsmSegment) #define dsm_unpin_mapping(dsmSegment)
#define dsm_pin_segment(dsmSegment) #define dsm_pin_segment(dsmSegment)
#define dsm_unpin_segment(dsmHandle) #define dsm_unpin_segment(dsmHandle)
#define dsm_find_mapping(dsmHandle) #define dsm_find_mapping(dsmHandle)
/* Informational functions. */ /* Informational functions. */
#define dsm_segment_address(dsmSegment) #define dsm_segment_address(dsmSegment)
#define dsm_segment_map_length(dsmSegment) #define dsm_segment_map_length(dsmSegment)
#define dsm_segment_handle(dsmSegment) #define dsm_segment_handle(dsmSegment)
/* Cleanup hooks. */ /* Cleanup hooks. */
#define on_dsm_detach(dsmSegment, callbackFunc, arg) typedef void (*on_dsm_detach_callback) (void *seg, Datum arg);
#define cancel_on_dsm_detach(dsmSegment, callbackFunc, arg) extern void on_dsm_detach(void *seg, on_dsm_detach_callback function, Datum arg);
#define reset_on_dsm_detach
#endif /* DSM_H */ #endif /* DSM_H */