/* ------------------------------------------------------------------------- * * shmem.cpp * create shared memory and initialize shared memory data structures. * * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/gausskernel/storage/ipc/shmem.cpp * * openGauss processes share one or more regions of shared memory. * The shared memory is created by a postmaster and is inherited * by each backend via fork() (or, in some ports, via other OS-specific * methods). The routines in this file are used for allocating and * binding to shared memory data structures. * * NOTES: * (a) There are three kinds of shared memory data structures * available to openGauss: fixed-size structures, queues and hash * tables. Fixed-size structures contain things like global variables * for a module and should never be allocated after the shared memory * initialization phase. Hash tables have a fixed maximum size, but * their actual size can vary dynamically. When entries are added * to the table, more space is allocated. Queues link data structures * that have been allocated either within fixed-size structures or as hash * buckets. Each shared data structure has a string name to identify * it (assigned in the module that declares it). * * (b) During initialization, each module looks for its * shared data structures in a hash table called the "Shmem Index". * If the data structure is not present, the caller can allocate * a new one and initialize it. If the data structure is present, * the caller "attaches" to the structure by initializing a pointer * in the local address space. * The shmem index has two purposes: first, it gives us * a simple model of how the world looks when a backend process * initializes. If something is present in the shmem index, * it is initialized. If it is not, it is uninitialized. Second, * the shmem index allows us to allocate shared memory on demand * instead of trying to preallocate structures and hard-wire the * sizes and locations in header files. If you are using a lot * of shared memory in a lot of different places (and changing * things during development), this is important. * * (c) In standard Unix-ish environments, individual backends do not * need to re-establish their local pointers into shared memory, because * they inherit correct values of those variables via fork() from the * postmaster. However, this does not work in the EXEC_BACKEND case. * In ports using EXEC_BACKEND, new backends have to set up their local * pointers using the method described in (b) above. * * (d) memory allocation model: shared memory can never be * freed, once allocated. Each hash table has its own free list, * so hash buckets can be reused when an item is deleted. However, * if one hash table grows very large and then shrinks, its space * cannot be redistributed to other tables. We could build a simple * hash bucket garbage collector if need be. Right now, it seems * unnecessary. * * ------------------------------------------------------------------------- */ #include "postgres.h" #include "knl/knl_variable.h" #include "access/transam.h" #include "miscadmin.h" #include "storage/lock/lwlock.h" #include "storage/pg_shmem.h" #include "storage/shmem.h" #include "storage/spin.h" /* shared memory global variables */ static HTAB* HeapmemIndex = NULL; /* primary index hashtable for shmem */ /* * InitShmemAccess() --- set up basic pointers to shared memory. * * Note: the argument should be declared "PGShmemHeader *seghdr", * but we use void to avoid having to include ipc.h in shmem.h. */ void InitShmemAccess(void *seghdr) { PGShmemHeader* shmhdr = (PGShmemHeader*)seghdr; t_thrd.shemem_ptr_cxt.ShmemSegHdr = shmhdr; t_thrd.shemem_ptr_cxt.ShmemBase = (void*)shmhdr; t_thrd.shemem_ptr_cxt.ShmemEnd = (char*)t_thrd.shemem_ptr_cxt.ShmemBase + shmhdr->totalsize; } /* * InitShmemAllocation() --- set up shared-memory space allocation. * * This should be called only in the postmaster or a standalone backend. */ void InitShmemAllocation(void) { PGShmemHeader* shmhdr = t_thrd.shemem_ptr_cxt.ShmemSegHdr; Assert(shmhdr != NULL); /* * Initialize the spinlock used by ShmemAlloc. We have to do the space * allocation the hard way, since obviously ShmemAlloc can't be called * yet. */ t_thrd.shemem_ptr_cxt.ShmemLock = (slock_t*)(((char*)shmhdr) + shmhdr->freeoffset); shmhdr->freeoffset += MAXALIGN(sizeof(slock_t)); Assert(shmhdr->freeoffset <= shmhdr->totalsize); SpinLockInit(t_thrd.shemem_ptr_cxt.ShmemLock); /* ShmemIndex can't be set up yet (need LWLocks first) */ shmhdr->index = NULL; t_thrd.shemem_ptr_cxt.ShmemIndex = (HTAB*)NULL; /* * Initialize ShmemVariableCache for transaction manager. (This doesn't * really belong here, but not worth moving.) */ t_thrd.xact_cxt.ShmemVariableCache = (VariableCache)ShmemAlloc(sizeof(*t_thrd.xact_cxt.ShmemVariableCache)); errno_t rc = memset_s(t_thrd.xact_cxt.ShmemVariableCache, sizeof(*t_thrd.xact_cxt.ShmemVariableCache), 0, sizeof(*t_thrd.xact_cxt.ShmemVariableCache)); securec_check(rc, "\0", "\0"); #ifdef ENABLE_MOT /* * Allow non backend (MOT) threads to access ShmemVariableCache for transaction manager. */ if (g_instance.mot_cxt.shmemVariableCache == NULL) { g_instance.mot_cxt.shmemVariableCache = t_thrd.xact_cxt.ShmemVariableCache; } #endif } /* * ShmemAlloc -- allocate max-aligned chunk from shared memory * * Assumes ShmemLock and ShmemSegHdr are initialized. * * Returns: real pointer to memory or NULL if we are out * of space. Has to return a real pointer in order * to be compatible with malloc(). */ void* ShmemAlloc(Size size) { Size newStart; Size newFree; Size newSize; void* newSpace = NULL; /* use volatile pointer to prevent code rearrangement */ volatile PGShmemHeader* shmemseghdr = t_thrd.shemem_ptr_cxt.ShmemSegHdr; /* * Ensure all space is adequately aligned. We used to only MAXALIGN this * space but experience has proved that on modern systems that is not good * enough. Many parts of the system are very sensitive to critical data * structures getting split across cache line boundaries. To avoid that, * attempt to align the beginning of the allocation to a cache line * boundary. The calling code will still need to be careful about how it * uses the allocated space - e.g. by padding each element in an array of * structures out to a power-of-two size - but without this, even that * won't be sufficient. */ newSize = CACHELINEALIGN(size); /* @MDshmem01 */ Assert(shmemseghdr != NULL); SpinLockAcquire(t_thrd.shemem_ptr_cxt.ShmemLock); /* * Small requests need only be aligned on MAXALIGN boundaries * This is already assured by rounding up all size requests to MAXALIGN. * So the buffer is placed immediatly after the last. */ newStart = shmemseghdr->freeoffset; newSpace = (void*)((char*)t_thrd.shemem_ptr_cxt.ShmemBase + newStart); /* * Align the address of large requests, since they may be used for I/O * buffers. I/O buffers have a stricter alignment, potentially a multiple * of 512 or more (for Direct I/O or other uses). Adjust the newSize to * include the additional offset introduced by the new alignment. */ if (newSize >= BLCKSZ) { void* bufSpace = NULL; bufSpace = (void*)BUFFERALIGN(newSpace); newSize += (Size)((char*)bufSpace - (char*)newSpace); newSpace = bufSpace; } /* Calculate the expected offset of the next allocated space */ newFree = newStart + newSize; /* * If the offset of the next allocated space is not beyond the available space, * adjust the offset to include the new space. * Otherwise, leave the offset unchanged, and return NULL * (discarding the calculations done earlier). */ if (newFree <= shmemseghdr->totalsize) { shmemseghdr->freeoffset = newFree; } else { newSpace = NULL; } SpinLockRelease(t_thrd.shemem_ptr_cxt.ShmemLock); if (newSpace == NULL) ereport(WARNING, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); return newSpace; } /* * HeapMemAlloc * allocate memory from heap */ void* HeapMemAlloc(Size size) { Assert(size > 0); void* ptr = malloc(size); if (ptr == NULL) ereport(WARNING, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); return ptr; } /* * ShmemAddrIsValid -- test if an address refers to shared memory * * Returns TRUE if the pointer points within the shared memory segment. */ bool ShmemAddrIsValid(const void* addr) { return (addr >= t_thrd.shemem_ptr_cxt.ShmemBase) && (addr < t_thrd.shemem_ptr_cxt.ShmemEnd); } /* * InitShmemIndex() --- set up or attach to shmem index table. */ void InitShmemIndex(void) { HASHCTL info; int hash_flags; /* * Create the shared memory shmem index. * * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex * hashtable to exist already, we have a bit of a circularity problem in * initializing the ShmemIndex itself. The special "ShmemIndex" hash * table name will tell ShmemInitStruct to fake it. */ info.keysize = SHMEM_INDEX_KEYSIZE; info.entrysize = sizeof(ShmemIndexEnt); hash_flags = HASH_ELEM; t_thrd.shemem_ptr_cxt.ShmemIndex = ShmemInitHash("ShmemIndex", SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, &info, hash_flags); } /* * ShmemInitHash -- Create and initialize, or attach to, a * shared memory hash table. * * We assume caller is doing some kind of synchronization * so that two processes don't try to create/initialize the same * table at once. (In practice, all creations are done in the postmaster * process; child processes should always be attaching to existing tables.) * * max_size is the estimated maximum number of hashtable entries. This is * not a hard limit, but the access efficiency will degrade if it is * exceeded substantially (since it's used to compute directory size and * the hash table buckets will get overfull). * * init_size is the number of hashtable entries to preallocate. For a table * whose maximum size is certain, this should be equal to max_size; that * ensures that no run-time out-of-shared-memory failures can occur. * * Note: before Postgres 9.0, this function returned NULL for some failure * cases. Now, it always throws error instead, so callers need not check * for NULL. */ HTAB* ShmemInitHash(const char* name, /* table string name for shmem index */ long init_size, /* initial table size */ long max_size, /* max size of the table */ HASHCTL* infoP, /* info about key and bucket size */ int hash_flags) /* info about infoP */ { bool found = false; void* location = NULL; /* * Hash tables allocated in shared memory have a fixed directory; it can't * grow or other backends wouldn't be able to find it. So, make sure we * make it big enough to start with. * * The shared memory allocator must be specified too. */ infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size); infoP->alloc = ShmemAlloc; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; /* look it up in the shmem index */ location = ShmemInitStruct(name, hash_get_shared_size(infoP, hash_flags), &found); /* * if it already exists, attach to it rather than allocate and initialize * new space */ if (found) hash_flags |= HASH_ATTACH; /* Pass location of hashtable header to hash_create */ infoP->hctl = (HASHHDR*)location; return hash_create(name, init_size, infoP, hash_flags); } static void InitHeapmemIndex(void) { Size size; HASHCTL info; int hash_flags; void* structPtr = NULL; errno_t errorno = EOK; errorno = memset_s(&info, sizeof(HASHCTL), 0, sizeof(HASHCTL)); securec_check_c(errorno, "\0", "\0"); /* Now, we use ShmemIndexLock, maybe HeapIndexLock in future */ LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); info.keysize = SHMEM_INDEX_KEYSIZE; info.entrysize = sizeof(ShmemIndexEnt); hash_flags = HASH_ELEM; info.dsize = info.max_dsize = hash_select_dirsize(SHMEM_INDEX_SIZE); info.alloc = HeapMemAlloc; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; size = hash_get_shared_size(&info, hash_flags), structPtr = HeapMemAlloc(size); if (structPtr == NULL) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("can not malloc memory for HeapmemIndex" "HeapmemIndex (%lu bytes requested)", (unsigned long)size))); } /* Pass location of hashtable header to hash_create */ info.hctl = (HASHHDR*)structPtr; HeapmemIndex = hash_create("HeapmemIndex", SHMEM_INDEX_SIZE, &info, hash_flags); LWLockRelease(ShmemIndexLock); } void* HeapmemInitStruct(const char* name, Size size, bool* foundPtr) { ShmemIndexEnt* result = NULL; void* structPtr = NULL; if (HeapmemIndex == NULL) InitHeapmemIndex(); /* Now, we use ShmemIndexLock, maybe HeapIndexLock in future */ LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); /* look it up in the shmem index */ result = (ShmemIndexEnt*)hash_search(HeapmemIndex, name, HASH_ENTER_NULL, foundPtr); if (result == NULL) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("could not create HeapMemIndex entry for data structure \"%s\"", name))); } if (*foundPtr) { /* * Structure is in the shmem index so someone else has allocated it * already. The size better be the same as the size we are trying to * initialize to, or there is a name conflict (or worse). */ if (result->size != size) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), errmsg("HeapMemIndex entry size is wrong for data structure" " \"%s\": expected %lu, actual %lu", name, (unsigned long)size, (unsigned long)result->size))); } structPtr = result->location; } else { /* It isn't in the table yet. allocate and initialize it */ structPtr = HeapMemAlloc(size); if (structPtr == NULL) { /* out of memory; remove the failed ShmemIndex entry */ hash_search(HeapmemIndex, name, HASH_REMOVE, NULL); LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("not enough heap memory for data structure" " \"%s\" (%lu bytes requested)", name, (unsigned long)size))); } result->size = size; result->location = structPtr; } LWLockRelease(ShmemIndexLock); return structPtr; } HTAB* HeapMemInitHash(const char* name, /* table string name for shmem index */ long init_size, /* initial table size */ long max_size, /* max size of the table */ HASHCTL* infoP, /* info about key and bucket size */ int hash_flags) /* info about infoP */ { bool found = false; void* location = NULL; /* * Hash tables allocated in shared memory have a fixed directory; it can't * grow or other backends wouldn't be able to find it. So, make sure we * make it big enough to start with. * * The shared memory allocator must be specified too. */ infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size); infoP->alloc = HeapMemAlloc; hash_flags |= HASH_HEAP_MEM | HASH_ALLOC | HASH_DIRSIZE; location = HeapmemInitStruct(name, hash_get_shared_size(infoP, hash_flags), &found); /* * if it already exists, attach to it rather than allocate and initialize * new space */ if (found) hash_flags |= HASH_ATTACH; /* Pass location of hashtable header to hash_create */ infoP->hctl = (HASHHDR*)location; return hash_create(name, init_size, infoP, hash_flags); } /* clean up (reset) this hash table in heap memory. */ void HeapMemResetHash(HTAB* hashtbl, const char* tabname) { HASH_SEQ_STATUS seq_scan = {NULL, 0, NULL}; void* hentry = NULL; hash_seq_init(&seq_scan, hashtbl); while ((hentry = hash_seq_search(&seq_scan)) != NULL) { /* as we know, hash entry consists of two parts: * KEY part + VALUE part. * and KEY part is the first position, so that * point(hash entry) = point(key of hash entry) */ if (NULL == hash_search(hashtbl, hentry, HASH_REMOVE, NULL)) { ereport(PANIC, (errmsg("corrupt during reset shared hash table \"%s\"", tabname))); } } } /* * ShmemInitStruct -- Create/attach to a structure in shared memory. * * This is called during initialization to find or allocate * a data structure in shared memory. If no other process * has created the structure, this routine allocates space * for it. If it exists already, a pointer to the existing * structure is returned. * * Returns: pointer to the object. *foundPtr is set TRUE if the object was * already in the shmem index (hence, already initialized). * * Note: before Postgres 9.0, this function returned NULL for some failure * cases. Now, it always throws error instead, so callers need not check * for NULL. */ void* ShmemInitStruct(const char* name, Size size, bool* foundPtr) { ShmemIndexEnt* result = NULL; void* structPtr = NULL; LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); if (!t_thrd.shemem_ptr_cxt.ShmemIndex) { PGShmemHeader* shmemseghdr = t_thrd.shemem_ptr_cxt.ShmemSegHdr; /* Must be trying to create/attach to ShmemIndex itself */ Assert(strcmp(name, "ShmemIndex") == 0); if (IsUnderPostmaster) { /* Must be initializing a (non-standalone) backend */ Assert(shmemseghdr->index != NULL); structPtr = shmemseghdr->index; *foundPtr = TRUE; } else { /* * If the shmem index doesn't exist, we are bootstrapping: we must * be trying to init the shmem index itself. * * Notice that the ShmemIndexLock is released before the shmem * index has been initialized. This should be OK because no other * process can be accessing shared memory yet. */ Assert(shmemseghdr->index == NULL); structPtr = ShmemAlloc(size); if (structPtr == NULL) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("not enough shared memory for data structure" " \"%s\" (%lu bytes requested)", name, (unsigned long)size))); } shmemseghdr->index = structPtr; *foundPtr = FALSE; } LWLockRelease(ShmemIndexLock); return structPtr; } /* look it up in the shmem index */ result = (ShmemIndexEnt*)hash_search(t_thrd.shemem_ptr_cxt.ShmemIndex, name, HASH_ENTER_NULL, foundPtr); if (result == NULL) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("could not create ShmemIndex entry for data structure \"%s\"", name))); } if (*foundPtr) { /* * Structure is in the shmem index so someone else has allocated it * already. The size better be the same as the size we are trying to * initialize to, or there is a name conflict (or worse). */ if (result->size != size) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH), errmsg("ShmemIndex entry size is wrong for data structure" " \"%s\": expected %lu, actual %lu", name, (unsigned long)size, (unsigned long)result->size))); } structPtr = result->location; } else { /* It isn't in the table yet. allocate and initialize it */ structPtr = ShmemAlloc(size); if (structPtr == NULL) { /* out of memory; remove the failed ShmemIndex entry */ hash_search(t_thrd.shemem_ptr_cxt.ShmemIndex, name, HASH_REMOVE, NULL); LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("not enough shared memory for data structure" " \"%s\" (%lu bytes requested)", name, (unsigned long)size))); } result->size = size; result->location = structPtr; } LWLockRelease(ShmemIndexLock); Assert(ShmemAddrIsValid(structPtr)); return structPtr; } /* * Add two Size values, checking for overflow */ Size add_size(Size s1, Size s2) { Size result; result = s1 + s2; /* We are assuming Size is an unsigned type here... */ if (result < s1 || result < s2) ereport( ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested shared memory size overflows size_t"))); return result; } /* * Multiply two Size values, checking for overflow */ Size mul_size(Size s1, Size s2) { Size result; if (s1 == 0 || s2 == 0) return 0; result = s1 * s2; /* We are assuming Size is an unsigned type here... */ if (result / s2 != s1) ereport( ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested shared memory size overflows size_t"))); return result; }