Avoid repeatedly calling smgrnblocks() in read query on standby. Now, we only add cached nblocks in main fork.
2286 lines
78 KiB
C++
2286 lines
78 KiB
C++
/* -------------------------------------------------------------------------
|
|
*
|
|
* storage.cpp
|
|
* code to create and destroy physical storage for relations
|
|
*
|
|
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/common/backend/catalog/storage.cpp
|
|
*
|
|
* NOTES
|
|
* Some of this code used to be in storage/smgr/smgr.c, and the
|
|
* function names still reflect that.
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
|
|
#include "access/cstore_am.h"
|
|
#include "access/dfs/dfs_insert.h"
|
|
#include "access/visibilitymap.h"
|
|
#include "access/xact.h"
|
|
#include "access/xlog.h"
|
|
#include "access/xloginsert.h"
|
|
#include "access/xlogutils.h"
|
|
#include "catalog/catalog.h"
|
|
#include "catalog/dependency.h"
|
|
#include "catalog/dfsstore_ctlg.h"
|
|
#include "catalog/storage.h"
|
|
#include "catalog/storage_gtt.h"
|
|
#include "catalog/storage_xlog.h"
|
|
#include "catalog/pg_hashbucket_fn.h"
|
|
#include "commands/tablespace.h"
|
|
#include "pgxc/pgxc.h"
|
|
#include "storage/freespace.h"
|
|
#include "storage/lmgr.h"
|
|
#include "storage/smgr.h"
|
|
#include "threadpool/threadpool.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/rel_gs.h"
|
|
#include "utils/snapmgr.h"
|
|
#include "utils/syscache.h"
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
#include "tsdb/cache/part_cachemgr.h"
|
|
#include "tsdb/cache/partid_cachemgr.h"
|
|
#include "tsdb/storage/part.h"
|
|
#include "tsdb/utils/constant_def.h"
|
|
#include "tsdb/utils/ts_pg_cudesc.h"
|
|
#include "tsdb/utils/ts_relcache.h"
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
/*
|
|
* We keep a list of all relations (represented as RelFileNode values)
|
|
* that have been created or deleted in the current transaction. When
|
|
* a relation is created, we create the physical file immediately, but
|
|
* remember it so that we can delete the file again if the current
|
|
* transaction is aborted. Conversely, a deletion request is NOT
|
|
* executed immediately, but is just entered in the list. When and if
|
|
* the transaction commits, we can delete the physical file.
|
|
*
|
|
* To handle subtransactions, every entry is marked with its transaction
|
|
* nesting level. At subtransaction commit, we reassign the subtransaction's
|
|
* entries to the parent nesting level. At subtransaction abort, we can
|
|
* immediately execute the abort-time actions for all entries of the current
|
|
* nesting level.
|
|
*
|
|
* NOTE: the list is kept in t_thrd.top_mem_cxt to be sure it won't disappear
|
|
* unbetimes. It'd probably be OK to keep it in u_sess->top_transaction_mem_cxt,
|
|
* but I'm being paranoid.
|
|
*/
|
|
|
|
typedef struct PendingRelDelete {
|
|
RelFileNode relnode; /* relation that may need to be deleted */
|
|
ForkNumber forknum; /* MAIN_FORKNUM for row table; or valid column ForkNum */
|
|
BackendId backend; /* InvalidBackendId if not a temp rel */
|
|
Oid relOid; /* InvalidOid if not a global temp rel */
|
|
Oid ownerid; /* owner id for user space statistics */
|
|
bool atCommit; /* T=delete at commit; F=delete at abort */
|
|
int nestLevel; /* xact nesting level of request */
|
|
struct PendingRelDelete* next; /* linked-list link */
|
|
} PendingRelDelete;
|
|
|
|
#define ColMainFileNodesDefNum 16
|
|
|
|
typedef struct PendingDfsDelete {
|
|
StringInfo filename;
|
|
Oid ownerid; /* owner id for user space statistics */
|
|
uint64 filesize; /* file size of dfs file */
|
|
TransactionId xid; /* the transaction id */
|
|
bool atCommit; /* T=delete at commit; F=delete at abort */
|
|
} PendingDfsDelete;
|
|
|
|
THR_LOCAL StringInfo vf_store_root = NULL;
|
|
|
|
typedef struct MapperFileOptions {
|
|
char filesystem[NAMEDATALEN]; /* like "hdfs" */
|
|
char address[MAXPGPATH]; /* like "10.185.178.239:25000, ..." */
|
|
char cfgpath[MAXPGPATH]; /* like "/opt/config" */
|
|
char tblpath[MAXPGPATH]; /* like "/user/tbl_mppdb" */
|
|
} MapperFileOptions;
|
|
|
|
extern int64 calculate_relation_size(RelFileNode* rfn, BackendId backend, ForkNumber forknum);
|
|
extern int64 calculate_relation_bucket_dir_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum);
|
|
|
|
void DropDfsDirectory(ColFileNode* colFileNode, bool cfgFromMapper);
|
|
void DropMapperFile(RelFileNode fNode);
|
|
static int GetConnConfig(RelFileNode fNode, MapperFileOptions* options);
|
|
static int SetConnConfig(RelFileNode fNode, DfsSrvOptions* srvOptions, StringInfo storePath, int64 timestamp);
|
|
|
|
extern bool find_tmptable_cache_key(Oid relNode);
|
|
extern void make_tmptable_cache_key(Oid relNode);
|
|
|
|
/* before creating storage and inserting into the pending
|
|
* delete list, we first set the right backend and check
|
|
* to need wal logs.
|
|
*/
|
|
static void StorageSetBackendAndLogged(_in_ char relpersistence, _out_ BackendId* backend, _out_ bool* needs_wal)
|
|
{
|
|
switch (relpersistence) {
|
|
case RELPERSISTENCE_TEMP:
|
|
*backend = InvalidBackendId;
|
|
if (STMT_RETRY_ENABLED) {
|
|
*needs_wal = true;
|
|
} else {
|
|
*needs_wal = false;
|
|
}
|
|
break;
|
|
case RELPERSISTENCE_GLOBAL_TEMP:
|
|
*backend = BackendIdForTempRelations;
|
|
*needs_wal = false;
|
|
break;
|
|
case RELPERSISTENCE_UNLOGGED:
|
|
*backend = InvalidBackendId;
|
|
*needs_wal = false;
|
|
break;
|
|
case RELPERSISTENCE_PERMANENT:
|
|
*backend = InvalidBackendId;
|
|
*needs_wal = true;
|
|
break;
|
|
default:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid relpersistence: %c", relpersistence)));
|
|
return; /* placate compiler */
|
|
}
|
|
}
|
|
|
|
/* Add the relation to the list of stuff to delete at abort.
|
|
* if it's a row-storage table, *whichAttr* must is *AllTheAttrs*.
|
|
* if it's a column-storage table, *whichAttr* >= *AllTheAttrs*.
|
|
*/
|
|
static void InsertStorageIntoPendingList(_in_ const RelFileNode* rnode, _in_ AttrNumber attrnum, _in_ BackendId backend,
|
|
_in_ Oid ownerid, _in_ bool atCommit, _in_ bool isDfsTruncate = false, Relation rel = NULL)
|
|
{
|
|
PendingRelDelete* pending = (PendingRelDelete*)MemoryContextAlloc(
|
|
SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(PendingRelDelete));
|
|
pending->relnode = *rnode;
|
|
|
|
if (AttrNumberIsForUserDefinedAttr(attrnum))
|
|
pending->forknum = ColumnId2ColForkNum(attrnum);
|
|
else {
|
|
pending->forknum = MAIN_FORKNUM;
|
|
|
|
/*
|
|
* For dfs table, we use special forknum to differentiate different operation
|
|
* If it's PAX_DFS_FORKNUM, means we will drop/create dfs table directory and mapper files
|
|
* on CN (remove file list for DN). It's used when drop/create dfs table.
|
|
* If it's PAX_DFS_TRUNCATE_FORKNUM, we will truncate the dfs table, it will read from
|
|
* the mapper file and mapper filelist to remove the corresponding files,
|
|
* under the dfs table directory
|
|
*/
|
|
if (IsDfsStor(attrnum)) {
|
|
if (!isDfsTruncate) {
|
|
pending->forknum = PAX_DFS_FORKNUM;
|
|
} else {
|
|
if (!IS_PGXC_COORDINATOR)
|
|
pending->forknum = PAX_DFS_TRUNCATE_FORKNUM;
|
|
}
|
|
}
|
|
}
|
|
pending->backend = backend;
|
|
pending->relOid = InvalidOid;
|
|
pending->ownerid = ownerid;
|
|
pending->atCommit = atCommit; /* false: delete if abort; true: delete if commit */
|
|
pending->nestLevel = GetCurrentTransactionNestLevel();
|
|
pending->next = u_sess->catalog_cxt.pendingDeletes;
|
|
u_sess->catalog_cxt.pendingDeletes = pending;
|
|
|
|
if (RELATION_IS_GLOBAL_TEMP(rel)) {
|
|
pending->relOid = RelationGetRelid(rel);
|
|
} else if (!u_sess->attr.attr_sql.enable_cluster_resize) {
|
|
/* Lock RelFileNode to control concurrent with Catchup Thread */
|
|
LockRelFileNode(*rnode, AccessExclusiveLock);
|
|
}
|
|
}
|
|
|
|
static void RelationCreateStorageInternal(RelFileNode rnode, char relpersistence, Oid ownerid, const oidvector* bucketlist = NULL, Relation rel = NULL)
|
|
{
|
|
SMgrRelation srel;
|
|
BackendId backend;
|
|
bool needs_wal = false;
|
|
|
|
Assert(rnode.bucketNode == DIR_BUCKET_ID || rnode.bucketNode == InvalidBktId);
|
|
|
|
StorageSetBackendAndLogged(relpersistence, &backend, &needs_wal);
|
|
|
|
srel = smgropen(rnode, backend, 0, bucketlist);
|
|
smgrcreate(srel, MAIN_FORKNUM, false);
|
|
|
|
if (needs_wal) {
|
|
log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM, bucketlist);
|
|
}
|
|
|
|
/* Add the relation to the list of stuff to delete at abort
|
|
* Just record non bucket file and bucket dir, not record bucket file
|
|
* When delete bucket dir, keep bucket dir and delete all the bucket files under dir
|
|
*/
|
|
if (!BUCKET_NODE_IS_VALID(rnode.bucketNode)) {
|
|
InsertStorageIntoPendingList(&rnode, InvalidAttrNumber, backend, ownerid, false, false, rel);
|
|
/* create buckets file */
|
|
if (rnode.bucketNode == DIR_BUCKET_ID) {
|
|
smgrcreatebuckets(srel, MAIN_FORKNUM, false);
|
|
}
|
|
}
|
|
|
|
/* remember global temp table storage info to localhash */
|
|
if (rel && relpersistence == RELPERSISTENCE_GLOBAL_TEMP) {
|
|
remember_gtt_storage_info(rnode, rel);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* RelationCreateStorage
|
|
* Create physical storage for a relation.
|
|
*
|
|
* Create the underlying disk file storage for the relation. This only
|
|
* creates the main fork; additional forks are created lazily by the
|
|
* modules that need them.
|
|
*
|
|
* This function is transactional. The creation is WAL-logged, and if the
|
|
* transaction aborts later on, the storage will be destroyed.
|
|
*/
|
|
void RelationCreateStorage(RelFileNode rnode, char relpersistence, Oid ownerid,
|
|
Oid bucketOid, Oid relfilenode, Relation rel)
|
|
{
|
|
if (OidIsValid(bucketOid) && (bucketOid != VirtualBktOid)) {
|
|
BucketCreateStorage(rnode, bucketOid, ownerid, relfilenode);
|
|
} else {
|
|
RelationCreateStorageInternal(rnode, relpersistence, ownerid, NULL, rel);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* CStoreRelCreateStorage
|
|
* Create physical storage for a column of column-storage relation.
|
|
*/
|
|
void CStoreRelCreateStorage(RelFileNode* rnode, AttrNumber attrnum, char relpersistence, Oid ownerid)
|
|
{
|
|
Assert(AttrNumberIsForUserDefinedAttr(attrnum));
|
|
|
|
BackendId backend = InvalidBackendId;
|
|
bool needs_wal = false;
|
|
StorageSetBackendAndLogged(relpersistence, &backend, &needs_wal);
|
|
|
|
if (needs_wal)
|
|
log_smgrcreate(rnode, (ForkNumber)ColumnId2ColForkNum(attrnum));
|
|
|
|
/* Add the relation to the list of stuff to delete at abort */
|
|
InsertStorageIntoPendingList(rnode, attrnum, backend, ownerid, false);
|
|
}
|
|
|
|
void BucketCreateStorage(RelFileNode rnode, Oid bucketOid, Oid ownerid, Oid relfilenode)
|
|
{
|
|
RelFileNodeBackend newrnode;
|
|
oidvector* bucketlist = searchHashBucketByOid(bucketOid);
|
|
bool check_file_exist = true;
|
|
|
|
SMgrRelation srel;
|
|
if (!OidIsValid(relfilenode) || t_thrd.xact_cxt.inheritFileNode != true) {
|
|
check_file_exist = false;
|
|
}
|
|
newrnode.node = rnode;
|
|
newrnode.backend = InvalidBackendId;
|
|
|
|
for (int i = 0; i < bucketlist->dim1; i++) {
|
|
/* remove the last shrink remains */
|
|
if (unlikely(check_file_exist)) {
|
|
newrnode.node.bucketNode = bucketlist->values[i];
|
|
srel = smgropen(newrnode.node, InvalidBackendId);
|
|
if (smgrexists(srel, MAIN_FORKNUM)) {
|
|
smgrdounlink(srel, false);
|
|
ereport(WARNING,
|
|
(errmsg("delete file for shrink remains %d/%d/%d_b%d",
|
|
newrnode.node.spcNode, newrnode.node.dbNode, newrnode.node.relNode, newrnode.node.bucketNode)));
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!OidIsValid(relfilenode) || t_thrd.xact_cxt.inheritFileNode != true) {
|
|
/* Create Bucket Dir in datanode */
|
|
newrnode.node.bucketNode = DIR_BUCKET_ID;
|
|
RelationCreateStorageInternal(newrnode.node, RELPERSISTENCE_PERMANENT, ownerid, bucketlist);
|
|
smgrclosenode(newrnode);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Perform XLogInsert of a XLOG_SMGR_CREATE record to WAL.
|
|
*/
|
|
void log_smgrcreate(RelFileNode* rnode, ForkNumber forkNum, const oidvector* bucketlist)
|
|
{
|
|
xl_smgr_create xlrec;
|
|
|
|
/*
|
|
* Make an XLOG entry reporting the file creation.
|
|
*/
|
|
xlrec.forkNum = forkNum;
|
|
RelFileNodeRelCopy(xlrec.rnode, *rnode);
|
|
|
|
XLogBeginInsert();
|
|
XLogRegisterData((char*)&xlrec, sizeof(xlrec));
|
|
if(rnode->bucketNode == DIR_BUCKET_ID) {
|
|
XLogRegisterData((char*)bucketlist, offsetof(oidvector, values) + (bucketlist->dim1 * sizeof(Oid)));
|
|
}
|
|
XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE, false, rnode->bucketNode);
|
|
|
|
}
|
|
|
|
static void CStoreRelDropStorage(Relation rel, RelFileNode* rnode, Oid ownerid)
|
|
{
|
|
Assert((RelationIsColStore(rel)));
|
|
|
|
TupleDesc desc = RelationGetDescr(rel);
|
|
int nattrs = desc->natts;
|
|
Form_pg_attribute* attrs = desc->attrs;
|
|
|
|
/* add all the cu files to the list of stuff to delete at commit */
|
|
for (int i = 0; i < nattrs; ++i) {
|
|
InsertStorageIntoPendingList(rnode, attrs[i]->attnum, rel->rd_backend, ownerid, true);
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
namespace Tsdb {
|
|
/*
|
|
* Insert part storage file (field/time data fiel) into pending list
|
|
* The storage file is determined by partition_rnode + part_id
|
|
*/
|
|
void InsertPartStorageIntoPendingList(_in_ RelFileNode* partition_rnode, _in_ AttrNumber part_id,
|
|
_in_ BackendId backend, _in_ Oid ownerid, _in_ bool atCommit)
|
|
{
|
|
Assert(part_id >= 2000);
|
|
PendingRelDelete* pending = (PendingRelDelete*)MemoryContextAlloc(
|
|
SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(PendingRelDelete));
|
|
pending->relnode = *partition_rnode;
|
|
pending->forknum = ColumnId2ColForkNum(part_id);
|
|
pending->backend = backend;
|
|
pending->ownerid = ownerid;
|
|
pending->atCommit = atCommit; /* false: delete if abort; true: delete if commit */
|
|
pending->nestLevel = GetCurrentTransactionNestLevel();
|
|
pending->next = u_sess->catalog_cxt.pendingDeletes;
|
|
u_sess->catalog_cxt.pendingDeletes = pending;
|
|
|
|
/* Lock RelFileNode to control concurrent with Catchup Thread */
|
|
LockRelFileNode(*partition_rnode, RowExclusiveLock);
|
|
}
|
|
|
|
/*
|
|
* Drop given parts(cudesc rel + field&time data file).
|
|
* @param partition_id: the given parts should be in same partition
|
|
* @param partition_rnode: partition's relfilenode
|
|
* @param backend
|
|
* @param ownerid
|
|
* @param target_cudesc_relids: parts to drop
|
|
*/
|
|
void DropPartStorage(
|
|
Oid partition_id, RelFileNode* partition_rnode, BackendId backend, Oid ownerid, List* target_cudesc_relids)
|
|
{
|
|
if (list_length(target_cudesc_relids) == 0) {
|
|
return;
|
|
}
|
|
List* cudesc_valid_oids = NIL;
|
|
List* cudesc_valid_part_id = NIL;
|
|
ListCell* cell = NULL;
|
|
bool result = false;
|
|
Tsdb::TsCUDesc tscudesc;
|
|
foreach(cell, target_cudesc_relids) {
|
|
Oid cudesc_oid = lfirst_oid(cell);
|
|
Relation cudesc_rel = heap_open(cudesc_oid, AccessShareLock);
|
|
TsCUDescUtils tscudesc_util(cudesc_rel);
|
|
tscudesc.cudesc->Reset();
|
|
result = tscudesc_util.get_part_desc(tscudesc);
|
|
if (result) {
|
|
cudesc_valid_oids = lappend_oid(cudesc_valid_oids, cudesc_oid);
|
|
cudesc_valid_part_id = lappend_oid(cudesc_valid_part_id, tscudesc.cudesc->cu_id);
|
|
ereport(DEBUG3, (errmsg("DropPartStorage, partition_oid=%u, cudesc oid=%d, part_id= %u",
|
|
partition_id, cudesc_oid, tscudesc.cudesc->cu_id)));
|
|
} else {
|
|
ereport(WARNING, (errmsg("DropPartStorage failed, cudesc oid=%u", cudesc_oid)));
|
|
}
|
|
heap_close(cudesc_rel, AccessShareLock);
|
|
}
|
|
if (list_length(cudesc_valid_oids) > 0) {
|
|
performTsCudescDeletion(cudesc_valid_oids);
|
|
foreach(cell, cudesc_valid_part_id) {
|
|
uint32 part_id = lfirst_oid(cell);
|
|
InsertPartStorageIntoPendingList(partition_rnode, part_id, backend, ownerid, true);
|
|
InsertPartStorageIntoPendingList(
|
|
partition_rnode, part_id + TsConf::TIME_FILE_OFFSET, backend, ownerid, true);
|
|
}
|
|
}
|
|
list_free_ext(cudesc_valid_oids);
|
|
list_free_ext(cudesc_valid_part_id);
|
|
}
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
/*
|
|
* - Brief: Drop column for a CStore table which includes
|
|
* 1. Delete the column information from the cudesc
|
|
* 2. Schedule unlinking of physical storage of the column at transaction commit.
|
|
* - Parameter:
|
|
* @rel: target relation to drop column
|
|
* @attrnum: the column to drop
|
|
* @ownerid: owerid for the target table
|
|
* - Return:
|
|
* no return value
|
|
*/
|
|
void CStoreRelDropColumn(Relation rel, AttrNumber attrnum, Oid ownerid)
|
|
{
|
|
Assert(RelationIsCUFormat(rel));
|
|
|
|
if (!RELATION_IS_PARTITIONED(rel)) {
|
|
CStoreDropColumnInCuDesc(rel, attrnum);
|
|
InsertStorageIntoPendingList(&rel->rd_node, attrnum, rel->rd_backend, ownerid, true);
|
|
} else {
|
|
List* partitions = NIL;
|
|
ListCell* cell = NULL;
|
|
Partition partition = NULL;
|
|
Relation partRel = NULL;
|
|
partitions = relationGetPartitionList(rel, AccessExclusiveLock);
|
|
|
|
foreach (cell, partitions) {
|
|
partition = (Partition)lfirst(cell);
|
|
partRel = partitionGetRelation(rel, partition);
|
|
|
|
CStoreDropColumnInCuDesc(partRel, attrnum);
|
|
InsertStorageIntoPendingList(&partRel->rd_node, attrnum, partRel->rd_backend, ownerid, true);
|
|
|
|
releaseDummyRelation(&partRel);
|
|
}
|
|
|
|
releasePartitionList(rel, &partitions, AccessExclusiveLock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* RelationDropStorage
|
|
* Schedule unlinking of physical storage at transaction commit.
|
|
*/
|
|
void RelationDropStorage(Relation rel, bool isDfsTruncate)
|
|
{
|
|
// global temp table files may not exist
|
|
if (RELATION_IS_GLOBAL_TEMP(rel)) {
|
|
if (rel->rd_smgr == NULL) {
|
|
/* Open it at the smgr level if not already done */
|
|
RelationOpenSmgr(rel);
|
|
}
|
|
if (!smgrexists(rel->rd_smgr, MAIN_FORKNUM)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* First we must push the column file, column bcm file to the pendingDeletes and
|
|
* then push the logical table file to pendingDeletes.
|
|
* When delete the file, smgrDoPendingDeletes will first delete the logical table file,
|
|
* and it will call DropRelFileNodeAllBuffers to make all buffer invaild, include the
|
|
* column bcm buffer, then we can drop the column file and column bcm file.
|
|
* Examples: if a column table relfilenode is 16384, it will create 16384, 16384_C1.0,
|
|
* 16384_C1_bcm... push into pendingDeletes
|
|
* push: pop:
|
|
* 16384(third) 16384(make all buffer invaild, unlink file)
|
|
* 16384_C1_bcm(second) 16384_C1_bcm(unlink file)
|
|
* 16384_C1.0(first) 16384_C1.0(unlink file)
|
|
*/
|
|
if (RelationUsesSpaceType(rel->rd_rel->relpersistence) == SP_TEMP) {
|
|
make_tmptable_cache_key(rel->rd_rel->relfilenode);
|
|
}
|
|
if (RelationIsCUFormat(rel)) {
|
|
CStoreRelDropStorage(rel, &rel->rd_node, rel->rd_rel->relowner);
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (RelationIsTsStore(rel)) {
|
|
/* rel is partition relation */
|
|
List* cudesc_relid_list = search_all_cudesc(rel->rd_id, true);
|
|
if (list_length(cudesc_relid_list) > 0) {
|
|
Tsdb::DropPartStorage(rel->rd_id, &(rel->rd_node), rel->rd_backend,
|
|
rel->rd_rel->relowner, cudesc_relid_list);
|
|
}
|
|
if (g_instance.attr.attr_common.enable_tsdb) {
|
|
Tsdb::PartCacheMgr::GetInstance().clear_partition_cache(rel->rd_id);
|
|
}
|
|
list_free_ext(cudesc_relid_list);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
if (RelationIsPAXFormat(rel)) {
|
|
/*
|
|
* For dfs table, if it's drop table statement, we will drop the dfs storage
|
|
* on main CN.
|
|
* If it's truncate table statement, we will drop the dfs storage on DN.
|
|
*/
|
|
if (isDfsTruncate) {
|
|
if (!IS_PGXC_COORDINATOR)
|
|
DropDfsStorage(rel, true);
|
|
} else {
|
|
/* drop dfs table */
|
|
DropDfsStorage(rel, false);
|
|
}
|
|
}
|
|
|
|
/* Drop all bucket's storage of this relation if there is any */
|
|
if (RELATION_CREATE_BUCKET(rel)) {
|
|
BucketDropStorage(rel, NULL);
|
|
} else {
|
|
/* Add the relation to the list of stuff to delete at commit */
|
|
InsertStorageIntoPendingList(
|
|
&rel->rd_node, InvalidAttrNumber, rel->rd_backend, rel->rd_rel->relowner, true, isDfsTruncate, rel);
|
|
}
|
|
|
|
/*
|
|
* NOTE: if the relation was created in this transaction, it will now be
|
|
* present in the pending-delete list twice, once with atCommit true and
|
|
* once with atCommit false. Hence, it will be physically deleted at end
|
|
* of xact in either case (and the other entry will be ignored by
|
|
* smgrDoPendingDeletes, so no error will occur). We could instead remove
|
|
* the existing list entry and delete the physical file immediately, but
|
|
* for now I'll keep the logic simple.
|
|
*/
|
|
RelationCloseSmgr(rel);
|
|
}
|
|
|
|
/*
|
|
* PartitionDropStorage
|
|
* Schedule unlinking of physical storage at transaction commit.
|
|
*/
|
|
void PartitionDropStorage(Relation rel, Partition part)
|
|
{
|
|
/*
|
|
* First we must push the column file, column bcm file to the pendingDeletes and
|
|
* then push the logical table file to pendingDeletes.
|
|
* When delete the file, smgrDoPendingDeletes will first delete the logical table file,
|
|
* and it will call DropRelFileNodeAllBuffers to make all buffer invaild, include the
|
|
* column bcm buffer, then we can drop the column file and column bcm file.
|
|
* Examples: if a column table relfilenode is 16384, it will create 16384, 16384_C1.0,
|
|
* 16384_C1_bcm... push into pendingDeletes
|
|
* push: pop:
|
|
* 16384(third) 16384(make all buffer invaild, unlink file)
|
|
* 16384_C1_bcm(second) 16384_C1_bcm(unlink file)
|
|
* 16384_C1.0(first) 16384_C1.0(unlink file)
|
|
*/
|
|
if (RelationIsColStore(rel)) {
|
|
CStoreRelDropStorage(rel, &part->pd_node, rel->rd_rel->relowner);
|
|
}
|
|
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
if (RelationIsTsStore(rel)) {
|
|
List* cudesc_relid_list = search_all_cudesc(part->pd_id, true);
|
|
if (list_length(cudesc_relid_list) > 0) {
|
|
Tsdb::DropPartStorage(
|
|
part->pd_id, &(part->pd_node), rel->rd_backend, rel->rd_rel->relowner, cudesc_relid_list);
|
|
}
|
|
if (g_instance.attr.attr_common.enable_tsdb) {
|
|
Tsdb::PartCacheMgr::GetInstance().clear_partition_cache(part->pd_id);
|
|
}
|
|
list_free_ext(cudesc_relid_list);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
|
|
/* Drop all bucket's storage of this relation if there is any */
|
|
if (RELATION_OWN_BUCKETKEY(rel)) {
|
|
BucketDropStorage(rel, part);
|
|
} else {
|
|
/* Add the relation to the list of stuff to delete at commit */
|
|
InsertStorageIntoPendingList(&part->pd_node, InvalidAttrNumber, rel->rd_backend, rel->rd_rel->relowner, true);
|
|
}
|
|
|
|
/*
|
|
* NOTE: if the relation was created in this transaction, it will now be
|
|
* present in the pending-delete list twice, once with atCommit true and
|
|
* once with atCommit false. Hence, it will be physically deleted at end
|
|
* of xact in either case (and the other entry will be ignored by
|
|
* smgrDoPendingDeletes, so no error will occur). We could instead remove
|
|
* the existing list entry and delete the physical file immediately, but
|
|
* for now I'll keep the logic simple.
|
|
*/
|
|
PartitionCloseSmgr(part);
|
|
}
|
|
|
|
void BucketDropStorage(Relation relation, Partition partition)
|
|
{
|
|
RelFileNodeBackend rnode;
|
|
|
|
Assert(relation->rd_backend == InvalidBackendId);
|
|
|
|
rnode.backend = InvalidBackendId;
|
|
if (RelationIsPartitioned(relation)) {
|
|
Assert(PointerIsValid(partition));
|
|
rnode.node = partition->pd_node;
|
|
} else {
|
|
rnode.node = relation->rd_node;
|
|
}
|
|
|
|
/* Add the relation to the list of stuff to delete at commit */
|
|
InsertStorageIntoPendingList(
|
|
&rnode.node, InvalidAttrNumber, InvalidBackendId, relation->rd_rel->relowner, true);
|
|
|
|
/*
|
|
* NOTE: if the relation was created in this transaction, it will now be
|
|
* present in the pending-delete list twice, once with atCommit true and
|
|
* once with atCommit false. Hence, it will be physically deleted at end
|
|
* of xact in either case (and the other entry will be ignored by
|
|
* smgrDoPendingDeletes, so no error will occur). We could instead remove
|
|
* the existing list entry and delete the physical file immediately, but
|
|
* for now I'll keep the logic simple.
|
|
*/
|
|
smgrclosenode(rnode);
|
|
}
|
|
|
|
void RelationPreserveStorage(RelFileNode rnode, bool atCommit)
|
|
{
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* prev = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
|
|
prev = NULL;
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
if (RelFileNodeRelEquals(rnode, pending->relnode) && pending->atCommit == atCommit) {
|
|
/* unlink and delete list entry */
|
|
if (prev != NULL)
|
|
prev->next = next;
|
|
else
|
|
u_sess->catalog_cxt.pendingDeletes = next;
|
|
pfree(pending);
|
|
/* prev does not change */
|
|
} else {
|
|
/* unrelated entry, don't touch it */
|
|
prev = pending;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* RelationTruncate
|
|
* Physically truncate a relation to the specified number of blocks.
|
|
*
|
|
* This includes getting rid of any buffers for the blocks that are to be
|
|
* dropped.
|
|
*/
|
|
void RelationTruncate(Relation rel, BlockNumber nblocks)
|
|
{
|
|
bool fsm = false;
|
|
bool vm = false;
|
|
bool bcm = false;
|
|
|
|
/* decrease the permanent space on users' record */
|
|
uint64 size = GetSMgrRelSize(&(rel->rd_node), rel->rd_backend, InvalidForkNumber);
|
|
size -= nblocks * BLCKSZ; /* Ignore FSM VM BCM reserved space */
|
|
perm_space_decrease(rel->rd_rel->relowner, size, RelationUsesSpaceType(rel->rd_rel->relpersistence));
|
|
|
|
/* Open it at the smgr level if not already done */
|
|
RelationOpenSmgr(rel);
|
|
|
|
/*
|
|
* Make sure smgr_targblock etc aren't pointing somewhere past new end
|
|
*/
|
|
rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
|
|
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
|
|
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
|
|
rel->rd_smgr->smgr_cached_nblocks = InvalidBlockNumber;
|
|
|
|
for (int i = 0; i < rel->rd_smgr->smgr_bcmarry_size; i++)
|
|
rel->rd_smgr->smgr_bcm_nblocks[i] = InvalidBlockNumber;
|
|
|
|
/* Truncate the FSM first if it exists */
|
|
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
|
|
if (fsm)
|
|
FreeSpaceMapTruncateRel(rel, nblocks);
|
|
|
|
/* Truncate the visibility map too if it exists. */
|
|
vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
|
|
if (vm)
|
|
visibilitymap_truncate(rel, nblocks);
|
|
|
|
/* Truncate the bcm too if it exists. */
|
|
bcm = smgrexists(rel->rd_smgr, BCM_FORKNUM);
|
|
if (bcm)
|
|
BCM_truncate(rel);
|
|
|
|
/* skip truncating if global temp table index does not exist */
|
|
if (RELATION_IS_GLOBAL_TEMP(rel) && !smgrexists(rel->rd_smgr, MAIN_FORKNUM)) {
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We WAL-log the truncation before actually truncating, which means
|
|
* trouble if the truncation fails. If we then crash, the WAL replay
|
|
* likely isn't going to succeed in the truncation either, and cause a
|
|
* PANIC. It's tempting to put a critical section here, but that cure
|
|
* would be worse than the disease. It would turn a usually harmless
|
|
* failure to truncate, that might spell trouble at WAL replay, into a
|
|
* certain PANIC.
|
|
*/
|
|
if (RelationNeedsWAL(rel)) {
|
|
/*
|
|
* Make an XLOG entry reporting the file truncation.
|
|
*/
|
|
XLogRecPtr lsn;
|
|
xl_smgr_truncate xlrec;
|
|
|
|
xlrec.blkno = nblocks;
|
|
RelFileNodeRelCopy(xlrec.rnode, rel->rd_node);
|
|
|
|
XLogBeginInsert();
|
|
XLogRegisterData((char*)&xlrec, sizeof(xlrec));
|
|
|
|
lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE, false, rel->rd_node.bucketNode);
|
|
|
|
/*
|
|
* Flush, because otherwise the truncation of the main relation might
|
|
* hit the disk before the WAL record, and the truncation of the FSM
|
|
* or visibility map. If we crashed during that window, we'd be left
|
|
* with a truncated heap, but the FSM or visibility map would still
|
|
* contain entries for the non-existent heap pages.
|
|
*/
|
|
if (fsm || vm)
|
|
XLogWaitFlush(lsn);
|
|
}
|
|
|
|
if (!RELATION_IS_GLOBAL_TEMP(rel)) {
|
|
/* Lock RelFileNode to control concurrent with Catchup Thread */
|
|
LockRelFileNode(rel->rd_node, AccessExclusiveLock);
|
|
}
|
|
|
|
/* Do the real work */
|
|
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
|
|
}
|
|
|
|
void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks)
|
|
{
|
|
Relation rel = NULL;
|
|
bool fsm = false;
|
|
bool vm = false;
|
|
bool bcm = false;
|
|
|
|
/* Open it at the smgr level if not already done */
|
|
PartitionOpenSmgr(part);
|
|
/* transform partition to fake relation */
|
|
rel = partitionGetRelation(parent, part);
|
|
|
|
/* decrease the permanent space on users' record */
|
|
uint64 size = GetSMgrRelSize(&(rel->rd_node), rel->rd_backend, InvalidForkNumber);
|
|
size -= nblocks * BLCKSZ; /* Ignore FSM VM BCM reserved space */
|
|
perm_space_decrease(rel->rd_rel->relowner, size, RelationUsesSpaceType(rel->rd_rel->relpersistence));
|
|
|
|
/*
|
|
* Make sure smgr_targblock etc aren't pointing somewhere past new end
|
|
*/
|
|
rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
|
|
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
|
|
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
|
|
|
|
for (int i = 0; i < rel->rd_smgr->smgr_bcmarry_size; i++)
|
|
rel->rd_smgr->smgr_bcm_nblocks[i] = InvalidBlockNumber;
|
|
|
|
/* Truncate the FSM first if it exists */
|
|
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
|
|
if (fsm)
|
|
FreeSpaceMapTruncateRel(rel, nblocks);
|
|
|
|
/* Truncate the visibility map too if it exists. */
|
|
vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
|
|
if (vm)
|
|
visibilitymap_truncate(rel, nblocks);
|
|
|
|
bcm = smgrexists(rel->rd_smgr, BCM_FORKNUM);
|
|
if (bcm)
|
|
BCM_truncate(rel);
|
|
|
|
if (RelationNeedsWAL(parent)) {
|
|
XLogRecPtr lsn;
|
|
xl_smgr_truncate xlrec;
|
|
|
|
xlrec.blkno = nblocks;
|
|
RelFileNodeRelCopy(xlrec.rnode, part->pd_node);
|
|
|
|
XLogBeginInsert();
|
|
XLogRegisterData((char*)&xlrec, sizeof(xlrec));
|
|
|
|
lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE, false, part->pd_node.bucketNode);
|
|
|
|
/*
|
|
* Flush, because otherwise the truncation of the main relation might
|
|
* hit the disk before the WAL record, and the truncation of the
|
|
* visibility map. If we crashed during that window, we'd be left
|
|
* with a truncated heap, but the visibility map would still
|
|
* contain entries for the non-existent heap pages.
|
|
*/
|
|
if (fsm || vm)
|
|
XLogWaitFlush(lsn);
|
|
}
|
|
|
|
/* Lock RelFileNode to control concurrent with Catchup Thread */
|
|
LockRelFileNode(rel->rd_node, AccessExclusiveLock);
|
|
|
|
/* Do the real work */
|
|
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
|
|
|
|
/* release fake relation */
|
|
releaseDummyRelation(&rel);
|
|
}
|
|
|
|
static inline bool smgrCheckPendingNumberOverHashThreshold(bool isCommit)
|
|
{
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
|
|
uint4 pending_cnt = 0;
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
if (pending->nestLevel >= nestLevel) {
|
|
if (pending->atCommit == isCommit) {
|
|
if (!IsValidColForkNum(pending->forknum)) {
|
|
pending_cnt++;
|
|
if (pending_cnt > DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void smgrDoDropBufferUsingScan(bool isCommit)
|
|
{
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
int rnode_len = 0;
|
|
|
|
RelFileNode rnodes[DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD];
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
if (pending->nestLevel >= nestLevel) {
|
|
if (pending->atCommit == isCommit) {
|
|
if (!IsValidColForkNum(pending->forknum)) {
|
|
rnodes[rnode_len++] = pending->relnode;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
DropRelFileNodeAllBuffersUsingScan(rnodes, rnode_len);
|
|
}
|
|
|
|
void smgrDoDropBufferUsingHashTbl(bool isCommit)
|
|
{
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
HTAB* relfilenode_hashtbl = relfilenode_hashtbl_create();
|
|
int enter_cnt = 0;
|
|
bool found = false;
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
if (pending->nestLevel >= nestLevel) {
|
|
/* do deletion if called for */
|
|
if (pending->atCommit == isCommit) {
|
|
if (!IsValidColForkNum(pending->forknum)) {
|
|
(void)hash_search(relfilenode_hashtbl, &(pending->relnode), HASH_ENTER, &found);
|
|
if (!found) {
|
|
enter_cnt++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* At least one relnode founded */
|
|
if (enter_cnt > 0) {
|
|
DropRelFileNodeAllBuffersUsingHash(relfilenode_hashtbl);
|
|
}
|
|
hash_destroy(relfilenode_hashtbl);
|
|
relfilenode_hashtbl = NULL;
|
|
}
|
|
|
|
static inline void smgrDoDropBuffers(bool isCommit)
|
|
{
|
|
bool over_hash_thresh = smgrCheckPendingNumberOverHashThreshold(isCommit);
|
|
if (over_hash_thresh) {
|
|
smgrDoDropBufferUsingHashTbl(isCommit);
|
|
} else {
|
|
smgrDoDropBufferUsingScan(isCommit);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
|
|
*
|
|
* This also runs when aborting a subxact; we want to clean up a failed
|
|
* subxact immediately.
|
|
*
|
|
* Note: It's possible that we're being asked to remove a relation that has
|
|
* no physical storage in any fork. In particular, it's possible that we're
|
|
* cleaning up an old temporary relation for which RemovePgTempFiles has
|
|
* already recovered the physical storage.
|
|
*/
|
|
void smgrDoPendingDeletes(bool isCommit)
|
|
{
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* prev = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
|
|
ColMainFileNodesCreate();
|
|
|
|
smgrDoDropBuffers(isCommit);
|
|
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
if (pending->nestLevel < nestLevel) {
|
|
/* outer-level entries should not be processed yet */
|
|
prev = pending;
|
|
} else {
|
|
/* unlink list entry first, so we don't retry on failure */
|
|
if (prev != NULL)
|
|
prev->next = next;
|
|
else
|
|
u_sess->catalog_cxt.pendingDeletes = next;
|
|
/* do deletion if called for */
|
|
if (pending->atCommit == isCommit) {
|
|
if (!IsValidColForkNum(pending->forknum)) {
|
|
RowRelationDoDeleteFiles(
|
|
pending->relnode, pending->backend, pending->ownerid, pending->relOid, isCommit);
|
|
|
|
/*
|
|
* "CREATE/DROP hdfs table" will use Two-Phrases Commit Transaction,
|
|
* in which FinishPreparedTransactionPhase2() just does what
|
|
* smgrDoPendingDeletes() will do, so it is not necessary to
|
|
* drop hdfs directory here. FinishPreparedTransactionPhase2()
|
|
* will do the job.
|
|
* see FinishPreparedTransactionPhase2() for more details.
|
|
*/
|
|
} else {
|
|
ColumnRelationDoDeleteFiles(
|
|
&pending->relnode, pending->forknum, pending->backend, pending->ownerid);
|
|
#ifdef ENABLE_MULTIPLE_NODES
|
|
uint16 partid = ColForkNum2ColumnId(pending->forknum);
|
|
if (g_instance.attr.attr_common.enable_tsdb && partid >= TsConf::FIRST_PARTID && partid % 2 == 0) {
|
|
PartIdMgr::GetInstance().free_part_id(&pending->relnode, partid);
|
|
}
|
|
#endif /* ENABLE_MULTIPLE_NODES */
|
|
}
|
|
} else {
|
|
/* roll back */
|
|
if (IsTruncateDfsForkNum(pending->forknum)) {
|
|
if (!IS_PGXC_COORDINATOR) {
|
|
/* clear mapper if truncate roll back */
|
|
DropMapperFile(pending->relnode);
|
|
DropDfsFilelist(pending->relnode);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (IsValidPaxDfsForkNum(pending->forknum)) {
|
|
/* clear mapper file */
|
|
DropMapperFile(pending->relnode);
|
|
}
|
|
|
|
/* must explicitly free the list entry */
|
|
pfree(pending);
|
|
/* prev does not change */
|
|
}
|
|
}
|
|
ColMainFileNodesDestroy();
|
|
|
|
/* just for "vacuum full" to delete files in hdfs */
|
|
if (u_sess->catalog_cxt.pendingDfsDeletes)
|
|
doPendingDfsDelete(isCommit, NULL);
|
|
}
|
|
|
|
/*
|
|
* smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
|
|
*
|
|
* The return value is the number of relations scheduled for termination.
|
|
* *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
|
|
* If there are no relations to be deleted, *ptr is set to NULL.
|
|
*
|
|
* Only non-temporary relations are included in the returned list. This is OK
|
|
* because the list is used only in contexts where temporary relations don't
|
|
* matter: we're either writing to the two-phase state file (and transactions
|
|
* that have touched temp tables can't be prepared) or we're writing to xlog
|
|
* (and all temporary files will be zapped if we restart anyway, so no need
|
|
* for redo to do it also).
|
|
*
|
|
* Note that the list does not include anything scheduled for termination
|
|
* by upper-level transactions.
|
|
*/
|
|
int smgrGetPendingDeletes(bool forCommit, ColFileNodeRel** ptr)
|
|
{
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
int nrels;
|
|
ColFileNodeRel* rptrRel = NULL;
|
|
PendingRelDelete* pending = NULL;
|
|
|
|
nrels = 0;
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = pending->next) {
|
|
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit && pending->backend == InvalidBackendId)
|
|
nrels++;
|
|
}
|
|
if (nrels == 0) {
|
|
*ptr = NULL;
|
|
return 0;
|
|
}
|
|
|
|
rptrRel = (ColFileNodeRel*)palloc(nrels * sizeof(ColFileNodeRel));
|
|
*ptr = rptrRel;
|
|
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = pending->next) {
|
|
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit && pending->backend == InvalidBackendId) {
|
|
rptrRel->filenode.spcNode = pending->relnode.spcNode;
|
|
rptrRel->filenode.dbNode = pending->relnode.dbNode;
|
|
rptrRel->filenode.relNode = pending->relnode.relNode;
|
|
rptrRel->forknum = pending->forknum;
|
|
rptrRel->ownerid = pending->ownerid;
|
|
/* Add bucketid into forknum */
|
|
forknum_add_bucketid(rptrRel->forknum, pending->relnode.bucketNode);
|
|
rptrRel++;
|
|
}
|
|
}
|
|
return nrels;
|
|
}
|
|
|
|
/*
|
|
* PostPrepare_smgr -- Clean up after a successful PREPARE
|
|
*
|
|
* What we have to do here is throw away the in-memory state about pending
|
|
* relation deletes. It's all been recorded in the 2PC state file and
|
|
* it's no longer smgr's job to worry about it.
|
|
*/
|
|
void PostPrepare_smgr(void)
|
|
{
|
|
PendingRelDelete* pending = NULL;
|
|
PendingRelDelete* next = NULL;
|
|
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = next) {
|
|
next = pending->next;
|
|
u_sess->catalog_cxt.pendingDeletes = next;
|
|
/* must explicitly free the list entry */
|
|
pfree(pending);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* AtSubCommit_smgr() --- Take care of subtransaction commit.
|
|
*
|
|
* Reassign all items in the pending-deletes list to the parent transaction.
|
|
*/
|
|
void AtSubCommit_smgr(void)
|
|
{
|
|
int nestLevel = GetCurrentTransactionNestLevel();
|
|
PendingRelDelete* pending = NULL;
|
|
|
|
for (pending = u_sess->catalog_cxt.pendingDeletes; pending != NULL; pending = pending->next) {
|
|
if (pending->nestLevel >= nestLevel)
|
|
pending->nestLevel = nestLevel - 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* AtSubAbort_smgr() --- Take care of subtransaction abort.
|
|
*
|
|
* Delete created relations and forget about deleted relations.
|
|
* We can execute these operations immediately because we know this
|
|
* subtransaction will not commit.
|
|
*/
|
|
void AtSubAbort_smgr(void)
|
|
{
|
|
smgrDoPendingDeletes(false);
|
|
}
|
|
|
|
void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data)
|
|
{
|
|
if (!IsValidColForkNum(forkNum)) {
|
|
oidvector* bucketlist = NULL;
|
|
if (rnode.bucketNode == DIR_BUCKET_ID) {
|
|
bucketlist = (oidvector*)(data + sizeof(xl_smgr_create));
|
|
}
|
|
SMgrRelation reln = smgropen(rnode, InvalidBackendId, 0, bucketlist);
|
|
smgrcreate(reln, forkNum, true);
|
|
if (rnode.bucketNode == DIR_BUCKET_ID) {
|
|
smgrcreatebuckets(reln, forkNum, true);
|
|
}
|
|
} else {
|
|
CFileNode cFileNode(rnode, ColForkNum2ColumnId(forkNum), MAIN_FORKNUM);
|
|
CUStorage* cuStorage = New(CurrentMemoryContext) CUStorage(cFileNode);
|
|
Assert(cuStorage);
|
|
TablespaceCreateDbspace(rnode, true);
|
|
cuStorage->CreateStorage(0, true);
|
|
DELETE_EX(cuStorage);
|
|
}
|
|
}
|
|
void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn)
|
|
{
|
|
SMgrRelation reln = smgropen(rnode, InvalidBackendId);
|
|
smgrcreate(reln, MAIN_FORKNUM, true);
|
|
UpdateMinRecoveryPoint(lsn, false);
|
|
LockRelFileNode(rnode, AccessExclusiveLock);
|
|
smgrtruncate(reln, MAIN_FORKNUM, blkno);
|
|
XLogTruncateRelation(rnode, MAIN_FORKNUM, blkno);
|
|
Relation rel = CreateFakeRelcacheEntry(rnode);
|
|
if (smgrexists(reln, FSM_FORKNUM))
|
|
FreeSpaceMapTruncateRel(rel, blkno);
|
|
if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
|
|
visibilitymap_truncate(rel, blkno);
|
|
FreeFakeRelcacheEntry(rel);
|
|
UnlockRelFileNode(rnode, AccessExclusiveLock);
|
|
}
|
|
void smgr_redo(XLogReaderState* record)
|
|
{
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
|
|
|
/* Backup blocks are not used in smgr records */
|
|
Assert(!XLogRecHasAnyBlockRefs(record));
|
|
|
|
if (info == XLOG_SMGR_CREATE) {
|
|
xl_smgr_create* xlrec = (xl_smgr_create*)XLogRecGetData(record);
|
|
|
|
RelFileNode rnode;
|
|
RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record));
|
|
smgr_redo_create(rnode, xlrec->forkNum, (char *)xlrec);
|
|
/* Redo column file, attid is hidden in forkNum */
|
|
|
|
} else if (info == XLOG_SMGR_TRUNCATE) {
|
|
xl_smgr_truncate* xlrec = (xl_smgr_truncate*)XLogRecGetData(record);
|
|
RelFileNode rnode;
|
|
RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record));
|
|
|
|
/*
|
|
* Forcibly create relation if it doesn't exist (which suggests that
|
|
* it was dropped somewhere later in the WAL sequence). As in
|
|
* XLogReadBufferForRedo, we prefer to recreate the rel and replay the
|
|
* log as best we can until the drop is seen.
|
|
*/
|
|
|
|
/*
|
|
* Before we perform the truncation, update minimum recovery point
|
|
* to cover this WAL record. Once the relation is truncated, there's
|
|
* no going back. The buffer manager enforces the WAL-first rule
|
|
* for normal updates to relation files, so that the minimum recovery
|
|
* point is always updated before the corresponding change in the
|
|
* data file is flushed to disk. We have to do the same manually
|
|
* here.
|
|
*
|
|
* Doing this before the truncation means that if the truncation fails
|
|
* for some reason, you cannot start up the system even after restart,
|
|
* until you fix the underlying situation so that the truncation will
|
|
* succeed. Alternatively, we could update the minimum recovery point
|
|
* after truncation, but that would leave a small window where the
|
|
* WAL-first rule could be violated.
|
|
*/
|
|
|
|
/* Also tell xlogutils.c about it */
|
|
xlog_block_smgr_redo_truncate(rnode, xlrec->blkno, lsn);
|
|
} else
|
|
ereport(PANIC, (errmsg("smgr_redo: unknown op code %u", info)));
|
|
|
|
}
|
|
|
|
void smgrApplyXLogTruncateRelation(XLogReaderState* record)
|
|
{
|
|
xl_smgr_truncate* xlrec = (xl_smgr_truncate*)XLogRecGetData(record);
|
|
|
|
RelFileNodeBackend rbnode;
|
|
RelFileNodeCopy(rbnode.node, xlrec->rnode, XLogRecGetBucketId(record));
|
|
rbnode.backend = InvalidBackendId;
|
|
|
|
smgrclosenode(rbnode);
|
|
|
|
XLogTruncateRelation(record, rbnode.node, MAIN_FORKNUM, xlrec->blkno);
|
|
}
|
|
|
|
/*
|
|
* Brief : drop hdfs directories.
|
|
* Input : pFileNode, array of ColFileNode,
|
|
* : rels, number of relation in pFileNode,
|
|
* : dropDir, drop hdfs directory with transaction status,
|
|
* : cfgFromMapper, true if call is from xlog redo;
|
|
* false if call from FinishPrepareTransactionPhase2()
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void ClearDfsStorage(ColFileNode* pFileNode, int nrels, bool dropDir, bool cfgFromMapper)
|
|
{
|
|
ColFileNode* colFileNode = NULL;
|
|
|
|
for (int i = 0; i < nrels; i++) {
|
|
colFileNode = pFileNode + i;
|
|
|
|
if (IsValidPaxDfsForkNum(colFileNode->forknum) && dropDir) {
|
|
DropDfsDirectory(colFileNode, cfgFromMapper);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Brief : clear hdfs directory.
|
|
* Input : colFileNode, including table, db, tablespace oid
|
|
* : cfgFromMapper, true if call is from xlog redo;
|
|
* false if call from FinishPrepareTransactionPhase2()
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void ClearDfsDirectory(ColFileNode* colFileNode, bool cfgFromMapper)
|
|
{
|
|
Oid tblSpcOid;
|
|
|
|
MapperFileOptions options;
|
|
DfsSrvOptions* srvOptions = NULL;
|
|
dfs::DFSConnector* conn = NULL;
|
|
|
|
/* get configuration info from the mapper file. */
|
|
if (-1 == GetConnConfig(colFileNode->filenode, &options))
|
|
return;
|
|
|
|
ResetPendingDfsDelete();
|
|
|
|
if (cfgFromMapper) {
|
|
/* run here just by xlog redo */
|
|
srvOptions = (DfsSrvOptions*)palloc0(sizeof(DfsSrvOptions));
|
|
|
|
/* get connection information by mapper file */
|
|
srvOptions->filesystem = options.filesystem;
|
|
srvOptions->address = options.address;
|
|
srvOptions->cfgPath = options.cfgpath;
|
|
srvOptions->storePath = NULL;
|
|
|
|
bool err = false;
|
|
PG_TRY();
|
|
{
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, colFileNode->filenode.spcNode);
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
ereport(LOG,
|
|
(errmsg("Failed to connect to HDFS, address: %s, config path: %s", options.address, options.cfgpath)));
|
|
FlushErrorState();
|
|
err = true;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
if (err)
|
|
return;
|
|
} else {
|
|
/* get connection information by tablespace oid */
|
|
tblSpcOid = colFileNode->filenode.spcNode;
|
|
srvOptions = GetDfsSrvOptions(tblSpcOid);
|
|
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, tblSpcOid);
|
|
}
|
|
|
|
if (conn == NULL) {
|
|
ereport(LOG, (errmsg("Failed to connect to HDFS")));
|
|
return;
|
|
}
|
|
|
|
/* read the hdfs file list. */
|
|
if (-1 == ReadDfsFilelist(colFileNode->filenode, colFileNode->ownerid, &u_sess->catalog_cxt.pendingDfsDeletes))
|
|
return;
|
|
|
|
u_sess->catalog_cxt.delete_conn = conn;
|
|
|
|
u_sess->catalog_cxt.vf_store_root = makeStringInfo();
|
|
appendStringInfo(u_sess->catalog_cxt.vf_store_root, "%s", options.tblpath);
|
|
|
|
doPendingDfsDelete(true, NULL);
|
|
|
|
pfree(srvOptions);
|
|
}
|
|
|
|
/*
|
|
* Brief : drop hdfs directory.
|
|
* Input : colFileNode, including table, db, tablespace oid
|
|
* : cfgFromMapper, true if call is from xlog redo;
|
|
* false if call from FinishPrepareTransactionPhase2()
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void DropDfsDirectory(ColFileNode* colFileNode, bool cfgFromMapper)
|
|
{
|
|
Oid tblSpcOid;
|
|
|
|
MapperFileOptions options;
|
|
DfsSrvOptions* srvOptions = NULL;
|
|
dfs::DFSConnector* conn = NULL;
|
|
|
|
/* get configuration info from the mapper file. */
|
|
if (-1 == GetConnConfig(colFileNode->filenode, &options))
|
|
return;
|
|
|
|
if (cfgFromMapper) {
|
|
/* run here just by xlog redo */
|
|
srvOptions = (DfsSrvOptions*)palloc0(sizeof(DfsSrvOptions));
|
|
|
|
/* get connection information by mapper file */
|
|
srvOptions->filesystem = options.filesystem;
|
|
srvOptions->address = options.address;
|
|
srvOptions->cfgPath = options.cfgpath;
|
|
srvOptions->storePath = NULL;
|
|
|
|
bool err = false;
|
|
PG_TRY();
|
|
{
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, colFileNode->filenode.spcNode);
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
ereport(LOG,
|
|
(errmsg("Failed to connect to HDFS, address: %s, config path: %s", options.address, options.cfgpath)));
|
|
FlushErrorState();
|
|
err = true;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
if (err)
|
|
return;
|
|
} else {
|
|
/* get connection information by tablespace oid */
|
|
tblSpcOid = colFileNode->filenode.spcNode;
|
|
srvOptions = GetDfsSrvOptions(tblSpcOid);
|
|
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, tblSpcOid);
|
|
}
|
|
|
|
if (conn == NULL) {
|
|
ereport(LOG, (errmsg("Failed to connect to HDFS")));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* if tblpath includes ':', then extract the timestamp and check it. If the
|
|
* timestamp is not the same, then we do not remove the directory and left it.
|
|
*/
|
|
char* timestr = strrchr(options.tblpath, ':');
|
|
if (timestr != NULL) {
|
|
int64 timestmap = 0;
|
|
char* endStr = NULL;
|
|
timestmap = strtoll(timestr + 1, &endStr, 10);
|
|
*timestr = '\0';
|
|
if (conn->getLastModifyTime(options.tblpath) != timestmap) {
|
|
ereport(LOG,
|
|
(errmodule(MOD_DFS),
|
|
errmsg("The directory of the relation to be dropped is changed "
|
|
"by others, so skip delete the hdfs directory %s.",
|
|
options.tblpath)));
|
|
delete (conn);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* drop relation directory on HDFS */
|
|
if (conn->pathExists(options.tblpath)) {
|
|
int retry_times = 2;
|
|
int ret = -1;
|
|
|
|
while ((retry_times > 0) && (ret != 0)) {
|
|
ret = conn->deleteFile(options.tblpath, 1);
|
|
--retry_times;
|
|
}
|
|
|
|
if (ret != 0) {
|
|
ereport(
|
|
WARNING, (errmodule(MOD_HDFS), errmsg("Failed to remove directory on HDFS, need to manually delete.")));
|
|
}
|
|
}
|
|
|
|
delete (conn);
|
|
}
|
|
|
|
/*
|
|
* Brief : drop the dfs file list.
|
|
* Input : fNode, relfilenode of the dfs table.
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void DropDfsFilelist(RelFileNode fNode)
|
|
{
|
|
int ret;
|
|
char mapper_path[MAXPGPATH];
|
|
errno_t rc = EOK;
|
|
rc = memset_s(mapper_path, MAXPGPATH, 0, MAXPGPATH);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
/*
|
|
* get the path of the dfs file list
|
|
*/
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u_snapshot",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
|
|
pfree(db_path);
|
|
|
|
/*
|
|
* drop the dfs file list. note we ignore any error
|
|
*/
|
|
if (unlink(mapper_path) < 0) {
|
|
ereport(LOG,
|
|
(errmodule(MOD_HDFS), errcode_for_file_access(), errmsg("could not unlink file \"%s\": %m", mapper_path)));
|
|
}
|
|
|
|
ereport(DEBUG1, (errmsg("Dropped the DfsFilelist:%s", mapper_path)));
|
|
}
|
|
|
|
/*
|
|
* Brief : drop the mapper file.
|
|
* Input : fNode, relfilenode of the mapper file.
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void DropMapperFile(RelFileNode fNode)
|
|
{
|
|
int ret;
|
|
char mapper_path[MAXPGPATH] = {0};
|
|
|
|
/*
|
|
* get the path of the mapper file
|
|
*/
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
|
|
pfree(db_path);
|
|
|
|
/*
|
|
* drop the mapper file. note we ignore any error
|
|
*/
|
|
if (unlink(mapper_path) < 0) {
|
|
ereport(LOG,
|
|
(errmodule(MOD_HDFS), errcode_for_file_access(), errmsg("could not unlink file \"%s\": %m", mapper_path)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Brief : drop the mapper files for hdfs relations.
|
|
* Input : pColFileNode, thr array of relfilenode;
|
|
* : nrels, the number of relfilenode in pColFileNode;
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void DropMapperFiles(ColFileNode* pColFileNode, int nrels)
|
|
{
|
|
ColFileNode* colFileNode = NULL;
|
|
|
|
for (int i = 0; i < nrels; i++) {
|
|
colFileNode = pColFileNode + i;
|
|
|
|
if (IsValidPaxDfsForkNum(colFileNode->forknum)) {
|
|
DropMapperFile(colFileNode->filenode);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Brief : drop relation entry from global hash table.
|
|
* Input : pColFileNode, array of ColFileNode,
|
|
* : rels, number of relation in pColFileNode
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void UnregisterDfsSpace(ColFileNode* pColFileNode, int rels)
|
|
{
|
|
ColFileNode* colFileNode = NULL;
|
|
|
|
for (int i = 0; i < rels; i++) {
|
|
colFileNode = pColFileNode + i;
|
|
|
|
if (IsValidPaxDfsForkNum(colFileNode->forknum)) {
|
|
DfsInsert::InvalidSpaceAllocCache(colFileNode->filenode.relNode);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Brief : create hdfs directory.
|
|
* Input : rel, Relation structure.
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void CreateDfsStorage(Relation rel)
|
|
{
|
|
Oid tblSpcOid;
|
|
|
|
StringInfo storePath;
|
|
DfsSrvOptions* srvOptions = NULL;
|
|
dfs::DFSConnector* conn = NULL;
|
|
int64 timestamp = 0;
|
|
|
|
tblSpcOid = rel->rd_rel->reltablespace;
|
|
storePath = getDfsStorePath(rel);
|
|
srvOptions = GetDfsSrvOptions(tblSpcOid);
|
|
|
|
/* 1. create relation directory on HDFS */
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, tblSpcOid);
|
|
|
|
/* 1. create relation directory on HDFS */
|
|
/* The create dfs directory's operator is only needed on current coordinate. */
|
|
if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
|
|
/* Here we first delete the old directory without check if it succeed. */
|
|
(void)conn->deleteFile(storePath->data, 1);
|
|
|
|
/* sleep 1 millisecond to make sure that the access time is different in millisecond. */
|
|
(void)usleep(1);
|
|
|
|
if (-1 == conn->createDirectory(storePath->data)) {
|
|
delete (conn);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
(errmsg("Failed to create directory on HDFS."),
|
|
errdetail("Please check log information in %s.", g_instance.attr.attr_common.PGXCNodeName))));
|
|
}
|
|
|
|
/*
|
|
* Get the last modify time of the directory just created.
|
|
*/
|
|
timestamp = conn->getLastModifyTime(storePath->data);
|
|
}
|
|
|
|
/* 2. write config info to the mapper file */
|
|
(void)SetConnConfig(rel->rd_node, srvOptions, storePath, timestamp);
|
|
|
|
/* 3. log the mapper file to pendingDelete, drop hdfs directory on abort */
|
|
InsertStorageIntoPendingList(&rel->rd_node, DFS_STOR_FLAG, InvalidBackendId, rel->rd_rel->relowner, false);
|
|
|
|
delete (conn);
|
|
|
|
pfree(storePath->data);
|
|
pfree(storePath);
|
|
}
|
|
|
|
/*
|
|
* Brief : do NOT drop hdfs directory really, just log into pendingDeletes.
|
|
* Input : rel, Relation structure.
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : Called by
|
|
* DROP CN DN
|
|
* TRUNCATE DN
|
|
* Notices : Call DropMapperFile to clean mapper file, otherwise will cause DROP TABLESPACE to fail.
|
|
*/
|
|
void DropDfsStorage(Relation rel, bool isDfsTruncate)
|
|
{
|
|
Oid tblSpcOid;
|
|
|
|
StringInfo storePath;
|
|
DfsSrvOptions* srvOptions = NULL;
|
|
dfs::DFSConnector* conn = NULL;
|
|
int64 timestamp = 0;
|
|
|
|
tblSpcOid = rel->rd_rel->reltablespace;
|
|
storePath = getDfsStorePath(rel);
|
|
srvOptions = GetDfsSrvOptions(tblSpcOid);
|
|
|
|
/* 1. just for getting hdfs server address which in srvOptions->address */
|
|
conn = dfs::createConnector(CurrentMemoryContext, srvOptions, tblSpcOid);
|
|
|
|
/*
|
|
* Get the last modify time of the directory to be dropped, which is not
|
|
* needed for truncate.
|
|
*/
|
|
if (!isDfsTruncate && IS_PGXC_COORDINATOR)
|
|
timestamp = conn->getLastModifyTime(storePath->data);
|
|
|
|
/* 2. write config info to the mapper file, call DropMapperFile to clean */
|
|
(void)SetConnConfig(rel->rd_node, srvOptions, storePath, timestamp);
|
|
|
|
if (isDfsTruncate) {
|
|
/* log the mapper file to pendingDelete, clear hdfs directory on commit */
|
|
InsertStorageIntoPendingList(&rel->rd_node, DFS_STOR_FLAG, rel->rd_backend, rel->rd_rel->relowner, true, true);
|
|
} else {
|
|
/* log the mapper file to pendingDelete, drop hdfs directory on commit */
|
|
InsertStorageIntoPendingList(&rel->rd_node, DFS_STOR_FLAG, rel->rd_backend, rel->rd_rel->relowner, true, false);
|
|
}
|
|
|
|
if (conn != NULL)
|
|
delete (conn);
|
|
|
|
pfree(storePath->data);
|
|
pfree(storePath);
|
|
}
|
|
|
|
/*
|
|
* Brief : get the content of mapper file.
|
|
* Input : fNode, relfilenode of the mapper file.
|
|
* : options, output argument, return config info.
|
|
* Output : None.
|
|
* Return Value : 0 - success, others - fail.
|
|
* Notes : None.
|
|
*/
|
|
static int GetConnConfig(RelFileNode fNode, MapperFileOptions* options)
|
|
{
|
|
Assert(options);
|
|
|
|
int ret;
|
|
char mapper_path[MAXPGPATH] = {0};
|
|
|
|
/*
|
|
* get the path of the mapper file
|
|
*/
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
|
|
pfree(db_path);
|
|
|
|
/*
|
|
* open the mapper file and get the content of the file.
|
|
*/
|
|
int fd = open(mapper_path, O_RDONLY, S_IRUSR | S_IWUSR);
|
|
if (fd < 0) {
|
|
ereport(LOG, (errmsg("Failed to open the mapper file, error code: %d", errno)));
|
|
return -1;
|
|
}
|
|
|
|
if (read(fd, options, sizeof(MapperFileOptions)) != sizeof(MapperFileOptions)) {
|
|
close(fd);
|
|
ereport(LOG, (errmsg("Failed to read data from the mapper file, error code: %d", errno)));
|
|
return -1;
|
|
}
|
|
|
|
close(fd);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Brief : get the content of dfs file list.
|
|
* Input : fNode, relfilenode of the dfs table.
|
|
* Input : ownerid, owner id of the dfs table.
|
|
* Output : pendingDfsDeletes will be appended. the list cell will alloc mem in t_thrd.top_mem_cxt
|
|
* Return Value : 0 - success, others - fail.
|
|
* Notes : make sure free the pendingList after use
|
|
*/
|
|
int ReadDfsFilelist(RelFileNode fNode, Oid ownerid, List** pendingList)
|
|
{
|
|
Assert(pendingList);
|
|
int buffLen = 0;
|
|
int* buffLenPtr = &buffLen;
|
|
errno_t rc = EOK;
|
|
TransactionId currXid = GetCurrentTransactionIdIfAny();
|
|
|
|
char mapper_path[MAXPGPATH];
|
|
rc = memset_s(mapper_path, MAXPGPATH, 0, MAXPGPATH);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
int ret;
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u_snapshot",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
pfree(db_path);
|
|
|
|
int fd = open(mapper_path, O_RDONLY, S_IRUSR | S_IWUSR);
|
|
if (fd < 0) {
|
|
ereport(LOG, (errmsg("Failed to open the dfs file list, error code: %d", errno)));
|
|
return -1;
|
|
}
|
|
|
|
if (read(fd, buffLenPtr, sizeof(int)) != sizeof(int)) {
|
|
close(fd);
|
|
ereport(LOG, (errmsg("Failed to read data from the dfs file list, error code: %d", errno)));
|
|
return -1;
|
|
}
|
|
|
|
char* buff = (char*)palloc(buffLen + 1);
|
|
|
|
if (read(fd, buff, buffLen) != buffLen) {
|
|
close(fd);
|
|
ereport(LOG, (errmsg("Failed to read data from the dfs file list, error code: %d", errno)));
|
|
pfree(buff);
|
|
return -1;
|
|
}
|
|
buff[buffLen] = '\0';
|
|
|
|
close(fd);
|
|
|
|
/* add them into the dfsPendingDelete */
|
|
char* tempStr = NULL;
|
|
char* fileName = strtok_r(buff, ",", &tempStr);
|
|
StringInfo fName = makeStringInfo();
|
|
|
|
while (fileName != NULL) {
|
|
resetStringInfo(fName);
|
|
|
|
/* parser file name */
|
|
char* pos = strrchr(fileName, ':');
|
|
if (pos != NULL) {
|
|
*pos = '\0';
|
|
}
|
|
appendStringInfo(fName, "%s", fileName);
|
|
|
|
if (pos != NULL) {
|
|
*pos = ':';
|
|
}
|
|
|
|
/* parser file size */
|
|
uint64 filesize = 0;
|
|
if ((pos + 1) != NULL) {
|
|
int64 size = atol(pos + 1);
|
|
if (size > 0)
|
|
filesize = (uint64)size;
|
|
}
|
|
|
|
/* add to list */
|
|
do {
|
|
AutoContextSwitch newContext(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
|
|
|
|
PendingDfsDelete* pending = (PendingDfsDelete*)MemoryContextAlloc(
|
|
THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(PendingDfsDelete));
|
|
pending->filename = makeStringInfo();
|
|
appendStringInfoString(pending->filename, fName->data);
|
|
pending->atCommit = true;
|
|
pending->ownerid = ownerid;
|
|
pending->xid = currXid;
|
|
pending->filesize = filesize;
|
|
|
|
*pendingList = lappend(*pendingList, pending);
|
|
} while (0);
|
|
|
|
/* next token */
|
|
fileName = strtok_r(NULL, ",", &tempStr);
|
|
}
|
|
|
|
pfree(fName->data);
|
|
pfree(fName);
|
|
pfree(buff);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Brief : create the dfs file list.
|
|
* Input : rel, RelationData of the dfs table
|
|
* Output : dfs file list will be created on DN
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
void SaveDfsFilelist(Relation rel, DFSDescHandler* handler)
|
|
{
|
|
/* get the path of the mapper file */
|
|
RelFileNode fNode = rel->rd_node;
|
|
char mapper_path[MAXPGPATH];
|
|
errno_t rc = EOK;
|
|
rc = memset_s(mapper_path, MAXPGPATH, 0, MAXPGPATH);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
int ret;
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u_snapshot",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
pfree(db_path);
|
|
|
|
/* open the mapper file and save the dfs file list */
|
|
int fd = open(mapper_path, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
|
|
if (fd < 0) {
|
|
ereport(WARNING, (errmsg("Failed to create the dfs file list.")));
|
|
return;
|
|
}
|
|
|
|
List* descs = handler->GetAllDescs(SnapshotNow);
|
|
|
|
StringInfo fullpath = makeStringInfo();
|
|
|
|
ListCell* lc = NULL;
|
|
foreach (lc, descs) {
|
|
DFSDesc* desc = (DFSDesc*)lfirst(lc);
|
|
/* make sure not have any char ',' and ':' in desc */
|
|
appendStringInfo(fullpath, "%s:%ld,", desc->GetFileName(), desc->GetFileSize());
|
|
}
|
|
|
|
if (write(fd, &fullpath->len, sizeof(fullpath->len)) < 0) {
|
|
close(fd);
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(), errmsg("Failed to write data to the dfs file list, error code: %d", errno)));
|
|
}
|
|
|
|
if (write(fd, fullpath->data, fullpath->len) < 0) {
|
|
close(fd);
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(), errmsg("Failed to write data to the dfs file list, error code: %d", errno)));
|
|
}
|
|
|
|
close(fd);
|
|
ereport(DEBUG1, (errmsg("%s: %s", __FUNCTION__, fullpath->data)));
|
|
|
|
pfree(fullpath->data);
|
|
fullpath->data = NULL;
|
|
pfree(fullpath);
|
|
}
|
|
|
|
/*
|
|
* Brief : create the mapper file.
|
|
* Input : fNode, relfilenode of the mapper file.
|
|
* : srvOptions, connection information
|
|
* : storePath, data directory on HDFS for hdfs table.
|
|
* Output : None.
|
|
* Return Value : None.
|
|
* Notes : None.
|
|
*/
|
|
static int SetConnConfig(RelFileNode fNode, DfsSrvOptions* srvOptions, StringInfo storePath, int64 timestamp)
|
|
{
|
|
char mapper_path[MAXPGPATH] = {0};
|
|
errno_t rs;
|
|
int ret;
|
|
|
|
MapperFileOptions options;
|
|
|
|
rs = memset_s(&options, sizeof(MapperFileOptions), 0, sizeof(MapperFileOptions));
|
|
securec_check(rs, "\0", "\0");
|
|
|
|
/*
|
|
* get the path of the mapper file
|
|
*/
|
|
char* db_path = GetDatabasePath(fNode.dbNode, fNode.spcNode);
|
|
ret = snprintf_s(mapper_path,
|
|
sizeof(mapper_path),
|
|
sizeof(mapper_path) - 1,
|
|
"%s/%u_%u_%u",
|
|
db_path,
|
|
fNode.spcNode,
|
|
fNode.dbNode,
|
|
fNode.relNode);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
|
|
pfree(db_path);
|
|
|
|
/*
|
|
* open the mapper file and write the configuration info.
|
|
*/
|
|
int fd = open(mapper_path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
|
|
if (fd < 0) {
|
|
ereport(LOG, (errmsg("Failed to create the mapper file, error code: %d", errno)));
|
|
return -1;
|
|
}
|
|
|
|
ret = snprintf_s(options.filesystem, NAMEDATALEN, NAMEDATALEN - 1, "%s", srvOptions->filesystem);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
ret = snprintf_s(options.address, MAXPGPATH, MAXPGPATH - 1, "%s", srvOptions->address);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
ret = snprintf_s(options.cfgpath, MAXPGPATH, MAXPGPATH - 1, "%s", srvOptions->cfgPath);
|
|
securec_check_ss(ret, "\0", "\0");
|
|
if (timestamp == 0) {
|
|
ret = snprintf_s(options.tblpath, MAXPGPATH, MAXPGPATH - 1, "%s", storePath->data);
|
|
} else {
|
|
ret = snprintf_s(options.tblpath, MAXPGPATH, MAXPGPATH - 1, "%s:%ld", storePath->data, timestamp);
|
|
}
|
|
securec_check_ss(ret, "\0", "\0");
|
|
if (write(fd, &options, sizeof(options)) < 0) {
|
|
ereport(LOG, (errmsg("Failed to write data to the mapper file, error code: %d", errno)));
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
|
|
close(fd);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* release all resources in pendingDfsDeletes, and set NULL to all variables
|
|
*/
|
|
void ResetPendingDfsDelete()
|
|
{
|
|
/*
|
|
* reset pendingDfsDeletes
|
|
*/
|
|
List* files = u_sess->catalog_cxt.pendingDfsDeletes;
|
|
u_sess->catalog_cxt.pendingDfsDeletes = NIL;
|
|
|
|
ListCell* lc = NULL;
|
|
foreach (lc, files) {
|
|
PendingDfsDelete* del_file = (PendingDfsDelete*)lfirst(lc);
|
|
if (NULL != del_file) {
|
|
if (NULL != del_file->filename) {
|
|
pfree(del_file->filename->data);
|
|
pfree(del_file->filename);
|
|
}
|
|
pfree(del_file);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* reset connection obj used to delete files
|
|
*/
|
|
if (u_sess->catalog_cxt.delete_conn != NULL) {
|
|
dfs::DFSConnector* conn = u_sess->catalog_cxt.delete_conn;
|
|
u_sess->catalog_cxt.delete_conn = NULL;
|
|
delete (conn);
|
|
}
|
|
|
|
/*
|
|
* reset root store path of data file of one table
|
|
*/
|
|
if (u_sess->catalog_cxt.vf_store_root != NULL) {
|
|
StringInfo root = u_sess->catalog_cxt.vf_store_root;
|
|
u_sess->catalog_cxt.vf_store_root = NULL;
|
|
pfree(root->data);
|
|
pfree(root);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* append filename to pendingDfsDeletes
|
|
*/
|
|
void InsertIntoPendingDfsDelete(const char* filename, bool atCommit, Oid ownerid, uint64 filesize)
|
|
{
|
|
AutoContextSwitch newContext(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
|
|
|
|
PendingDfsDelete* pending = (PendingDfsDelete*)MemoryContextAlloc(
|
|
THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(PendingDfsDelete));
|
|
pending->filename = NULL;
|
|
pending->filename = makeStringInfo();
|
|
|
|
appendStringInfo(pending->filename, "%s", filename);
|
|
pending->atCommit = atCommit;
|
|
|
|
pending->ownerid = ownerid;
|
|
pending->xid = GetCurrentTransactionIdIfAny();
|
|
pending->filesize = filesize;
|
|
|
|
u_sess->catalog_cxt.pendingDfsDeletes = lappend(u_sess->catalog_cxt.pendingDfsDeletes, pending);
|
|
}
|
|
|
|
/*
|
|
* delete files on commit or abort
|
|
*/
|
|
void doPendingDfsDelete(bool isCommit, TransactionId* xid)
|
|
{
|
|
if (u_sess->catalog_cxt.pendingDfsDeletes == NULL || u_sess->catalog_cxt.delete_conn == NULL ||
|
|
u_sess->catalog_cxt.vf_store_root == NULL) {
|
|
ResetPendingDfsDelete();
|
|
return;
|
|
}
|
|
|
|
Assert(
|
|
u_sess->catalog_cxt.pendingDfsDeletes && u_sess->catalog_cxt.delete_conn && u_sess->catalog_cxt.vf_store_root);
|
|
|
|
/*
|
|
* make sure that pendingDfsDeletes will be set to NULL whatever cases.
|
|
*/
|
|
TransactionId currXid = (xid != NULL) ? *xid : GetCurrentTransactionIdIfAny();
|
|
List* files = u_sess->catalog_cxt.pendingDfsDeletes;
|
|
u_sess->catalog_cxt.pendingDfsDeletes = NIL;
|
|
|
|
dfs::DFSConnector* conn = u_sess->catalog_cxt.delete_conn;
|
|
u_sess->catalog_cxt.delete_conn = NULL;
|
|
|
|
StringInfo rootpath = makeStringInfo();
|
|
appendStringInfo(rootpath, "%s", u_sess->catalog_cxt.vf_store_root->data);
|
|
pfree(u_sess->catalog_cxt.vf_store_root->data);
|
|
u_sess->catalog_cxt.vf_store_root->data = NULL;
|
|
pfree(u_sess->catalog_cxt.vf_store_root);
|
|
u_sess->catalog_cxt.vf_store_root = NULL;
|
|
|
|
StringInfo fullpath = makeStringInfo();
|
|
|
|
ListCell* lc = NULL;
|
|
foreach (lc, files) {
|
|
PendingDfsDelete* del_file = (PendingDfsDelete*)lfirst(lc);
|
|
|
|
/* delete files according to status of transaction */
|
|
if (del_file->atCommit == isCommit && del_file->xid == currXid) {
|
|
resetStringInfo(fullpath);
|
|
appendStringInfo(fullpath, "%s/%s", rootpath->data, del_file->filename->data);
|
|
|
|
/* decrease the permanent space on users' record */
|
|
perm_space_decrease(del_file->ownerid, del_file->filesize, SP_PERM);
|
|
|
|
conn->deleteFile(fullpath->data, false);
|
|
ereport(
|
|
DEBUG1, (errmsg("Delete file %s by %s.", fullpath->data, g_instance.attr.attr_common.PGXCNodeName)));
|
|
}
|
|
|
|
/* release memory at commit or abort */
|
|
pfree(del_file->filename->data);
|
|
pfree(del_file->filename);
|
|
pfree(del_file);
|
|
}
|
|
|
|
pfree(fullpath->data);
|
|
pfree(fullpath);
|
|
|
|
pfree(rootpath->data);
|
|
pfree(rootpath);
|
|
|
|
delete (conn);
|
|
}
|
|
|
|
/* create Column Heap Main file list */
|
|
void ColMainFileNodesCreate(void)
|
|
{
|
|
if (u_sess->catalog_cxt.ColMainFileNodes == NULL) {
|
|
u_sess->catalog_cxt.ColMainFileNodes =
|
|
(RelFileNodeBackend*)MemoryContextAlloc(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE),
|
|
u_sess->catalog_cxt.ColMainFileNodesMaxNum * sizeof(RelFileNodeBackend));
|
|
} else {
|
|
/* Rollback maybe happens during rollback transaction.
|
|
* So reset Column Heap Main file list.
|
|
*/
|
|
u_sess->catalog_cxt.ColMainFileNodesCurNum = 0;
|
|
}
|
|
}
|
|
|
|
/* destroy Column Heap Main file list */
|
|
void ColMainFileNodesDestroy(void)
|
|
{
|
|
if (u_sess->catalog_cxt.ColMainFileNodes) {
|
|
pfree_ext(u_sess->catalog_cxt.ColMainFileNodes);
|
|
u_sess->catalog_cxt.ColMainFileNodesCurNum = 0;
|
|
u_sess->catalog_cxt.ColMainFileNodesMaxNum = ColMainFileNodesDefNum;
|
|
}
|
|
}
|
|
|
|
/* expand Column Heap Main relfilenode list */
|
|
static inline void ColMainFileNodesExpand(void)
|
|
{
|
|
if (u_sess->catalog_cxt.ColMainFileNodesCurNum == u_sess->catalog_cxt.ColMainFileNodesMaxNum) {
|
|
u_sess->catalog_cxt.ColMainFileNodesMaxNum *= 2;
|
|
u_sess->catalog_cxt.ColMainFileNodes = (RelFileNodeBackend*)repalloc(u_sess->catalog_cxt.ColMainFileNodes,
|
|
u_sess->catalog_cxt.ColMainFileNodesMaxNum * sizeof(RelFileNodeBackend));
|
|
} else {
|
|
Assert(u_sess->catalog_cxt.ColMainFileNodesCurNum < u_sess->catalog_cxt.ColMainFileNodesMaxNum);
|
|
}
|
|
}
|
|
|
|
/* append one filenode to Column Heap Main file list */
|
|
void ColMainFileNodesAppend(RelFileNode* bcmFileNode, BackendId backend)
|
|
{
|
|
ColMainFileNodesExpand();
|
|
u_sess->catalog_cxt.ColMainFileNodes[u_sess->catalog_cxt.ColMainFileNodesCurNum].node = *bcmFileNode;
|
|
u_sess->catalog_cxt.ColMainFileNodes[u_sess->catalog_cxt.ColMainFileNodesCurNum].backend = backend;
|
|
++u_sess->catalog_cxt.ColMainFileNodesCurNum;
|
|
|
|
ereport(DEBUG1,
|
|
(errmsg("Row[MAIN] relation dropped: %u/%u/%u backend(%d)",
|
|
bcmFileNode->spcNode,
|
|
bcmFileNode->dbNode,
|
|
bcmFileNode->relNode,
|
|
backend)));
|
|
}
|
|
|
|
/* search some one in Column Heap Main file list.
|
|
*
|
|
* If it's found, we will put it in the front of this list.
|
|
* We think the hit file is the same to the next expected file.
|
|
*/
|
|
static inline bool ColMainFileNodesSearch(RelFileNodeBackend* bcmFileNode)
|
|
{
|
|
RelFileNodeBackend* node = u_sess->catalog_cxt.ColMainFileNodes;
|
|
int idx = 0;
|
|
bool found = false;
|
|
|
|
for (idx = 0; idx < u_sess->catalog_cxt.ColMainFileNodesCurNum; ++idx) {
|
|
if (RelFileNodeEquals(bcmFileNode->node, node->node) && bcmFileNode->backend == node->backend) {
|
|
found = true;
|
|
break;
|
|
}
|
|
++node;
|
|
}
|
|
|
|
if (found && idx != 0) {
|
|
/* put the hit one in the front of file list */
|
|
RelFileNodeBackend tmpnode = u_sess->catalog_cxt.ColMainFileNodes[0];
|
|
u_sess->catalog_cxt.ColMainFileNodes[0] = u_sess->catalog_cxt.ColMainFileNodes[idx];
|
|
u_sess->catalog_cxt.ColMainFileNodes[idx] = tmpnode;
|
|
}
|
|
return found;
|
|
}
|
|
|
|
/* Delete all the physical files for column relation. */
|
|
void ColumnRelationDoDeleteFiles(RelFileNode* rnode, ForkNumber forknum, BackendId backend, Oid ownerid)
|
|
{
|
|
/* decrease the permanent space on users' record */
|
|
uint64 size = GetSMgrRelSize(rnode, backend, forknum);
|
|
perm_space_decrease(ownerid, size, find_tmptable_cache_key(rnode->relNode) ? SP_TEMP : SP_PERM);
|
|
|
|
RelFileNodeBackend mainfile = {*rnode, backend};
|
|
int whichColumn = ColForkNum2ColumnId(forknum);
|
|
|
|
if (ColMainFileNodesSearch(&mainfile)) {
|
|
/* BCM shared buffers have been invalided ahead within heap main relation.
|
|
* So here we can delete CU files and their BCM files safely.
|
|
*/
|
|
CStore::UnlinkColDataFile(*rnode, whichColumn, true);
|
|
} else {
|
|
/* Invalid BCM shared buffers and delete BCM files of this column */
|
|
SMgrRelation srel = smgropen(mainfile.node, backend, whichColumn);
|
|
smgrdounlinkfork(srel, forknum, false);
|
|
smgrclose(srel);
|
|
|
|
/* Then delete CU files of this column */
|
|
CStore::UnlinkColDataFile(*rnode, whichColumn, false);
|
|
|
|
ereport(DEBUG5,
|
|
(errmsg("Delete Column files[+BCM]: %u/%u/%u column(%d)",
|
|
rnode->spcNode,
|
|
rnode->dbNode,
|
|
rnode->relNode,
|
|
whichColumn)));
|
|
}
|
|
}
|
|
|
|
/* Delete all the physical files for row relation. */
|
|
void RowRelationDoDeleteFiles(RelFileNode rnode, BackendId backend, Oid ownerid, Oid relOid, bool isCommit)
|
|
{
|
|
/* decrease the permanent space on users' record */
|
|
uint64 size = GetSMgrRelSize(&rnode, backend, InvalidForkNumber);
|
|
perm_space_decrease(ownerid, size, find_tmptable_cache_key(rnode.relNode) ? SP_TEMP : SP_PERM);
|
|
SMgrRelation srel = smgropen(rnode, backend);
|
|
|
|
/* Before unlinking files, invalid all the shared buffers first. */
|
|
smgrdounlink(srel, false);
|
|
smgrclose(srel);
|
|
|
|
/* clean global temp table flags when transaction commit or rollback */
|
|
if (SmgrIsTemp(srel) && relOid != InvalidOid && gtt_storage_attached(relOid)) {
|
|
forget_gtt_storage_info(relOid, rnode, isCommit);
|
|
}
|
|
|
|
/*
|
|
* After files are deleted, append this filenode into BCM file list,
|
|
* so that we know all the BCM shared buffers of column relation has been
|
|
* invalided.
|
|
*/
|
|
ColMainFileNodesAppend(&rnode, backend);
|
|
|
|
/* do nothing for row table. or invalid space cache for column table. */
|
|
CStore::InvalidRelSpaceCache(&rnode);
|
|
}
|
|
|
|
/*
|
|
* @Description: get total files size for given relfilenode/backend /forknum
|
|
* @IN relfilenode: relation file node
|
|
* @IN backend: backend id
|
|
* @IN forkNum: fork number
|
|
* @Return: total files size
|
|
*/
|
|
uint64 GetSMgrRelSize(RelFileNode* relfilenode, BackendId backend, ForkNumber forkNum)
|
|
{
|
|
Assert(relfilenode);
|
|
|
|
uint64 size = 0;
|
|
|
|
if (BUCKET_ID_IS_DIR(relfilenode->bucketNode)) {
|
|
size = calculate_relation_bucket_dir_size(relfilenode, backend, forkNum);
|
|
} else if (forkNum == InvalidForkNumber) {
|
|
for (int fork = 0; fork <= MAX_FORKNUM; fork++) {
|
|
size += calculate_relation_size(relfilenode, backend, fork);
|
|
}
|
|
} else {
|
|
/* Column data 's BCM */
|
|
size = calculate_relation_size(relfilenode, backend, forkNum);
|
|
|
|
/* Column data */
|
|
CFileNode tmpNode(*relfilenode, ColForkNum2ColumnId(forkNum), MAIN_FORKNUM);
|
|
CUStorage custore(tmpNode);
|
|
char pathname[MAXPGPATH] = {'\0'};
|
|
unsigned int segcount = 0;
|
|
|
|
for (segcount = 0;; segcount++) {
|
|
struct stat fst;
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
custore.GetFileName(pathname, MAXPGPATH, segcount);
|
|
|
|
if (stat(pathname, &fst) < 0) {
|
|
if (errno == ENOENT)
|
|
break;
|
|
else if (errno == EIO)
|
|
ereport(ERROR,
|
|
(errcode_for_file_access(),
|
|
errmsg("could not stat file \"%s\": %m", pathname),
|
|
errhint("I/O Error,Please Check If Your DISK Is Broken")));
|
|
else
|
|
ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathname)));
|
|
}
|
|
size += fst.st_size;
|
|
}
|
|
custore.Destroy();
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* @Description: calculate delete file size from dfsfilelist
|
|
* @IN dfsfilelist: dfs delete file list
|
|
* @IN isCommit: is commit
|
|
* @Return: delte file size
|
|
*/
|
|
uint64 GetDfsDelFileSize(List* dfsfilelist, bool isCommit)
|
|
{
|
|
uint64 size = 0;
|
|
ListCell* lc = NULL;
|
|
|
|
foreach (lc, dfsfilelist) {
|
|
PendingDfsDelete* del_file = (PendingDfsDelete*)lfirst(lc);
|
|
if (del_file->atCommit == isCommit)
|
|
size += del_file->filesize;
|
|
}
|
|
|
|
return size;
|
|
}
|
|
bool IsSmgrTruncate(const XLogReaderState* record)
|
|
{
|
|
return (XLogRecGetRmid(record) == RM_SMGR_ID && (XLogRecGetInfo(record) & (~XLR_INFO_MASK)) == XLOG_SMGR_TRUNCATE);
|
|
}
|
|
bool IsSmgrCreate(const XLogReaderState* record)
|
|
{
|
|
return (XLogRecGetRmid(record) == RM_SMGR_ID && (XLogRecGetInfo(record) & (~XLR_INFO_MASK)) == XLOG_SMGR_CREATE);
|
|
}
|