diff --git a/src/gausskernel/dbmind/kernel/hypopg_index.cpp b/src/gausskernel/dbmind/kernel/hypopg_index.cpp index 621a08ab1..46f903582 100644 --- a/src/gausskernel/dbmind/kernel/hypopg_index.cpp +++ b/src/gausskernel/dbmind/kernel/hypopg_index.cpp @@ -91,7 +91,8 @@ static void hypo_set_indexname(hypoIndex *entry, const char *indexname); static void hypo_index_reset(void); static void hypo_injectHypotheticalIndex(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel, Relation relation, hypoIndex *entry); - +static List *get_table_indexes(Oid oid); +static List *get_index_attrnum(Oid oid); void InitHypopg() { @@ -217,6 +218,31 @@ static void hypo_executorEnd_hook(QueryDesc *queryDesc) standard_ExecutorEnd(queryDesc); } } +List *get_table_indexes(Oid oid) +{ + Relation rel = heap_open(oid, NoLock); + List *indexes = RelationGetIndexList(rel); + heap_close(rel, NoLock); + return indexes; +} + +/* Return the names of all the columns involved in the index. */ +List *get_index_attrnum(Oid index_oid) +{ + HeapTuple index_tup = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(index_tup)) + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("cache lookup failed for index %u", index_oid))); + Form_pg_index index_form = (Form_pg_index)GETSTRUCT(index_tup); + int2vector *attnums = &(index_form->indkey); + // get attrnum from table oid. + List *attrnum = NIL; + int i; + for (i = 0; i < attnums->dim1; i++) { + attrnum = lappend_int(attrnum, attnums->values[i]); + } + ReleaseSysCache(index_tup); + return attrnum; +} /* * This function will execute the "hypo_injectHypotheticalIndex" for every @@ -243,7 +269,32 @@ static void hypo_get_relation_info_hook(PlannerInfo *root, Oid relationObjectId, * hypothetical index found, add it to the relation's * indextlist */ - hypo_injectHypotheticalIndex(root, relationObjectId, inhparent, rel, relation, entry); + List *indexes = get_table_indexes(entry->relid); + ListCell *index = NULL; + bool match_flag = false; + foreach (index, indexes) { + List *attrnums = get_index_attrnum(lfirst_oid(index)); + if (attrnums == NIL) { + break; + } + if (entry->ncolumns > attrnums->length) { + continue; + } + match_flag = true; + for (int i = 0; i < entry->ncolumns; i++) { + if (entry->indexkeys[i] != list_nth_int(attrnums, i)) { + match_flag = false; + break; + } + } + // the suggested index has existed + if (match_flag) { + break; + } + } + if (!match_flag) { + hypo_injectHypotheticalIndex(root, relationObjectId, inhparent, rel, relation, entry); + } } } diff --git a/src/gausskernel/dbmind/kernel/index_advisor.cpp b/src/gausskernel/dbmind/kernel/index_advisor.cpp index 333b525fe..65465e02a 100644 --- a/src/gausskernel/dbmind/kernel/index_advisor.cpp +++ b/src/gausskernel/dbmind/kernel/index_advisor.cpp @@ -906,6 +906,11 @@ void parse_join_expr(JoinExpr *join_tree) return; } + if (nodeTag(join_tree->larg) != T_RangeVar || + nodeTag(join_tree->rarg) != T_RangeVar) { + return; + } + List *join_fields = join_tree->usingClause; char *l_table_name = ((RangeVar *)(join_tree->larg))->relname; char *r_table_name = ((RangeVar *)(join_tree->rarg))->relname; @@ -1548,7 +1553,7 @@ void add_index_from_group_order(TableCell *table, List *clause, List *target_lis ListCell *prev = NULL; foreach (cur, table->index) { IndexCell *table_index = (IndexCell *)lfirst(cur); - if (strcasecmp(table_index->index_name, index->index_name) == 0) { + if (index->index_name == NULL || strcasecmp(table_index->index_name, index->index_name) == 0) { break; } if (table_index->op && strcasecmp(table_index->op, "=") != 0) { diff --git a/src/gausskernel/dbmind/tools/index_advisor/extract_log.py b/src/gausskernel/dbmind/tools/index_advisor/extract_log.py index 7b2e0b41b..c93cf6b8e 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/extract_log.py +++ b/src/gausskernel/dbmind/tools/index_advisor/extract_log.py @@ -7,6 +7,8 @@ SQL_TYPE = ['select', 'delete', 'insert', 'update'] def output_valid_sql(sql): + if 'from pg_' in sql.lower(): + return '' if any(tp in sql.lower() for tp in SQL_TYPE[1:]): return sql if sql.endswith('; ') else sql + ';' elif SQL_TYPE[0] in sql.lower() and 'from ' in sql.lower(): @@ -17,53 +19,59 @@ def output_valid_sql(sql): def extract_sql_from_log(log_path): files = os.listdir(log_path) for file in files: - if not os.path.isdir(file) and '.swap' not in file: - with open(log_path + "/" + file, mode='r') as f: + file_path = log_path + "/" + file + if os.path.isfile(file_path) and re.search(r'.log$', file): + with open(file_path, mode='r') as f: line = f.readline() sql = '' statement_flag = False execute_flag = False while line: - # Identify statement scene - if re.search('statement: ', line, re.IGNORECASE): - statement_flag = True - if output_valid_sql(sql): - yield output_valid_sql(sql) - sql = re.search(r'statement: (.*)', line.strip(), - re.IGNORECASE).group(1) + ' ' - line = f.readline() + try: + # Identify statement scene + if re.search('statement: ', line, re.IGNORECASE): + statement_flag = True + if output_valid_sql(sql): + yield output_valid_sql(sql) + sql = re.search(r'statement: (.*)', line.strip(), + re.IGNORECASE).group(1) + ' ' + line = f.readline() - # Identify execute statement scene - elif re.search(r'execute .*:', line, re.IGNORECASE): - if output_valid_sql(sql): - yield output_valid_sql(sql) - execute_flag = True - sql = re.search(r'execute .*: (.*)', line.strip(), re.IGNORECASE).group(1) - line = f.readline() - else: - if statement_flag: - if re.match(r'^\t', line): - sql += line.strip('\t\n') - else: - statement_flag = False - if output_valid_sql(sql): - yield output_valid_sql(sql) - sql = '' - if execute_flag and re.search(r'parameters: ', line, re.IGNORECASE): - execute_flag = False - param_list = re.search(r'parameters: (.*)', line.strip(), - re.IGNORECASE).group(1).split(', ') - param_list = list(param.split('=', 1) for param in param_list) - param_list.sort(key=lambda x: int(x[0].strip(' $')), - reverse=True) - for item in param_list: - if len(item[1].strip()) >= 256: - sql = sql.replace(item[0].strip(), "''") + # Identify execute statement scene + elif re.search(r'execute .*:', line, re.IGNORECASE): + if output_valid_sql(sql): + yield output_valid_sql(sql) + execute_flag = True + sql = re.search(r'execute .*: (.*)', line.strip(), re.IGNORECASE).group(1) + line = f.readline() + else: + if statement_flag: + if re.match(r'^\t', line): + sql += line.strip('\t\n') else: - sql = sql.replace(item[0].strip(), item[1].strip()) - yield output_valid_sql(sql) - sql = '' + statement_flag = False + if output_valid_sql(sql): + yield output_valid_sql(sql) + sql = '' + if execute_flag and re.search(r'parameters: ', line, re.IGNORECASE): + execute_flag = False + param_list = re.search(r'parameters: (.*)', line.strip(), + re.IGNORECASE).group(1).split(', ') + param_list = list(param.split('=', 1) for param in param_list) + param_list.sort(key=lambda x: int(x[0].strip(' $')), + reverse=True) + for item in param_list: + if len(item[1].strip()) >= 256: + sql = sql.replace(item[0].strip(), "''") + else: + sql = sql.replace(item[0].strip(), item[1].strip()) + yield output_valid_sql(sql) + sql = '' + line = f.readline() + except: + execute_flag = False + statement_flag = False line = f.readline()