diff --git a/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py b/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py index e7de9b029..784a92723 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py +++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py @@ -158,7 +158,7 @@ class DriverExecute(ExecuteFactory): is_computed = False self.execute('SET current_schema = %s' % self.schema) if index_config: - if len(index_config) == 1 and index_config[0].positive_pos: + if len(index_config) == 1 and index_config[0].is_candidate: is_computed = True # create hypo-indexes self.execute('SET enable_hypo_index = on') @@ -184,6 +184,8 @@ class DriverExecute(ExecuteFactory): query_cost = DriverExecute.parse_explain_plan(res, index_config, ori_indexes_name) query_cost *= workload[ind].frequency workload[ind].cost_list.append(query_cost) + if index_config and len(index_config) == 1 and query_cost < workload[ind].cost_list[0]: + index_config[0].positive_pos.append(ind) total_cost += query_cost else: workload[ind].cost_list.append(0) diff --git a/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py b/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py index 04df534cd..1112264c0 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py +++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py @@ -72,19 +72,21 @@ class ExecuteFactory: def record_ineffective_negative_sql(candidate_index, obj, ind): cur_table = candidate_index.table if cur_table not in obj.statement.lower() and \ - not re.search(r'(\.%s\s)' % cur_table.split('.')[-1], obj.statement.lower()): + not re.search(r'((\A|[\s\(,])%s[\s\),])' % cur_table.split('.')[-1], obj.statement.lower()): return - if any(re.match(r'(insert\sinto\s%s\s)' % table, obj.statement.lower()) + + if any(re.match(r'(insert\s+into\s+%s\s)' % table, obj.statement.lower()) for table in [cur_table, cur_table.split('.')[-1]]): candidate_index.insert_sql_num += obj.frequency candidate_index.negative_pos.append(ind) candidate_index.total_sql_num += obj.frequency - elif any(re.match(r'(delete\sfrom\s%s\s)' % table, obj.statement.lower()) + elif any(re.match(r'(delete\s+from\s+%s\s)' % table, obj.statement.lower()) + or re.match(r'(delete\s+%s\s)' % table, obj.statement.lower()) for table in [cur_table, cur_table.split('.')[-1]]): candidate_index.delete_sql_num += obj.frequency candidate_index.negative_pos.append(ind) candidate_index.total_sql_num += obj.frequency - elif any(re.match(r'(update\s%s\s)' % table, obj.statement.lower()) + elif any(re.match(r'(update\s+%s\s)' % table, obj.statement.lower()) for table in [cur_table, cur_table.split('.')[-1]]): candidate_index.update_sql_num += obj.frequency # the index column appears in the UPDATE set condition, the statement is negative @@ -94,8 +96,7 @@ class ExecuteFactory: else: candidate_index.ineffective_pos.append(ind) candidate_index.total_sql_num += obj.frequency - elif cur_table in obj.statement.lower() or \ - re.search(r'(\s%s\s)' % cur_table.split('.')[-1], obj.statement.lower()): + else: candidate_index.select_sql_num += obj.frequency # SELECT scenes to filter out positive if ind not in candidate_index.positive_pos and \ diff --git a/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py b/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py index 4e8266a3b..a99e006b4 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py +++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py @@ -184,6 +184,8 @@ class GSqlExecute(ExecuteFactory): query_cost = GSqlExecute.parse_plan_cost(line) query_cost *= workload[select_sql_pos[i]].frequency workload[select_sql_pos[i]].cost_list.append(query_cost) + if index_config and len(index_config) == 1 and query_cost < workload[select_sql_pos[i]].cost_list[0]: + index_config[0].positive_pos.append(select_sql_pos[i]) total_cost += query_cost found_plan = False i += 1 @@ -212,7 +214,7 @@ class GSqlExecute(ExecuteFactory): if self.schema: file.write('SET current_schema = %s;\n' % self.schema) if index_config: - if len(index_config) == 1 and index_config[0].positive_pos: + if len(index_config) == 1 and index_config[0].is_candidate: is_computed = True # create hypo-indexes file.write('SET enable_hypo_index = on;\n') diff --git a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py index 570e7e061..76793de36 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py +++ b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py @@ -93,13 +93,21 @@ class QueryItem: class IndexItem: - def __init__(self, tbl, cols, positive_pos=None): + instances = {} + + @classmethod + def get_index(cls, tbl, cols): + if not (tbl, cols) in cls.instances: + cls.instances[(tbl, cols)] = cls(tbl, cols) + return cls.instances[(tbl, cols)] + + def __init__(self, tbl, cols): self.table = tbl self.columns = cols self.atomic_pos = 0 self.benefit = 0 self.storage = 0 - self.positive_pos = positive_pos + self.positive_pos = [] self.ineffective_pos = [] self.negative_pos = [] self.total_sql_num = 0 @@ -107,6 +115,7 @@ class IndexItem: self.update_sql_num = 0 self.delete_sql_num = 0 self.select_sql_num = 0 + self.is_candidate = False def green(text): @@ -138,7 +147,9 @@ def filter_low_benefit(candidate_indexes, multi_iter_mode, workload): negative_ratio = ((index.insert_sql_num + index.delete_sql_num + index.update_sql_num) / index.total_sql_num) if index.total_sql_num else 0 # filter the candidate indexes that do not meet the conditions of optimization - if sql_optimzed / len(index.positive_pos) < 0.1: + if not index.positive_pos: + remove_list.append(key) + elif sql_optimzed / len(index.positive_pos) < 0.1: remove_list.append(key) elif sql_optimzed / len(index.positive_pos) < NEGATIVE_RATIO_THRESHOLD < negative_ratio: remove_list.append(key) @@ -338,7 +349,7 @@ def generate_candidate_indexes(workload, workload_table_name, db): for columns in valid_index_dict[table]: if len(workload[k].valid_index_list) >= FULL_ARRANGEMENT_THRESHOLD: break - workload[k].valid_index_list.append(IndexItem(table, columns)) + workload[k].valid_index_list.append(IndexItem.get_index(table, columns)) if columns in index_dict[table]: index_dict[table][columns].append(k) else: @@ -352,11 +363,13 @@ def generate_candidate_indexes(workload, workload_table_name, db): sorted_column_sqls[i+1][1].extend(sorted_column_sqls[i][1]) else: print("table: ", table, "columns: ", sorted_column_sqls[i][0]) - candidate_indexes.append(IndexItem(table, sorted_column_sqls[i][0], - sorted_column_sqls[i][1])) + candidate_indexes.append(IndexItem.get_index(table, sorted_column_sqls[i][0], + )) print("table: ", table, "columns: ", sorted_column_sqls[-1][0]) candidate_indexes.append( - IndexItem(table, sorted_column_sqls[-1][0], sorted_column_sqls[-1][1])) + IndexItem.get_index(table, sorted_column_sqls[-1][0])) + for index in candidate_indexes: + index.is_candidate = True if DRIVER: db.close_conn() return candidate_indexes