!1510 更新索引的收益query列表

Merge pull request !1510 from guoguozhenhaowan/positive_pos
2022-02-22 06:43:53 +00:00
parent 39ba99288f db52992505
commit 3fbed4ad61
4 changed files with 33 additions and 15 deletions
--- a/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py
+++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/driver_execute.py
@ -158,7 +158,7 @@ class DriverExecute(ExecuteFactory):
        is_computed = False
        self.execute('SET current_schema = %s' % self.schema)
        if index_config:
-            if len(index_config) == 1 and index_config[0].positive_pos:
+            if len(index_config) == 1 and index_config[0].is_candidate:
                is_computed = True
            # create hypo-indexes
            self.execute('SET enable_hypo_index = on')
@ -184,6 +184,8 @@ class DriverExecute(ExecuteFactory):
                    query_cost = DriverExecute.parse_explain_plan(res, index_config, ori_indexes_name)
                    query_cost *= workload[ind].frequency
                    workload[ind].cost_list.append(query_cost)
+                    if index_config and len(index_config) == 1 and query_cost < workload[ind].cost_list[0]:
+                        index_config[0].positive_pos.append(ind)
                    total_cost += query_cost
                else:
                    workload[ind].cost_list.append(0)
--- a/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py
+++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/execute_factory.py
@ -72,19 +72,21 @@ class ExecuteFactory:
    def record_ineffective_negative_sql(candidate_index, obj, ind):
        cur_table = candidate_index.table
        if cur_table not in obj.statement.lower() and \
-                not re.search(r'(\.%s\s)' % cur_table.split('.')[-1], obj.statement.lower()):
+                not re.search(r'((\A|[\s\(,])%s[\s\),])' % cur_table.split('.')[-1], obj.statement.lower()):
            return
-        if any(re.match(r'(insert\sinto\s%s\s)' % table, obj.statement.lower())
+
+        if any(re.match(r'(insert\s+into\s+%s\s)' % table, obj.statement.lower())
               for table in [cur_table, cur_table.split('.')[-1]]):
            candidate_index.insert_sql_num += obj.frequency
            candidate_index.negative_pos.append(ind)
            candidate_index.total_sql_num += obj.frequency
-        elif any(re.match(r'(delete\sfrom\s%s\s)' % table, obj.statement.lower())
+        elif any(re.match(r'(delete\s+from\s+%s\s)' % table, obj.statement.lower())
+                 or re.match(r'(delete\s+%s\s)' % table, obj.statement.lower())
                 for table in [cur_table, cur_table.split('.')[-1]]):
            candidate_index.delete_sql_num += obj.frequency
            candidate_index.negative_pos.append(ind)
            candidate_index.total_sql_num += obj.frequency
-        elif any(re.match(r'(update\s%s\s)' % table, obj.statement.lower())
+        elif any(re.match(r'(update\s+%s\s)' % table, obj.statement.lower())
                 for table in [cur_table, cur_table.split('.')[-1]]):
            candidate_index.update_sql_num += obj.frequency
            # the index column appears in the UPDATE set condition, the statement is negative
@ -94,8 +96,7 @@ class ExecuteFactory:
            else:
                candidate_index.ineffective_pos.append(ind)
            candidate_index.total_sql_num += obj.frequency
-        elif cur_table in obj.statement.lower() or \
-                re.search(r'(\s%s\s)' % cur_table.split('.')[-1], obj.statement.lower()):
+        else:
            candidate_index.select_sql_num += obj.frequency
            # SELECT scenes to filter out positive
            if ind not in candidate_index.positive_pos and \
--- a/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py
+++ b/src/gausskernel/dbmind/tools/index_advisor/DAO/gsql_execute.py
@ -184,6 +184,8 @@ class GSqlExecute(ExecuteFactory):
                query_cost = GSqlExecute.parse_plan_cost(line)
                query_cost *= workload[select_sql_pos[i]].frequency
                workload[select_sql_pos[i]].cost_list.append(query_cost)
+                if index_config and len(index_config) == 1 and query_cost < workload[select_sql_pos[i]].cost_list[0]:
+                    index_config[0].positive_pos.append(select_sql_pos[i])
                total_cost += query_cost
                found_plan = False
                i += 1
@ -212,7 +214,7 @@ class GSqlExecute(ExecuteFactory):
            if self.schema:
                file.write('SET current_schema = %s;\n' % self.schema)
            if index_config:
-                if len(index_config) == 1 and index_config[0].positive_pos:
+                if len(index_config) == 1 and index_config[0].is_candidate:
                    is_computed = True
                # create hypo-indexes
                file.write('SET enable_hypo_index = on;\n')
--- a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py
+++ b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py
@ -93,13 +93,21 @@ class QueryItem:


 class IndexItem:
-    def __init__(self, tbl, cols, positive_pos=None):
+    instances = {}
+
+    @classmethod
+    def get_index(cls, tbl, cols):
+        if not (tbl, cols) in cls.instances:
+            cls.instances[(tbl, cols)] = cls(tbl, cols)
+        return cls.instances[(tbl, cols)]
+
+    def __init__(self, tbl, cols):
        self.table = tbl
        self.columns = cols
        self.atomic_pos = 0
        self.benefit = 0
        self.storage = 0
-        self.positive_pos = positive_pos
+        self.positive_pos = []
        self.ineffective_pos = []
        self.negative_pos = []
        self.total_sql_num = 0
@ -107,6 +115,7 @@ class IndexItem:
        self.update_sql_num = 0
        self.delete_sql_num = 0
        self.select_sql_num = 0
+        self.is_candidate = False


 def green(text):
@ -138,7 +147,9 @@ def filter_low_benefit(candidate_indexes, multi_iter_mode, workload):
        negative_ratio = ((index.insert_sql_num + index.delete_sql_num +
                          index.update_sql_num) / index.total_sql_num) if index.total_sql_num else 0
        # filter the candidate indexes that do not meet the conditions of optimization
-        if sql_optimzed / len(index.positive_pos) < 0.1:
+        if not index.positive_pos:
+            remove_list.append(key)
+        elif sql_optimzed / len(index.positive_pos) < 0.1:
            remove_list.append(key)
        elif sql_optimzed / len(index.positive_pos) < NEGATIVE_RATIO_THRESHOLD < negative_ratio:
            remove_list.append(key)
@ -338,7 +349,7 @@ def generate_candidate_indexes(workload, workload_table_name, db):
            for columns in valid_index_dict[table]:
                if len(workload[k].valid_index_list) >= FULL_ARRANGEMENT_THRESHOLD:
                    break
-                workload[k].valid_index_list.append(IndexItem(table, columns))
+                workload[k].valid_index_list.append(IndexItem.get_index(table, columns))
                if columns in index_dict[table]:
                    index_dict[table][columns].append(k)
                else:
@ -352,11 +363,13 @@ def generate_candidate_indexes(workload, workload_table_name, db):
                sorted_column_sqls[i+1][1].extend(sorted_column_sqls[i][1])
            else:
                print("table: ", table, "columns: ", sorted_column_sqls[i][0])
-                candidate_indexes.append(IndexItem(table, sorted_column_sqls[i][0],
-                                                   sorted_column_sqls[i][1]))
+                candidate_indexes.append(IndexItem.get_index(table, sorted_column_sqls[i][0],
+                                                   ))
        print("table: ", table, "columns: ", sorted_column_sqls[-1][0])
        candidate_indexes.append(
-            IndexItem(table, sorted_column_sqls[-1][0], sorted_column_sqls[-1][1]))
+            IndexItem.get_index(table, sorted_column_sqls[-1][0]))
+    for index in candidate_indexes:
+        index.is_candidate = True
    if DRIVER:
        db.close_conn()
    return candidate_indexes