!1510 更新索引的收益query列表

Merge pull request !1510 from guoguozhenhaowan/positive_pos
This commit is contained in:
opengauss-bot
2022-02-22 06:43:53 +00:00
committed by Gitee
4 changed files with 33 additions and 15 deletions

View File

@ -158,7 +158,7 @@ class DriverExecute(ExecuteFactory):
is_computed = False
self.execute('SET current_schema = %s' % self.schema)
if index_config:
if len(index_config) == 1 and index_config[0].positive_pos:
if len(index_config) == 1 and index_config[0].is_candidate:
is_computed = True
# create hypo-indexes
self.execute('SET enable_hypo_index = on')
@ -184,6 +184,8 @@ class DriverExecute(ExecuteFactory):
query_cost = DriverExecute.parse_explain_plan(res, index_config, ori_indexes_name)
query_cost *= workload[ind].frequency
workload[ind].cost_list.append(query_cost)
if index_config and len(index_config) == 1 and query_cost < workload[ind].cost_list[0]:
index_config[0].positive_pos.append(ind)
total_cost += query_cost
else:
workload[ind].cost_list.append(0)

View File

@ -72,19 +72,21 @@ class ExecuteFactory:
def record_ineffective_negative_sql(candidate_index, obj, ind):
cur_table = candidate_index.table
if cur_table not in obj.statement.lower() and \
not re.search(r'(\.%s\s)' % cur_table.split('.')[-1], obj.statement.lower()):
not re.search(r'((\A|[\s\(,])%s[\s\),])' % cur_table.split('.')[-1], obj.statement.lower()):
return
if any(re.match(r'(insert\sinto\s%s\s)' % table, obj.statement.lower())
if any(re.match(r'(insert\s+into\s+%s\s)' % table, obj.statement.lower())
for table in [cur_table, cur_table.split('.')[-1]]):
candidate_index.insert_sql_num += obj.frequency
candidate_index.negative_pos.append(ind)
candidate_index.total_sql_num += obj.frequency
elif any(re.match(r'(delete\sfrom\s%s\s)' % table, obj.statement.lower())
elif any(re.match(r'(delete\s+from\s+%s\s)' % table, obj.statement.lower())
or re.match(r'(delete\s+%s\s)' % table, obj.statement.lower())
for table in [cur_table, cur_table.split('.')[-1]]):
candidate_index.delete_sql_num += obj.frequency
candidate_index.negative_pos.append(ind)
candidate_index.total_sql_num += obj.frequency
elif any(re.match(r'(update\s%s\s)' % table, obj.statement.lower())
elif any(re.match(r'(update\s+%s\s)' % table, obj.statement.lower())
for table in [cur_table, cur_table.split('.')[-1]]):
candidate_index.update_sql_num += obj.frequency
# the index column appears in the UPDATE set condition, the statement is negative
@ -94,8 +96,7 @@ class ExecuteFactory:
else:
candidate_index.ineffective_pos.append(ind)
candidate_index.total_sql_num += obj.frequency
elif cur_table in obj.statement.lower() or \
re.search(r'(\s%s\s)' % cur_table.split('.')[-1], obj.statement.lower()):
else:
candidate_index.select_sql_num += obj.frequency
# SELECT scenes to filter out positive
if ind not in candidate_index.positive_pos and \

View File

@ -184,6 +184,8 @@ class GSqlExecute(ExecuteFactory):
query_cost = GSqlExecute.parse_plan_cost(line)
query_cost *= workload[select_sql_pos[i]].frequency
workload[select_sql_pos[i]].cost_list.append(query_cost)
if index_config and len(index_config) == 1 and query_cost < workload[select_sql_pos[i]].cost_list[0]:
index_config[0].positive_pos.append(select_sql_pos[i])
total_cost += query_cost
found_plan = False
i += 1
@ -212,7 +214,7 @@ class GSqlExecute(ExecuteFactory):
if self.schema:
file.write('SET current_schema = %s;\n' % self.schema)
if index_config:
if len(index_config) == 1 and index_config[0].positive_pos:
if len(index_config) == 1 and index_config[0].is_candidate:
is_computed = True
# create hypo-indexes
file.write('SET enable_hypo_index = on;\n')

View File

@ -93,13 +93,21 @@ class QueryItem:
class IndexItem:
def __init__(self, tbl, cols, positive_pos=None):
instances = {}
@classmethod
def get_index(cls, tbl, cols):
if not (tbl, cols) in cls.instances:
cls.instances[(tbl, cols)] = cls(tbl, cols)
return cls.instances[(tbl, cols)]
def __init__(self, tbl, cols):
self.table = tbl
self.columns = cols
self.atomic_pos = 0
self.benefit = 0
self.storage = 0
self.positive_pos = positive_pos
self.positive_pos = []
self.ineffective_pos = []
self.negative_pos = []
self.total_sql_num = 0
@ -107,6 +115,7 @@ class IndexItem:
self.update_sql_num = 0
self.delete_sql_num = 0
self.select_sql_num = 0
self.is_candidate = False
def green(text):
@ -138,7 +147,9 @@ def filter_low_benefit(candidate_indexes, multi_iter_mode, workload):
negative_ratio = ((index.insert_sql_num + index.delete_sql_num +
index.update_sql_num) / index.total_sql_num) if index.total_sql_num else 0
# filter the candidate indexes that do not meet the conditions of optimization
if sql_optimzed / len(index.positive_pos) < 0.1:
if not index.positive_pos:
remove_list.append(key)
elif sql_optimzed / len(index.positive_pos) < 0.1:
remove_list.append(key)
elif sql_optimzed / len(index.positive_pos) < NEGATIVE_RATIO_THRESHOLD < negative_ratio:
remove_list.append(key)
@ -338,7 +349,7 @@ def generate_candidate_indexes(workload, workload_table_name, db):
for columns in valid_index_dict[table]:
if len(workload[k].valid_index_list) >= FULL_ARRANGEMENT_THRESHOLD:
break
workload[k].valid_index_list.append(IndexItem(table, columns))
workload[k].valid_index_list.append(IndexItem.get_index(table, columns))
if columns in index_dict[table]:
index_dict[table][columns].append(k)
else:
@ -352,11 +363,13 @@ def generate_candidate_indexes(workload, workload_table_name, db):
sorted_column_sqls[i+1][1].extend(sorted_column_sqls[i][1])
else:
print("table: ", table, "columns: ", sorted_column_sqls[i][0])
candidate_indexes.append(IndexItem(table, sorted_column_sqls[i][0],
sorted_column_sqls[i][1]))
candidate_indexes.append(IndexItem.get_index(table, sorted_column_sqls[i][0],
))
print("table: ", table, "columns: ", sorted_column_sqls[-1][0])
candidate_indexes.append(
IndexItem(table, sorted_column_sqls[-1][0], sorted_column_sqls[-1][1]))
IndexItem.get_index(table, sorted_column_sqls[-1][0]))
for index in candidate_indexes:
index.is_candidate = True
if DRIVER:
db.close_conn()
return candidate_indexes