From 53f100a8432690ceaf08fc45b3f4ad92c10d9830 Mon Sep 17 00:00:00 2001 From: flyly Date: Thu, 29 Jul 2021 19:26:16 +0800 Subject: [PATCH] Fix issue: Fix a bug on the index advisor: Repair log extraction --- .../dbmind/tools/index_advisor/extract_log.py | 14 ++++++++------ .../tools/index_advisor/index_advisor_workload.py | 4 ++-- .../index_advisor/index_advisor_workload_driver.py | 4 ++-- .../dbmind/tools/index_advisor/index_server.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/gausskernel/dbmind/tools/index_advisor/extract_log.py b/src/gausskernel/dbmind/tools/index_advisor/extract_log.py index 3374ffcb3..1652d7cd8 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/extract_log.py +++ b/src/gausskernel/dbmind/tools/index_advisor/extract_log.py @@ -6,7 +6,7 @@ import random import time from subprocess import Popen, PIPE -SQL_TYPE = ['select', 'delete', 'insert', 'update'] +SQL_TYPE = ['select ', 'delete ', 'insert ', 'update '] SQL_AMOUNT = 0 PLACEHOLDER = r'@@@' SAMPLE_NUM = 5 @@ -38,11 +38,13 @@ def output_valid_sql(sql): is_quotation_valid = sql.count("'") % 2 if 'from pg_' in sql.lower() or ' join ' in sql.lower() or is_quotation_valid: return '' - if any(tp in sql.lower() for tp in SQL_TYPE[1:]): + if any(tp in sql.lower() for tp in SQL_TYPE[1:]) or \ + (SQL_TYPE[0] in sql.lower() and 'from ' in sql.lower()): + if ' where ' in sql.lower() and \ + len(re.search(r'\s+where\s+(.*)', sql, flags=re.I).group(1)) > 256: + return '' sql = re.sub(r'for\s+update[\s;]*$', '', sql, flags=re.I) return sql.strip() if sql.endswith('; ') else sql + ';' - elif SQL_TYPE[0] in sql.lower() and 'from ' in sql.lower(): - return sql.strip() if sql.endswith('; ') else sql + ';' return '' @@ -161,8 +163,8 @@ def extract_sql_from_log(args): files = os.listdir(args.l) files = sorted(files, key=lambda x: os.path.getctime(os.path.join(args.l, x)), reverse=True) valid_files = files - time_stamp = int(time.mktime(time.strptime(args.start_time, '%Y-%m-%d %H:%M:%S'))) if args.start_time: + time_stamp = int(time.mktime(time.strptime(args.start_time, '%Y-%m-%d %H:%M:%S'))) valid_files = [] for file in files: if os.path.getmtime(os.path.join(args.l, file)) < time_stamp: @@ -194,7 +196,7 @@ def main(): args = arg_parser.parse_args() if args.start_time: time.strptime(args.start_time, '%Y-%m-%d %H:%M:%S') - if args.sql_amount and args.sql_amount <= 0: + if args.sql_amount is not None and args.sql_amount <= 0: raise argparse.ArgumentTypeError("%s is an invalid positive int value" % args.sql_amount) extract_sql_from_log(args) diff --git a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py index cfb8e4976..c5f8a7373 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py +++ b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload.py @@ -275,7 +275,7 @@ def check_useless_index(tables): "pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \ "pg_index x JOIN pg_class c ON c.oid = x.indrelid JOIN " \ "pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_namespace n " \ - "ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON i.oid = p.conindid" \ + "ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON i.oid = p.conindid " \ "WHERE (c.relkind = ANY (ARRAY['r'::\"char\", 'm'::\"char\"])) AND " \ "(i.relkind = ANY (ARRAY['i'::\"char\", 'I'::\"char\"])) AND " \ "n.nspname = '%s' AND c.relname in (%s) order by c.relname;" % (SCHEMA, tables_string) @@ -336,7 +336,7 @@ def check_unused_index_workload(whole_indexes, redundant_indexes, workload_index if 'UNIQUE INDEX' not in index.indexdef: statement = "DROP INDEX %s;" % index.indexname print(statement) - useless_index = {"schemaName": index.schema, "tbName": index.table, "type": 1, + useless_index = {"schemaName": index.schema, "tbName": index.table, "type": 3, "columns": index.columns, "statement": statement} detail_info['uselessIndexes'].append(useless_index) print_header_boundary(" Redundant indexes ") diff --git a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload_driver.py b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload_driver.py index 5d9bb565b..d3748dc86 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload_driver.py +++ b/src/gausskernel/dbmind/tools/index_advisor/index_advisor_workload_driver.py @@ -265,7 +265,7 @@ def check_useless_index(tables, db): "pg_get_indexdef(i.oid) AS indexdef, p.contype AS pkey from " \ "pg_index x JOIN pg_class c ON c.oid = x.indrelid JOIN " \ "pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_namespace n " \ - "ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON i.oid = p.conindid" \ + "ON n.oid = c.relnamespace LEFT JOIN pg_constraint p ON i.oid = p.conindid " \ "WHERE (c.relkind = ANY (ARRAY['r'::\"char\", 'm'::\"char\"])) AND " \ "(i.relkind = ANY (ARRAY['i'::\"char\", 'I'::\"char\"])) AND " \ "n.nspname = '%s' AND c.relname in (%s) order by c.relname;" % (SCHEMA, tables_string) @@ -323,7 +323,7 @@ def check_unused_index_workload(whole_indexes, redundant_indexes, workload_index if 'UNIQUE INDEX' not in index.indexdef: statement = "DROP INDEX %s;" % index.indexname print(statement) - useless_index = {"schemaName": index.schema, "tbName": index.table, "type": 1, + useless_index = {"schemaName": index.schema, "tbName": index.table, "type": 3, "columns": index.columns, "statement": statement} detail_info['uselessIndexes'].append(useless_index) print_header_boundary(" Redundant indexes ") diff --git a/src/gausskernel/dbmind/tools/index_advisor/index_server.py b/src/gausskernel/dbmind/tools/index_advisor/index_server.py index 9c8834101..f621ed470 100644 --- a/src/gausskernel/dbmind/tools/index_advisor/index_server.py +++ b/src/gausskernel/dbmind/tools/index_advisor/index_server.py @@ -170,7 +170,7 @@ class IndexServer: self.logger.error(e) def extract_log(self, start_time): - extract_log_cmd = 'python3 %s %s %s --start_time "%s"' % \ + extract_log_cmd = 'python3 %s %s %s --start_time "%s" --json ' % \ (os.path.join(current_dirname, 'extract_log.py'), self._kwargs['pg_log_path'], self._kwargs['output_sql_file'], start_time)