From 3c88f4df705c0216ddc314b9d3d5ce0b34e15a04 Mon Sep 17 00:00:00 2001 From: seawinde Date: Thu, 3 Jul 2025 08:56:16 +0800 Subject: [PATCH] [opt](nereids) Improve sql parse performance by avoid parse twice when collect hint map (#52627) cherry pick part of code from pr: https://github.com/apache/doris/pull/40202 commitId: 81f3c484 ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- .../doris/nereids/parser/NereidsParser.java | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java index 974da5d2aa..2dfb542708 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java @@ -56,6 +56,7 @@ import org.apache.logging.log4j.Logger; import java.lang.reflect.Method; import java.util.BitSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -228,7 +229,7 @@ public class NereidsParser { } private List parseSQLWithDialect(String sql, - SessionVariable sessionVariable) { + SessionVariable sessionVariable) { @Nullable Dialect sqlDialect = Dialect.getByName(sessionVariable.getSqlDialect()); if (sqlDialect == null) { return parseSQL(sql); @@ -244,7 +245,7 @@ public class NereidsParser { } } catch (Throwable throwable) { LOG.warn("Parse sql with dialect {} failed, plugin: {}, sql: {}.", - sqlDialect, plugin.getClass().getSimpleName(), sql, throwable); + sqlDialect, plugin.getClass().getSimpleName(), sql, throwable); } } @@ -280,7 +281,7 @@ public class NereidsParser { } public List> parseMultiple(String sql, - @Nullable LogicalPlanBuilder logicalPlanBuilder) { + @Nullable LogicalPlanBuilder logicalPlanBuilder) { return parse(sql, logicalPlanBuilder, DorisParser::multiStatements); } @@ -325,30 +326,31 @@ public class NereidsParser { } private T parse(String sql, @Nullable LogicalPlanBuilder logicalPlanBuilder, - Function parseFunction) { - ParserRuleContext tree = toAst(sql, parseFunction); + Function parseFunction) { + CommonTokenStream tokenStream = parseAllTokens(sql); + ParserRuleContext tree = toAst(tokenStream, parseFunction); LogicalPlanBuilder realLogicalPlanBuilder = logicalPlanBuilder == null - ? new LogicalPlanBuilder(getHintMap(sql, DorisParser::selectHint)) : logicalPlanBuilder; + ? new LogicalPlanBuilder(getHintMap(sql, tokenStream, DorisParser::selectHint)) + : logicalPlanBuilder; return (T) realLogicalPlanBuilder.visit(tree); } public LogicalPlan parseForCreateView(String sql) { - ParserRuleContext tree = toAst(sql, DorisParser::singleStatement); + CommonTokenStream tokenStream = parseAllTokens(sql); + ParserRuleContext tree = toAst(tokenStream, DorisParser::singleStatement); LogicalPlanBuilder realLogicalPlanBuilder = new LogicalPlanBuilderForCreateView( - getHintMap(sql, DorisParser::selectHint)); + getHintMap(sql, tokenStream, DorisParser::selectHint)); return (LogicalPlan) realLogicalPlanBuilder.visit(tree); } /** get hint map */ - public static Map getHintMap(String sql, - Function parseFunction) { + public static Map getHintMap(String sql, CommonTokenStream hintTokenStream, + Function parseFunction) { // parse hint first round - DorisLexer hintLexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); - CommonTokenStream hintTokenStream = new CommonTokenStream(hintLexer); - Map selectHintMap = Maps.newHashMap(); - Token hintToken = hintTokenStream.getTokenSource().nextToken(); + Iterator tokenIterator = hintTokenStream.getTokens().iterator(); + Token hintToken = tokenIterator.hasNext() ? tokenIterator.next() : null; while (hintToken != null && hintToken.getType() != DorisLexer.EOF) { if (hintToken.getChannel() == 2 && sql.charAt(hintToken.getStartIndex() + 2) == '+') { String hintSql = sql.substring(hintToken.getStartIndex() + 3, hintToken.getStopIndex() + 1); @@ -358,15 +360,19 @@ public class NereidsParser { ParserRuleContext hintContext = parseFunction.apply(hintParser); selectHintMap.put(hintToken.getStartIndex(), hintContext); } - hintToken = hintTokenStream.getTokenSource().nextToken(); + hintToken = tokenIterator.hasNext() ? tokenIterator.next() : null; } return selectHintMap; } + public static ParserRuleContext toAst( + String sql, Function parseFunction) { + return toAst(parseAllTokens(sql), parseFunction); + } + /** toAst */ - public static ParserRuleContext toAst(String sql, Function parseFunction) { - DorisLexer lexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); - CommonTokenStream tokenStream = new CommonTokenStream(lexer); + public static ParserRuleContext toAst( + CommonTokenStream tokenStream, Function parseFunction) { DorisParser parser = new DorisParser(tokenStream); parser.addParseListener(POST_PROCESSOR); @@ -397,9 +403,7 @@ public class NereidsParser { * will be normalized to: select \/*+SET_VAR(key=value)*\/ * , a, b from table */ public static String removeCommentAndTrimBlank(String sql) { - DorisLexer lexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); - CommonTokenStream tokenStream = new CommonTokenStream(lexer); - tokenStream.fill(); + CommonTokenStream tokenStream = parseAllTokens(sql); // maybe add more space char StringBuilder newSql = new StringBuilder((int) (sql.length() * 1.2)); @@ -426,4 +430,11 @@ public class NereidsParser { } return newSql.toString().trim(); } + + private static CommonTokenStream parseAllTokens(String sql) { + DorisLexer lexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); + CommonTokenStream tokenStream = new CommonTokenStream(lexer); + tokenStream.fill(); + return tokenStream; + } }