From e507fcc3b3ba9c7ed53f74c1b2ef27eee12df005 Mon Sep 17 00:00:00 2001 From: xinghuayu007 <1450306854@qq.com> Date: Sun, 22 Nov 2020 20:35:12 +0800 Subject: [PATCH] [Enhancement] Improve list comparing performance (#4880) The function equalSets is not efficient enough currently, the time complexity is O(n^2). To improve the performance of comparing two lists, this patch tries to use hash map structure to make the time complexity to be O(n). --- .../java/org/apache/doris/analysis/Expr.java | 32 +++++++++++++++++-- .../org/apache/doris/analysis/ExprTest.java | 25 +++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 7f476be7dc..263c30dc98 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -50,6 +50,7 @@ import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; +import java.util.HashMap; /** * Root of the expr node hierarchy. @@ -482,13 +483,40 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl /** * Return true if l1 equals l2 when both lists are interpreted as sets. - * TODO: come up with something better than O(n^2)? */ public static boolean equalSets(List l1, List l2) { if (l1.size() != l2.size()) { return false; } - return l1.containsAll(l2) && l2.containsAll(l1); + Map cMap1 = toCountMap(l1); + Map cMap2 = toCountMap(l2); + if (cMap1.size() != cMap2.size()) { + return false; + } + Iterator it = cMap1.keySet().iterator(); + while (it.hasNext()) { + C obj = (C) it.next(); + Integer count1 = (Integer) cMap1.get(obj); + Integer count2 = (Integer) cMap2.get(obj); + if (count2 == null || count1 != count2) { + return false; + } + } + return true; + } + + public static HashMap toCountMap(List list) { + HashMap countMap = new HashMap(); + for (int i = 0; i < list.size(); i++) { + C obj = list.get(i); + Integer count = (Integer) countMap.get(obj); + if (count == null) { + countMap.put(obj, 1); + } else { + countMap.put(obj, count+1); + } + } + return countMap; } public void analyzeNoThrow(Analyzer analyzer) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java index 0b877bbfa2..360a5cfc45 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java @@ -29,6 +29,8 @@ import org.junit.Test; import java.util.Map; import java.util.Set; +import java.util.List; +import java.util.ArrayList; import mockit.Expectations; import mockit.Injectable; @@ -159,4 +161,27 @@ public class ExprTest { StringLiteral castStringLiteral2 = (StringLiteral) stringLiteral.uncheckedCastTo(Type.VARCHAR); Assert.assertTrue(stringLiteral == castStringLiteral2); } + + @Test + public void testEqualSets() { + Expr r1 = new DateLiteral(2020, 10, 20); + Expr r2 = new DateLiteral(2020, 10, 21); + Expr r3 = new DateLiteral(2020, 10, 22); + Expr r4 = new DateLiteral(2020, 10, 23); + + //list1 equal list2 + List list1 = new ArrayList<>(); + List list2 = new ArrayList<>(); + list1.add(r1); + list1.add(r2); + list1.add(r3); + list2.add(r1); + list2.add(r2); + list2.add(r3); + Assert.assertTrue(Expr.equalSets(list1, list2)); + + //list3 not equal list4 + list2.add(r4); + Assert.assertFalse(Expr.equalSets(list1, list2)); + } }