[fix](Index)Make column unique ids in index dynamically computed (#48988) (#49300)

bp #48988
2025-03-21 19:51:00 +08:00
parent 3e894994d4
commit 71824569a9
16 changed files with 197 additions and 48 deletions
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java
@ -2015,7 +2015,6 @@ public class SchemaChangeHandler extends AlterHandler {
                            index.setIndexId(existedIdx.getIndexId());
                            index.setColumns(existedIdx.getColumns());
                            index.setProperties(existedIdx.getProperties());
-                            index.setColumnUniqueIds(existedIdx.getColumnUniqueIds());
                            if (indexDef.getPartitionNames().isEmpty()) {
                                invertedIndexOnPartitions.put(index.getIndexId(), olapTable.getPartitionNames());
                            } else {
@ -2594,7 +2593,6 @@ public class SchemaChangeHandler extends AlterHandler {
                indexDef.checkColumn(column, olapTable.getKeysType(),
                        olapTable.getTableProperty().getEnableUniqueKeyMergeOnWrite(),
                                                                        disableInvertedIndexV1ForVariant);
-                indexDef.getColumnUniqueIds().add(column.getUniqueId());
            } else {
                throw new DdlException("index column does not exist in table. invalid column: " + col);
            }
@ -2605,7 +2603,6 @@ public class SchemaChangeHandler extends AlterHandler {
        // so here update column name in CreateIndexClause after checkColumn for indexDef,
        // there will use the column name in olapTable instead of the column name in CreateIndexClause.
        alterIndex.setColumns(indexDef.getColumns());
-        alterIndex.setColumnUniqueIds(indexDef.getColumnUniqueIds());
        newIndexes.add(alterIndex);
        return false;
    }
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java
@ -73,7 +73,7 @@ public class BuildIndexClause extends AlterTableClause {
        indexDef.analyze();
        this.index = new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(),
                indexDef.getColumns(), indexDef.getIndexType(),
-                indexDef.getProperties(), indexDef.getComment(), indexDef.getColumnUniqueIds());
+                indexDef.getProperties(), indexDef.getComment());
    }

    @Override
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java
@ -73,7 +73,7 @@ public class CreateIndexClause extends AlterTableClause {
        indexDef.analyze();
        this.index = new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(),
                indexDef.getColumns(), indexDef.getIndexType(),
-                indexDef.getProperties(), indexDef.getComment(), indexDef.getColumnUniqueIds());
+                indexDef.getProperties(), indexDef.getComment());
    }

    @Override
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
@ -581,8 +581,7 @@ public class CreateTableStmt extends DdlStmt {
                    }
                }
                indexes.add(new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(), indexDef.getColumns(),
-                        indexDef.getIndexType(), indexDef.getProperties(), indexDef.getComment(),
-                        indexDef.getColumnUniqueIds()));
+                        indexDef.getIndexType(), indexDef.getProperties(), indexDef.getComment()));
                distinct.add(indexDef.getIndexName());
                distinctCol.add(Pair.of(indexDef.getIndexType(),
                        indexDef.getColumns().stream().map(String::toUpperCase).collect(Collectors.toList())));
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@ -43,7 +43,6 @@ public class IndexDef {
    private Map<String, String> properties;
    private boolean isBuildDeferred = false;
    private PartitionNames partitionNames;
-    private List<Integer> columnUniqueIds = Lists.newArrayList();
    public static final int MIN_NGRAM_SIZE = 1;
    public static final int MAX_NGRAM_SIZE = 255;
    public static final int MIN_BF_SIZE = 64;
@ -203,10 +202,6 @@ public class IndexDef {
        return partitionNames == null ? Lists.newArrayList() : partitionNames.getPartitionNames();
    }

-    public List<Integer> getColumnUniqueIds() {
-        return columnUniqueIds;
-    }
-
    public enum IndexType {
        BITMAP,
        INVERTED,
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java
@ -64,19 +64,15 @@ public class Index implements Writable {
    private Map<String, String> properties;
    @SerializedName(value = "comment")
    private String comment;
-    @SerializedName(value = "cui", alternate = {"columnUniqueIds"})
-    private List<Integer> columnUniqueIds;

    public Index(long indexId, String indexName, List<String> columns,
-            IndexDef.IndexType indexType, Map<String, String> properties, String comment,
-            List<Integer> columnUniqueIds) {
+            IndexDef.IndexType indexType, Map<String, String> properties, String comment) {
        this.indexId = indexId;
        this.indexName = indexName;
        this.columns = columns == null ? Lists.newArrayList() : Lists.newArrayList(columns);
        this.indexType = indexType;
        this.properties = properties == null ? Maps.newHashMap() : Maps.newHashMap(properties);
        this.comment = comment;
-        this.columnUniqueIds = columnUniqueIds == null ? Lists.newArrayList() : Lists.newArrayList(columnUniqueIds);
        if (indexType == IndexDef.IndexType.INVERTED) {
            if (this.properties != null && !this.properties.isEmpty()) {
                if (this.properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY)) {
@ -100,7 +96,6 @@ public class Index implements Writable {
        this.indexType = null;
        this.properties = null;
        this.comment = null;
-        this.columnUniqueIds = null;
    }

    public long getIndexId() {
@ -190,14 +185,6 @@ public class Index implements Writable {
        this.comment = comment;
    }

-    public List<Integer> getColumnUniqueIds() {
-        return columnUniqueIds;
-    }
-
-    public void setColumnUniqueIds(List<Integer> columnUniqueIds) {
-        this.columnUniqueIds = columnUniqueIds;
-    }
-
    @Override
    public void write(DataOutput out) throws IOException {
        Text.writeString(out, GsonUtils.GSON.toJson(this));
@ -215,7 +202,7 @@ public class Index implements Writable {

    public Index clone() {
        return new Index(indexId, indexName, new ArrayList<>(columns),
-                indexType, new HashMap<>(properties), comment, columnUniqueIds);
+                indexType, new HashMap<>(properties), comment);
    }

    @Override
@ -250,7 +237,21 @@ public class Index implements Writable {
        return sb.toString();
    }

-    public TOlapTableIndex toThrift() {
+    public List<Integer> getColumnUniqueIds(List<Column> schema) {
+        List<Integer> columnUniqueIds = new ArrayList<>();
+        if (schema != null) {
+            for (String columnName : columns) {
+                for (Column column : schema) {
+                    if (columnName.equalsIgnoreCase(column.getName())) {
+                        columnUniqueIds.add(column.getUniqueId());
+                    }
+                }
+            }
+        }
+        return columnUniqueIds;
+    }
+
+    public TOlapTableIndex toThrift(List<Integer> indexColumnUniqueIds) {
        TOlapTableIndex tIndex = new TOlapTableIndex();
        tIndex.setIndexId(indexId);
        tIndex.setIndexName(indexName);
@ -259,7 +260,7 @@ public class Index implements Writable {
        if (properties != null) {
            tIndex.setProperties(properties);
        }
-        tIndex.setColumnUniqueIds(columnUniqueIds);
+        tIndex.setColumnUniqueIds(indexColumnUniqueIds);
        return tIndex;
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MaterializedIndexMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MaterializedIndexMeta.java
@ -387,13 +387,6 @@ public class MaterializedIndexMeta implements Writable, GsonPostProcessable {
        maxColUniqueId = Column.COLUMN_UNIQUE_ID_INIT_VALUE;
        this.schema.forEach(column -> {
            column.setUniqueId(incAndGetMaxColUniqueId());
-            this.indexes.forEach(index -> {
-                index.getColumns().forEach(col -> {
-                    if (col.equalsIgnoreCase(column.getName())) {
-                        index.getColumnUniqueIds().add(column.getUniqueId());
-                    }
-                });
-            });
            if (LOG.isDebugEnabled()) {
                LOG.debug("indexId: {},  column:{}, uniqueId:{}",
                        indexId, column, column.getUniqueId());
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@ -213,6 +213,6 @@ public class IndexDefinition {

    public Index translateToCatalogStyle() {
        return new Index(Env.getCurrentEnv().getNextId(), name, cols, indexType, properties,
-                comment, null);
+                comment);
    }
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@ -1481,7 +1481,7 @@ public class OlapScanNode extends ScanNode {
        }

        for (Index index : olapTable.getIndexes()) {
-            TOlapTableIndex tIndex = index.toThrift();
+            TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(olapTable.getBaseSchema()));
            indexDesc.add(tIndex);
        }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
@ -305,7 +305,7 @@ public class OlapTableSink extends DataSink {
                indexes = table.getIndexes();
            }
            for (Index index : indexes) {
-                TOlapTableIndex tIndex = index.toThrift();
+                TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(table.getBaseSchema()));
                indexDesc.add(tIndex);
            }
            TOlapTableIndexSchema indexSchema = new TOlapTableIndexSchema(pair.getKey(), columns,
--- a/fe/fe-core/src/main/java/org/apache/doris/task/AlterInvertedIndexTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/AlterInvertedIndexTask.java
@ -103,7 +103,7 @@ public class AlterInvertedIndexTask extends AgentTask {
        if (!alterInvertedIndexes.isEmpty()) {
            List<TOlapTableIndex> tIndexes = new ArrayList<>();
            for (Index index : alterInvertedIndexes) {
-                tIndexes.add(index.toThrift());
+                tIndexes.add(index.toThrift(index.getColumnUniqueIds(schemaColumns)));
            }
            req.setAlterInvertedIndexes(tIndexes);
        }
@ -111,7 +111,7 @@ public class AlterInvertedIndexTask extends AgentTask {
        if (existIndexes != null) {
            List<TOlapTableIndex> indexDesc = new ArrayList<TOlapTableIndex>();
            for (Index index : existIndexes) {
-                TOlapTableIndex tIndex = index.toThrift();
+                TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(schemaColumns));
                indexDesc.add(tIndex);
            }
            req.setIndexesDesc(indexDesc);
--- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java
@ -340,7 +340,7 @@ public class CreateReplicaTask extends AgentTask {
            } else {
                tIndexes = new ArrayList<>();
                for (Index index : indexes) {
-                    tIndexes.add(index.toThrift());
+                    tIndexes.add(index.toThrift(index.getColumnUniqueIds(columns)));
                }
            }
            tSchema.setIndexes(tIndexes);
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/IndexTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/IndexTest.java
@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.analysis.IndexDef;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class IndexTest {
+
+    @Test
+    public void testGetColumnUniqueIds() {
+        // Create test columns with unique IDs
+        List<Column> schema = new ArrayList<>();
+        Column col1 = new Column("col1", Type.INT);
+        col1.setUniqueId(101);
+        Column col2 = new Column("col2", Type.VARCHAR);
+        col2.setUniqueId(102);
+        Column col3 = new Column("col3", Type.DOUBLE);
+        col3.setUniqueId(103);
+        Column specialCol = new Column("special-name!@#", Type.STRING);
+        specialCol.setUniqueId(104);
+        Column mixedCaseCol = new Column("MiXeD_CaSe", Type.BIGINT);
+        mixedCaseCol.setUniqueId(105);
+
+        schema.add(col1);
+        schema.add(col2);
+        schema.add(col3);
+        schema.add(specialCol);
+        schema.add(mixedCaseCol);
+
+        // Test case 1: Basic column matching
+        List<String> indexColumns1 = new ArrayList<>();
+        indexColumns1.add("col1");
+        indexColumns1.add("col3");
+        Index index1 = new Index(1, "test_index1", indexColumns1, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> uniqueIds1 = index1.getColumnUniqueIds(schema);
+        Assert.assertEquals(2, uniqueIds1.size());
+        Assert.assertEquals(Integer.valueOf(101), uniqueIds1.get(0));
+        Assert.assertEquals(Integer.valueOf(103), uniqueIds1.get(1));
+
+        // Test case 2: Case-insensitive matching
+        List<String> indexColumns2 = new ArrayList<>();
+        indexColumns2.add("CoL1");
+        indexColumns2.add("COL3");
+        Index index2 = new Index(2, "test_index2", indexColumns2, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> uniqueIds2 = index2.getColumnUniqueIds(schema);
+        Assert.assertEquals(2, uniqueIds2.size());
+        Assert.assertEquals(Integer.valueOf(101), uniqueIds2.get(0));
+        Assert.assertEquals(Integer.valueOf(103), uniqueIds2.get(1));
+
+        // Test case 3: Non-existent column name
+        List<String> indexColumns3 = new ArrayList<>();
+        indexColumns3.add("col1");
+        indexColumns3.add("non_existent_column");
+        Index index3 = new Index(3, "test_index3", indexColumns3, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> uniqueIds3 = index3.getColumnUniqueIds(schema);
+        Assert.assertEquals(1, uniqueIds3.size());
+        Assert.assertEquals(Integer.valueOf(101), uniqueIds3.get(0));
+
+        // Test case 4: Null schema
+        List<Integer> uniqueIds4 = index1.getColumnUniqueIds(null);
+        Assert.assertEquals(0, uniqueIds4.size());
+
+        // Test case 5: Empty column list
+        Index emptyColIndex = new Index(5, "empty_col_index", new ArrayList<>(),
+                IndexDef.IndexType.BITMAP, null, null);
+        List<Integer> emptyColUniqueIds = emptyColIndex.getColumnUniqueIds(schema);
+        Assert.assertEquals(0, emptyColUniqueIds.size());
+
+        // Test case 6: Empty schema (non-null)
+        List<Integer> emptySchemaUniqueIds = index1.getColumnUniqueIds(new ArrayList<>());
+        Assert.assertEquals(0, emptySchemaUniqueIds.size());
+
+        // Test case 7: Duplicate column names
+        List<String> dupColumns = new ArrayList<>();
+        dupColumns.add("col1");
+        dupColumns.add("col1");  // Duplicated
+        dupColumns.add("col2");
+        Index dupIndex = new Index(7, "dup_index", dupColumns, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> dupUniqueIds = dupIndex.getColumnUniqueIds(schema);
+        Assert.assertEquals(3, dupUniqueIds.size());
+        Assert.assertEquals(Integer.valueOf(101), dupUniqueIds.get(0));
+        Assert.assertEquals(Integer.valueOf(101), dupUniqueIds.get(1));
+        Assert.assertEquals(Integer.valueOf(102), dupUniqueIds.get(2));
+
+        // Test case 8: Special characters in column names
+        List<String> specialColList = new ArrayList<>();
+        specialColList.add("special-name!@#");
+        Index specialIndex = new Index(8, "special_index", specialColList, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> specialUniqueIds = specialIndex.getColumnUniqueIds(schema);
+        Assert.assertEquals(1, specialUniqueIds.size());
+        Assert.assertEquals(Integer.valueOf(104), specialUniqueIds.get(0));
+
+        // Test case 9: Mixed case column name
+        List<String> mixedCaseList = new ArrayList<>();
+        mixedCaseList.add("mixed_case");  // Testing case insensitivity with underscores
+        Index mixedCaseIndex = new Index(9, "mixed_case_index", mixedCaseList, IndexDef.IndexType.BITMAP, null, null);
+
+        List<Integer> mixedCaseUniqueIds = mixedCaseIndex.getColumnUniqueIds(schema);
+        Assert.assertEquals(1, mixedCaseUniqueIds.size());
+        Assert.assertEquals(Integer.valueOf(105), mixedCaseUniqueIds.get(0));
+
+        // Test case 10: Large number of columns
+        List<String> largeColumnList = new ArrayList<>();
+        List<Column> largeSchema = new ArrayList<>();
+        for (int i = 0; i < 1000; i++) {
+            Column tempCol = new Column("col" + i, Type.INT);
+            tempCol.setUniqueId(1000 + i);
+            largeSchema.add(tempCol);
+
+            // Add every other column to the index
+            if (i % 2 == 0) {
+                largeColumnList.add("col" + i);
+            }
+        }
+
+        Index largeIndex = new Index(10, "large_index", largeColumnList, IndexDef.IndexType.BITMAP, null, null);
+        List<Integer> largeUniqueIds = largeIndex.getColumnUniqueIds(largeSchema);
+
+        Assert.assertEquals(500, largeUniqueIds.size());
+        // Check first and last elements
+        Assert.assertEquals(Integer.valueOf(1000), largeUniqueIds.get(0));
+        Assert.assertEquals(Integer.valueOf(1000 + 998), largeUniqueIds.get(499));
+
+        // Test case 11: Order preservation - ensure column order in index is preserved in IDs
+        List<String> reverseOrderColumns = new ArrayList<>();
+        reverseOrderColumns.add("col3");
+        reverseOrderColumns.add("col2");
+        reverseOrderColumns.add("col1");
+
+        Index reverseIndex = new Index(11, "reverse_index", reverseOrderColumns, IndexDef.IndexType.BITMAP, null, null);
+        List<Integer> reverseUniqueIds = reverseIndex.getColumnUniqueIds(schema);
+
+        Assert.assertEquals(3, reverseUniqueIds.size());
+        Assert.assertEquals(Integer.valueOf(103), reverseUniqueIds.get(0));
+        Assert.assertEquals(Integer.valueOf(102), reverseUniqueIds.get(1));
+        Assert.assertEquals(Integer.valueOf(101), reverseUniqueIds.get(2));
+    }
+}
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java
@ -58,7 +58,7 @@ public class OlapTableTest {
            }
            OlapTable tbl = (OlapTable) table;
            tbl.setIndexes(Lists.newArrayList(new Index(0, "index", Lists.newArrayList("col"),
-                    IndexDef.IndexType.BITMAP, null, "xxxxxx", Lists.newArrayList(1))));
+                    IndexDef.IndexType.BITMAP, null, "xxxxxx")));
            System.out.println("orig table id: " + tbl.getId());

            FastByteArrayOutputStream byteArrayOutputStream = new FastByteArrayOutputStream();
--- a/fe/fe-core/src/test/java/org/apache/doris/common/proc/IndexesProcNodeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/proc/IndexesProcNodeTest.java
@ -42,18 +42,18 @@ public class IndexesProcNodeTest {
    public void testFetchResult() throws AnalysisException {
        List<Index> indexes = new ArrayList<>();
        Index indexBitmap = new Index(1, "bitmap_index", Lists.newArrayList("col_1"),
-                IndexType.BITMAP, null, "bitmap index on col_1", Lists.newArrayList(1));
+                IndexType.BITMAP, null, "bitmap index on col_1");
        Map<String, String> invertedProperties = new HashMap<>();
        invertedProperties.put("parser", "unicode");
        Index indexInverted = new Index(2, "inverted_index", Lists.newArrayList("col_2"),
-                        IndexType.INVERTED, invertedProperties, "inverted index on col_2", Lists.newArrayList(2));
+                        IndexType.INVERTED, invertedProperties, "inverted index on col_2");
        Index indexBf = new Index(3, "bloomfilter_index", Lists.newArrayList("col_3"),
-                IndexType.BLOOMFILTER, null, "bloomfilter index on col_3", Lists.newArrayList(3));
+                IndexType.BLOOMFILTER, null, "bloomfilter index on col_3");
        Map<String, String> ngramProperties = new HashMap<>();
        ngramProperties.put("gram_size", "3");
        ngramProperties.put("bf_size", "256");
        Index indexNgramBf = new Index(4, "ngram_bf_index", Lists.newArrayList("col_4"),
-                        IndexType.NGRAM_BF, ngramProperties, "ngram_bf index on col_4", Lists.newArrayList(4));
+                        IndexType.NGRAM_BF, ngramProperties, "ngram_bf index on col_4");
        indexes.add(indexBitmap);
        indexes.add(indexInverted);
        indexes.add(indexBf);
--- a/fe/fe-core/src/test/java/org/apache/doris/persist/TableAddOrDropColumnsInfoTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/persist/TableAddOrDropColumnsInfoTest.java
@ -71,7 +71,7 @@ public class TableAddOrDropColumnsInfoTest {
        indexNameToId.put("index", 1L);

        List<Index> indexes = Lists.newArrayList(
-                new Index(0, "index", Lists.newArrayList("testCol1"), IndexDef.IndexType.INVERTED, null, "xxxxxx", Lists.newArrayList(1)));
+                new Index(0, "index", Lists.newArrayList("testCol1"), IndexDef.IndexType.INVERTED, null, "xxxxxx"));

        TableAddOrDropColumnsInfo tableAddOrDropColumnsInfo1 = new TableAddOrDropColumnsInfo(
                "", dbId, tableId, tableId,