[fix](Index)Make column unique ids in index dynamically computed (#48988) (#49300)

bp #48988
This commit is contained in:
qiye
2025-03-21 19:51:00 +08:00
committed by GitHub
parent 3e894994d4
commit 71824569a9
16 changed files with 197 additions and 48 deletions

View File

@ -2015,7 +2015,6 @@ public class SchemaChangeHandler extends AlterHandler {
index.setIndexId(existedIdx.getIndexId());
index.setColumns(existedIdx.getColumns());
index.setProperties(existedIdx.getProperties());
index.setColumnUniqueIds(existedIdx.getColumnUniqueIds());
if (indexDef.getPartitionNames().isEmpty()) {
invertedIndexOnPartitions.put(index.getIndexId(), olapTable.getPartitionNames());
} else {
@ -2594,7 +2593,6 @@ public class SchemaChangeHandler extends AlterHandler {
indexDef.checkColumn(column, olapTable.getKeysType(),
olapTable.getTableProperty().getEnableUniqueKeyMergeOnWrite(),
disableInvertedIndexV1ForVariant);
indexDef.getColumnUniqueIds().add(column.getUniqueId());
} else {
throw new DdlException("index column does not exist in table. invalid column: " + col);
}
@ -2605,7 +2603,6 @@ public class SchemaChangeHandler extends AlterHandler {
// so here update column name in CreateIndexClause after checkColumn for indexDef,
// there will use the column name in olapTable instead of the column name in CreateIndexClause.
alterIndex.setColumns(indexDef.getColumns());
alterIndex.setColumnUniqueIds(indexDef.getColumnUniqueIds());
newIndexes.add(alterIndex);
return false;
}

View File

@ -73,7 +73,7 @@ public class BuildIndexClause extends AlterTableClause {
indexDef.analyze();
this.index = new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(),
indexDef.getColumns(), indexDef.getIndexType(),
indexDef.getProperties(), indexDef.getComment(), indexDef.getColumnUniqueIds());
indexDef.getProperties(), indexDef.getComment());
}
@Override

View File

@ -73,7 +73,7 @@ public class CreateIndexClause extends AlterTableClause {
indexDef.analyze();
this.index = new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(),
indexDef.getColumns(), indexDef.getIndexType(),
indexDef.getProperties(), indexDef.getComment(), indexDef.getColumnUniqueIds());
indexDef.getProperties(), indexDef.getComment());
}
@Override

View File

@ -581,8 +581,7 @@ public class CreateTableStmt extends DdlStmt {
}
}
indexes.add(new Index(Env.getCurrentEnv().getNextId(), indexDef.getIndexName(), indexDef.getColumns(),
indexDef.getIndexType(), indexDef.getProperties(), indexDef.getComment(),
indexDef.getColumnUniqueIds()));
indexDef.getIndexType(), indexDef.getProperties(), indexDef.getComment()));
distinct.add(indexDef.getIndexName());
distinctCol.add(Pair.of(indexDef.getIndexType(),
indexDef.getColumns().stream().map(String::toUpperCase).collect(Collectors.toList())));

View File

@ -43,7 +43,6 @@ public class IndexDef {
private Map<String, String> properties;
private boolean isBuildDeferred = false;
private PartitionNames partitionNames;
private List<Integer> columnUniqueIds = Lists.newArrayList();
public static final int MIN_NGRAM_SIZE = 1;
public static final int MAX_NGRAM_SIZE = 255;
public static final int MIN_BF_SIZE = 64;
@ -203,10 +202,6 @@ public class IndexDef {
return partitionNames == null ? Lists.newArrayList() : partitionNames.getPartitionNames();
}
public List<Integer> getColumnUniqueIds() {
return columnUniqueIds;
}
public enum IndexType {
BITMAP,
INVERTED,

View File

@ -64,19 +64,15 @@ public class Index implements Writable {
private Map<String, String> properties;
@SerializedName(value = "comment")
private String comment;
@SerializedName(value = "cui", alternate = {"columnUniqueIds"})
private List<Integer> columnUniqueIds;
public Index(long indexId, String indexName, List<String> columns,
IndexDef.IndexType indexType, Map<String, String> properties, String comment,
List<Integer> columnUniqueIds) {
IndexDef.IndexType indexType, Map<String, String> properties, String comment) {
this.indexId = indexId;
this.indexName = indexName;
this.columns = columns == null ? Lists.newArrayList() : Lists.newArrayList(columns);
this.indexType = indexType;
this.properties = properties == null ? Maps.newHashMap() : Maps.newHashMap(properties);
this.comment = comment;
this.columnUniqueIds = columnUniqueIds == null ? Lists.newArrayList() : Lists.newArrayList(columnUniqueIds);
if (indexType == IndexDef.IndexType.INVERTED) {
if (this.properties != null && !this.properties.isEmpty()) {
if (this.properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY)) {
@ -100,7 +96,6 @@ public class Index implements Writable {
this.indexType = null;
this.properties = null;
this.comment = null;
this.columnUniqueIds = null;
}
public long getIndexId() {
@ -190,14 +185,6 @@ public class Index implements Writable {
this.comment = comment;
}
public List<Integer> getColumnUniqueIds() {
return columnUniqueIds;
}
public void setColumnUniqueIds(List<Integer> columnUniqueIds) {
this.columnUniqueIds = columnUniqueIds;
}
@Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, GsonUtils.GSON.toJson(this));
@ -215,7 +202,7 @@ public class Index implements Writable {
public Index clone() {
return new Index(indexId, indexName, new ArrayList<>(columns),
indexType, new HashMap<>(properties), comment, columnUniqueIds);
indexType, new HashMap<>(properties), comment);
}
@Override
@ -250,7 +237,21 @@ public class Index implements Writable {
return sb.toString();
}
public TOlapTableIndex toThrift() {
public List<Integer> getColumnUniqueIds(List<Column> schema) {
List<Integer> columnUniqueIds = new ArrayList<>();
if (schema != null) {
for (String columnName : columns) {
for (Column column : schema) {
if (columnName.equalsIgnoreCase(column.getName())) {
columnUniqueIds.add(column.getUniqueId());
}
}
}
}
return columnUniqueIds;
}
public TOlapTableIndex toThrift(List<Integer> indexColumnUniqueIds) {
TOlapTableIndex tIndex = new TOlapTableIndex();
tIndex.setIndexId(indexId);
tIndex.setIndexName(indexName);
@ -259,7 +260,7 @@ public class Index implements Writable {
if (properties != null) {
tIndex.setProperties(properties);
}
tIndex.setColumnUniqueIds(columnUniqueIds);
tIndex.setColumnUniqueIds(indexColumnUniqueIds);
return tIndex;
}

View File

@ -387,13 +387,6 @@ public class MaterializedIndexMeta implements Writable, GsonPostProcessable {
maxColUniqueId = Column.COLUMN_UNIQUE_ID_INIT_VALUE;
this.schema.forEach(column -> {
column.setUniqueId(incAndGetMaxColUniqueId());
this.indexes.forEach(index -> {
index.getColumns().forEach(col -> {
if (col.equalsIgnoreCase(column.getName())) {
index.getColumnUniqueIds().add(column.getUniqueId());
}
});
});
if (LOG.isDebugEnabled()) {
LOG.debug("indexId: {}, column:{}, uniqueId:{}",
indexId, column, column.getUniqueId());

View File

@ -213,6 +213,6 @@ public class IndexDefinition {
public Index translateToCatalogStyle() {
return new Index(Env.getCurrentEnv().getNextId(), name, cols, indexType, properties,
comment, null);
comment);
}
}

View File

@ -1481,7 +1481,7 @@ public class OlapScanNode extends ScanNode {
}
for (Index index : olapTable.getIndexes()) {
TOlapTableIndex tIndex = index.toThrift();
TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(olapTable.getBaseSchema()));
indexDesc.add(tIndex);
}

View File

@ -305,7 +305,7 @@ public class OlapTableSink extends DataSink {
indexes = table.getIndexes();
}
for (Index index : indexes) {
TOlapTableIndex tIndex = index.toThrift();
TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(table.getBaseSchema()));
indexDesc.add(tIndex);
}
TOlapTableIndexSchema indexSchema = new TOlapTableIndexSchema(pair.getKey(), columns,

View File

@ -103,7 +103,7 @@ public class AlterInvertedIndexTask extends AgentTask {
if (!alterInvertedIndexes.isEmpty()) {
List<TOlapTableIndex> tIndexes = new ArrayList<>();
for (Index index : alterInvertedIndexes) {
tIndexes.add(index.toThrift());
tIndexes.add(index.toThrift(index.getColumnUniqueIds(schemaColumns)));
}
req.setAlterInvertedIndexes(tIndexes);
}
@ -111,7 +111,7 @@ public class AlterInvertedIndexTask extends AgentTask {
if (existIndexes != null) {
List<TOlapTableIndex> indexDesc = new ArrayList<TOlapTableIndex>();
for (Index index : existIndexes) {
TOlapTableIndex tIndex = index.toThrift();
TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(schemaColumns));
indexDesc.add(tIndex);
}
req.setIndexesDesc(indexDesc);

View File

@ -340,7 +340,7 @@ public class CreateReplicaTask extends AgentTask {
} else {
tIndexes = new ArrayList<>();
for (Index index : indexes) {
tIndexes.add(index.toThrift());
tIndexes.add(index.toThrift(index.getColumnUniqueIds(columns)));
}
}
tSchema.setIndexes(tIndexes);

View File

@ -0,0 +1,164 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.catalog;
import org.apache.doris.analysis.IndexDef;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
public class IndexTest {
@Test
public void testGetColumnUniqueIds() {
// Create test columns with unique IDs
List<Column> schema = new ArrayList<>();
Column col1 = new Column("col1", Type.INT);
col1.setUniqueId(101);
Column col2 = new Column("col2", Type.VARCHAR);
col2.setUniqueId(102);
Column col3 = new Column("col3", Type.DOUBLE);
col3.setUniqueId(103);
Column specialCol = new Column("special-name!@#", Type.STRING);
specialCol.setUniqueId(104);
Column mixedCaseCol = new Column("MiXeD_CaSe", Type.BIGINT);
mixedCaseCol.setUniqueId(105);
schema.add(col1);
schema.add(col2);
schema.add(col3);
schema.add(specialCol);
schema.add(mixedCaseCol);
// Test case 1: Basic column matching
List<String> indexColumns1 = new ArrayList<>();
indexColumns1.add("col1");
indexColumns1.add("col3");
Index index1 = new Index(1, "test_index1", indexColumns1, IndexDef.IndexType.BITMAP, null, null);
List<Integer> uniqueIds1 = index1.getColumnUniqueIds(schema);
Assert.assertEquals(2, uniqueIds1.size());
Assert.assertEquals(Integer.valueOf(101), uniqueIds1.get(0));
Assert.assertEquals(Integer.valueOf(103), uniqueIds1.get(1));
// Test case 2: Case-insensitive matching
List<String> indexColumns2 = new ArrayList<>();
indexColumns2.add("CoL1");
indexColumns2.add("COL3");
Index index2 = new Index(2, "test_index2", indexColumns2, IndexDef.IndexType.BITMAP, null, null);
List<Integer> uniqueIds2 = index2.getColumnUniqueIds(schema);
Assert.assertEquals(2, uniqueIds2.size());
Assert.assertEquals(Integer.valueOf(101), uniqueIds2.get(0));
Assert.assertEquals(Integer.valueOf(103), uniqueIds2.get(1));
// Test case 3: Non-existent column name
List<String> indexColumns3 = new ArrayList<>();
indexColumns3.add("col1");
indexColumns3.add("non_existent_column");
Index index3 = new Index(3, "test_index3", indexColumns3, IndexDef.IndexType.BITMAP, null, null);
List<Integer> uniqueIds3 = index3.getColumnUniqueIds(schema);
Assert.assertEquals(1, uniqueIds3.size());
Assert.assertEquals(Integer.valueOf(101), uniqueIds3.get(0));
// Test case 4: Null schema
List<Integer> uniqueIds4 = index1.getColumnUniqueIds(null);
Assert.assertEquals(0, uniqueIds4.size());
// Test case 5: Empty column list
Index emptyColIndex = new Index(5, "empty_col_index", new ArrayList<>(),
IndexDef.IndexType.BITMAP, null, null);
List<Integer> emptyColUniqueIds = emptyColIndex.getColumnUniqueIds(schema);
Assert.assertEquals(0, emptyColUniqueIds.size());
// Test case 6: Empty schema (non-null)
List<Integer> emptySchemaUniqueIds = index1.getColumnUniqueIds(new ArrayList<>());
Assert.assertEquals(0, emptySchemaUniqueIds.size());
// Test case 7: Duplicate column names
List<String> dupColumns = new ArrayList<>();
dupColumns.add("col1");
dupColumns.add("col1"); // Duplicated
dupColumns.add("col2");
Index dupIndex = new Index(7, "dup_index", dupColumns, IndexDef.IndexType.BITMAP, null, null);
List<Integer> dupUniqueIds = dupIndex.getColumnUniqueIds(schema);
Assert.assertEquals(3, dupUniqueIds.size());
Assert.assertEquals(Integer.valueOf(101), dupUniqueIds.get(0));
Assert.assertEquals(Integer.valueOf(101), dupUniqueIds.get(1));
Assert.assertEquals(Integer.valueOf(102), dupUniqueIds.get(2));
// Test case 8: Special characters in column names
List<String> specialColList = new ArrayList<>();
specialColList.add("special-name!@#");
Index specialIndex = new Index(8, "special_index", specialColList, IndexDef.IndexType.BITMAP, null, null);
List<Integer> specialUniqueIds = specialIndex.getColumnUniqueIds(schema);
Assert.assertEquals(1, specialUniqueIds.size());
Assert.assertEquals(Integer.valueOf(104), specialUniqueIds.get(0));
// Test case 9: Mixed case column name
List<String> mixedCaseList = new ArrayList<>();
mixedCaseList.add("mixed_case"); // Testing case insensitivity with underscores
Index mixedCaseIndex = new Index(9, "mixed_case_index", mixedCaseList, IndexDef.IndexType.BITMAP, null, null);
List<Integer> mixedCaseUniqueIds = mixedCaseIndex.getColumnUniqueIds(schema);
Assert.assertEquals(1, mixedCaseUniqueIds.size());
Assert.assertEquals(Integer.valueOf(105), mixedCaseUniqueIds.get(0));
// Test case 10: Large number of columns
List<String> largeColumnList = new ArrayList<>();
List<Column> largeSchema = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
Column tempCol = new Column("col" + i, Type.INT);
tempCol.setUniqueId(1000 + i);
largeSchema.add(tempCol);
// Add every other column to the index
if (i % 2 == 0) {
largeColumnList.add("col" + i);
}
}
Index largeIndex = new Index(10, "large_index", largeColumnList, IndexDef.IndexType.BITMAP, null, null);
List<Integer> largeUniqueIds = largeIndex.getColumnUniqueIds(largeSchema);
Assert.assertEquals(500, largeUniqueIds.size());
// Check first and last elements
Assert.assertEquals(Integer.valueOf(1000), largeUniqueIds.get(0));
Assert.assertEquals(Integer.valueOf(1000 + 998), largeUniqueIds.get(499));
// Test case 11: Order preservation - ensure column order in index is preserved in IDs
List<String> reverseOrderColumns = new ArrayList<>();
reverseOrderColumns.add("col3");
reverseOrderColumns.add("col2");
reverseOrderColumns.add("col1");
Index reverseIndex = new Index(11, "reverse_index", reverseOrderColumns, IndexDef.IndexType.BITMAP, null, null);
List<Integer> reverseUniqueIds = reverseIndex.getColumnUniqueIds(schema);
Assert.assertEquals(3, reverseUniqueIds.size());
Assert.assertEquals(Integer.valueOf(103), reverseUniqueIds.get(0));
Assert.assertEquals(Integer.valueOf(102), reverseUniqueIds.get(1));
Assert.assertEquals(Integer.valueOf(101), reverseUniqueIds.get(2));
}
}

View File

@ -58,7 +58,7 @@ public class OlapTableTest {
}
OlapTable tbl = (OlapTable) table;
tbl.setIndexes(Lists.newArrayList(new Index(0, "index", Lists.newArrayList("col"),
IndexDef.IndexType.BITMAP, null, "xxxxxx", Lists.newArrayList(1))));
IndexDef.IndexType.BITMAP, null, "xxxxxx")));
System.out.println("orig table id: " + tbl.getId());
FastByteArrayOutputStream byteArrayOutputStream = new FastByteArrayOutputStream();

View File

@ -42,18 +42,18 @@ public class IndexesProcNodeTest {
public void testFetchResult() throws AnalysisException {
List<Index> indexes = new ArrayList<>();
Index indexBitmap = new Index(1, "bitmap_index", Lists.newArrayList("col_1"),
IndexType.BITMAP, null, "bitmap index on col_1", Lists.newArrayList(1));
IndexType.BITMAP, null, "bitmap index on col_1");
Map<String, String> invertedProperties = new HashMap<>();
invertedProperties.put("parser", "unicode");
Index indexInverted = new Index(2, "inverted_index", Lists.newArrayList("col_2"),
IndexType.INVERTED, invertedProperties, "inverted index on col_2", Lists.newArrayList(2));
IndexType.INVERTED, invertedProperties, "inverted index on col_2");
Index indexBf = new Index(3, "bloomfilter_index", Lists.newArrayList("col_3"),
IndexType.BLOOMFILTER, null, "bloomfilter index on col_3", Lists.newArrayList(3));
IndexType.BLOOMFILTER, null, "bloomfilter index on col_3");
Map<String, String> ngramProperties = new HashMap<>();
ngramProperties.put("gram_size", "3");
ngramProperties.put("bf_size", "256");
Index indexNgramBf = new Index(4, "ngram_bf_index", Lists.newArrayList("col_4"),
IndexType.NGRAM_BF, ngramProperties, "ngram_bf index on col_4", Lists.newArrayList(4));
IndexType.NGRAM_BF, ngramProperties, "ngram_bf index on col_4");
indexes.add(indexBitmap);
indexes.add(indexInverted);
indexes.add(indexBf);

View File

@ -71,7 +71,7 @@ public class TableAddOrDropColumnsInfoTest {
indexNameToId.put("index", 1L);
List<Index> indexes = Lists.newArrayList(
new Index(0, "index", Lists.newArrayList("testCol1"), IndexDef.IndexType.INVERTED, null, "xxxxxx", Lists.newArrayList(1)));
new Index(0, "index", Lists.newArrayList("testCol1"), IndexDef.IndexType.INVERTED, null, "xxxxxx"));
TableAddOrDropColumnsInfo tableAddOrDropColumnsInfo1 = new TableAddOrDropColumnsInfo(
"", dbId, tableId, tableId,