[SegmentV2] Change the default storage format to SegmentV2 (#4387)
Since the Segment V2 has been released for a long time, we should make it as default storage format for newly created table. This CL mainly changes: 1. For all newly created tables, their default storage format is Segment V2. 2. For all already exist tablets, their storage format remain unchanged. 3. Fix bugs described in Fix #4384 and Fix #4385
This commit is contained in:
@ -139,9 +139,10 @@ OLAPStatus DeltaWriter::init() {
|
||||
writer_context.tablet_id = _req.tablet_id;
|
||||
writer_context.partition_id = _req.partition_id;
|
||||
writer_context.tablet_schema_hash = _req.schema_hash;
|
||||
writer_context.rowset_type = _storage_engine->default_rowset_type();
|
||||
if (_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
|
||||
writer_context.rowset_type = BETA_ROWSET;
|
||||
} else {
|
||||
writer_context.rowset_type = ALPHA_ROWSET;
|
||||
}
|
||||
writer_context.rowset_path_prefix = _tablet->tablet_path();
|
||||
writer_context.tablet_schema = &(_tablet->tablet_schema());
|
||||
|
||||
@ -282,6 +282,9 @@ OLAPStatus PushHandler::_convert_v2(TabletSharedPtr cur_tablet,
|
||||
context.partition_id = _request.partition_id;
|
||||
context.tablet_schema_hash = cur_tablet->schema_hash();
|
||||
context.rowset_type = StorageEngine::instance()->default_rowset_type();
|
||||
if (cur_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
|
||||
context.rowset_type = BETA_ROWSET;
|
||||
}
|
||||
context.rowset_path_prefix = cur_tablet->tablet_path();
|
||||
context.tablet_schema = &(cur_tablet->tablet_schema());
|
||||
context.rowset_state = PREPARED;
|
||||
@ -468,6 +471,9 @@ OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet,
|
||||
context.partition_id = _request.partition_id;
|
||||
context.tablet_schema_hash = cur_tablet->schema_hash();
|
||||
context.rowset_type = StorageEngine::instance()->default_rowset_type();
|
||||
if (cur_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
|
||||
context.rowset_type = BETA_ROWSET;
|
||||
}
|
||||
context.rowset_path_prefix = cur_tablet->tablet_path();
|
||||
context.tablet_schema = &(cur_tablet->tablet_schema());
|
||||
context.rowset_state = PREPARED;
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
#include "olap/tablet.h"
|
||||
#include "olap/wrapper_field.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/heartbeat_flags.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
|
||||
|
||||
@ -90,7 +90,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id,
|
||||
tablet_meta_pb.set_cumulative_layer_point(-1);
|
||||
tablet_meta_pb.set_tablet_state(PB_RUNNING);
|
||||
*(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto();
|
||||
tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_MEMORY ?
|
||||
tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK ?
|
||||
TabletTypePB::TABLET_TYPE_DISK : TabletTypePB::TABLET_TYPE_MEMORY);
|
||||
TabletSchemaPB* schema = tablet_meta_pb.mutable_schema();
|
||||
schema->set_num_short_key_columns(tablet_schema.short_key_column_count);
|
||||
|
||||
@ -77,7 +77,4 @@ Please refer to [Scheam Change](alter-table-schema-change.html)
|
||||
* `LARGEINT`
|
||||
* `DECIMAL`
|
||||
* `BOOL`
|
||||
* The bitmap index takes effect only in segmentV2. You need to add the following configuration to the configuration file of be
|
||||
```
|
||||
default_rowset_type=BETA
|
||||
```
|
||||
* The bitmap index takes effect only in segmentV2. The table's storage format will be converted to V2 automatically when creating index.
|
||||
|
||||
@ -79,8 +79,4 @@ create/drop index 语法
|
||||
* `DECIMAL`
|
||||
* `BOOL`
|
||||
|
||||
* bitmap索引仅在 segmentV2 下生效,需要在be的配置文件中增加如下配置
|
||||
|
||||
```
|
||||
default_rowset_type=BETA
|
||||
```
|
||||
* bitmap索引仅在 Segment V2 下生效。当创建 index 时,表的存储格式将默认转换为 V2 格式。
|
||||
|
||||
@ -42,6 +42,8 @@ V2 格式的表可以支持以下新的特性:
|
||||
4. 字典压缩
|
||||
5. 延迟物化(Lazy Materialization)
|
||||
|
||||
**从 0.13 版本开始,新建表的默认存储格式将为 Segment V2**
|
||||
|
||||
## 集群升级
|
||||
|
||||
0.12 版本仅支持从 0.11 版本升级,不支持从 0.11 之前的版本升级。请先确保升级的前的 Doris 集群版本为 0.11。
|
||||
@ -129,9 +131,10 @@ PROPERTIES
|
||||
|
||||
在 `properties` 中指定 `"storage_format" = "v2"` 后,该表将使用 V2 格式创建。如果是分区表,则之后创建的分区也都是 V2 格式。
|
||||
|
||||
### 全量格式转换(试验功能)
|
||||
### 全量格式转换(试验功能,不推荐)
|
||||
|
||||
通过以下方式可以开启整个集群的全量数据格式转换(V1 -> V2)。全量数据转换是通过 BE 后台的数据 compaction 过程异步进行的。
|
||||
**该功能目前并没有很好的方式查看或控制转换进度,并且无法保证数据能够转换完成。可能导致同一张表长期处于同时包含两种数据格式的状态。因此建议使用 ALTER TABLE 针对性的转换。**
|
||||
|
||||
1. 从 BE 开启全量格式转换
|
||||
|
||||
|
||||
@ -341,6 +341,9 @@ public class MaterializedViewHandler extends AlterHandler {
|
||||
String newStorageFormatIndexName = NEW_STORAGE_FORMAT_INDEX_NAME_PREFIX + olapTable.getName();
|
||||
if (mvName.equals(newStorageFormatIndexName)) {
|
||||
mvJob.setStorageFormat(TStorageFormat.V2);
|
||||
} else {
|
||||
// use base table's storage foramt as the mv's format
|
||||
mvJob.setStorageFormat(olapTable.getStorageFormat());
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@ -900,6 +900,10 @@ public class SchemaChangeHandler extends AlterHandler {
|
||||
// If StorageFormat is set to TStorageFormat.V2
|
||||
// which will create tablet with preferred_rowset_type set to BETA
|
||||
// for both base table and rollup index
|
||||
if (hasIndexChange) {
|
||||
// only V2 support index, so if there is index changed, storage format must be V2
|
||||
storageFormat = TStorageFormat.V2;
|
||||
}
|
||||
schemaChangeJob.setStorageFormat(storageFormat);
|
||||
|
||||
// begin checking each table
|
||||
|
||||
@ -3719,7 +3719,7 @@ public class Catalog {
|
||||
Preconditions.checkNotNull(versionInfo);
|
||||
|
||||
// get storage format
|
||||
TStorageFormat storageFormat = TStorageFormat.DEFAULT; // default means it's up to BE's config
|
||||
TStorageFormat storageFormat = TStorageFormat.V2; // default is segment v2
|
||||
try {
|
||||
storageFormat = PropertyAnalyzer.analyzeStorageFormat(properties);
|
||||
} catch (AnalysisException e) {
|
||||
|
||||
@ -1293,6 +1293,8 @@ public class OlapTable extends Table {
|
||||
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) {
|
||||
for (Tablet tablet : mIndex.getTablets()) {
|
||||
if (tabletScheduler.containsTablet(tablet.getId())) {
|
||||
LOG.info("table {} is not stable because tablet {} is in tablet scheduler. replicas: {}",
|
||||
id, tablet.getId(), tablet.getReplicas());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -30,12 +30,12 @@ import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.thrift.TStorageFormat;
|
||||
import org.apache.doris.thrift.TStorageMedium;
|
||||
import org.apache.doris.thrift.TStorageType;
|
||||
import org.apache.doris.thrift.TTabletType;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.doris.thrift.TTabletType;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
@ -400,7 +400,7 @@ public class PropertyAnalyzer {
|
||||
storageFormat = properties.get(PROPERTIES_STORAGE_FORMAT);
|
||||
properties.remove(PROPERTIES_STORAGE_FORMAT);
|
||||
} else {
|
||||
return TStorageFormat.DEFAULT;
|
||||
return TStorageFormat.V2;
|
||||
}
|
||||
|
||||
if (storageFormat.equalsIgnoreCase("v1")) {
|
||||
@ -408,7 +408,7 @@ public class PropertyAnalyzer {
|
||||
} else if (storageFormat.equalsIgnoreCase("v2")) {
|
||||
return TStorageFormat.V2;
|
||||
} else if (storageFormat.equalsIgnoreCase("default")) {
|
||||
return TStorageFormat.DEFAULT;
|
||||
return TStorageFormat.V2;
|
||||
} else {
|
||||
throw new AnalysisException("unknown storage format: " + storageFormat);
|
||||
}
|
||||
|
||||
@ -62,8 +62,8 @@ public class AlterJobV2Test {
|
||||
Catalog.getCurrentCatalog().createDb(createDbStmt);
|
||||
|
||||
createTable("CREATE TABLE test.schema_change_test(k1 int, k2 int, k3 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');");
|
||||
|
||||
createTable("CREATE TABLE test.segmentv2(k1 int, k2 int, v1 int sum) distributed by hash(k1) buckets 3 properties('replication_num' = '1');");
|
||||
|
||||
createTable("CREATE TABLE test.segmentv2(k1 int, k2 int, v1 int sum) distributed by hash(k1) buckets 3 properties('replication_num' = '1', 'storage_format' = 'v1');");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@ -133,7 +133,7 @@ public class AlterJobV2Test {
|
||||
Assert.assertNotNull(db);
|
||||
OlapTable tbl = (OlapTable) db.getTable("segmentv2");
|
||||
Assert.assertNotNull(tbl);
|
||||
Assert.assertEquals(TStorageFormat.DEFAULT, tbl.getTableProperty().getStorageFormat());
|
||||
Assert.assertEquals(TStorageFormat.V1, tbl.getTableProperty().getStorageFormat());
|
||||
|
||||
// 1. create a rollup r1
|
||||
String alterStmtStr = "alter table test.segmentv2 add rollup r1(k2, v1)";
|
||||
|
||||
Reference in New Issue
Block a user