[SegmentV2] Change the default storage format to SegmentV2 (#4387)

Since the Segment V2 has been released for a long time, we should make it as default storage format for newly created table.

This CL mainly changes:
1. For all newly created tables, their default storage format is Segment V2.
2. For all already exist tablets, their storage format remain unchanged.
3. Fix  bugs described in Fix #4384 and Fix #4385
This commit is contained in:
Mingyu Chen
2020-08-24 21:51:17 +08:00
committed by GitHub
parent 5fc79561d7
commit 976820ba20
13 changed files with 31 additions and 20 deletions

View File

@ -139,9 +139,10 @@ OLAPStatus DeltaWriter::init() {
writer_context.tablet_id = _req.tablet_id;
writer_context.partition_id = _req.partition_id;
writer_context.tablet_schema_hash = _req.schema_hash;
writer_context.rowset_type = _storage_engine->default_rowset_type();
if (_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
writer_context.rowset_type = BETA_ROWSET;
} else {
writer_context.rowset_type = ALPHA_ROWSET;
}
writer_context.rowset_path_prefix = _tablet->tablet_path();
writer_context.tablet_schema = &(_tablet->tablet_schema());

View File

@ -282,6 +282,9 @@ OLAPStatus PushHandler::_convert_v2(TabletSharedPtr cur_tablet,
context.partition_id = _request.partition_id;
context.tablet_schema_hash = cur_tablet->schema_hash();
context.rowset_type = StorageEngine::instance()->default_rowset_type();
if (cur_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
context.rowset_type = BETA_ROWSET;
}
context.rowset_path_prefix = cur_tablet->tablet_path();
context.tablet_schema = &(cur_tablet->tablet_schema());
context.rowset_state = PREPARED;
@ -468,6 +471,9 @@ OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet,
context.partition_id = _request.partition_id;
context.tablet_schema_hash = cur_tablet->schema_hash();
context.rowset_type = StorageEngine::instance()->default_rowset_type();
if (cur_tablet->tablet_meta()->preferred_rowset_type() == BETA_ROWSET) {
context.rowset_type = BETA_ROWSET;
}
context.rowset_path_prefix = cur_tablet->tablet_path();
context.tablet_schema = &(cur_tablet->tablet_schema());
context.rowset_state = PREPARED;

View File

@ -35,7 +35,6 @@
#include "olap/tablet.h"
#include "olap/wrapper_field.h"
#include "runtime/exec_env.h"
#include "runtime/heartbeat_flags.h"
#include "runtime/mem_pool.h"
#include "runtime/mem_tracker.h"

View File

@ -90,7 +90,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id,
tablet_meta_pb.set_cumulative_layer_point(-1);
tablet_meta_pb.set_tablet_state(PB_RUNNING);
*(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto();
tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_MEMORY ?
tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK ?
TabletTypePB::TABLET_TYPE_DISK : TabletTypePB::TABLET_TYPE_MEMORY);
TabletSchemaPB* schema = tablet_meta_pb.mutable_schema();
schema->set_num_short_key_columns(tablet_schema.short_key_column_count);

View File

@ -77,7 +77,4 @@ Please refer to [Scheam Change](alter-table-schema-change.html)
* `LARGEINT`
* `DECIMAL`
* `BOOL`
* The bitmap index takes effect only in segmentV2. You need to add the following configuration to the configuration file of be
```
default_rowset_type=BETA
```
* The bitmap index takes effect only in segmentV2. The table's storage format will be converted to V2 automatically when creating index.

View File

@ -79,8 +79,4 @@ create/drop index 语法
* `DECIMAL`
* `BOOL`
* bitmap索引仅在 segmentV2 下生效,需要在be的配置文件中增加如下配置
```
default_rowset_type=BETA
```
* bitmap索引仅在 Segment V2 下生效。当创建 index 时,表的存储格式将默认转换为 V2 格式。

View File

@ -42,6 +42,8 @@ V2 格式的表可以支持以下新的特性:
4. 字典压缩
5. 延迟物化(Lazy Materialization)
**从 0.13 版本开始,新建表的默认存储格式将为 Segment V2**
## 集群升级
0.12 版本仅支持从 0.11 版本升级,不支持从 0.11 之前的版本升级。请先确保升级的前的 Doris 集群版本为 0.11。
@ -129,9 +131,10 @@ PROPERTIES
在 `properties` 中指定 `"storage_format" = "v2"` 后,该表将使用 V2 格式创建。如果是分区表,则之后创建的分区也都是 V2 格式。
### 全量格式转换(试验功能)
### 全量格式转换(试验功能,不推荐)
通过以下方式可以开启整个集群的全量数据格式转换(V1 -> V2)。全量数据转换是通过 BE 后台的数据 compaction 过程异步进行的。
**该功能目前并没有很好的方式查看或控制转换进度,并且无法保证数据能够转换完成。可能导致同一张表长期处于同时包含两种数据格式的状态。因此建议使用 ALTER TABLE 针对性的转换。**
1. 从 BE 开启全量格式转换

View File

@ -341,6 +341,9 @@ public class MaterializedViewHandler extends AlterHandler {
String newStorageFormatIndexName = NEW_STORAGE_FORMAT_INDEX_NAME_PREFIX + olapTable.getName();
if (mvName.equals(newStorageFormatIndexName)) {
mvJob.setStorageFormat(TStorageFormat.V2);
} else {
// use base table's storage foramt as the mv's format
mvJob.setStorageFormat(olapTable.getStorageFormat());
}
/*

View File

@ -900,6 +900,10 @@ public class SchemaChangeHandler extends AlterHandler {
// If StorageFormat is set to TStorageFormat.V2
// which will create tablet with preferred_rowset_type set to BETA
// for both base table and rollup index
if (hasIndexChange) {
// only V2 support index, so if there is index changed, storage format must be V2
storageFormat = TStorageFormat.V2;
}
schemaChangeJob.setStorageFormat(storageFormat);
// begin checking each table

View File

@ -3719,7 +3719,7 @@ public class Catalog {
Preconditions.checkNotNull(versionInfo);
// get storage format
TStorageFormat storageFormat = TStorageFormat.DEFAULT; // default means it's up to BE's config
TStorageFormat storageFormat = TStorageFormat.V2; // default is segment v2
try {
storageFormat = PropertyAnalyzer.analyzeStorageFormat(properties);
} catch (AnalysisException e) {

View File

@ -1293,6 +1293,8 @@ public class OlapTable extends Table {
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) {
for (Tablet tablet : mIndex.getTablets()) {
if (tabletScheduler.containsTablet(tablet.getId())) {
LOG.info("table {} is not stable because tablet {} is in tablet scheduler. replicas: {}",
id, tablet.getId(), tablet.getReplicas());
return false;
}

View File

@ -30,12 +30,12 @@ import org.apache.doris.common.Pair;
import org.apache.doris.thrift.TStorageFormat;
import org.apache.doris.thrift.TStorageMedium;
import org.apache.doris.thrift.TStorageType;
import org.apache.doris.thrift.TTabletType;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import org.apache.doris.thrift.TTabletType;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -400,7 +400,7 @@ public class PropertyAnalyzer {
storageFormat = properties.get(PROPERTIES_STORAGE_FORMAT);
properties.remove(PROPERTIES_STORAGE_FORMAT);
} else {
return TStorageFormat.DEFAULT;
return TStorageFormat.V2;
}
if (storageFormat.equalsIgnoreCase("v1")) {
@ -408,7 +408,7 @@ public class PropertyAnalyzer {
} else if (storageFormat.equalsIgnoreCase("v2")) {
return TStorageFormat.V2;
} else if (storageFormat.equalsIgnoreCase("default")) {
return TStorageFormat.DEFAULT;
return TStorageFormat.V2;
} else {
throw new AnalysisException("unknown storage format: " + storageFormat);
}

View File

@ -62,8 +62,8 @@ public class AlterJobV2Test {
Catalog.getCurrentCatalog().createDb(createDbStmt);
createTable("CREATE TABLE test.schema_change_test(k1 int, k2 int, k3 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');");
createTable("CREATE TABLE test.segmentv2(k1 int, k2 int, v1 int sum) distributed by hash(k1) buckets 3 properties('replication_num' = '1');");
createTable("CREATE TABLE test.segmentv2(k1 int, k2 int, v1 int sum) distributed by hash(k1) buckets 3 properties('replication_num' = '1', 'storage_format' = 'v1');");
}
@AfterClass
@ -133,7 +133,7 @@ public class AlterJobV2Test {
Assert.assertNotNull(db);
OlapTable tbl = (OlapTable) db.getTable("segmentv2");
Assert.assertNotNull(tbl);
Assert.assertEquals(TStorageFormat.DEFAULT, tbl.getTableProperty().getStorageFormat());
Assert.assertEquals(TStorageFormat.V1, tbl.getTableProperty().getStorageFormat());
// 1. create a rollup r1
String alterStmtStr = "alter table test.segmentv2 add rollup r1(k2, v1)";