Add scheduler routine load job for stream load (#313)

1. fetch need_scheduler routine load job
2. caculate current concurrent task number of job
3. divide kafka partition into tasks
This commit is contained in:
EmmyMiao87
2018-11-15 21:04:22 +08:00
committed by Mingyu Chen
parent 8ac9492b11
commit 44029937e4
14 changed files with 1090 additions and 157 deletions

View File

@ -0,0 +1,106 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.load.routineload;
import com.google.common.collect.Lists;
import mockit.Deencapsulation;
import mockit.Delegate;
import mockit.Expectations;
import mockit.Injectable;
import mockit.Mocked;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Database;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.thrift.TResourceInfo;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.junit.Assert;
import org.junit.Test;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class KafkaRoutineLoadJobTest {
@Test
public void testBeNumMin(@Mocked KafkaConsumer kafkaConsumer,
@Injectable PartitionInfo partitionInfo1,
@Injectable PartitionInfo partitionInfo2,
@Mocked Catalog catalog,
@Mocked SystemInfoService systemInfoService,
@Mocked Database database) throws MetaNotFoundException {
List<PartitionInfo> partitionInfoList = new ArrayList<>();
partitionInfoList.add(partitionInfo1);
partitionInfoList.add(partitionInfo2);
List<Long> beIds = Lists.newArrayList(1L);
String clusterName = "clusterA";
new Expectations() {
{
kafkaConsumer.partitionsFor(anyString, (Duration) any);
result = partitionInfoList;
Catalog.getCurrentSystemInfo();
result = systemInfoService;
Catalog.getCurrentCatalog();
result = catalog;
catalog.getDb(anyLong);
result = database;
database.getClusterName();
result = clusterName;
systemInfoService.getClusterBackendIds(clusterName, true);
result = beIds;
}
};
KafkaRoutineLoadJob kafkaRoutineLoadJob = new KafkaRoutineLoadJob(1L, "kafka_routine_load_job", "miaoling", 1L,
1L, "1L", "v1", "", "", 3,
RoutineLoadJob.JobState.NEED_SCHEDULER, RoutineLoadJob.DataSourceType.KAFKA, 0, new TResourceInfo(),
"", "");
Assert.assertEquals(1, kafkaRoutineLoadJob.calculateCurrentConcurrentTaskNum());
}
@Test
public void testDivideRoutineLoadJob() {
KafkaRoutineLoadJob kafkaRoutineLoadJob = new KafkaRoutineLoadJob(1L, "kafka_routine_load_job", "miaoling", 1L,
1L, "1L", "v1", "", "", 3,
RoutineLoadJob.JobState.NEED_SCHEDULER, RoutineLoadJob.DataSourceType.KAFKA, 0, new TResourceInfo(),
"", "");
Deencapsulation.setField(kafkaRoutineLoadJob, "kafkaPartitions", Arrays.asList(1, 4, 6));
List<RoutineLoadTask> result = kafkaRoutineLoadJob.divideRoutineLoadJob(2);
Assert.assertEquals(2, result.size());
for (RoutineLoadTask routineLoadTask : result) {
KafkaRoutineLoadTask kafkaRoutineLoadTask = (KafkaRoutineLoadTask) routineLoadTask;
if (kafkaRoutineLoadTask.getKafkaPartitions().size() == 2) {
Assert.assertTrue(kafkaRoutineLoadTask.getKafkaPartitions().contains(1));
Assert.assertTrue(kafkaRoutineLoadTask.getKafkaPartitions().contains(6));
} else if (kafkaRoutineLoadTask.getKafkaPartitions().size() == 1) {
Assert.assertTrue(kafkaRoutineLoadTask.getKafkaPartitions().contains(4));
} else {
Assert.fail();
}
}
}
}

View File

@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.load.routineload;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.common.LoadException;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.persist.EditLog;
import org.apache.doris.system.SystemInfoService;
import org.easymock.EasyMock;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.powermock.api.easymock.PowerMock;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@RunWith(PowerMockRunner.class)
@PrepareForTest({Catalog.class})
public class RoutineLoadSchedulerTest {
@Test
public void testNormalRunOneCycle() throws LoadException, MetaNotFoundException {
int taskNum = 1;
List<RoutineLoadTask> routineLoadTaskList = new ArrayList<>();
KafkaRoutineLoadTask kafkaRoutineLoadTask = EasyMock.createNiceMock(KafkaRoutineLoadTask.class);
EasyMock.expect(kafkaRoutineLoadTask.getSignature()).andReturn(1L).anyTimes();
EasyMock.replay(kafkaRoutineLoadTask);
routineLoadTaskList.add(kafkaRoutineLoadTask);
KafkaRoutineLoadJob routineLoadJob = EasyMock.createNiceMock(KafkaRoutineLoadJob.class);
EasyMock.expect(routineLoadJob.calculateCurrentConcurrentTaskNum()).andReturn(taskNum).anyTimes();
EasyMock.expect(routineLoadJob.divideRoutineLoadJob(taskNum)).andReturn(routineLoadTaskList).anyTimes();
EasyMock.expect(routineLoadJob.getState()).andReturn(RoutineLoadJob.JobState.NEED_SCHEDULER).anyTimes();
EasyMock.replay(routineLoadJob);
SystemInfoService systemInfoService = EasyMock.createNiceMock(SystemInfoService.class);
List<Long> beIds = Arrays.asList(1L, 2L, 3L);
EasyMock.expect(systemInfoService.getBackendIds(true)).andReturn(beIds).anyTimes();
EasyMock.replay(systemInfoService);
Catalog catalog = EasyMock.createNiceMock(Catalog.class);
EditLog editLog = EasyMock.createNiceMock(EditLog.class);
PowerMock.mockStatic(Catalog.class);
EasyMock.expect(Catalog.getCurrentSystemInfo()).andReturn(systemInfoService).anyTimes();
EasyMock.expect(Catalog.getInstance()).andReturn(catalog).anyTimes();
PowerMock.replay(Catalog.class);
RoutineLoad routineLoad = new RoutineLoad();
EasyMock.expect(catalog.getEditLog()).andReturn(editLog).anyTimes();
EasyMock.expect(catalog.getRoutineLoadInstance()).andReturn(routineLoad).anyTimes();
EasyMock.replay(catalog);
routineLoad.addRoutineLoadJob(routineLoadJob);
routineLoad.updateRoutineLoadJobState(routineLoadJob, RoutineLoadJob.JobState.NEED_SCHEDULER);
RoutineLoadScheduler routineLoadScheduler = new RoutineLoadScheduler();
routineLoadScheduler.runOneCycle();
Assert.assertEquals(1, routineLoad.getIdToRoutineLoadTask().size());
Assert.assertEquals(1, routineLoad.getIdToNeedSchedulerRoutineLoadTasks().size());
Assert.assertEquals(1, routineLoad.getRoutineLoadJobByState(RoutineLoadJob.JobState.RUNNING).size());
Assert.assertEquals(0, routineLoad.getRoutineLoadJobByState(RoutineLoadJob.JobState.NEED_SCHEDULER).size());
}
}