[Demo] Add flink mysql cdc to doris demo (#6352)

add flink mysql cdc to doris
This commit is contained in:
jiafeng.zhang
2021-08-05 14:34:52 +08:00
committed by GitHub
parent b5d8ee35f5
commit de7376062a
3 changed files with 285 additions and 23 deletions

View File

@ -0,0 +1,117 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# Instructions for use
This series of sample codes mainly explain how to use Flink and Flink doris connector to read and write data to doris from the perspective of Flink framework and Flink doris connector, and give code examples based on actual usage scenarios.
## Flink Mysql CDC TO Doris
Realize the consumption of mysql binlog log data through flink cdc, and then import mysql data into doris table data in real time through flink doris connector sql
```java
org.apache.doris.demo.flink.FlinkConnectorMysqlCDCDemo
```
**Note:** Because the Flink doris connector jar package is not in the Maven central warehouse, you need to compile it separately and add it to the classpath of your project. Refer to the compilation and use of Flink doris connector:
[Flink doris connector]: https://doris.apache.org/master/zh-CN/extending-doris/flink-doris-connector.html
1. First, enable mysql binlog
For details on how to open binlog, please search by yourself or go to the official Mysql documentation to inquire
2. Install Flink
Flink installation and use are not introduced here, but code examples are given in the development environment
3. Create mysql table
```sql
CREATE TABLE `test` (
`id` int NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=19 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
```
4. Create doris table
```sql
CREATE TABLE `doris_test` (
`id` int NULL COMMENT "",
`name` varchar(100) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_num" = "3",
"in_memory" = "false",
"storage_format" = "V2"
);
```
5. Create Flink Mysql CDC
```sql
tEnv.executeSql(
"CREATE TABLE orders (\n" +
" id INT,\n" +
" name STRING\n" +
") WITH (\n" +
" 'connector' = 'mysql-cdc',\n" +
" 'hostname' = 'localhost',\n" +
" 'port' = '3306',\n" +
" 'username' = 'root',\n" +
" 'password' = 'zhangfeng',\n" +
" 'database-name' = 'demo',\n" +
" 'table-name' = 'test'\n" +
")");
```
6. Create flink doris table
```sql
tEnv.executeSql(
"CREATE TABLE doris_test_sink (" +
"id INT," +
"name STRING" +
") " +
"WITH (\n" +
" 'connector' = 'doris',\n" +
" 'fenodes' = '10.220.146.10:8030',\n" +
" 'table.identifier' = 'test_2.doris_test',\n" +
" 'sink.batch.size' = '2',\n" +
" 'username' = 'root',\n" +
" 'password' = ''\n" +
")");
```
6. Execute insert into
```sql
tEnv.executeSql("INSERT INTO doris_test_sink select id,name from orders");
```

View File

@ -18,32 +18,101 @@ specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>flink-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<groupId>org.example</groupId>
<artifactId>flink-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<name>flink-demo</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<name>flink-demo</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<scala.version>2.11.12</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<java.version>1.8</java.version>
<flink.version>1.11.0</flink.version>
<scope.mode>compile</scope.mode>
</properties>
<properties>
<scala.version>2.12.10</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<java.version>1.8</java.version>
<flink.version>1.12.2</flink.version>
<scope.mode>compile</scope.mode>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.12</version>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<args>
<arg>-feature</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,76 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.demo.flink;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
/**
* Realize the consumption of mysql binlog log data through flink cdc,
* and then import mysql data into doris table data in real time through flink doris connector sql
*/
public class FlinkConnectorMysqlCDCDemo {
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// source only supports parallelism of 1
env.setParallelism(1);
final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
// register a table in the catalog
tEnv.executeSql(
"CREATE TABLE orders (\n" +
" id INT,\n" +
" name STRING\n" +
") WITH (\n" +
" 'connector' = 'mysql-cdc',\n" +
" 'hostname' = 'localhost',\n" +
" 'port' = '3306',\n" +
" 'username' = 'root',\n" +
" 'password' = 'zhangfeng',\n" +
" 'database-name' = 'demo',\n" +
" 'table-name' = 'test'\n" +
")");
//doris table
tEnv.executeSql(
"CREATE TABLE doris_test_sink (" +
"id INT," +
"name STRING" +
") " +
"WITH (\n" +
" 'connector' = 'doris',\n" +
" 'fenodes' = '10.220.146.10:8030',\n" +
" 'table.identifier' = 'test_2.doris_test',\n" +
" 'sink.batch.size' = '2',\n" +
" 'username' = 'root',\n" +
" 'password' = ''\n" +
")");
// define a dynamic aggregating query
final Table result = tEnv.sqlQuery("SELECT id,name FROM orders");
// print the result to the console
tEnv.toRetractStream(result, Row.class).print();
result.execute();
//insert into mysql table to doris table
tEnv.executeSql("INSERT INTO doris_test_sink select id,name from orders");
env.execute();
}
}