diff --git a/docs/.vuepress/public/images/canal_store.png b/docs/.vuepress/public/images/canal_store.png
new file mode 100644
index 0000000000..51671584bc
Binary files /dev/null and b/docs/.vuepress/public/images/canal_store.png differ
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index 4f7cdbb3fc..d5adc11770 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -55,6 +55,7 @@ module.exports = [
children: [
"load-manual",
"batch-delete-manual",
+ "binlog-load-manual",
"broker-load-manual",
"routine-load-manual",
"sequence-column-manual",
@@ -534,6 +535,7 @@ module.exports = [
"CANCEL DELETE",
"CANCEL LABEL",
"CANCEL LOAD",
+ "CREATE SYNC JOB",
"DELETE",
"EXPORT",
"GET LABEL STATE",
@@ -542,8 +544,10 @@ module.exports = [
"MINI LOAD",
"MULTI LOAD",
"PAUSE ROUTINE LOAD",
+ "PAUSE SYNC JOB",
"RESTORE TABLET",
"RESUME ROUTINE LOAD",
+ "RESUME SYNC JOB",
"ROUTINE LOAD",
"SHOW ALTER",
"SHOW BACKUP",
@@ -562,10 +566,12 @@ module.exports = [
"SHOW ROUTINE LOAD TASK",
"SHOW ROUTINE LOAD",
"SHOW SNAPSHOT",
+ "SHOW SYNC JOB",
"SHOW TABLES",
"SHOW TABLET",
"SHOW TRANSACTION",
"STOP ROUTINE LOAD",
+ "STOP SYNC JOB",
"STREAM LOAD",
"alter-routine-load",
"insert",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index c9d297d6f2..e1ba2b2e8a 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -54,6 +54,7 @@ module.exports = [
children: [
"load-manual",
"batch-delete-manual",
+ "binlog-load-manual",
"broker-load-manual",
"routine-load-manual",
"sequence-column-manual",
@@ -538,6 +539,7 @@ module.exports = [
children: [
"BROKER LOAD",
"CANCEL LOAD",
+ "CREATE SYNC JOB",
"DELETE",
"EXPORT",
"GROUP BY",
@@ -545,7 +547,9 @@ module.exports = [
"MINI LOAD",
"MULTI LOAD",
"PAUSE ROUTINE LOAD",
+ "PAUSE SYNC JOB",
"RESUME ROUTINE LOAD",
+ "RESUME SYNC JOB",
"ROUTINE LOAD",
"SHOW ALTER",
"SHOW BACKUP",
@@ -564,11 +568,13 @@ module.exports = [
"SHOW ROUTINE LOAD TASK",
"SHOW ROUTINE LOAD",
"SHOW SNAPSHOT",
+ "SHOW SYNC JOB",
"SHOW TABLES",
"SHOW TABLET",
"SHOW TRANSACTION",
"SPARK LOAD",
"STOP ROUTINE LOAD",
+ "STOP SYNC JOB",
"STREAM LOAD",
"alter-routine-load",
"insert",
diff --git a/docs/en/administrator-guide/load-data/binlog-load-manual.md b/docs/en/administrator-guide/load-data/binlog-load-manual.md
new file mode 100644
index 0000000000..90db6da5ae
--- /dev/null
+++ b/docs/en/administrator-guide/load-data/binlog-load-manual.md
@@ -0,0 +1,492 @@
+---
+{
+ "title": "Binlog Load",
+ "language": "en"
+}
+---
+
+
+
+# Binlog Load
+
+The Binlog Load feature enables Doris to incrementally synchronize update operations in MySQL, so as to CDC(Change Data Capture) of data on Mysql.
+
+## Scenarios
+* Support insert / update / delete operations
+* Filter query
+* Temporarily incompatible with DDL statements
+
+## Glossary
+* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access.
+* BE: Backend, the backend node of Doris. Responsible for query execution and data storage.
+* Canal: Alibaba's open source MySQL binlog parsing tool. Support incremental data subscription & consumption.
+* Batch: A batch of data sent by canal to the client with a globally unique self-incrementing ID.
+* SyncJob: A data synchronization job submitted by the user.
+* Receiver: Responsible for subscribing to and receiving data from canal.
+* Consumer: Responsible for distributing the data received by the Receiver to each channel.
+* Channel: The channel that receives the data distributed by Consumer, it creates tasks for sending data, and controls the begining, committing and aborting of transaction in one table.
+* Task: Task created by channel, sends data to Be when executing.
+
+## Principle
+In the design of phase one, Binlog Load needs to rely on canal as an intermediate medium, so that canal can be pretended to be a slave node to get and parse the binlog on the MySQL master node, and then Doris can get the parsed data on the canal. This process mainly involves mysql, canal and Doris. The overall data flow is as follows:
+
+```
++---------------------------------------------+
+| Mysql |
++----------------------+----------------------+
+ | Binlog
++----------------------v----------------------+
+| Canal Server |
++-------------------+-----^-------------------+
+ Get | | Ack
++-------------------|-----|-------------------+
+| FE | | |
+| +-----------------|-----|----------------+ |
+| | Sync Job | | | |
+| | +------------v-----+-----------+ | |
+| | | Canal Client | | |
+| | | +-----------------------+ | | |
+| | | | Receiver | | | |
+| | | +-----------------------+ | | |
+| | | +-----------------------+ | | |
+| | | | Consumer | | | |
+| | | +-----------------------+ | | |
+| | +------------------------------+ | |
+| +----+---------------+--------------+----+ |
+| | | | |
+| +----v-----+ +-----v----+ +-----v----+ |
+| | Channel1 | | Channel2 | | Channel3 | |
+| | [Table1] | | [Table2] | | [Table3] | |
+| +----+-----+ +-----+----+ +-----+----+ |
+| | | | |
+| +--|-------+ +---|------+ +---|------+|
+| +---v------+| +----v-----+| +----v-----+||
+| +----------+|+ +----------+|+ +----------+|+|
+| | Task |+ | Task |+ | Task |+ |
+| +----------+ +----------+ +----------+ |
++----------------------+----------------------+
+ | | |
++----v-----------------v------------------v---+
+| Coordinator |
+| BE |
++----+-----------------+------------------+---+
+ | | |
++----v---+ +---v----+ +----v---+
+| BE | | BE | | BE |
++--------+ +--------+ +--------+
+
+```
+
+As shown in the figure above, the user first submits a SyncJob to the Fe.
+
+Then, Fe will start a Canal Client for each SyncJob to subscribe to and get data from the Canal Server.
+
+The Receiver in the Canal Client will receives data by the GET request. Every time a Batch is received, it will be distributed by the Consumer to different Channels according to the corresponding target table. Once a channel received data distributed by Consumer, it will submit a send task for sending data.
+
+A Send task is a request from Channel to Be, which contains the data of the same Batch distributed to the current channel.
+
+Channel controls the begin, commit and abort of transaction of single table. In a transaction, the consumer may distribute multiple Batches of data to a channel, so multiple send tasks may be generated. These tasks will not actually take effect until the transaction is committed successfully.
+
+When certain conditions are met (for example, a certain period of time was passed, an empty batch is received), the Consumer will block and notify each channel to try commit the transaction.
+
+If and only if all channels are committed successfully, Canal Server will be notified by the ACK request and Canal Client continue to get and consume data.
+
+If there are any Channel fails to commit, it will retrieve data from the location where the last consumption was successful and commit again (the Channel that has successfully commited before will not commmit again to ensure the idempotency of commit).
+
+In the whole cycle of a SyncJob, Canal Client continuously received data from Canal Server and send it to Be through the above process to complete data synchronization.
+
+## Configure MySQL Server
+
+In the master-slave synchronization of MySQL Cluster mode, the binary log file (binlog) records all data changes on the master node. Data synchronization and backup among multiple nodes of the cluster should be carried out through binlog logs, so as to improve the availability of the cluster.
+
+The architecture of master-slave synchronization is usually composed of a master node (responsible for writing) and one or more slave nodes (responsible for reading). All data changes on the master node will be copied to the slave node.
+
+**Note that: Currently, you must use MySQL version 5.7 or above to support Binlog Load**
+
+To enable the binlog of MySQL, you need to edit the my.cnf file and set it like:
+
+```
+[mysqld]
+log-bin = mysql-bin # 开启 binlog
+binlog-format=ROW # 选择 ROW 模式
+```
+
+### Principle Description
+
+On MySQL, the binlog files usually name as mysql-bin.000001, mysql-bin.000002... And MySQL will automatically segment the binlog file when certain conditions are met:
+
+1. MySQL is restarted
+2. The user enters the `flush logs` command
+3. The binlog file size exceeds 1G
+
+To locate the latest consumption location of binlog, the binlog file name and position (offset) must be needed.
+
+For instance, the binlog location of the current consumption so far will be saved on each slave node to prepare for disconnection, reconnection and continued consumption at any time.
+
+```
+--------------------- ---------------------
+| Slave | read | Master |
+| FileName/Position | <<<--------------------------- | Binlog Files |
+--------------------- ---------------------
+```
+
+For the master node, it is only responsible for writing to the binlog. Multiple slave nodes can be connected to a master node at the same time to consume different parts of the binlog log without affecting each other.
+
+Binlog log supports two main formats (in addition to mixed based mode):
+
+* Statement-based format:
+
+ Binlog only records the SQL statements executed on the master node, and the slave node copies them to the local node for re-execution.
+
+* Row-based format:
+
+ Binlog will record the data change information of each row and all columns of the master node, and the slave node will copy and execute the change of each row to the local node.
+
+The first format only writes the executed SQL statements. Although the log volume will be small, it has the following disadvantages:
+
+1. The actual data of each row is not recorded
+2. The UDF, random and time functions executed on the master node will have inconsistent results on the slave node
+3. The execution order of limit statements may be inconsistent
+
+Therefore, we need to choose the second format which parses each row of data from the binlog log.
+
+In the row-based format, binlog will record the timestamp, server ID, offset and other information of each binlog event. For instance, the following transaction with two insert statements:
+
+```
+begin;
+insert into canal_test.test_tbl values (3, 300);
+insert into canal_test.test_tbl values (4, 400);
+commit;
+```
+
+There will be four binlog events, including one begin event, two insert events and one commit event:
+
+```
+SET TIMESTAMP=1538238301/*!*/;
+BEGIN
+/*!*/.
+# at 211935643
+# at 211935698
+#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25
+#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F
+...
+'/*!*/;
+### INSERT INTO canal_test.test_tbl
+### SET
+### @1=1
+### @2=100
+# at 211935744
+#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641
+...
+'/*!*/;
+### INSERT INTO canal_test.test_tbl
+### SET
+### @1=2
+### @2=200
+# at 211935771
+#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641
+COMMIT/*!*/;
+```
+
+As shown above, each insert event contains modified data. During delete/update, an event can also contain multiple rows of data, making the binlog more compact.
+
+### Open GTID mode (Optional)
+
+A global transaction ID (global transaction identifier) identifies a transaction that has been committed on the master node, which is unique and valid in global. After binlog is enabled, the gtid will be written to the binlog file.
+
+To open the gtid mode of MySQL, you need to edit the my.cnf configuration file and set it like:
+
+```
+gtid-mode=on // Open gtid mode
+enforce-gtid-consistency=1 // Enforce consistency between gtid and transaction
+```
+
+In gtid mode, the master server can easily track transactions, recover data and replicas without binlog file name and offset.
+
+In gtid mode, due to the global validity of gtid, the slave node will no longer need to locate the binlog location on the master node by saving the file name and offset, but can be located by the data itself. During SyncJob, the slave node will skip the execution of any gtid transaction already executed before.
+
+Gtid is expressed as a pair of coordinates, `source_ID` identifies the master node, `transaction_ID` indicates the order in which this transaction is executed on the master node (max 263-1).
+
+```
+GTID = source_id:transaction_id
+```
+
+For example, the gtid of the 23rd transaction executed on the same master node is:
+
+```
+3E11FA47-71CA-11E1-9E33-C80AA9429562:23
+```
+
+## Configure Canal Server
+
+Canal is a sub project of Alibaba Otter project. Its main purpose is to provide incremental data subscription and consumption based on MySQL database binlog analysis, which is originally used to solve the scenario of cross machine-room synchronization.
+
+Canal version 1.1.5 and above is recommended. [download link](https://github.com/alibaba/canal/releases)
+
+After downloading, please follow the steps below to complete the deployment.
+
+1. Unzip the canal deployer
+2. Create a new directory under the conf folder and rename it as the root directory of instance. The directory name is the destination mentioned later.
+3. Modify the instance configuration file (you can copy from `conf/example/instance.properties`)
+
+ ```
+ vim conf/{your destination}/instance.properties
+ ```
+ ```
+ ## canal instance serverId
+ canal.instance.mysql.slaveId = 1234
+ ## mysql adress
+ canal.instance.master.address = 127.0.0.1:3306
+ ## mysql username/password
+ canal.instance.dbUsername = canal
+ canal.instance.dbPassword = canal
+ ```
+4. start up canal server
+
+ ```
+ sh bin/startup.sh
+ ```
+
+5. Validation start up successfully
+
+ ```
+ cat logs/{your destination}/{your destination}.log
+ ```
+ ```
+ 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
+ 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties]
+ 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx
+ 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful....
+ ```
+
+### Principle Description
+
+By faking its own MySQL dump protocol, canal disguises itself as a slave node, get and parses the binlog of the master node.
+
+Multiple instances can be started on the canal server. An instance can be regarded as a slave node. Each instance consists of the following parts:
+
+```
+-------------------------------------------------
+| Server |
+| -------------------------------------------- |
+| | Instance 1 | |
+| | ----------- ----------- ----------- | |
+| | | Parser | | Sink | | Store | | |
+| | ----------- ----------- ----------- | |
+| | ----------------------------------- | |
+| | | MetaManager | | |
+| | ----------------------------------- | |
+| -------------------------------------------- |
+-------------------------------------------------
+```
+
+* Parser: Access the data source, simulate the dump protocol, interact with the master, and analyze the protocol
+* Sink: Linker between parser and store, for data filtering, processing and distribution
+* Store: Data store
+* Meta Manager: Metadata management module
+
+Each instance has its own unique ID in the cluster, that is, server ID.
+
+In the canal server, the instance is identified by a unique string named destination. The canal client needs destination to connect to the corresponding instance.
+
+**Note that: canal client and canal instance should correspond to each other one by one**
+
+Binlog load has forbidded multiple SyncJobs to connect to the same destination.
+
+The data flow direction in instance is binlog -> Parser -> sink -> store.
+
+Instance parses binlog logs through the parser module, and the parsed data is cached in the store. When the user submits a SyncJob to Fe, it will start a Canal Client to subscripe and get the data in the store in the corresponding instance.
+
+The store is actually a ring queue. Users can configure its length and storage space by themselves.
+
+
+
+Store manages the data in the queue through three pointers:
+
+1. Get pointer: the GET pointer points to the last location get by the Canal Client.
+2. Ack pointer: the ACK pointer points to the location of the last successful consumption.
+3. Put pointer: the PUT pointer points to the location where the sink module successfully wrote to the store at last.
+
+```
+canal client asynchronously get data in the store
+
+ get 0 get 1 get 2 put
+ | | | ...... |
+ v v v v
+--------------------------------------------------------------------- store ring queue
+ ^ ^
+ | |
+ ack 0 ack 1
+```
+
+When the Canal Client calls the Get command, the Canal Server will generate data batches and send them to the Canal Client, and move the Get pointer to the right. The Canal Client can get multiple batches until the Get pointer catches up with the Put pointer.
+
+When the consumption of data is successful, the Canal Client will return Ack + Batch ID, notify that the consumption has been successful, and move the Ack pointer to the right. The store will delete the data of this batch from the ring queue, make room to get data from the upstream sink module, and then move the Put pointer to the right.
+
+When the data consumption fails, the client will return a rollback notification of the consumption failure, and the store will reset the Get pointer to the left to the Ack pointer's position, so that the next data get by the Canal Client can start from the Ack pointer again.
+
+Like the slave node in mysql, Canal Server also needs to save the latest consumption location of the client. All metadata in Canal Server (such as gtid and binlog location) is managed by the metamanager. At present, these metadata is persisted in the meta.dat file in the instance's root directory in JSON format by default.
+
+## Basic Operation
+
+### Configure Target Table Properties
+
+User needs to first create the target table which is corresponding to the MySQL side.
+
+Binlog Load can only support unique target tables from now, and the batch delete feature of the target table must be activated.
+
+Example:
+
+```
+-- create target table
+CREATE TABLE `test1` (
+ `a` int(11) NOT NULL COMMENT "",
+ `b` int(11) NOT NULL COMMENT ""
+) ENGINE=OLAP
+UNIQUE KEY(`a`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`a`) BUCKETS 8;
+
+-- enable batch delete
+ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
+```
+
+### Create SyncJob
+
+The detailed syntax of creating a SyncJob can be viewd in `help create sync job` command. Here we mainly introduce the precautions when creating a SyncJob.
+
+* job_name
+
+ `job_Name` is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time.
+
+* channel_desc
+
+ `column_Mapping` mainly refers to the mapping relationship between the columns of the MySQL source table and the Doris target table.
+
+ If it is not specified, the columns of the source table and the target table will consider correspond one by one in order.
+
+ However, we still recommend explicitly specifying the mapping relationship of columns, so that when the schema-change of the target table (such as adding a nullable column), data synchronization can still be carried out.
+
+ Otherwise, when the schema-change occur, because the column mapping relationship is no longer one-to-one, the SyncJob will report an error.
+
+* binlog_desc
+
+ `binlog_desc` defines some necessary information for docking the remote binlog address.
+
+ At present, the only supported docking type is the canal type. In canal type, all configuration items need to be prefixed with the canal prefix.
+
+ 1. canal.server.ip: the address of the canal server
+ 2. canal.server.port: the port of canal server
+ 3. canal.destination: the identifier of the instance
+ 4. canal.batchSize: the maximum batch size get from the canal server for each batch. Default 8192
+ 5. canal.username: the username of instance
+ 6. canal.password: the password of instance
+ 7. canal.debug: when set to true, the details message of each batch and each row will be printed, which may affect the performance.
+
+### Show Job Status
+
+Specific commands and examples for showing job status can be found in `help show sync job;` command.
+
+The parameters in the result set have the following meanings:
+
+* State
+
+ The current stage of the job. The transition between job states is shown in the following figure:
+
+ ```
+ +-------------+
+ create job | PENDING | resume job
+ +-----------+ <-------------+
+ | +-------------+ |
+ +----v-------+ +-------+----+
+ | RUNNING | pause job | PAUSED |
+ | +-----------------------> |
+ +----+-------+ run error +-------+----+
+ | +-------------+ |
+ | | CANCELLED | |
+ +-----------> <-------------+
+ stop job +-------------+ stop job
+ system error
+ ```
+
+ After the SyncJob is submitted, the status is pending.
+
+ After the Fe scheduler starts the canal client, the status becomes running.
+
+ User can control the status of the job by three commands: `stop/pause/resume`. After the operation, the job status is `cancelled/paused/running` respectively.
+
+ There is only one final stage of the job, Cancelled. When the job status changes to Canceled, it cannot be resumed again.
+
+ When an error occurs during SyncJob is running, if the error is unrecoverable, the status will change to cancelled, otherwise it will change to paused.
+
+* Channel
+
+ The mapping relationship between all source tables and target tables of the job.
+
+* Status
+
+ The latest consumption location of the current binlog (if the gtid mode is on, the gtid will be displayed), and the delay time of the Doris side compared with the MySQL side.
+
+* JobConfig
+
+ The remote server information of the docking, such as the address of the Canal Server and the destination of the connected instance.
+
+### Control Operation
+
+Users can control the status of jobs through `stop/pause/resume` commands.
+
+You can use `HELP STOP SYNC JOB;`, `HELP PAUSE SYNC JOB`; And `HELP RESUME SYNC JOB;` commands to view help and examples.
+
+## Related Parameters
+
+### Fe configuration
+
+The following configuration belongs to the system level configuration of SyncJob. The configuration value can be modified in configuration file fe.conf.
+
+* `enable_create_sync_job`
+
+ Turn on the Binlog Load feature. The default value is false. This feature is turned off.
+
+* `sync_commit_interval_second`
+
+ Maximum interval time between commit transactions. If there is still data in the channel that has not been committed after this time, the consumer will notify the channel to commit the transaction.
+
+* `max_sync_task_threads_num`
+
+ The maximum number of threads in the SyncJobs' thread pool. There is only one thread pool in the whole Fe for synchronization, which is used to process the tasks created by all SyncJobs in the Fe.
+
+## FAQ
+
+1. Will modifying the table structure affect data synchronization?
+
+ Yes. The SyncJob cannot prohibit `alter table` operation.
+When the table's schema changes, if the column mapping cannot match, the job may be suspended incorrectly. It is recommended to reduce such problems by explicitly specifying the column mapping relationship in the data synchronization job, or by adding nullable columns or columns with default values.
+
+2. Will the SyncJob continue to run after the database is deleted?
+
+ No. In this case, the SyncJob will be checked by the Fe's scheduler thread and be stopped.
+
+3. Can multiple SyncJobs be configured with the same `IP:Port + destination`?
+
+ No. When creating a SyncJob, FE will check whether the `IP:Port + destination` is duplicate with the existing job to prevent multiple jobs from connecting to the same instance.
+
+4. Why is the precision of floating-point type different between MySQL and Doris during data synchronization?
+
+ The precision of Doris floating-point type is different from that of MySQL. You can choose to use decimal type instead.
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
new file mode 100644
index 0000000000..9a46486c87
--- /dev/null
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
@@ -0,0 +1,165 @@
+---
+{
+ "title": "CREATE SYNC JOB",
+ "language": "en"
+}
+---
+
+
+
+# CREATE SYNC JOB
+
+## description
+
+The sync job feature supports to submit a resident SyncJob, and CDC (change data capture) the user's update operation in MySQL database by reading the binlog log from the specified remote address.
+
+At present, data synchronization only supports docking with the canal, getting the parsed binlog from the canal server and loading it into Doris.
+
+You can view the SyncJob's status by command 'SHOW SYNC JOB'.
+
+Syntax:
+
+```
+CREATE SYNC [db.]job_name
+ (
+ channel_desc,
+ channel_desc
+ ...
+ )
+binlog_desc
+```
+
+1. `job_name`
+
+ job_Name is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time.
+
+2. `channel_desc`
+
+ The data channel under the job is used to describe the mapping relationship between the MySQL source table and the Doris target table.
+
+ Syntax:
+
+ ```
+ FROM mysql_db.src_tbl INTO des_tbl
+ [partitions]
+ [columns_mapping]
+ ```
+
+ 1. `mysql_db.src_tbl`
+
+ Specify the database and source table on the MySQL side.
+
+ 2. `des_tbl`
+
+ Specify the target table on the Doris side. Only the unique table is supported, and the batch delete feature of the table needs to be enabled.
+
+ 3. `partitions`
+
+ Specify which partitions to be load into in target table. If it is not specified, it will be automatically loaded into the corresponding partition.
+
+ Example:
+
+ ```
+ PARTITION(p1, p2, p3)
+ ```
+
+ 4. `column_mapping`
+
+ Specify the mapping relationship between the columns of the MySQL source table and the Doris target table. If not specified, Fe will default that the columns of the source table and the target table correspond one by one in order.
+
+ Columns are not supported in the 'col_name = expr' form.
+
+ Example:
+
+ ```
+ Suppose the columns of target table are (K1, K2, V1),
+
+ Change the order of columns K1 and K2
+ COLUMNS(k2, k1, v1)
+
+ Ignore the fourth column of the source data
+ COLUMNS(k2, k1, v1, dummy_column)
+ ```
+
+3. `binlog_desc`
+
+ It is used to describe remote data sources. Currently, only canal is supported.
+
+ Syntax:
+
+ ```
+ FROM BINLOG
+ (
+ "key1" = "value1",
+ "key2" = "value2"
+ )
+ ```
+
+ 1. The attribute related to the canal is prefixed with `canal.`
+
+ 1. canal.server.ip: the address of the canal server
+ 2. canal.server.port: the port of canal server
+ 3. canal.destination: Identifier of instance
+ 4. canal.batchSize: the maximum batch size. The default is 8192
+ 5. canal.username: the username of instance
+ 6. canal.password: password of instance
+ 7. canal.debug: optional. When set to true, the details of each batch and each row will be printed.
+
+## example
+
+1. create a sync job named `job1` for target table `test_tbl` in `test_db`, connects to the local canal server, and corresponds to the MySQL source table `mysql_db1.tbl1`
+
+ CREATE SYNC `test_db`.`job1`
+ (
+ FROM `mysql_db1`.`tbl1` INTO `test_tbl `
+ )
+ FROM BINLOG
+ (
+ "type" = "canal",
+ "canal.server.ip" = "127.0.0.1",
+ "canal.server.port" = "11111",
+ "canal.destination" = "example",
+ "canal.username" = "",
+ "canal.password" = ""
+ );
+
+2. create a sync job named `job1` for multiple target tables in `test_db`, correspond to multiple MySQL source tables one by one, and explicitly specify column mapping.
+
+ CREATE SYNC `test_db`.`job1`
+ (
+ FROM `mysql_db`.`t1` INTO `test1` COLUMNS(k1, k2, v1) PARTITIONS (p1, p2),
+ FROM `mysql_db`.`t2` INTO `test2` COLUMNS(k3, k4, v2) PARTITION p1
+ )
+ FROM BINLOG
+ (
+ "type" = "canal",
+ "canal.server.ip" = "xx.xxx.xxx.xx",
+ "canal.server.port" = "12111",
+ "canal.destination" = "example",
+ "canal.username" = "username",
+ "canal.password" = "password"
+ );
+
+## keyword
+
+ CREATE,SYNC,JOB,BINLOG
+
+
+
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
new file mode 100644
index 0000000000..b685270649
--- /dev/null
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
@@ -0,0 +1,48 @@
+---
+{
+ "title": "PAUSE SYNC JOB",
+ "language": "en"
+}
+---
+
+
+
+# PAUSE SYNC JOB
+
+## description
+
+Pauses a running SyncJob in the database.
+
+The paused job will stop synchronizing and keep the latest consumption location until it is resumed by the user.
+
+Syntax:
+
+ PAUSE SYNC JOB [db.]job_name
+
+## example
+
+1. Pause the SyncJob named `job_name`.
+
+ PAUSE SYNC JOB `job_name`;
+
+## keyword
+ PAUSE,SYNC,JOB,BINLOG
+
+
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
new file mode 100644
index 0000000000..ef5f7f851a
--- /dev/null
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
@@ -0,0 +1,46 @@
+---
+{
+ "title": "RESUME SYNC JOB",
+ "language": "en"
+}
+---
+
+
+
+# RESUME SYNC JOB
+
+## description
+
+Rusumes a paused SyncJob in the database.
+
+The job will continue to synchronize data from the latest location before the last pause.
+
+Syntax:
+
+ RESUME SYNC JOB [db.]job_name
+
+## example
+
+1. Resume the SyncJob named `job_name`
+
+ RESUME SYNC JOB `job_name`;
+
+## keyword
+ RESUME,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
new file mode 100644
index 0000000000..f496daffa1
--- /dev/null
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
@@ -0,0 +1,49 @@
+---
+{
+ "title": "SHOW SYNC JOB",
+ "language": "en"
+}
+---
+
+
+
+# SHOW SYNC JOB
+
+## description
+
+This command is used to display the resident data synchronization job status in all databases.
+
+Syntax:
+
+ SHOW SYNC JOB [FROM db_name]
+
+## example
+
+1. Show the status of all SyncJob in the current database.
+
+ SHOW SYNC JOB;
+
+2. Show status of all SyncJob under databases `test_db`.
+
+ SHOW SYNC JOB FROM `test_db`;
+
+## keyword
+
+ SHOW,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
new file mode 100644
index 0000000000..b9fc262d0c
--- /dev/null
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
@@ -0,0 +1,44 @@
+---
+{
+ "title": "STOP SYNC JOB",
+ "language": "en"
+}
+---
+
+
+
+# STOP SYNC JOB
+
+## description
+
+Stops a uncancelled SyncJob in the database.
+
+Syntax:
+
+ STOP SYNC JOB [db.]job_name
+
+## example
+
+1. Stop the SyncJob named `job_name`
+
+ STOP SYNC JOB `job_name`;
+
+## keyword
+ STOP,SYNC,JOB,BINLOG
diff --git a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
new file mode 100644
index 0000000000..3ed946b85f
--- /dev/null
+++ b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
@@ -0,0 +1,470 @@
+---
+{
+ "title": "Binlog Load",
+ "language": "zh-CN"
+}
+---
+
+
+
+
+# Binlog Load
+Binlog Load提供了一种使Doris增量同步用户在Mysql数据库的对数据更新操作的CDC(Change Data Capture)功能。
+
+## 适用场景
+
+* INSERT/UPDATE/DELETE支持
+* 过滤Query
+* 暂不兼容DDL语句
+
+## 名词解释
+1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入 plan 生成和导入任务的调度工作。
+2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。
+3. Canal:阿里巴巴开源的Mysql Binlog日志解析工具。提供增量数据订阅&消费等功能。
+4. Batch:Canal发送到客户端的一批数据,具有全局唯一自增的ID。
+5. SyncJob:用户提交的一个数据同步作业。
+6. Receiver: 负责订阅并接收Canal的数据。
+7. Consumer: 负责分发Receiver接收的数据到各个Channel。
+8. Channel: 接收Consumer分发的数据的渠道,创建发送数据的子任务,控制单个表事务的开启、提交、终止。
+9. Task:Channel向Be发送数据的子任务。
+
+## 基本原理
+在第一期的设计中,Binlog Load需要依赖canal作为中间媒介,让canal伪造成一个从节点去获取Mysql主节点上的Binlog并解析,再由Doris去获取Canal上解析好的数据,主要涉及Mysql端、Canal端以及Doris端,总体数据流向如下:
+
+```
++---------------------------------------------+
+| Mysql |
++----------------------+----------------------+
+ | Binlog
++----------------------v----------------------+
+| Canal Server |
++-------------------+-----^-------------------+
+ Get | | Ack
++-------------------|-----|-------------------+
+| FE | | |
+| +-----------------|-----|----------------+ |
+| | Sync Job | | | |
+| | +------------v-----+-----------+ | |
+| | | Canal Client | | |
+| | | +-----------------------+ | | |
+| | | | Receiver | | | |
+| | | +-----------------------+ | | |
+| | | +-----------------------+ | | |
+| | | | Consumer | | | |
+| | | +-----------------------+ | | |
+| | +------------------------------+ | |
+| +----+---------------+--------------+----+ |
+| | | | |
+| +----v-----+ +-----v----+ +-----v----+ |
+| | Channel1 | | Channel2 | | Channel3 | |
+| | [Table1] | | [Table2] | | [Table3] | |
+| +----+-----+ +-----+----+ +-----+----+ |
+| | | | |
+| +--|-------+ +---|------+ +---|------+|
+| +---v------+| +----v-----+| +----v-----+||
+| +----------+|+ +----------+|+ +----------+|+|
+| | Task |+ | Task |+ | Task |+ |
+| +----------+ +----------+ +----------+ |
++----------------------+----------------------+
+ | | |
++----v-----------------v------------------v---+
+| Coordinator |
+| BE |
++----+-----------------+------------------+---+
+ | | |
++----v---+ +---v----+ +----v---+
+| BE | | BE | | BE |
++--------+ +--------+ +--------+
+
+```
+
+如上图,用户向FE提交一个数据同步作业。
+
+FE会为每个数据同步作业启动一个canal client,来向canal server端订阅并获取数据。
+
+client中的receiver将负责通过Get命令接收数据,每获取到一个数据batch,都会由consumer根据对应表分发到不同的channel,每个channel都会为此数据batch产生一个发送数据的子任务Task。
+
+在FE上,一个Task是channel向BE发送数据的子任务,里面包含分发到当前channel的同一个batch的数据。
+
+channel控制着单个表事务的开始、提交、终止。一个事务周期内,一般会从consumer获取到多个batch的数据,因此会产生多个向BE发送数据的子任务Task,在提交事务成功前,这些Task不会实际生效。
+
+满足一定条件时(比如超过一定时间、获取到了空的batch),consumer将会阻塞并通知各个channel提交事务。
+
+当且仅当所有channel都提交成功,才会通过Ack命令通知canal并继续获取并消费数据。
+
+如果有任意channel提交失败,将会重新从上一次消费成功的位置获取数据并再次提交(已提交成功的channel不会再次提交以保证幂等性)。
+
+整个数据同步作业中,FE通过以上流程不断的从canal获取数据并提交到BE,来完成数据同步。
+
+## 配置Mysql端
+
+在Mysql Cluster模式的主从同步中,二进制日志文件(Binlog)记录了主节点上的所有数据变化,数据在Cluster的多个节点间同步、备份都要通过Binlog日志进行,从而提高集群的可用性。架构通常由一个主节点(负责写)和一个或多个从节点(负责读)构成,所有在主节点上发生的数据变更将会复制给从节点。
+
+**注意:目前必须要使用Mysql 5.7及以上的版本才能支持Binlog Load功能。**
+
+要打开mysql的二进制binlog日志功能,则需要编辑my.cnf配置文件设置一下。
+
+```
+[mysqld]
+log-bin = mysql-bin # 开启 binlog
+binlog-format=ROW # 选择 ROW 模式
+```
+
+### Mysql端说明
+
+在Mysql上,Binlog命名格式为mysql-bin.000001、mysql-bin.000002... ,满足一定条件时mysql会去自动切分Binlog日志:
+
+1. mysql重启了
+2. 客户端输入命令flush logs
+3. binlog文件大小超过1G
+
+要定位Binlog的最新的消费位置,可以通过binlog文件名和position(偏移量)。
+
+例如,各个从节点上会保存当前消费到的binlog位置,方便随时断开连接、重新连接和继续消费。
+
+```
+--------------------- ---------------------
+| Slave | read | Master |
+| FileName/Position | <<<--------------------------- | Binlog Files |
+--------------------- ---------------------
+```
+
+对于主节点来说,它只负责写入Binlog,多个从节点可以同时连接到一台主节点上,消费Binlog日志的不同部分,互相之间不会影响。
+
+Binlog日志支持两种主要格式(此外还有混合模式mixed-based):
+
+```
+statement-based格式: Binlog只保存主节点上执行的sql语句,从节点将其复制到本地重新执行
+row-based格式: Binlog会记录主节点的每一行所有列的数据的变更信息,从节点会复制并执行每一行的变更到本地
+```
+
+第一种格式只写入了执行的sql语句,虽然日志量会很少,但是有下列缺点
+
+ 1. 没有保存每一行实际的数据
+ 2. 在主节点上执行的UDF、随机、时间函数会在从节点上结果不一致
+ 3. limit语句执行顺序可能不一致
+
+因此我们需要选择第二种格式,才能从Binlog日志中解析出每一行数据。
+
+在row-based格式下,Binlog会记录每一条binlog event的时间戳,server id,偏移量等信息,如下面一条带有两条insert语句的事务:
+
+```
+begin;
+insert into canal_test.test_tbl values (3, 300);
+insert into canal_test.test_tbl values (4, 400);
+commit;
+```
+
+对应将会有四条binlog event,其中一条begin event,两条insert event,一条commit event:
+
+```
+SET TIMESTAMP=1538238301/*!*/;
+BEGIN
+/*!*/.
+# at 211935643
+# at 211935698
+#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25
+#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F
+...
+'/*!*/;
+### INSERT INTO canal_test.test_tbl
+### SET
+### @1=1
+### @2=100
+# at 211935744
+#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641
+...
+'/*!*/;
+### INSERT INTO canal_test.test_tbl
+### SET
+### @1=2
+### @2=200
+# at 211935771
+#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641
+COMMIT/*!*/;
+```
+
+如上图所示,每条Insert event中包含了修改的数据。在进行Delete/Update操作时,一条event还能包含多行数据,使得Binlog日志更加的紧密。
+
+
+
+### 开启GTID模式 [可选]
+一个全局事务Id(global transaction identifier)标识出了一个曾在主节点上提交过的事务,在全局都是唯一有效的。开启了Binlog后,GTID会被写入到Binlog文件中,与事务一一对应。
+
+要打开mysql的GTID模式,则需要编辑my.cnf配置文件设置一下
+
+```
+gtid-mode=on // 开启gtid模式
+enforce-gtid-consistency=1 // 强制gtid和事务的一致性
+```
+
+在GTID模式下,主服务器可以不需要Binlog的文件名和偏移量,就能很方便的追踪事务、恢复数据、复制副本。
+
+在GTID模式下,由于GTID的全局有效性,从节点将不再需要通过保存文件名和偏移量来定位主节点上的Binlog位置,而通过数据本身就可以定位了。在进行数据同步中,从节点会跳过执行任意被识别为已执行的GTID事务。
+
+GTID的表现形式为一对坐标, `source_id`标识出主节点,`transaction_id`表示此事务在主节点上执行的顺序(最大263-1)。
+
+```
+GTID = source_id:transaction_id
+```
+
+例如,在同一主节点上执行的第23个事务的gtid为
+
+```
+3E11FA47-71CA-11E1-9E33-C80AA9429562:23
+```
+
+## 配置Canal端
+canal是属于阿里巴巴otter项目下的一个子项目,主要用途是基于 MySQL 数据库增量日志解析,提供增量数据订阅和消费,用于解决跨机房同步的业务场景,建议使用canal 1.1.5及以上版本,[下载地址](https://github.com/alibaba/canal/releases),下载完成后,请按以下步骤完成部署。
+
+1. 解压canal deployer
+2. 在conf文件夹下新建目录并重命名,作为instance的根目录,目录名即后文提到的destination
+3. 修改instance配置文件(可拷贝conf/example/instance.properties)
+
+ ```
+ vim conf/{your destination}/instance.properties
+ ```
+ ```
+ ## canal instance serverId
+ canal.instance.mysql.slaveId = 1234
+ ## mysql adress
+ canal.instance.master.address = 127.0.0.1:3306
+ ## mysql username/password
+ canal.instance.dbUsername = canal
+ canal.instance.dbPassword = canal
+ ```
+
+4. 启动
+
+ ```
+ sh bin/startup.sh
+ ```
+
+5. 验证启动成功
+
+ ```
+ cat logs/{your destination}/{your destination}.log
+ ```
+ ```
+ 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
+ 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties]
+ 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx
+ 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful....
+ ```
+
+### canal端说明
+
+canal通过伪造自己的mysql dump协议,去伪装成一个从节点,获取主节点的Binlog日志并解析。
+
+canal server上可启动多个instance,一个instance可看作一个从节点,每个instance由下面几个部分组成:
+
+```
+-------------------------------------------------
+| Server |
+| -------------------------------------------- |
+| | Instance 1 | |
+| | ----------- ----------- ----------- | |
+| | | Parser | | Sink | | Store | | |
+| | ----------- ----------- ----------- | |
+| | ----------------------------------- | |
+| | | MetaManager | | |
+| | ----------------------------------- | |
+| -------------------------------------------- |
+-------------------------------------------------
+```
+
+* parser:数据源接入,模拟slave协议和master进行交互,协议解析
+* sink:parser和store链接器,进行数据过滤,加工,分发的工作
+* store:数据存储
+* meta manager:元数据管理模块
+
+每个instance都有自己在cluster内的唯一标识,即server Id。
+
+在canal server内,instance用字符串表示,此唯一字符串被记为destination,canal client需要通过destination连接到对应的instance。
+
+**注意:canal client和canal instance是一一对应的**,Binlog Load已限制多个数据同步作业不能连接到同一个destination。
+
+数据在instance内的流向是binlog -> parser -> sink -> store。
+
+instance通过parser模块解析binlog日志,解析出来的数据缓存在store里面,当用户向FE提交一个数据同步作业时,会启动一个canal client订阅并获取对应instance中的store内的数据。
+
+store实际上是一个环形的队列,用户可以自行配置它的长度和存储空间。
+
+
+
+store通过三个指针去管理队列内的数据:
+
+1. get指针:get指针代表客户端最后获取到的位置。
+2. ack指针:ack指针记录着最后消费成功的位置。
+3. put指针:代表sink模块最后写入store成功的位置。
+
+```
+canal client异步获取store中数据
+
+ get 0 get 1 get 2 put
+ | | | ...... |
+ v v v v
+--------------------------------------------------------------------- store环形队列
+ ^ ^
+ | |
+ ack 0 ack 1
+```
+
+canal client调用get命令时,canal server会产生数据batch发送给client,并右移get指针,client可以获取多个batch,直到get指针赶上put指针为止。
+
+当消费数据成功时,client会返回ack + batch Id通知已消费成功了,并右移ack指针,store会从队列中删除此batch的数据,腾出空间来从上游sink模块获取数据,并右移put指针。
+
+当数据消费失败时,client会返回rollback通知消费失败,store会将get指针重置左移到ack指针位置,使下一次client获取的数据能再次从ack指针处开始。
+
+和Mysql中的从节点一样,canal也需要去保存client最新消费到的位置。canal中所有元数据(如GTID、Binlog位置)都是由MetaManager去管理的,目前元数据默认以json格式持久化在instance根目录下的meta.dat文件内。
+
+## 基本操作
+
+### 配置目标表属性
+
+用户需要先在Doris端创建好与Mysql端对应的目标表
+
+Binlog Load只能支持Unique类型的目标表,且必须激活目标表的Batch Delete功能。
+
+开启Batch Delete的方法可以参考`help alter table`中的批量删除功能。
+
+示例:
+
+```
+-- create target table
+CREATE TABLE `test1` (
+ `a` int(11) NOT NULL COMMENT "",
+ `b` int(11) NOT NULL COMMENT ""
+) ENGINE=OLAP
+UNIQUE KEY(`a`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`a`) BUCKETS 8;
+
+-- enable batch delete
+ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
+```
+
+### 创建同步作业
+创建数据同步作业的的详细语法可以连接到 Doris 后,执行 HELP CREATE SYNC JOB; 查看语法帮助。这里主要详细介绍,创建作业时的注意事项。
+
+* job_name
+
+ `job_name`是数据同步作业在当前数据库内的唯一标识,相同`job_name`的作业只能有一个在运行。
+
+* channel_desc
+
+ `channel_desc `用来定义任务下的数据通道,可表示mysql源表到doris目标表的映射关系。在设置此项时,如果存在多个映射关系,必须满足mysql源表应该与doris目标表是一一对应关系,其他的任何映射关系(如一对多关系),检查语法时都被视为不合法。
+
+* column_mapping
+
+ `column_mapping`主要指mysql源表和doris目标表的列之间的映射关系,如果不指定,FE会默认源表和目标表的列按顺序一一对应。但是我们依然建议显式的指定列的映射关系,这样当目标表的结构发生变化(比如增加一个 nullable 的列),数据同步作业依然可以进行。否则,当发生上述变动后,因为列映射关系不再一一对应,导入将报错。
+
+* binlog_desc
+
+ `binlog_desc`中的属性定义了对接远端Binlog地址的一些必要信息,目前可支持的对接类型只有canal方式,所有的配置项前都需要加上canal前缀。
+
+ 1. `canal.server.ip`: canal server的地址
+ 2. `canal.server.port`: canal server的端口
+ 3. `canal.destination`: 前文提到的instance的字符串标识
+ 4. `canal.batchSize`: 每批从canal server处获取的batch大小的最大值,默认8192
+ 5. `canal.username`: instance的用户名
+ 6. `canal.password`: instance的密码
+ 7. `canal.debug`: 设置为true时,会将batch和每一行数据的详细信息都打印出来,会影响性能。
+
+### 查看作业状态
+
+查看作业状态的具体命令和示例可以通过 `HELP SHOW SYNC JOB;` 命令查看。
+
+返回结果集的参数意义如下:
+
+* State
+
+ 作业当前所处的阶段。作业状态之间的转换如下图所示:
+
+ ```
+ +-------------+
+ create job | PENDING | resume job
+ +-----------+ <-------------+
+ | +-------------+ |
+ +----v-------+ +-------+----+
+ | RUNNING | pause job | PAUSED |
+ | +-----------------------> |
+ +----+-------+ run error +-------+----+
+ | +-------------+ |
+ | | CANCELLED | |
+ +-----------> <-------------+
+ stop job +-------------+ stop job
+ system error
+ ```
+ 作业提交之后状态为PENDING,由FE调度执行启动canal client后状态变成RUNNING,用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复,操作后作业状态分别为CANCELLED/PAUSED/RUNNING。
+
+ 作业的最终阶段只有一个CANCELLED,当作业状态变为CANCELLED后,将无法再次恢复。当作业发生了错误时,若错误是不可恢复的,状态会变成CANCELLED,否则会变成PAUSED。
+
+* Channel
+
+ 作业所有源表到目标表的映射关系。
+
+* Status
+
+ 当前binlog的消费位置(若设置了GTID模式,会显示GTID),以及doris端执行时间相比mysql端的延迟时间。
+
+* JobConfig
+
+ 对接的远端服务器信息,如canal server的地址与连接instance的destination
+
+### 控制作业
+用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复。可以通过`HELP STOP SYNC JOB`; `HELP PAUSE SYNC JOB`; 以及 `HELP RESUME SYNC JOB`; 三个命令查看帮助和示例。
+
+## 相关参数
+
+### FE配置
+
+下面配置属于数据同步作业的系统级别配置,主要通过修改 fe.conf 来调整配置值。
+
+* `enable_create_sync_job`
+
+ 开启数据同步作业功能。默认为 false,关闭此功能。
+
+* `sync_commit_interval_second`
+
+ 提交事务的最大时间间隔。若超过了这个时间channel中还有数据没有提交,consumer会通知channel提交事务。
+
+* `max_sync_task_threads_num`
+
+ 数据同步作业线程池中的最大线程数量。此线程池整个FE中只有一个,用于处理FE中所有数据同步作业向BE发送数据的任务task,线程池的实现在`SyncTaskPool`类。
+
+## 常见问题
+
+1. 修改表结构是否会影响数据同步作业?
+
+ 会影响。数据同步作业并不能禁止`alter table`的操作,当表结构发生了变化,如果列的映射无法匹配,可能导致作业发生错误暂停,建议通过在数据同步作业中显式指定列映射关系,或者通过增加 Nullable 列或带 Default 值的列来减少这类问题。
+
+2. 删除了数据库后数据同步作业还会继续运行吗?
+
+ 不会。删除数据库后的几秒日志中可能会出现找不到元数据的错误,之后该数据同步作业会被FE的定时调度检查时停止。
+
+3. 多个数据同步作业可以配置相同的`ip:port + destination`吗?
+
+ 不能。创建数据同步作业时会检查`ip:port + destination`与已存在的作业是否重复,防止出现多个作业连接到同一个instance的情况。
+
+4. 为什么数据同步时浮点类型的数据精度在Mysql端和Doris端不一样?
+
+ Doris本身浮点类型的精度与Mysql不一样。可以选择用Decimal类型代替。
+
\ No newline at end of file
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
new file mode 100644
index 0000000000..67051735cb
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md
@@ -0,0 +1,166 @@
+---
+{
+ "title": "CREATE SYNC JOB",
+ "language": "zh-CN"
+}
+---
+
+
+
+# CREATE SYNC JOB
+
+## description
+
+数据同步(Sync Job)功能,支持用户提交一个常驻的数据同步作业,通过从指定的远端地址读取Binlog日志,增量同步用户在Mysql数据库的对数据更新操作的CDC(Change Data Capture)功能。
+
+目前数据同步作业只支持对接Canal,从Canal Server上获取解析好的Binlog数据,导入到Doris内。
+
+用户可通过 `SHOW SYNC JOB` 查看数据同步作业状态。
+
+语法:
+
+```
+CREATE SYNC [db.]job_name
+ (
+ channel_desc,
+ channel_desc
+ ...
+ )
+binlog_desc
+```
+
+1. `job_name`
+
+ 同步作业名称,是作业在当前数据库内的唯一标识,相同`job_name`的作业只能有一个在运行。
+
+2. `channel_desc`
+
+ 作业下的数据通道,用来描述mysql源表到doris目标表的映射关系。
+
+ 语法:
+
+ ```
+ FROM mysql_db.src_tbl INTO des_tbl
+ [partitions]
+ [columns_mapping]
+ ```
+
+ 1. `mysql_db.src_tbl`
+
+ 指定mysql端的数据库和源表。
+
+ 2. `des_tbl`
+
+ 指定doris端的目标表,只支持Unique表,且需开启表的batch delete功能(开启方法请看help alter table的'批量删除功能')。
+
+ 3. `partitions`
+
+ 指定导入目的表的哪些 partition 中。如果不指定,则会自动导入到对应的 partition 中。
+
+ 示例:
+
+ ```
+ PARTITION(p1, p2, p3)
+ ```
+
+ 4. `column_mapping`
+
+ 指定mysql源表和doris目标表的列之间的映射关系。如果不指定,FE会默认源表和目标表的列按顺序一一对应。
+
+ 不支持 col_name = expr 的形式表示列。
+
+ 示例:
+
+ ```
+ 假设目标表列为(k1, k2, v1),
+
+ 改变列k1和k2的顺序
+ COLUMNS(k2, k1, v1)
+
+ 忽略源数据的第四列
+ COLUMNS(k2, k1, v1, dummy_column)
+ ```
+
+3. `binlog_desc`
+
+ 用来描述远端数据源,目前仅支持canal一种。
+
+ 语法:
+
+ ```
+ FROM BINLOG
+ (
+ "key1" = "value1",
+ "key2" = "value2"
+ )
+ ```
+
+ 1. Canal 数据源对应的属性,以`canal.`为前缀
+
+ 1. canal.server.ip: canal server的地址
+ 2. canal.server.port: canal server的端口
+ 3. canal.destination: instance的标识
+ 4. canal.batchSize: 获取的batch大小的最大值,默认8192
+ 5. canal.username: instance的用户名
+ 6. canal.password: instance的密码
+ 7. canal.debug: 可选,设置为true时,会将batch和每一行数据的详细信息都打印出来
+
+## example
+
+1. 简单为 `test_db` 的 `test_tbl` 创建一个名为 `job1` 的数据同步作业,连接本地的Canal服务器,对应Mysql源表 `mysql_db1.tbl1`。
+
+ CREATE SYNC `test_db`.`job1`
+ (
+ FROM `mysql_db1`.`tbl1` INTO `test_tbl `
+ )
+ FROM BINLOG
+ (
+ "type" = "canal",
+ "canal.server.ip" = "127.0.0.1",
+ "canal.server.port" = "11111",
+ "canal.destination" = "example",
+ "canal.username" = "",
+ "canal.password" = ""
+ );
+
+2. 为 `test_db` 的多张表创建一个名为 `job1` 的数据同步作业,一一对应多张Mysql源表,并显式的指定列映射。
+
+ CREATE SYNC `test_db`.`job1`
+ (
+ FROM `mysql_db`.`t1` INTO `test1` COLUMNS(k1, k2, v1) PARTITIONS (p1, p2),
+ FROM `mysql_db`.`t2` INTO `test2` COLUMNS(k3, k4, v2) PARTITION p1
+ )
+ FROM BINLOG
+ (
+ "type" = "canal",
+ "canal.server.ip" = "xx.xxx.xxx.xx",
+ "canal.server.port" = "12111",
+ "canal.destination" = "example",
+ "canal.username" = "username",
+ "canal.password" = "password"
+ );
+
+## keyword
+
+ CREATE,SYNC,JOB,BINLOG
+
+
+
+
\ No newline at end of file
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
new file mode 100644
index 0000000000..4612bebf91
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md
@@ -0,0 +1,43 @@
+---
+{
+ "title": "PAUSE SYNC JOB",
+ "language": "zh-CN"
+}
+---
+
+
+
+# PAUSE SYNC JOB
+
+## description
+
+通过 `job_name` 暂停一个数据库内正在运行的常驻数据同步作业,被暂停的作业将停止同步数据,保持消费的最新位置,直到被用户恢复。
+
+语法:
+
+ PAUSE SYNC JOB [db.]job_name
+
+## example
+1. 暂停名称为 `job_name` 的数据同步作业。
+
+ PAUSE SYNC JOB `job_name`;
+
+## keyword
+ PAUSE,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
new file mode 100644
index 0000000000..9237419443
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md
@@ -0,0 +1,44 @@
+---
+{
+ "title": "RESUME SYNC JOB",
+ "language": "zh-CN"
+}
+---
+
+
+
+# RESUME SYNC JOB
+
+## description
+
+通过 `job_name`恢复一个当前数据库已被暂停的常驻数据同步作业,作业将从上一次被暂停前最新的位置继续同步数据。
+
+语法:
+
+ RESUME SYNC JOB [db.]job_name
+
+## example
+
+1. 恢复名称为 `job_name` 的数据同步作业
+
+ RESUME SYNC JOB `job_name`;
+
+## keyword
+ RESUME,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
new file mode 100644
index 0000000000..8fbde6130e
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md
@@ -0,0 +1,49 @@
+---
+{
+ "title": "SHOW SYNC JOB",
+ "language": "zh-CN"
+}
+---
+
+
+
+# SHOW SYNC JOB
+
+## description
+
+此命令用于当前显示所有数据库内的常驻数据同步作业状态。
+
+语法:
+
+ SHOW SYNC JOB [FROM db_name]
+
+## example
+
+1. 展示当前数据库的所有数据同步作业状态。
+
+ SHOW SYNC JOB;
+
+2. 展示数据库 `test_db` 下的所有数据同步作业状态。
+
+ SHOW SYNC JOB FROM `test_db`;
+
+## keyword
+
+ SHOW,SYNC,JOB,BINLOG
\ No newline at end of file
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
new file mode 100644
index 0000000000..cf9cbca00f
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md
@@ -0,0 +1,44 @@
+---
+{
+ "title": "STOP SYNC JOB",
+ "language": "zh-CN"
+}
+---
+
+
+
+# STOP SYNC JOB
+
+## description
+
+通过 `job_name` 停止一个数据库内非停止状态的常驻数据同步作业。
+
+语法:
+
+ STOP SYNC JOB [db.]job_name
+
+## example
+
+1. 停止名称为 `job_name` 的数据同步作业
+
+ STOP SYNC JOB `job_name`;
+
+## keyword
+ STOP,SYNC,JOB,BINLOG
\ No newline at end of file