[feature](paimon/iceberg)add a docker that can directly pull up all the relevant environments of paimon/iceberg/doris for 2.1 (#38009) (#38588)
bp: #38009
This commit is contained in:
279
samples/datalake/iceberg_and_paimon/README.md
Normal file
279
samples/datalake/iceberg_and_paimon/README.md
Normal file
@ -0,0 +1,279 @@
|
||||
# doris-iceberg-paimon-compose
|
||||
|
||||
|
||||
## Launch Docker Compose
|
||||
First, we need to ensure the environmental parameters of the machine.
|
||||
|
||||
```
|
||||
sysctl -w vm.max_map_count=2000000
|
||||
```
|
||||
|
||||
We can then start all the required containers via the script.
|
||||
|
||||
```
|
||||
bash start_all.sh
|
||||
```
|
||||
It will start a set of docker, the environment includes:
|
||||
- doris
|
||||
- iceberg
|
||||
- paimon
|
||||
- flink
|
||||
- spark
|
||||
|
||||
And it will automatically create an iceberg table and a paimon table. We can use these tables directly to experience doris.
|
||||
|
||||
|
||||
## paimon table test
|
||||
|
||||
Enter the flink client.
|
||||
|
||||
```
|
||||
bash start_flink_client.sh
|
||||
```
|
||||
|
||||
Here is a table that has been created.
|
||||
|
||||
```sql
|
||||
|
||||
Flink SQL> use paimon.db_paimon;
|
||||
[INFO] Execute statement succeed.
|
||||
|
||||
Flink SQL> show tables;
|
||||
+------------+
|
||||
| table name |
|
||||
+------------+
|
||||
| customer |
|
||||
+------------+
|
||||
1 row in set
|
||||
|
||||
Flink SQL> show create table customer;
|
||||
+------------------------------------------------------------------------+
|
||||
| result |
|
||||
+------------------------------------------------------------------------+
|
||||
| CREATE TABLE `paimon`.`db_paimon`.`customer` (
|
||||
`c_custkey` INT NOT NULL,
|
||||
`c_name` VARCHAR(25),
|
||||
`c_address` VARCHAR(40),
|
||||
`c_nationkey` INT NOT NULL,
|
||||
`c_phone` CHAR(15),
|
||||
`c_acctbal` DECIMAL(12, 2),
|
||||
`c_mktsegment` CHAR(10),
|
||||
`c_comment` VARCHAR(117),
|
||||
CONSTRAINT `PK_c_custkey_c_nationkey` PRIMARY KEY (`c_custkey`, `c_nationkey`) NOT ENFORCED
|
||||
) PARTITIONED BY (`c_nationkey`)
|
||||
WITH (
|
||||
'bucket' = '1',
|
||||
'path' = 's3://warehouse/wh/db_paimon.db/customer',
|
||||
'deletion-vectors.enabled' = 'true'
|
||||
)
|
||||
|
|
||||
+-------------------------------------------------------------------------+
|
||||
1 row in set
|
||||
|
||||
Flink SQL> desc customer;
|
||||
+--------------+----------------+-------+-----------------------------+--------+-----------+
|
||||
| name | type | null | key | extras | watermark |
|
||||
+--------------+----------------+-------+-----------------------------+--------+-----------+
|
||||
| c_custkey | INT | FALSE | PRI(c_custkey, c_nationkey) | | |
|
||||
| c_name | VARCHAR(25) | TRUE | | | |
|
||||
| c_address | VARCHAR(40) | TRUE | | | |
|
||||
| c_nationkey | INT | FALSE | PRI(c_custkey, c_nationkey) | | |
|
||||
| c_phone | CHAR(15) | TRUE | | | |
|
||||
| c_acctbal | DECIMAL(12, 2) | TRUE | | | |
|
||||
| c_mktsegment | CHAR(10) | TRUE | | | |
|
||||
| c_comment | VARCHAR(117) | TRUE | | | |
|
||||
+--------------+----------------+-------+-----------------------------+--------+-----------+
|
||||
8 rows in set
|
||||
```
|
||||
|
||||
This table already has some data.
|
||||
|
||||
```sql
|
||||
Flink SQL> select * from customer order by c_custkey limit 4;
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
| 1 | Customer#000000001 | IVhzIApeRb ot,c,E | 15 | 25-989-741-2988 | 711.56 | BUILDING | to the even, regular platel... |
|
||||
| 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | 13 | 23-768-687-3665 | 121.65 | AUTOMOBILE | l accounts. blithely ironic... |
|
||||
| 3 | Customer#000000003 | MG9kdTD2WBHm | 1 | 11-719-748-3364 | 7498.12 | AUTOMOBILE | deposits eat slyly ironic,... |
|
||||
| 32 | Customer#000000032 | jD2xZzi UmId,DCtNBLXKj9q0Tl... | 15 | 25-430-914-2194 | 3471.53 | BUILDING | cial ideas. final, furious ... |
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
4 rows in set
|
||||
|
||||
Flink SQL> select * from customer order by c_custkey desc limit 4;
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
| 149987 | Customer#000149987 | P6z8nSIgW55cSydfa1bZ | 8 | 18-187-349-6326 | 5338.96 | HOUSEHOLD | aggle carefully quickly reg... |
|
||||
| 149986 | Customer#000149986 | HyEJpj2jvEqt,,pA50NOvuTP | 7 | 17-654-752-5642 | 1251.17 | BUILDING | enticingly carefully carefu... |
|
||||
| 149985 | Customer#000149985 | y4m,kcxXX6ZtGTJGxavBTJf52OM... | 22 | 32-595-455-4078 | 6012.98 | MACHINERY | kages affix against the bli... |
|
||||
| 149984 | Customer#000149984 | ZBEyUfjRsVtUNSIv9dnnyoPYeQw... | 12 | 22-283-613-7016 | 6567.62 | HOUSEHOLD | ges integrate along the bli... |
|
||||
+-----------+--------------------+--------------------------------+-------------+-----------------+-----------+--------------+--------------------------------+
|
||||
4 rows in set
|
||||
```
|
||||
|
||||
Now we can query this table through doris.
|
||||
|
||||
```
|
||||
bash start_doris_client.sh
|
||||
```
|
||||
|
||||
After entering the doris client, the paimon catalog has been created here, so the data of the paimon table can be directly queried.
|
||||
|
||||
```sql
|
||||
mysql> use paimon.db_paimon;
|
||||
Reading table information for completion of table and column names
|
||||
You can turn off this feature to get a quicker startup with -A
|
||||
|
||||
Database changed
|
||||
mysql> show tables;
|
||||
+---------------------+
|
||||
| Tables_in_db_paimon |
|
||||
+---------------------+
|
||||
| customer |
|
||||
+---------------------+
|
||||
1 row in set (0.00 sec)
|
||||
|
||||
mysql> select * from customer order by c_custkey limit 4;
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
| 1 | Customer#000000001 | IVhzIApeRb ot,c,E | 15 | 25-989-741-2988 | 711.56 | BUILDING | to the even, regular platelets. regular, ironic epitaphs nag e |
|
||||
| 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | 13 | 23-768-687-3665 | 121.65 | AUTOMOBILE | l accounts. blithely ironic theodolites integrate boldly: caref |
|
||||
| 3 | Customer#000000003 | MG9kdTD2WBHm | 1 | 11-719-748-3364 | 7498.12 | AUTOMOBILE | deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov |
|
||||
| 32 | Customer#000000032 | jD2xZzi UmId,DCtNBLXKj9q0Tlp2iQ6ZcO3J | 15 | 25-430-914-2194 | 3471.53 | BUILDING | cial ideas. final, furious requests across the e |
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
4 rows in set (1.89 sec)
|
||||
|
||||
mysql> select * from customer order by c_custkey desc limit 4;
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------+
|
||||
| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------+
|
||||
| 149987 | Customer#000149987 | P6z8nSIgW55cSydfa1bZ | 8 | 18-187-349-6326 | 5338.96 | HOUSEHOLD | aggle carefully quickly regular ideas-- ironic, bold packages are. regular tithes cajole regular requ |
|
||||
| 149986 | Customer#000149986 | HyEJpj2jvEqt,,pA50NOvuTP | 7 | 17-654-752-5642 | 1251.17 | BUILDING | enticingly carefully careful courts. furiously |
|
||||
| 149985 | Customer#000149985 | y4m,kcxXX6ZtGTJGxavBTJf52OMqBK9z | 22 | 32-595-455-4078 | 6012.98 | MACHINERY | kages affix against the blithely pending foxes. slyly final packages boost |
|
||||
| 149984 | Customer#000149984 | ZBEyUfjRsVtUNSIv9dnnyoPYeQwi7czgCeeeM | 12 | 22-283-613-7016 | 6567.62 | HOUSEHOLD | ges integrate along the blithely unusual |
|
||||
+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------+
|
||||
4 rows in set (0.35 sec)
|
||||
```
|
||||
|
||||
Doris can perform partition pruning on Paimon and speed up the query process through native reading. We can check this through `explain verbose`.
|
||||
|
||||
```sql
|
||||
mysql> explain verbose select * from customer where c_nationkey < 3;
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Explain String(Nereids Planner) |
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ............... |
|
||||
| 0:VPAIMON_SCAN_NODE(68) |
|
||||
| table: customer |
|
||||
| predicates: (c_nationkey[#3] < 3) |
|
||||
| inputSplitNum=3, totalFileSize=193823, scanRanges=3 |
|
||||
| partition=3/0 |
|
||||
| backends: |
|
||||
| 10002 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=1/bucket-0/data-15cee5b7-1bd7-42ca-9314-56d92c62c03b-0.orc start: 0 length: 66600 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=2/bucket-0/data-e98fb7ef-ec2b-4ad5-a496-713cb9481d56-0.orc start: 0 length: 64059 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=0/bucket-0/data-431be05d-50fa-401f-9680-d646757d0f95-0.orc start: 0 length: 63164 |
|
||||
| cardinality=18751, numNodes=1 |
|
||||
| pushdown agg=NONE |
|
||||
| paimonNativeReadSplits=3/3 |
|
||||
| PaimonSplitStats: |
|
||||
| SplitStat [type=NATIVE, rowCount=771, rawFileConvertable=true, hasDeletionVector=false] |
|
||||
| SplitStat [type=NATIVE, rowCount=750, rawFileConvertable=true, hasDeletionVector=false] |
|
||||
| SplitStat [type=NATIVE, rowCount=750, rawFileConvertable=true, hasDeletionVector=false] |
|
||||
| tuple ids: 0 |
|
||||
| |
|
||||
| Tuples: |
|
||||
| TupleDescriptor{id=0, tbl=customer} |
|
||||
| SlotDescriptor{id=0, col=c_custkey, colUniqueId=0, type=INT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=1, col=c_name, colUniqueId=1, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=2, col=c_address, colUniqueId=2, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=3, col=c_nationkey, colUniqueId=3, type=INT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=4, col=c_phone, colUniqueId=4, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=5, col=c_acctbal, colUniqueId=5, type=DECIMALV3(12, 2), nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=6, col=c_mktsegment, colUniqueId=6, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=7, col=c_comment, colUniqueId=7, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
| Statistics |
|
||||
| planed with unknown column statistics |
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
66 rows in set (0.17 sec)
|
||||
```
|
||||
|
||||
Through the query plan, we can see that doris only reads three partition files whose c_nationkey is less than 3, and the file reading method is native.
|
||||
In addition, doris supports the `deletion vectors` of paimon.
|
||||
First, we will modify some data through flink.
|
||||
|
||||
```sql
|
||||
Flink SQL> update customer set c_address='c_address_update' where c_nationkey = 1;
|
||||
[INFO] Submitting SQL update statement to the cluster...
|
||||
[INFO] SQL update statement has been successfully submitted to the cluster:
|
||||
Job ID: ff838b7b778a94396b332b0d93c8f7ac
|
||||
|
||||
```
|
||||
|
||||
After waiting for the task to be completed, we can view the modified data through doris.
|
||||
|
||||
```sql
|
||||
mysql> explain verbose select * from customer where c_nationkey < 3;
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Explain String(Nereids Planner) |
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ............... |
|
||||
| |
|
||||
| 0:VPAIMON_SCAN_NODE(68) |
|
||||
| table: customer |
|
||||
| predicates: (c_nationkey[#3] < 3) |
|
||||
| inputSplitNum=4, totalFileSize=238324, scanRanges=4 |
|
||||
| partition=3/0 |
|
||||
| backends: |
|
||||
| 10002 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=1/bucket-0/data-15cee5b7-1bd7-42ca-9314-56d92c62c03b-0.orc start: 0 length: 66600 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=1/bucket-0/data-5d50255a-2215-4010-b976-d5dc656f3444-0.orc start: 0 length: 44501 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=2/bucket-0/data-e98fb7ef-ec2b-4ad5-a496-713cb9481d56-0.orc start: 0 length: 64059 |
|
||||
| s3://warehouse/wh/db_paimon.db/customer/c_nationkey=0/bucket-0/data-431be05d-50fa-401f-9680-d646757d0f95-0.orc start: 0 length: 63164 |
|
||||
| cardinality=18751, numNodes=1 |
|
||||
| pushdown agg=NONE |
|
||||
| paimonNativeReadSplits=4/4 |
|
||||
| PaimonSplitStats: |
|
||||
| SplitStat [type=NATIVE, rowCount=1542, rawFileConvertable=true, hasDeletionVector=true] |
|
||||
| SplitStat [type=NATIVE, rowCount=750, rawFileConvertable=true, hasDeletionVector=false] |
|
||||
| SplitStat [type=NATIVE, rowCount=750, rawFileConvertable=true, hasDeletionVector=false] |
|
||||
| tuple ids: 0 |
|
||||
| |
|
||||
| Tuples: |
|
||||
| TupleDescriptor{id=0, tbl=customer} |
|
||||
| SlotDescriptor{id=0, col=c_custkey, colUniqueId=0, type=INT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=1, col=c_name, colUniqueId=1, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=2, col=c_address, colUniqueId=2, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=3, col=c_nationkey, colUniqueId=3, type=INT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=4, col=c_phone, colUniqueId=4, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=5, col=c_acctbal, colUniqueId=5, type=DECIMALV3(12, 2), nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=6, col=c_mktsegment, colUniqueId=6, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| SlotDescriptor{id=7, col=c_comment, colUniqueId=7, type=TEXT, nullable=true, isAutoIncrement=false, subColPath=null} |
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
| Statistics |
|
||||
| planed with unknown column statistics |
|
||||
+------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
67 rows in set (0.23 sec)
|
||||
```
|
||||
|
||||
From the plan, we can see that doris reads 4 splits using the native method, and the corresponding paimon splits are 3, one of which carries the deletion vector.
|
||||
Finally, we can read the modified data by doris.
|
||||
|
||||
```sql
|
||||
mysql> select * from customer where c_nationkey=1 limit 2;
|
||||
+-----------+--------------------+-----------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |
|
||||
+-----------+--------------------+-----------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
| 3 | Customer#000000003 | c_address_update | 1 | 11-719-748-3364 | 7498.12 | AUTOMOBILE | deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov |
|
||||
| 513 | Customer#000000513 | c_address_update | 1 | 11-861-303-6887 | 955.37 | HOUSEHOLD | press along the quickly regular instructions. regular requests against the carefully ironic s |
|
||||
+-----------+--------------------+-----------------+-------------+-----------------+-----------+--------------+--------------------------------------------------------------------------------------------------------+
|
||||
2 rows in set (0.19 sec)
|
||||
|
||||
```
|
||||
@ -0,0 +1,312 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# These parameters are required for Java 17 support.
|
||||
# They can be safely removed when using Java 8/11.
|
||||
env.java.opts.all: --add-exports=java.base/sun.net.util=ALL-UNNAMED --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
|
||||
|
||||
#==============================================================================
|
||||
# Common
|
||||
#==============================================================================
|
||||
|
||||
# The external address of the host on which the JobManager runs and can be
|
||||
# reached by the TaskManagers and any clients which want to connect. This setting
|
||||
# is only used in Standalone mode and may be overwritten on the JobManager side
|
||||
# by specifying the --host <hostname> parameter of the bin/jobmanager.sh executable.
|
||||
# In high availability mode, if you use the bin/start-cluster.sh script and setup
|
||||
# the conf/masters file, this will be taken care of automatically. Yarn
|
||||
# automatically configure the host name based on the hostname of the node where the
|
||||
# JobManager runs.
|
||||
|
||||
jobmanager.rpc.address: jobmanager
|
||||
|
||||
# The RPC port where the JobManager is reachable.
|
||||
|
||||
jobmanager.rpc.port: 6123
|
||||
|
||||
# The host interface the JobManager will bind to. By default, this is localhost, and will prevent
|
||||
# the JobManager from communicating outside the machine/container it is running on.
|
||||
# On YARN this setting will be ignored if it is set to 'localhost', defaulting to 0.0.0.0.
|
||||
# On Kubernetes this setting will be ignored, defaulting to 0.0.0.0.
|
||||
#
|
||||
# To enable this, set the bind-host address to one that has access to an outside facing network
|
||||
# interface, such as 0.0.0.0.
|
||||
|
||||
jobmanager.bind-host: 0.0.0.0
|
||||
|
||||
|
||||
# The total process memory size for the JobManager.
|
||||
#
|
||||
# Note this accounts for all memory usage within the JobManager process, including JVM metaspace and other overhead.
|
||||
|
||||
jobmanager.memory.process.size: 1600m
|
||||
|
||||
# The host interface the TaskManager will bind to. By default, this is localhost, and will prevent
|
||||
# the TaskManager from communicating outside the machine/container it is running on.
|
||||
# On YARN this setting will be ignored if it is set to 'localhost', defaulting to 0.0.0.0.
|
||||
# On Kubernetes this setting will be ignored, defaulting to 0.0.0.0.
|
||||
#
|
||||
# To enable this, set the bind-host address to one that has access to an outside facing network
|
||||
# interface, such as 0.0.0.0.
|
||||
|
||||
taskmanager.bind-host: 0.0.0.0
|
||||
|
||||
# The address of the host on which the TaskManager runs and can be reached by the JobManager and
|
||||
# other TaskManagers. If not specified, the TaskManager will try different strategies to identify
|
||||
# the address.
|
||||
#
|
||||
# Note this address needs to be reachable by the JobManager and forward traffic to one of
|
||||
# the interfaces the TaskManager is bound to (see 'taskmanager.bind-host').
|
||||
#
|
||||
# Note also that unless all TaskManagers are running on the same machine, this address needs to be
|
||||
# configured separately for each TaskManager.
|
||||
|
||||
|
||||
# The total process memory size for the TaskManager.
|
||||
#
|
||||
# Note this accounts for all memory usage within the TaskManager process, including JVM metaspace and other overhead.
|
||||
|
||||
taskmanager.memory.process.size: 1728m
|
||||
|
||||
# To exclude JVM metaspace and overhead, please, use total Flink memory size instead of 'taskmanager.memory.process.size'.
|
||||
# It is not recommended to set both 'taskmanager.memory.process.size' and Flink memory.
|
||||
#
|
||||
# taskmanager.memory.flink.size: 1280m
|
||||
|
||||
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
|
||||
|
||||
taskmanager.numberOfTaskSlots: 1
|
||||
|
||||
# The parallelism used for programs that did not specify and other parallelism.
|
||||
|
||||
parallelism.default: 1
|
||||
|
||||
# The default file system scheme and authority.
|
||||
#
|
||||
# By default file paths without scheme are interpreted relative to the local
|
||||
# root file system 'file:///'. Use this to override the default and interpret
|
||||
# relative paths relative to a different file system,
|
||||
# for example 'hdfs://mynamenode:12345'
|
||||
#
|
||||
# fs.default-scheme
|
||||
|
||||
#==============================================================================
|
||||
# High Availability
|
||||
#==============================================================================
|
||||
|
||||
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
|
||||
#
|
||||
# high-availability.type: zookeeper
|
||||
|
||||
# The path where metadata for master recovery is persisted. While ZooKeeper stores
|
||||
# the small ground truth for checkpoint and leader election, this location stores
|
||||
# the larger objects, like persisted dataflow graphs.
|
||||
#
|
||||
# Must be a durable file system that is accessible from all nodes
|
||||
# (like HDFS, S3, Ceph, nfs, ...)
|
||||
#
|
||||
# high-availability.storageDir: hdfs:///flink/ha/
|
||||
|
||||
# The list of ZooKeeper quorum peers that coordinate the high-availability
|
||||
# setup. This must be a list of the form:
|
||||
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
|
||||
#
|
||||
# high-availability.zookeeper.quorum: localhost:2181
|
||||
|
||||
|
||||
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
|
||||
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
|
||||
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
|
||||
#
|
||||
# high-availability.zookeeper.client.acl: open
|
||||
|
||||
#==============================================================================
|
||||
# Fault tolerance and checkpointing
|
||||
#==============================================================================
|
||||
|
||||
# The backend that will be used to store operator state checkpoints if
|
||||
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
|
||||
#
|
||||
# Execution checkpointing related parameters. Please refer to CheckpointConfig and ExecutionCheckpointingOptions for more details.
|
||||
#
|
||||
# execution.checkpointing.interval: 3min
|
||||
# execution.checkpointing.externalized-checkpoint-retention: [DELETE_ON_CANCELLATION, RETAIN_ON_CANCELLATION]
|
||||
# execution.checkpointing.max-concurrent-checkpoints: 1
|
||||
# execution.checkpointing.min-pause: 0
|
||||
# execution.checkpointing.mode: [EXACTLY_ONCE, AT_LEAST_ONCE]
|
||||
# execution.checkpointing.timeout: 10min
|
||||
# execution.checkpointing.tolerable-failed-checkpoints: 0
|
||||
# execution.checkpointing.unaligned: false
|
||||
#
|
||||
# Supported backends are 'hashmap', 'rocksdb', or the
|
||||
# <class-name-of-factory>.
|
||||
#
|
||||
# state.backend.type: hashmap
|
||||
|
||||
# Directory for checkpoints filesystem, when using any of the default bundled
|
||||
# state backends.
|
||||
#
|
||||
# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints
|
||||
|
||||
# Default target directory for savepoints, optional.
|
||||
#
|
||||
# state.savepoints.dir: hdfs://namenode-host:port/flink-savepoints
|
||||
|
||||
# Flag to enable/disable incremental checkpoints for backends that
|
||||
# support incremental checkpoints (like the RocksDB state backend).
|
||||
#
|
||||
# state.backend.incremental: false
|
||||
|
||||
# The failover strategy, i.e., how the job computation recovers from task failures.
|
||||
# Only restart tasks that may have been affected by the task failure, which typically includes
|
||||
# downstream tasks and potentially upstream tasks if their produced data is no longer available for consumption.
|
||||
|
||||
jobmanager.execution.failover-strategy: region
|
||||
|
||||
#==============================================================================
|
||||
# Rest & web frontend
|
||||
#==============================================================================
|
||||
|
||||
# The port to which the REST client connects to. If rest.bind-port has
|
||||
# not been specified, then the server will bind to this port as well.
|
||||
#
|
||||
#rest.port: 8081
|
||||
|
||||
# The address to which the REST client will connect to
|
||||
#
|
||||
rest.address: 0.0.0.0
|
||||
|
||||
# Port range for the REST and web server to bind to.
|
||||
#
|
||||
#rest.bind-port: 8080-8090
|
||||
|
||||
# The address that the REST & web server binds to
|
||||
# By default, this is localhost, which prevents the REST & web server from
|
||||
# being able to communicate outside of the machine/container it is running on.
|
||||
#
|
||||
# To enable this, set the bind address to one that has access to outside-facing
|
||||
# network interface, such as 0.0.0.0.
|
||||
#
|
||||
rest.bind-address: 0.0.0.0
|
||||
|
||||
# Flag to specify whether job submission is enabled from the web-based
|
||||
# runtime monitor. Uncomment to disable.
|
||||
|
||||
#web.submit.enable: false
|
||||
|
||||
# Flag to specify whether job cancellation is enabled from the web-based
|
||||
# runtime monitor. Uncomment to disable.
|
||||
|
||||
#web.cancel.enable: false
|
||||
|
||||
#==============================================================================
|
||||
# Advanced
|
||||
#==============================================================================
|
||||
|
||||
# Override the directories for temporary files. If not specified, the
|
||||
# system-specific Java temporary directory (java.io.tmpdir property) is taken.
|
||||
#
|
||||
# For framework setups on Yarn, Flink will automatically pick up the
|
||||
# containers' temp directories without any need for configuration.
|
||||
#
|
||||
# Add a delimited list for multiple directories, using the system directory
|
||||
# delimiter (colon ':' on unix) or a comma, e.g.:
|
||||
# /data1/tmp:/data2/tmp:/data3/tmp
|
||||
#
|
||||
# Note: Each directory entry is read from and written to by a different I/O
|
||||
# thread. You can include the same directory multiple times in order to create
|
||||
# multiple I/O threads against that directory. This is for example relevant for
|
||||
# high-throughput RAIDs.
|
||||
#
|
||||
# io.tmp.dirs: /tmp
|
||||
|
||||
# The classloading resolve order. Possible values are 'child-first' (Flink's default)
|
||||
# and 'parent-first' (Java's default).
|
||||
#
|
||||
# Child first classloading allows users to use different dependency/library
|
||||
# versions in their application than those in the classpath. Switching back
|
||||
# to 'parent-first' may help with debugging dependency issues.
|
||||
#
|
||||
# classloader.resolve-order: child-first
|
||||
|
||||
# The amount of memory going to the network stack. These numbers usually need
|
||||
# no tuning. Adjusting them may be necessary in case of an "Insufficient number
|
||||
# of network buffers" error. The default min is 64MB, the default max is 1GB.
|
||||
#
|
||||
# taskmanager.memory.network.fraction: 0.1
|
||||
# taskmanager.memory.network.min: 64mb
|
||||
# taskmanager.memory.network.max: 1gb
|
||||
|
||||
#==============================================================================
|
||||
# Flink Cluster Security Configuration
|
||||
#==============================================================================
|
||||
|
||||
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
|
||||
# may be enabled in four steps:
|
||||
# 1. configure the local krb5.conf file
|
||||
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
|
||||
# 3. make the credentials available to various JAAS login contexts
|
||||
# 4. configure the connector to use JAAS/SASL
|
||||
|
||||
# The below configure how Kerberos credentials are provided. A keytab will be used instead of
|
||||
# a ticket cache if the keytab path and principal are set.
|
||||
|
||||
# security.kerberos.login.use-ticket-cache: true
|
||||
# security.kerberos.login.keytab: /path/to/kerberos/keytab
|
||||
# security.kerberos.login.principal: flink-user
|
||||
|
||||
# The configuration below defines which JAAS login contexts
|
||||
|
||||
# security.kerberos.login.contexts: Client,KafkaClient
|
||||
|
||||
#==============================================================================
|
||||
# ZK Security Configuration
|
||||
#==============================================================================
|
||||
|
||||
# Below configurations are applicable if ZK ensemble is configured for security
|
||||
|
||||
# Override below configuration to provide custom ZK service name if configured
|
||||
# zookeeper.sasl.service-name: zookeeper
|
||||
|
||||
# The configuration below must match one of the values set in "security.kerberos.login.contexts"
|
||||
# zookeeper.sasl.login-context-name: Client
|
||||
|
||||
#==============================================================================
|
||||
# HistoryServer
|
||||
#==============================================================================
|
||||
|
||||
# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)
|
||||
|
||||
# Directory to upload completed jobs to. Add this directory to the list of
|
||||
# monitored directories of the HistoryServer as well (see below).
|
||||
#jobmanager.archive.fs.dir: hdfs:///completed-jobs/
|
||||
|
||||
# The address under which the web-based HistoryServer listens.
|
||||
#historyserver.web.address: 0.0.0.0
|
||||
|
||||
# The port under which the web-based HistoryServer listens.
|
||||
#historyserver.web.port: 8082
|
||||
|
||||
# Comma separated list of directories to monitor for completed jobs.
|
||||
#historyserver.archive.fs.dir: hdfs:///completed-jobs/
|
||||
|
||||
# Interval in milliseconds for refreshing the monitored directories.
|
||||
#historyserver.archive.fs.refresh-interval: 10000
|
||||
|
||||
blob.server.port: 6124
|
||||
query.server.port: 6125
|
||||
@ -0,0 +1,67 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
|
||||
monitorInterval=30
|
||||
|
||||
rootLogger.level = INFO
|
||||
rootLogger.appenderRef.file.ref = FileAppender
|
||||
|
||||
# Log all infos in the given file
|
||||
appender.file.name = FileAppender
|
||||
appender.file.type = FILE
|
||||
appender.file.append = false
|
||||
appender.file.fileName = ${sys:log.file}
|
||||
appender.file.layout.type = PatternLayout
|
||||
appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
|
||||
# Log output from org.apache.flink.yarn to the console. This is used by the
|
||||
# CliFrontend class when using a per-job YARN cluster.
|
||||
logger.yarn.name = org.apache.flink.yarn
|
||||
logger.yarn.level = INFO
|
||||
logger.yarn.appenderRef.console.ref = ConsoleAppender
|
||||
logger.yarncli.name = org.apache.flink.yarn.cli.FlinkYarnSessionCli
|
||||
logger.yarncli.level = INFO
|
||||
logger.yarncli.appenderRef.console.ref = ConsoleAppender
|
||||
logger.hadoop.name = org.apache.hadoop
|
||||
logger.hadoop.level = WARN
|
||||
logger.hadoop.appenderRef.console.ref = ConsoleAppender
|
||||
|
||||
# Make sure hive logs go to the file.
|
||||
logger.hive.name = org.apache.hadoop.hive
|
||||
logger.hive.level = INFO
|
||||
logger.hive.additivity = false
|
||||
logger.hive.appenderRef.file.ref = FileAppender
|
||||
|
||||
# Log output from org.apache.flink.kubernetes to the console.
|
||||
logger.kubernetes.name = org.apache.flink.kubernetes
|
||||
logger.kubernetes.level = INFO
|
||||
logger.kubernetes.appenderRef.console.ref = ConsoleAppender
|
||||
|
||||
appender.console.name = ConsoleAppender
|
||||
appender.console.type = CONSOLE
|
||||
appender.console.layout.type = PatternLayout
|
||||
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
|
||||
# suppress the warning that hadoop native libraries are not loaded (irrelevant for the client)
|
||||
logger.hadoopnative.name = org.apache.hadoop.util.NativeCodeLoader
|
||||
logger.hadoopnative.level = OFF
|
||||
|
||||
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
|
||||
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
|
||||
logger.netty.level = OFF
|
||||
@ -0,0 +1,70 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
|
||||
monitorInterval=30
|
||||
|
||||
# This affects logging for both user code and Flink
|
||||
rootLogger.level = INFO
|
||||
rootLogger.appenderRef.console.ref = ConsoleAppender
|
||||
rootLogger.appenderRef.rolling.ref = RollingFileAppender
|
||||
|
||||
# Uncomment this if you want to _only_ change Flink's logging
|
||||
#logger.flink.name = org.apache.flink
|
||||
#logger.flink.level = INFO
|
||||
|
||||
# The following lines keep the log level of common libraries/connectors on
|
||||
# log level INFO. The root logger does not override this. You have to manually
|
||||
# change the log levels here.
|
||||
logger.pekko.name = org.apache.pekko
|
||||
logger.pekko.level = INFO
|
||||
logger.kafka.name= org.apache.kafka
|
||||
logger.kafka.level = INFO
|
||||
logger.hadoop.name = org.apache.hadoop
|
||||
logger.hadoop.level = WARN
|
||||
logger.zookeeper.name = org.apache.zookeeper
|
||||
logger.zookeeper.level = INFO
|
||||
logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
|
||||
logger.shaded_zookeeper.level = INFO
|
||||
|
||||
# Log all infos to the console
|
||||
appender.console.name = ConsoleAppender
|
||||
appender.console.type = CONSOLE
|
||||
appender.console.layout.type = PatternLayout
|
||||
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
appender.console.filter.threshold.type = ThresholdFilter
|
||||
appender.console.filter.threshold.level = ${sys:console.log.level:-ALL}
|
||||
|
||||
# Log all infos in the given rolling file
|
||||
appender.rolling.name = RollingFileAppender
|
||||
appender.rolling.type = RollingFile
|
||||
appender.rolling.append = true
|
||||
appender.rolling.fileName = ${sys:log.file}
|
||||
appender.rolling.filePattern = ${sys:log.file}.%i
|
||||
appender.rolling.layout.type = PatternLayout
|
||||
appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
appender.rolling.policies.type = Policies
|
||||
appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
|
||||
appender.rolling.policies.size.size=100MB
|
||||
appender.rolling.policies.startup.type = OnStartupTriggeringPolicy
|
||||
appender.rolling.strategy.type = DefaultRolloverStrategy
|
||||
appender.rolling.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10}
|
||||
|
||||
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
|
||||
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
|
||||
logger.netty.level = OFF
|
||||
@ -0,0 +1,42 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
|
||||
monitorInterval=30
|
||||
|
||||
rootLogger.level = INFO
|
||||
rootLogger.appenderRef.console.ref = ConsoleAppender
|
||||
|
||||
appender.console.name = ConsoleAppender
|
||||
appender.console.type = CONSOLE
|
||||
appender.console.layout.type = PatternLayout
|
||||
appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
|
||||
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
|
||||
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
|
||||
logger.netty.level = OFF
|
||||
logger.zookeeper.name = org.apache.zookeeper
|
||||
logger.zookeeper.level = WARN
|
||||
logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
|
||||
logger.shaded_zookeeper.level = WARN
|
||||
logger.curator.name = org.apache.flink.shaded.org.apache.curator.framework
|
||||
logger.curator.level = WARN
|
||||
logger.runtimeutils.name= org.apache.flink.runtime.util.ZooKeeperUtils
|
||||
logger.runtimeutils.level = WARN
|
||||
logger.runtimeleader.name = org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalDriver
|
||||
logger.runtimeleader.level = WARN
|
||||
@ -0,0 +1,61 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
|
||||
monitorInterval=10
|
||||
|
||||
# This affects logging for both user code and Flink
|
||||
rootLogger.level = INFO
|
||||
rootLogger.appenderRef.file.ref = MainAppender
|
||||
|
||||
# Uncomment this if you want to _only_ change Flink's logging
|
||||
#logger.flink.name = org.apache.flink
|
||||
#logger.flink.level = INFO
|
||||
|
||||
# The following lines keep the log level of common libraries/connectors on
|
||||
# log level INFO. The root logger does not override this. You have to manually
|
||||
# change the log levels here.
|
||||
logger.pekko.name = org.apache.pekko
|
||||
logger.pekko.level = INFO
|
||||
logger.kafka.name= org.apache.kafka
|
||||
logger.kafka.level = INFO
|
||||
logger.hadoop.name = org.apache.hadoop
|
||||
logger.hadoop.level = WARN
|
||||
logger.zookeeper.name = org.apache.zookeeper
|
||||
logger.zookeeper.level = INFO
|
||||
logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
|
||||
logger.shaded_zookeeper.level = INFO
|
||||
|
||||
# Log all infos in the given file
|
||||
appender.main.name = MainAppender
|
||||
appender.main.type = RollingFile
|
||||
appender.main.append = true
|
||||
appender.main.fileName = ${sys:log.file}
|
||||
appender.main.filePattern = ${sys:log.file}.%i
|
||||
appender.main.layout.type = PatternLayout
|
||||
appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
appender.main.policies.type = Policies
|
||||
appender.main.policies.size.type = SizeBasedTriggeringPolicy
|
||||
appender.main.policies.size.size = 100MB
|
||||
appender.main.policies.startup.type = OnStartupTriggeringPolicy
|
||||
appender.main.strategy.type = DefaultRolloverStrategy
|
||||
appender.main.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10}
|
||||
|
||||
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
|
||||
logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
|
||||
logger.netty.level = OFF
|
||||
@ -0,0 +1,67 @@
|
||||
<!--
|
||||
~ Licensed to the Apache Software Foundation (ASF) under one
|
||||
~ or more contributor license agreements. See the NOTICE file
|
||||
~ distributed with this work for additional information
|
||||
~ regarding copyright ownership. The ASF licenses this file
|
||||
~ to you under the Apache License, Version 2.0 (the
|
||||
~ "License"); you may not use this file except in compliance
|
||||
~ with the License. You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<appender name="console" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n</pattern>
|
||||
</encoder>
|
||||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
|
||||
<level>${console.log.level:-ALL}</level>
|
||||
</filter>
|
||||
</appender>
|
||||
|
||||
<appender name="rolling" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.file}</file>
|
||||
<append>false</append>
|
||||
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
|
||||
<fileNamePattern>${log.file}.%i</fileNamePattern>
|
||||
<minIndex>1</minIndex>
|
||||
<maxIndex>10</maxIndex>
|
||||
</rollingPolicy>
|
||||
|
||||
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
||||
<maxFileSize>100MB</maxFileSize>
|
||||
</triggeringPolicy>
|
||||
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- This affects logging for both user code and Flink -->
|
||||
<root level="INFO">
|
||||
<appender-ref ref="console"/>
|
||||
<appender-ref ref="rolling"/>
|
||||
</root>
|
||||
|
||||
<!-- Uncomment this if you want to only change Flink's logging -->
|
||||
<!--<logger name="org.apache.flink" level="INFO"/>-->
|
||||
|
||||
<!-- The following lines keep the log level of common libraries/connectors on
|
||||
log level INFO. The root logger does not override this. You have to manually
|
||||
change the log levels here. -->
|
||||
<logger name="org.apache.pekko" level="INFO"/>
|
||||
<logger name="org.apache.kafka" level="INFO"/>
|
||||
<logger name="org.apache.hadoop" level="WARN"/>
|
||||
<logger name="org.apache.zookeeper" level="INFO"/>
|
||||
|
||||
<!-- Suppress the irrelevant (wrong) warnings from the Netty channel handler -->
|
||||
<logger name="org.jboss.netty.channel.DefaultChannelPipeline" level="ERROR"/>
|
||||
</configuration>
|
||||
@ -0,0 +1,39 @@
|
||||
<!--
|
||||
~ Licensed to the Apache Software Foundation (ASF) under one
|
||||
~ or more contributor license agreements. See the NOTICE file
|
||||
~ distributed with this work for additional information
|
||||
~ regarding copyright ownership. The ASF licenses this file
|
||||
~ to you under the Apache License, Version 2.0 (the
|
||||
~ "License"); you may not use this file except in compliance
|
||||
~ with the License. You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<appender name="file" class="ch.qos.logback.core.FileAppender">
|
||||
<file>${log.file}</file>
|
||||
<append>false</append>
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<appender name="console" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss} %-5level %logger{60} %X{sourceThread} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<logger name="ch.qos.logback" level="WARN" />
|
||||
<root level="INFO">
|
||||
<appender-ref ref="file"/>
|
||||
<appender-ref ref="console"/>
|
||||
</root>
|
||||
</configuration>
|
||||
@ -0,0 +1,58 @@
|
||||
<!--
|
||||
~ Licensed to the Apache Software Foundation (ASF) under one
|
||||
~ or more contributor license agreements. See the NOTICE file
|
||||
~ distributed with this work for additional information
|
||||
~ regarding copyright ownership. The ASF licenses this file
|
||||
~ to you under the Apache License, Version 2.0 (the
|
||||
~ "License"); you may not use this file except in compliance
|
||||
~ with the License. You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<appender name="file" class="ch.qos.logback.core.FileAppender">
|
||||
<file>${log.file}</file>
|
||||
<append>false</append>
|
||||
<encoder>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- This affects logging for both user code and Flink -->
|
||||
<root level="INFO">
|
||||
<appender-ref ref="file"/>
|
||||
</root>
|
||||
|
||||
<!-- Uncomment this if you want to only change Flink's logging -->
|
||||
<!--<logger name="org.apache.flink" level="INFO">-->
|
||||
<!--<appender-ref ref="file"/>-->
|
||||
<!--</logger>-->
|
||||
|
||||
<!-- The following lines keep the log level of common libraries/connectors on
|
||||
log level INFO. The root logger does not override this. You have to manually
|
||||
change the log levels here. -->
|
||||
<logger name="org.apache.pekko" level="INFO">
|
||||
<appender-ref ref="file"/>
|
||||
</logger>
|
||||
<logger name="org.apache.kafka" level="INFO">
|
||||
<appender-ref ref="file"/>
|
||||
</logger>
|
||||
<logger name="org.apache.hadoop" level="WARN">
|
||||
<appender-ref ref="file"/>
|
||||
</logger>
|
||||
<logger name="org.apache.zookeeper" level="INFO">
|
||||
<appender-ref ref="file"/>
|
||||
</logger>
|
||||
|
||||
<!-- Suppress the irrelevant (wrong) warnings from the Netty channel handler -->
|
||||
<logger name="org.jboss.netty.channel.DefaultChannelPipeline" level="ERROR">
|
||||
<appender-ref ref="file"/>
|
||||
</logger>
|
||||
</configuration>
|
||||
18
samples/datalake/iceberg_and_paimon/data/flink-conf/masters
Normal file
18
samples/datalake/iceberg_and_paimon/data/flink-conf/masters
Normal file
@ -0,0 +1,18 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
localhost:8081
|
||||
18
samples/datalake/iceberg_and_paimon/data/flink-conf/workers
Normal file
18
samples/datalake/iceberg_and_paimon/data/flink-conf/workers
Normal file
@ -0,0 +1,18 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
localhost
|
||||
36
samples/datalake/iceberg_and_paimon/data/flink-conf/zoo.cfg
Normal file
36
samples/datalake/iceberg_and_paimon/data/flink-conf/zoo.cfg
Normal file
@ -0,0 +1,36 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
# The number of milliseconds of each tick
|
||||
tickTime=2000
|
||||
|
||||
# The number of ticks that the initial synchronization phase can take
|
||||
initLimit=10
|
||||
|
||||
# The number of ticks that can pass between sending a request and getting an acknowledgement
|
||||
syncLimit=5
|
||||
|
||||
# The directory where the snapshot is stored.
|
||||
# dataDir=/tmp/zookeeper
|
||||
|
||||
# The port at which the clients will connect
|
||||
clientPort=2181
|
||||
|
||||
# ZooKeeper quorum peers
|
||||
server.1=localhost:2888:3888
|
||||
# server.2=host:peer-port:leader-port
|
||||
@ -0,0 +1,31 @@
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<allocations>
|
||||
<pool name="production">
|
||||
<schedulingMode>FAIR</schedulingMode>
|
||||
<weight>1</weight>
|
||||
<minShare>2</minShare>
|
||||
</pool>
|
||||
<pool name="test">
|
||||
<schedulingMode>FIFO</schedulingMode>
|
||||
<weight>2</weight>
|
||||
<minShare>3</minShare>
|
||||
</pool>
|
||||
</allocations>
|
||||
@ -0,0 +1,69 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Set everything to be logged to the console
|
||||
rootLogger.level = info
|
||||
rootLogger.appenderRef.stdout.ref = console
|
||||
|
||||
# In the pattern layout configuration below, we specify an explicit `%ex` conversion
|
||||
# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
|
||||
# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
|
||||
# class packaging information. That extra information can sometimes add a substantial
|
||||
# performance overhead, so we disable it in our default logging config.
|
||||
# For more information, see SPARK-39361.
|
||||
appender.console.type = Console
|
||||
appender.console.name = console
|
||||
appender.console.target = SYSTEM_ERR
|
||||
appender.console.layout.type = PatternLayout
|
||||
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
|
||||
|
||||
# Set the default spark-shell/spark-sql log level to WARN. When running the
|
||||
# spark-shell/spark-sql, the log level for these classes is used to overwrite
|
||||
# the root logger's log level, so that the user can have different defaults
|
||||
# for the shell and regular Spark apps.
|
||||
logger.repl.name = org.apache.spark.repl.Main
|
||||
logger.repl.level = warn
|
||||
|
||||
logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
|
||||
logger.thriftserver.level = warn
|
||||
|
||||
# Settings to quiet third party logs that are too verbose
|
||||
logger.jetty1.name = org.sparkproject.jetty
|
||||
logger.jetty1.level = warn
|
||||
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
|
||||
logger.jetty2.level = error
|
||||
logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
|
||||
logger.replexprTyper.level = info
|
||||
logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
|
||||
logger.replSparkILoopInterpreter.level = info
|
||||
logger.parquet1.name = org.apache.parquet
|
||||
logger.parquet1.level = error
|
||||
logger.parquet2.name = parquet
|
||||
logger.parquet2.level = error
|
||||
|
||||
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
|
||||
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
|
||||
logger.RetryingHMSHandler.level = fatal
|
||||
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
|
||||
logger.FunctionRegistry.level = error
|
||||
|
||||
# For deploying Spark ThriftServer
|
||||
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
|
||||
appender.console.filter.1.type = RegexFilter
|
||||
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
|
||||
appender.console.filter.1.onMatch = deny
|
||||
appender.console.filter.1.onMismatch = neutral
|
||||
@ -0,0 +1,210 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# syntax: [instance].sink|source.[name].[options]=[value]
|
||||
|
||||
# This file configures Spark's internal metrics system. The metrics system is
|
||||
# divided into instances which correspond to internal components.
|
||||
# Each instance can be configured to report its metrics to one or more sinks.
|
||||
# Accepted values for [instance] are "master", "worker", "executor", "driver",
|
||||
# and "applications". A wildcard "*" can be used as an instance name, in
|
||||
# which case all instances will inherit the supplied property.
|
||||
#
|
||||
# Within an instance, a "source" specifies a particular set of grouped metrics.
|
||||
# there are two kinds of sources:
|
||||
# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
|
||||
# collect a Spark component's internal state. Each instance is paired with a
|
||||
# Spark source that is added automatically.
|
||||
# 2. Common sources, like JvmSource, which will collect low level state.
|
||||
# These can be added through configuration options and are then loaded
|
||||
# using reflection.
|
||||
#
|
||||
# A "sink" specifies where metrics are delivered to. Each instance can be
|
||||
# assigned one or more sinks.
|
||||
#
|
||||
# The sink|source field specifies whether the property relates to a sink or
|
||||
# source.
|
||||
#
|
||||
# The [name] field specifies the name of source or sink.
|
||||
#
|
||||
# The [options] field is the specific property of this source or sink. The
|
||||
# source or sink is responsible for parsing this property.
|
||||
#
|
||||
# Notes:
|
||||
# 1. To add a new sink, set the "class" option to a fully qualified class
|
||||
# name (see examples below).
|
||||
# 2. Some sinks involve a polling period. The minimum allowed polling period
|
||||
# is 1 second.
|
||||
# 3. Wildcard properties can be overridden by more specific properties.
|
||||
# For example, master.sink.console.period takes precedence over
|
||||
# *.sink.console.period.
|
||||
# 4. A metrics specific configuration
|
||||
# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
|
||||
# added to Java properties using -Dspark.metrics.conf=xxx if you want to
|
||||
# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
|
||||
# and it will be loaded automatically.
|
||||
# 5. The MetricsServlet sink is added by default as a sink in the master,
|
||||
# worker and driver, and you can send HTTP requests to the "/metrics/json"
|
||||
# endpoint to get a snapshot of all the registered metrics in JSON format.
|
||||
# For master, requests to the "/metrics/master/json" and
|
||||
# "/metrics/applications/json" endpoints can be sent separately to get
|
||||
# metrics snapshots of the master instance and applications. This
|
||||
# MetricsServlet does not have to be configured.
|
||||
# 6. The metrics system can also be configured using Spark configuration
|
||||
# parameters. The relevant parameter names are formed by adding the
|
||||
# prefix "spark.metrics.conf." to the configuration entries detailed in
|
||||
# this file (see examples below).
|
||||
|
||||
## List of available common sources and their properties.
|
||||
|
||||
# org.apache.spark.metrics.source.JvmSource
|
||||
# Note: Currently, JvmSource is the only available common source.
|
||||
# It can be added to an instance by setting the "class" option to its
|
||||
# fully qualified class name (see examples below).
|
||||
|
||||
## List of available sinks and their properties.
|
||||
|
||||
# org.apache.spark.metrics.sink.ConsoleSink
|
||||
# Name: Default: Description:
|
||||
# period 10 Poll period
|
||||
# unit seconds Unit of the poll period
|
||||
|
||||
# org.apache.spark.metrics.sink.CSVSink
|
||||
# Name: Default: Description:
|
||||
# period 10 Poll period
|
||||
# unit seconds Unit of the poll period
|
||||
# directory /tmp Where to store CSV files
|
||||
|
||||
# org.apache.spark.metrics.sink.GangliaSink
|
||||
# Name: Default: Description:
|
||||
# host NONE Hostname or multicast group of the Ganglia server,
|
||||
# must be set
|
||||
# port NONE Port of the Ganglia server(s), must be set
|
||||
# period 10 Poll period
|
||||
# unit seconds Unit of the poll period
|
||||
# ttl 1 TTL of messages sent by Ganglia
|
||||
# dmax 0 Lifetime in seconds of metrics (0 never expired)
|
||||
# mode multicast Ganglia network mode ('unicast' or 'multicast')
|
||||
|
||||
# org.apache.spark.metrics.sink.JmxSink
|
||||
|
||||
# org.apache.spark.metrics.sink.MetricsServlet
|
||||
# Name: Default: Description:
|
||||
# path VARIES* Path prefix from the web server root
|
||||
# sample false Whether to show entire set of samples for histograms
|
||||
# ('false' or 'true')
|
||||
#
|
||||
# * Default path is /metrics/json for all instances except the master. The
|
||||
# master has two paths:
|
||||
# /metrics/applications/json # App information
|
||||
# /metrics/master/json # Master information
|
||||
|
||||
# org.apache.spark.metrics.sink.PrometheusServlet
|
||||
# Name: Default: Description:
|
||||
# path VARIES* Path prefix from the web server root
|
||||
#
|
||||
# * Default path is /metrics/prometheus for all instances except the master. The
|
||||
# master has two paths:
|
||||
# /metrics/applications/prometheus # App information
|
||||
# /metrics/master/prometheus # Master information
|
||||
|
||||
# org.apache.spark.metrics.sink.GraphiteSink
|
||||
# Name: Default: Description:
|
||||
# host NONE Hostname of the Graphite server, must be set
|
||||
# port NONE Port of the Graphite server, must be set
|
||||
# period 10 Poll period
|
||||
# unit seconds Unit of the poll period
|
||||
# prefix EMPTY STRING Prefix to prepend to every metric's name
|
||||
# protocol tcp Protocol ("tcp" or "udp") to use
|
||||
# regex NONE Optional filter to send only metrics matching this regex string
|
||||
|
||||
# org.apache.spark.metrics.sink.StatsdSink
|
||||
# Name: Default: Description:
|
||||
# host 127.0.0.1 Hostname or IP of StatsD server
|
||||
# port 8125 Port of StatsD server
|
||||
# period 10 Poll period
|
||||
# unit seconds Units of poll period
|
||||
# prefix EMPTY STRING Prefix to prepend to metric name
|
||||
|
||||
## Examples
|
||||
# Enable JmxSink for all instances by class name
|
||||
#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
|
||||
|
||||
# Enable ConsoleSink for all instances by class name
|
||||
#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
|
||||
|
||||
# Enable StatsdSink for all instances by class name
|
||||
#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink
|
||||
#*.sink.statsd.prefix=spark
|
||||
|
||||
# Polling period for the ConsoleSink
|
||||
#*.sink.console.period=10
|
||||
# Unit of the polling period for the ConsoleSink
|
||||
#*.sink.console.unit=seconds
|
||||
|
||||
# Polling period for the ConsoleSink specific for the master instance
|
||||
#master.sink.console.period=15
|
||||
# Unit of the polling period for the ConsoleSink specific for the master
|
||||
# instance
|
||||
#master.sink.console.unit=seconds
|
||||
|
||||
# Enable CsvSink for all instances by class name
|
||||
#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
|
||||
|
||||
# Polling period for the CsvSink
|
||||
#*.sink.csv.period=1
|
||||
# Unit of the polling period for the CsvSink
|
||||
#*.sink.csv.unit=minutes
|
||||
|
||||
# Polling directory for CsvSink
|
||||
#*.sink.csv.directory=/tmp/
|
||||
|
||||
# Polling period for the CsvSink specific for the worker instance
|
||||
#worker.sink.csv.period=10
|
||||
# Unit of the polling period for the CsvSink specific for the worker instance
|
||||
#worker.sink.csv.unit=minutes
|
||||
|
||||
# Enable Slf4jSink for all instances by class name
|
||||
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
|
||||
|
||||
# Polling period for the Slf4JSink
|
||||
#*.sink.slf4j.period=1
|
||||
# Unit of the polling period for the Slf4jSink
|
||||
#*.sink.slf4j.unit=minutes
|
||||
|
||||
# Example configuration for Graphite sink
|
||||
#*.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink
|
||||
#*.sink.graphite.host=<graphiteEndPoint_hostName>
|
||||
#*.sink.graphite.port=<listening_port>
|
||||
#*.sink.graphite.period=10
|
||||
#*.sink.graphite.unit=seconds
|
||||
#*.sink.graphite.prefix=<optional_value>
|
||||
|
||||
# Enable JvmSource for instance master, worker, driver and executor
|
||||
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||
|
||||
#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||
|
||||
#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||
|
||||
#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||
|
||||
# Example configuration for PrometheusServlet
|
||||
#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet
|
||||
#*.sink.prometheusServlet.path=/metrics/prometheus
|
||||
#master.sink.prometheusServlet.path=/metrics/master/prometheus
|
||||
#applications.sink.prometheusServlet.path=/metrics/applications/prometheus
|
||||
43
samples/datalake/iceberg_and_paimon/data/spark-conf/spark-defaults.conf
Executable file
43
samples/datalake/iceberg_and_paimon/data/spark-conf/spark-defaults.conf
Executable file
@ -0,0 +1,43 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Default system properties included when running spark-submit.
|
||||
# This is useful for setting default environmental settings.
|
||||
|
||||
# Example:
|
||||
# spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
|
||||
spark.sql.catalog.demo org.apache.iceberg.spark.SparkCatalog
|
||||
spark.sql.catalog.demo.type rest
|
||||
spark.sql.catalog.demo.uri http://rest:8181
|
||||
spark.sql.catalog.demo.io-impl org.apache.iceberg.aws.s3.S3FileIO
|
||||
spark.sql.catalog.demo.warehouse s3://warehouse/wh/
|
||||
spark.sql.catalog.demo.s3.endpoint http://minio:9000
|
||||
|
||||
spark.sql.defaultCatalog demo
|
||||
spark.eventLog.enabled true
|
||||
spark.eventLog.dir /home/iceberg/spark-events
|
||||
spark.history.fs.logDirectory /home/iceberg/spark-events
|
||||
spark.sql.catalogImplementation in-memory
|
||||
|
||||
# spark.sql.extensions org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
|
||||
spark.sql.catalog.paimon org.apache.paimon.spark.SparkCatalog
|
||||
spark.sql.catalog.paimon.warehouse s3://warehouse/wh
|
||||
spark.sql.catalog.paimon.s3.endpoint http://minio:9000
|
||||
spark.sql.catalog.paimon.s3.access-key admin
|
||||
spark.sql.catalog.paimon.s3.secret-key password
|
||||
spark.sql.catalog.paimon.s3.region us-east-1
|
||||
spark.sql.catalog.paimon.warehouse s3://warehouse/wh
|
||||
@ -0,0 +1,27 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Default system properties included when running spark-submit.
|
||||
# This is useful for setting default environmental settings.
|
||||
|
||||
# Example:
|
||||
# spark.master spark://master:7077
|
||||
# spark.eventLog.enabled true
|
||||
# spark.eventLog.dir hdfs://namenode:8021/directory
|
||||
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||
# spark.driver.memory 5g
|
||||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
|
||||
81
samples/datalake/iceberg_and_paimon/data/spark-conf/spark-env.sh.template
Executable file
81
samples/datalake/iceberg_and_paimon/data/spark-conf/spark-env.sh.template
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This file is sourced when running various Spark programs.
|
||||
# Copy it as spark-env.sh and edit that to configure Spark for your site.
|
||||
|
||||
# Options read when launching programs locally with
|
||||
# ./bin/run-example or ./bin/spark-submit
|
||||
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
|
||||
|
||||
# Options read by executors and drivers running inside the cluster
|
||||
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
|
||||
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
|
||||
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
|
||||
|
||||
# Options read in any mode
|
||||
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
|
||||
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
|
||||
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
|
||||
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
|
||||
|
||||
# Options read in any cluster manager using HDFS
|
||||
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||
|
||||
# Options read in YARN client/cluster mode
|
||||
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
|
||||
|
||||
# Options for the daemons used in the standalone deploy mode
|
||||
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
|
||||
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
|
||||
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
|
||||
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
|
||||
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
|
||||
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
|
||||
# - SPARK_WORKER_DIR, to set the working directory of worker processes
|
||||
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
|
||||
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
|
||||
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
|
||||
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
|
||||
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
|
||||
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
|
||||
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
|
||||
|
||||
# Options for launcher
|
||||
# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
|
||||
|
||||
# Generic options for the daemons used in the standalone deploy mode
|
||||
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
|
||||
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
|
||||
# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5.
|
||||
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
|
||||
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
|
||||
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
|
||||
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
|
||||
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
|
||||
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
|
||||
# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
|
||||
# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
|
||||
|
||||
# Options for beeline
|
||||
# - SPARK_BEELINE_OPTS, to set config properties only for the beeline cli (e.g. "-Dx=y")
|
||||
# - SPARK_BEELINE_MEMORY, Memory for beeline (e.g. 1000M, 2G) (Default: 1G)
|
||||
@ -0,0 +1,19 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# A Spark Worker will be started on each of the machines listed below.
|
||||
localhost
|
||||
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000000_0
Normal file
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000000_0
Normal file
Binary file not shown.
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000001_0
Normal file
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000001_0
Normal file
Binary file not shown.
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000002_0
Normal file
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000002_0
Normal file
Binary file not shown.
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000003_0
Normal file
BIN
samples/datalake/iceberg_and_paimon/data/table/customer/000003_0
Normal file
Binary file not shown.
22
samples/datalake/iceberg_and_paimon/docker-compose.env
Normal file
22
samples/datalake/iceberg_and_paimon/docker-compose.env
Normal file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
REST_CATALOG_PORT=18181
|
||||
MINIO_UI_PORT=19002
|
||||
MINIO_API_PORT=19001
|
||||
DORIS_QUERY_PORT=19031
|
||||
173
samples/datalake/iceberg_and_paimon/docker-compose.yml
Normal file
173
samples/datalake/iceberg_and_paimon/docker-compose.yml
Normal file
@ -0,0 +1,173 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
spark:
|
||||
image: tabulario/spark-iceberg
|
||||
container_name: doris-iceberg-paimon-spark
|
||||
hostname: demo-spark-iceberg
|
||||
build: spark/
|
||||
volumes:
|
||||
- ./packages/jars/paimon-spark-3.5-0.8.0.jar:/opt/spark/jars/paimon-spark-3.5-0.8.0.jar
|
||||
- ./packages/jars/paimon-s3-0.8.0.jar:/opt/spark/jars/paimon-s3-0.8.0.jar
|
||||
- ./data/table:/opt/data
|
||||
- ./data/spark-conf:/opt/spark/conf
|
||||
- ./sql/prepare_data.sql:/opt/sql/prepare_data.sql
|
||||
depends_on:
|
||||
- rest
|
||||
- minio
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
networks:
|
||||
- demo-iceberg
|
||||
|
||||
rest:
|
||||
image: tabulario/iceberg-rest
|
||||
container_name: doris-iceberg-paimon-iceberg-rest
|
||||
ports:
|
||||
- ${REST_CATALOG_PORT}:8181
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
- CATALOG_WAREHOUSE=s3://warehouse/wh/
|
||||
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
|
||||
- CATALOG_S3_ENDPOINT=http://minio:9000
|
||||
networks:
|
||||
- demo-iceberg
|
||||
volumes:
|
||||
- './packages/jdk1.8.0_202:/opt/jdk1.8.0_202'
|
||||
- './packages/doris-bin:/opt/doris-bin'
|
||||
- './scripts:/opt/scripts'
|
||||
|
||||
minio:
|
||||
image: minio/minio
|
||||
container_name: doris-iceberg-paimon-minio
|
||||
ports:
|
||||
- ${MINIO_API_PORT}:9000
|
||||
- ${MINIO_UI_PORT}:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=admin
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
- MINIO_DOMAIN=minio
|
||||
networks:
|
||||
demo-iceberg:
|
||||
aliases:
|
||||
- warehouse.minio
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
|
||||
mc:
|
||||
depends_on:
|
||||
- minio
|
||||
image: minio/mc
|
||||
container_name: doris-iceberg-paimon-mc
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
networks:
|
||||
- demo-iceberg
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc rm -r --force minio/warehouse;
|
||||
/usr/bin/mc mb minio/warehouse;
|
||||
/usr/bin/mc policy set public minio/warehouse;
|
||||
tail -f /dev/null
|
||||
"
|
||||
|
||||
jobmanager:
|
||||
image: flink:1.18.0
|
||||
container_name: doris-iceberg-paimon-jobmanager
|
||||
environment:
|
||||
- JOB_MANAGER_RPC_ADDRESS=jobmanager
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
ports:
|
||||
- "8082:8081"
|
||||
command: jobmanager
|
||||
depends_on:
|
||||
- rest
|
||||
- minio
|
||||
volumes:
|
||||
- ./packages/jars/flink-connector-jdbc-3.1.2-1.18.jar:/opt/flink/lib/flink-connector-jdbc-3.1.2-1.18.jar
|
||||
- ./packages/jars/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar:/opt/flink/lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
|
||||
- ./packages/jars/flink-s3-fs-hadoop-1.18.0.jar:/opt/flink/plugins/s3-fs-hadoop/flink-s3-fs-hadoop-1.18.0.jar
|
||||
- ./packages/jars/iceberg-flink-runtime-1.18-1.5.2.jar:/opt/flink/lib/iceberg-flink-runtime-1.18-1.5.2.jar
|
||||
- ./packages/jars/iceberg-aws-bundle-1.5.2.jar:/opt/flink/lib/iceberg-aws-bundle-1.5.2.jar
|
||||
- ./packages/jars/paimon-flink-1.18-0.8.0.jar:/opt/flink/lib/paimon-flink-1.18-0.8.0.jar
|
||||
- ./packages/jars/paimon-s3-0.8.0.jar:/opt/flink/lib/paimon-s3-0.8.0.jar
|
||||
- ./sql/init_tables.sql:/opt/flink/sql/init_tables.sql
|
||||
- ./data/flink-conf:/opt/flink/conf
|
||||
networks:
|
||||
- demo-iceberg
|
||||
deploy:
|
||||
replicas: 1
|
||||
|
||||
taskmanager:
|
||||
image: flink:1.18.0
|
||||
environment:
|
||||
- JOB_MANAGER_RPC_ADDRESS=jobmanager
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
depends_on:
|
||||
- jobmanager
|
||||
command: taskmanager
|
||||
volumes:
|
||||
- ./packages/jars/flink-connector-jdbc-3.1.2-1.18.jar:/opt/flink/lib/flink-connector-jdbc-3.1.2-1.18.jar
|
||||
- ./packages/jars/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar:/opt/flink/lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
|
||||
- ./packages/jars/flink-s3-fs-hadoop-1.18.0.jar:/opt/flink/plugins/s3-fs-hadoop/flink-s3-fs-hadoop-1.18.0.jar
|
||||
- ./packages/jars/iceberg-flink-runtime-1.18-1.5.2.jar:/opt/flink/lib/iceberg-flink-runtime-1.18-1.5.2.jar
|
||||
- ./packages/jars/iceberg-aws-bundle-1.5.2.jar:/opt/flink/lib/iceberg-aws-bundle-1.5.2.jar
|
||||
- ./packages/jars/paimon-flink-1.18-0.8.0.jar:/opt/flink/lib/paimon-flink-1.18-0.8.0.jar
|
||||
- ./packages/jars/paimon-s3-0.8.0.jar:/opt/flink/lib/paimon-s3-0.8.0.jar
|
||||
networks:
|
||||
- demo-iceberg
|
||||
deploy:
|
||||
replicas: 2
|
||||
|
||||
doris:
|
||||
image: mysql:8.0.18
|
||||
container_name: doris-iceberg-paimon-doris
|
||||
networks:
|
||||
- demo-iceberg
|
||||
hostname: doris
|
||||
ports:
|
||||
- ${DORIS_QUERY_PORT}:9030
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
- LD_LIBRARY_PATH=/opt/doris/be/lib
|
||||
- JAVA_HOME=/opt/jdk8
|
||||
volumes:
|
||||
- ./packages/jdk1.8.0_202:/opt/jdk8
|
||||
- ./packages/doris-bin:/opt/doris-bin
|
||||
- ./sql/init_doris.sql:/opt/doris-bin/init_doris.sql
|
||||
- ./scripts:/opt/scripts
|
||||
command: bin/bash /opt/scripts/start_doris.sh
|
||||
|
||||
networks:
|
||||
demo-iceberg:
|
||||
ipam:
|
||||
driver: default
|
||||
60
samples/datalake/iceberg_and_paimon/scripts/start_doris.sh
Normal file
60
samples/datalake/iceberg_and_paimon/scripts/start_doris.sh
Normal file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
export JAVA_HOME=/opt/jdk8
|
||||
export PATH=${JAVA_HOME}/bin:${PATH}
|
||||
|
||||
echo 'start to copy...'
|
||||
cp -r /opt/doris-bin /opt/doris
|
||||
|
||||
echo 'start fe...'
|
||||
rm -rf /opt/doris/fe/doris-meta/*
|
||||
/opt/doris/fe/bin/start_fe.sh --daemon
|
||||
|
||||
echo 'start be...'
|
||||
rm -rf /opt/doris/be/storage/*
|
||||
/opt/doris/be/bin/start_be.sh --daemon
|
||||
|
||||
while [[ ! -f "/opt/doris/fe/log/fe.log" ]]; do
|
||||
echo "wait log..."
|
||||
sleep 2
|
||||
done
|
||||
|
||||
QE=$(grep "QE service start." /opt/doris/fe/log/fe.log)
|
||||
while [[ -z "${QE}" ]]; do
|
||||
echo "wait fe..."
|
||||
sleep 2
|
||||
QE=$(grep "QE service start." /opt/doris/fe/log/fe.log)
|
||||
done
|
||||
|
||||
echo 'doris is started'
|
||||
|
||||
MYSQL_ERROR=$(mysql -u root -P 9030 -h doris </opt/doris-bin/init_doris.sql 2>&1)
|
||||
ERR=$(echo "${MYSQL_ERROR}" | grep "Can't connect to MySQL")
|
||||
echo "${ERR}"
|
||||
while [[ -n "${ERR}" ]]; do
|
||||
echo "wait mysql..."
|
||||
sleep 2
|
||||
MYSQL_ERROR=$(mysql -u root -P 9030 -h doris </opt/doris-bin/init_doris.sql 2>&1)
|
||||
ERR=$(echo "${MYSQL_ERROR}" | grep "Can't connect to MySQL")
|
||||
done
|
||||
|
||||
echo 'doris is inited'
|
||||
|
||||
tail -F /dev/null
|
||||
21
samples/datalake/iceberg_and_paimon/sql/init_doris.sql
Normal file
21
samples/datalake/iceberg_and_paimon/sql/init_doris.sql
Normal file
@ -0,0 +1,21 @@
|
||||
ALTER SYSTEM ADD BACKEND '127.0.0.1:9050';
|
||||
|
||||
CREATE CATALOG `iceberg` PROPERTIES (
|
||||
"type" = "iceberg",
|
||||
"iceberg.catalog.type" = "rest",
|
||||
"uri"="http://rest:8181",
|
||||
"warehouse" = "s3://warehouse/",
|
||||
"s3.endpoint"="http://minio:9000",
|
||||
"s3.access_key"="admin",
|
||||
"s3.secret_key"="password",
|
||||
"s3.region"="us-east-1"
|
||||
);
|
||||
|
||||
CREATE CATALOG `paimon` PROPERTIES (
|
||||
"type" = "paimon",
|
||||
"warehouse" = "s3://warehouse/wh/",
|
||||
"s3.endpoint"="http://minio:9000",
|
||||
"s3.access_key"="admin",
|
||||
"s3.secret_key"="password",
|
||||
"s3.region"="us-east-1"
|
||||
);
|
||||
53
samples/datalake/iceberg_and_paimon/sql/init_tables.sql
Normal file
53
samples/datalake/iceberg_and_paimon/sql/init_tables.sql
Normal file
@ -0,0 +1,53 @@
|
||||
SET 'sql-client.execution.result-mode' = 'tableau';
|
||||
SET 'execution.runtime-mode' = 'batch';
|
||||
|
||||
|
||||
CREATE CATALOG iceberg WITH (
|
||||
'type'='iceberg',
|
||||
'catalog-type'='rest',
|
||||
'uri'='http://rest:8181/',
|
||||
's3.endpoint'='http://minio:9000',
|
||||
'warehouse'='s3://warehouse/wh/'
|
||||
);
|
||||
|
||||
create database if not exists iceberg.db_iceberg;
|
||||
|
||||
|
||||
CREATE TABLE if not exists iceberg.db_iceberg.tb_iceberg (
|
||||
id BIGINT,
|
||||
val string,
|
||||
PRIMARY KEY (id) NOT ENFORCED
|
||||
) WITH (
|
||||
'write.upsert.enabled'='true',
|
||||
'upsert-enabled'='true',
|
||||
'write.delete.mode'='merge-on-read',
|
||||
'write.update.mode'='merge-on-read'
|
||||
);
|
||||
|
||||
|
||||
CREATE CATALOG `paimon` WITH (
|
||||
'type' = 'paimon',
|
||||
'warehouse' = 's3://warehouse/wh',
|
||||
's3.endpoint'='http://minio:9000',
|
||||
's3.access-key' = 'admin',
|
||||
's3.secret-key' = 'password',
|
||||
's3.region' = 'us-east-1'
|
||||
);
|
||||
|
||||
|
||||
create database if not exists paimon.db_paimon;
|
||||
|
||||
CREATE TABLE if not exists paimon.db_paimon.customer (
|
||||
`c_custkey` int,
|
||||
`c_name` varchar(25),
|
||||
`c_address` varchar(40),
|
||||
`c_nationkey` int,
|
||||
`c_phone` char(15),
|
||||
`c_acctbal` decimal(12,2),
|
||||
`c_mktsegment` char(10),
|
||||
`c_comment` varchar(117),
|
||||
PRIMARY KEY (c_custkey, c_nationkey) NOT ENFORCED
|
||||
) PARTITIONED BY (c_nationkey) WITH (
|
||||
'deletion-vectors.enabled' = 'true',
|
||||
'bucket'='1'
|
||||
);
|
||||
8
samples/datalake/iceberg_and_paimon/sql/prepare_data.sql
Normal file
8
samples/datalake/iceberg_and_paimon/sql/prepare_data.sql
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
CREATE TEMPORARY VIEW customer_files
|
||||
USING org.apache.spark.sql.parquet
|
||||
OPTIONS (
|
||||
path "file:///opt/data/customer/"
|
||||
);
|
||||
|
||||
insert overwrite paimon.db_paimon.customer select c_custkey,c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment from customer_files;
|
||||
121
samples/datalake/iceberg_and_paimon/start_all.sh
Normal file
121
samples/datalake/iceberg_and_paimon/start_all.sh
Normal file
@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
set -e
|
||||
|
||||
DORIS_PACKAGE=apache-doris-2.1.4-bin-x64
|
||||
DORIS_DOWNLOAD_URL=https://apache-doris-releases.oss-accelerate.aliyuncs.com
|
||||
|
||||
download_source_file() {
|
||||
local FILE_PATH="$1"
|
||||
local EXPECTED_MD5="$2"
|
||||
local DOWNLOAD_URL="$3"
|
||||
|
||||
echo "solve for ${FILE_PATH} ..."
|
||||
|
||||
if [[ -f "${FILE_PATH}" ]]; then
|
||||
local FILE_MD5
|
||||
echo "compare md5sum ..."
|
||||
FILE_MD5=$(md5sum "${FILE_PATH}" | awk '{ print $1 }')
|
||||
|
||||
if [[ "${FILE_MD5}" = "${EXPECTED_MD5}" ]]; then
|
||||
echo "${FILE_PATH} is ready!"
|
||||
else
|
||||
echo "${FILE_PATH} is broken, Redownloading ..."
|
||||
rm "${FILE_PATH}"
|
||||
wget "${DOWNLOAD_URL}"/"${FILE_PATH}"
|
||||
fi
|
||||
else
|
||||
echo "downloading ${FILE_PATH} ..."
|
||||
wget "${DOWNLOAD_URL}"/"${FILE_PATH}"
|
||||
fi
|
||||
}
|
||||
|
||||
curdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
||||
cd "${curdir}" || exit
|
||||
|
||||
if [[ ! -d "packages" ]]; then
|
||||
mkdir packages
|
||||
fi
|
||||
cd packages || exit
|
||||
|
||||
download_source_file "${DORIS_PACKAGE}.tar.gz" "a4d8bc9730aca3a51294e87d7d5b3e8e" "${DORIS_DOWNLOAD_URL}"
|
||||
download_source_file "jdk-8u202-linux-x64.tar.gz" "0029351f7a946f6c05b582100c7d45b7" "https://repo.huaweicloud.com/java/jdk/8u202-b08"
|
||||
download_source_file "iceberg-aws-bundle-1.5.2.jar" "7087ac697254f8067d0f813521542263" "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.5.2"
|
||||
download_source_file "iceberg-flink-runtime-1.18-1.5.2.jar" "8e895288e6770eea69ea05ffbc918c1b" "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-1.18/1.5.2"
|
||||
download_source_file "flink-connector-jdbc-3.1.2-1.18.jar" "5c99b637721dd339e10725b81ccedb60" "https://repo1.maven.org/maven2/org/apache/flink/flink-connector-jdbc/3.1.2-1.18"
|
||||
download_source_file "paimon-s3-0.8.0.jar" "3e510c634a21cbcdca4fd3b85786a20c" "https://repo1.maven.org/maven2/org/apache/paimon/paimon-s3/0.8.0"
|
||||
download_source_file "paimon-flink-1.18-0.8.0.jar" "f590d94af1b923a7c68152b558d5b25b" "https://repo1.maven.org/maven2/org/apache/paimon/paimon-flink-1.18/0.8.0"
|
||||
download_source_file "paimon-spark-3.5-0.8.0.jar" "963d0c17d69034ecf77816f64863fc51" "https://repo1.maven.org/maven2/org/apache/paimon/paimon-spark-3.5/0.8.0"
|
||||
download_source_file "flink-shaded-hadoop-2-uber-2.8.3-10.0.jar" "f6f0be5b9cbebfd43e38121b209f4ecc" "https://repo1.maven.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0"
|
||||
download_source_file "flink-s3-fs-hadoop-1.18.0.jar" "60b75e0fdc5ed05f1213b593c4b66556" "https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.18.0"
|
||||
|
||||
if [[ ! -f "jdk1.8.0_202/SUCCESS" ]]; then
|
||||
echo "Prepare jdk8 environment"
|
||||
if [[ -d "jdk1.8.0_202" ]]; then
|
||||
echo "Remove broken jdk1.8.0_202"
|
||||
rm -rf jdk1.8.0_202
|
||||
fi
|
||||
echo "Unpackage jdk1.8.0_202"
|
||||
tar xzf jdk-8u202-linux-x64.tar.gz
|
||||
touch jdk1.8.0_202/SUCCESS
|
||||
fi
|
||||
|
||||
if [[ ! -f "doris-bin/SUCCESS" ]]; then
|
||||
echo "Prepare ${DORIS_PACKAGE} environment"
|
||||
if [[ -d "doris-bin" ]]; then
|
||||
echo "Remove broken ${DORIS_PACKAGE}"
|
||||
rm -rf doris-bin
|
||||
fi
|
||||
echo "Unpackage ${DORIS_PACKAGE}"
|
||||
tar xzf "${DORIS_PACKAGE}".tar.gz
|
||||
mv "${DORIS_PACKAGE}" doris-bin
|
||||
touch doris-bin/SUCCESS
|
||||
fi
|
||||
|
||||
if [[ ! -f "jars/SUCCESS" ]]; then
|
||||
echo "Prepare jars environment"
|
||||
if [[ -d "jars" ]]; then
|
||||
echo "Remove broken jars"
|
||||
rm -rf jars
|
||||
fi
|
||||
mkdir jars
|
||||
cp ./*.jar jars/
|
||||
touch jars/SUCCESS
|
||||
fi
|
||||
|
||||
cd ../
|
||||
|
||||
echo "Start docker-compose..."
|
||||
sudo docker compose -f docker-compose.yml --env-file docker-compose.env up -d
|
||||
|
||||
echo "Start init iceberg and paimon tables..."
|
||||
sudo docker exec -it doris-iceberg-paimon-jobmanager sql-client.sh -f /opt/flink/sql/init_tables.sql | tee -a init.log >/dev/null
|
||||
|
||||
echo "Start prepare data for tables..."
|
||||
sudo docker exec -it doris-iceberg-paimon-spark spark-sql --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f /opt/sql/prepare_data.sql | tee -a init.log >/dev/null
|
||||
|
||||
echo "============================================================================="
|
||||
echo "Success to launch doris+iceberg+paimon+flink+spark+minio environments!"
|
||||
echo "You can:"
|
||||
echo " 'bash start_doris_client.sh' to login into doris"
|
||||
echo " 'bash start_flink_client.sh' to login into flink"
|
||||
echo " 'bash start_spark_paimon_client.sh' to login into spark for paimon"
|
||||
echo " 'bash start_spark_iceberg_client.sh' to login into spark for iceberg"
|
||||
echo "============================================================================="
|
||||
20
samples/datalake/iceberg_and_paimon/start_doris_client.sh
Normal file
20
samples/datalake/iceberg_and_paimon/start_doris_client.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
sudo docker exec -it doris-iceberg-paimon-doris sh -c "mysql -u root -h doris -P 9030"
|
||||
19
samples/datalake/iceberg_and_paimon/start_flink_client.sh
Normal file
19
samples/datalake/iceberg_and_paimon/start_flink_client.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
sudo docker exec -it doris-iceberg-paimon-jobmanager sql-client.sh -i /opt/flink/sql/init_tables.sql
|
||||
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
sudo docker exec -it doris-iceberg-paimon-spark spark-sql --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
|
||||
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
sudo docker exec -it doris-iceberg-paimon-spark spark-sql --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
|
||||
19
samples/datalake/iceberg_and_paimon/stop_all.sh
Normal file
19
samples/datalake/iceberg_and_paimon/stop_all.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
sudo docker compose -f docker-compose.yml --env-file docker-compose.env down
|
||||
Reference in New Issue
Block a user