Files
doris/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
wuwenchi 10f1957379 [feature](docker)add docker-iceberg init tables (#25424)
Add some init tables for docker-iceberg.
2023-10-24 19:29:57 +08:00

107 lines
3.3 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import random
import string
table_name = "demo.format_v1.sample_parquet"
alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
binary_alphabet = '11111111111110000000000000000000'
data_choice = ["date('2000-12-31')", "date('1969-09-21')", "date('2969-02-03')"]
timestamp_choice = [
"TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
timestamp_ntz_choice = [
"TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
]
city_choice = [
"'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
]
def get_one_data():
id = random.randint(-100000000, 100000000)
col_boolean = True
if random.randint(-1000000, 1000000) % 2 == 0:
col_boolean = False
col_short = random.randint(-32700, 32700)
col_byte = random.randint(-128, 127)
col_integer = random.randint(-21474836, 2147483)
col_long = random.randint(-92233720368547758, 92233720368547758)
col_float = random.random() * 10
col_double = random.random() * 10
col_date = random.choice(data_choice)
col_timestamp = random.choice(timestamp_choice)
col_timestamp_ntz = random.choice(timestamp_ntz_choice)
col_char = "".join(random.sample(alphabet, random.randint(1,18)))
col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
col_string = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
col_decimal = random.random() * 10000
city = random.choice(city_choice)
out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS BINARY),{},{}".format(
id,
col_boolean,
col_short,
col_byte,
col_integer,
col_long,
col_float,
col_double,
col_date,
col_timestamp,
col_timestamp_ntz,
col_char,
col_varchar,
col_string,
col_binary,
col_decimal,
city
)
return out
with open('insert_table_values.sql', 'w') as f:
f.write("INSERT INTO {} VALUES\n".format(table_name))
f.write(" ({})\n".format(get_one_data()))
for i in range(1, 1000):
f.write(", ({})\n".format(get_one_data()))
f.write(";\n")