107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import random
|
|
import string
|
|
|
|
table_name = "demo.format_v1.sample_parquet"
|
|
|
|
alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
|
|
binary_alphabet = '11111111111110000000000000000000'
|
|
data_choice = ["date('2000-12-31')", "date('1969-09-21')", "date('2969-02-03')"]
|
|
timestamp_choice = [
|
|
"TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
|
|
"TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
|
|
"TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
|
|
"TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
|
|
timestamp_ntz_choice = [
|
|
"TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
|
|
"TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
|
|
"TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
|
|
"TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
|
|
]
|
|
city_choice = [
|
|
"'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
|
|
]
|
|
|
|
|
|
def get_one_data():
|
|
id = random.randint(-100000000, 100000000)
|
|
|
|
col_boolean = True
|
|
if random.randint(-1000000, 1000000) % 2 == 0:
|
|
col_boolean = False
|
|
|
|
col_short = random.randint(-32700, 32700)
|
|
|
|
col_byte = random.randint(-128, 127)
|
|
|
|
col_integer = random.randint(-21474836, 2147483)
|
|
|
|
col_long = random.randint(-92233720368547758, 92233720368547758)
|
|
|
|
col_float = random.random() * 10
|
|
|
|
col_double = random.random() * 10
|
|
|
|
col_date = random.choice(data_choice)
|
|
|
|
col_timestamp = random.choice(timestamp_choice)
|
|
|
|
col_timestamp_ntz = random.choice(timestamp_ntz_choice)
|
|
|
|
col_char = "".join(random.sample(alphabet, random.randint(1,18)))
|
|
|
|
col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
|
|
|
|
col_string = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
|
|
|
|
col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
|
|
|
|
col_decimal = random.random() * 10000
|
|
|
|
city = random.choice(city_choice)
|
|
|
|
out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS BINARY),{},{}".format(
|
|
id,
|
|
col_boolean,
|
|
col_short,
|
|
col_byte,
|
|
col_integer,
|
|
col_long,
|
|
col_float,
|
|
col_double,
|
|
col_date,
|
|
col_timestamp,
|
|
col_timestamp_ntz,
|
|
col_char,
|
|
col_varchar,
|
|
col_string,
|
|
col_binary,
|
|
col_decimal,
|
|
city
|
|
)
|
|
return out
|
|
|
|
with open('insert_table_values.sql', 'w') as f:
|
|
f.write("INSERT INTO {} VALUES\n".format(table_name))
|
|
f.write(" ({})\n".format(get_one_data()))
|
|
for i in range(1, 1000):
|
|
f.write(", ({})\n".format(get_one_data()))
|
|
f.write(";\n")
|
|
|