patch 4.0
This commit is contained in:
6
unittest/sql/optimizer/cost_model_utils/README
Normal file
6
unittest/sql/optimizer/cost_model_utils/README
Normal file
@ -0,0 +1,6 @@
|
||||
1、cost_model_util:根据输入参数采集实验数据。需要将被测目标在这里建立一个参数可控的最小运行环境,提供基本的数据生成、schema控制等功能。
|
||||
2、benchmaster_xxx:生成参数组合调用cost_model_util,需要根据被测目标的特点控制参数的种类、数量。例如对于join,需要控制左右表行数等。
|
||||
3、preprocess:对原始数据进行预处理。例如benchmaster对每组参数运行多次,在这进行去极值、取平均等工作,将同参数的几组数据合为一组。
|
||||
4、fit_xx:进行拟合
|
||||
5、plot:绘制图像
|
||||
|
0
unittest/sql/optimizer/cost_model_utils/__init__.py
Normal file
0
unittest/sql/optimizer/cost_model_utils/__init__.py
Normal file
88
unittest/sql/optimizer/cost_model_utils/apply_array_model.py
Executable file
88
unittest/sql/optimizer/cost_model_utils/apply_array_model.py
Executable file
@ -0,0 +1,88 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
|
||||
def array_model_form(args,
|
||||
params):
|
||||
Nelem = args
|
||||
|
||||
(
|
||||
Telem_ence,
|
||||
Telem_copy
|
||||
) = params
|
||||
|
||||
ELEM_PER_PAGE = 1024
|
||||
extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
if extend_cnt < 0:
|
||||
extend_cnt = 0
|
||||
copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
|
||||
total_cost = Telem_ence * Nelem
|
||||
#total_cost += Tmem_alloc * extend_cnt
|
||||
total_cost += Telem_copy * copy_cnt
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
|
||||
for line in input_file:
|
||||
if line.startswith('#'):
|
||||
out_file.write(line)
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[0])
|
||||
time = case_param[1]
|
||||
cost_val = array_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
# new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
# new_line += "\n"
|
||||
# out_file.write(new_line)
|
||||
|
||||
new_line = ",".join([line.strip(), str(cost_val)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
100
unittest/sql/optimizer/cost_model_utils/apply_hg_model.py
Executable file
100
unittest/sql/optimizer/cost_model_utils/apply_hg_model.py
Executable file
@ -0,0 +1,100 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
params
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
(
|
||||
Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) = params
|
||||
|
||||
total_cost = Tstartup + Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_input * Ncol_group * Tgroup_cmp_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
for line in input_file:
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3])
|
||||
time = case_param[6]
|
||||
cost_val = mg_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
82
unittest/sql/optimizer/cost_model_utils/apply_material_model.py
Executable file
82
unittest/sql/optimizer/cost_model_utils/apply_material_model.py
Executable file
@ -0,0 +1,82 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
|
||||
def material_model_form(args,
|
||||
params):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
(
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col
|
||||
) = params
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
|
||||
for line in input_file:
|
||||
if line.startswith('#'):
|
||||
out_file.write(line)
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[0],
|
||||
case_param[1])
|
||||
time = case_param[3]
|
||||
cost_val = material_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
146
unittest/sql/optimizer/cost_model_utils/apply_merge_model.py
Executable file
146
unittest/sql/optimizer/cost_model_utils/apply_merge_model.py
Executable file
@ -0,0 +1,146 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
|
||||
def merge_model_form(args,
|
||||
params
|
||||
):
|
||||
(
|
||||
Nrow_res,
|
||||
Nrow_left,
|
||||
Nrow_right,
|
||||
Nright_cache_in,
|
||||
Nright_cache_out,
|
||||
Nright_cache_clear,
|
||||
Nequal_cond
|
||||
) = args
|
||||
|
||||
(
|
||||
Tstartup,
|
||||
Tres_right_op,
|
||||
Tres_right_cache,
|
||||
Tmatch_group,
|
||||
#Tassemble_row,
|
||||
Tequal_fail,
|
||||
Trow_left,
|
||||
Trow_right
|
||||
) = params
|
||||
|
||||
total_cost = Tstartup
|
||||
total_cost += Nrow_left * Trow_left
|
||||
total_cost += (Nrow_right - Nright_cache_in) * Trow_right
|
||||
total_cost += Nright_cache_in * Tres_right_op
|
||||
total_cost += Nright_cache_out * Tres_right_cache
|
||||
#total_cost += Nrow_res * Tassemble_row
|
||||
total_cost += Nright_cache_clear * Tmatch_group
|
||||
total_cost += (Nequal_cond - Nrow_res - 2 * Tmatch_group) * Tequal_fail
|
||||
|
||||
|
||||
# total_cost += Nright_cache_in * Tres_right_op
|
||||
# total_cost += (Nrow_res - Nright_cache_in) * Tres_right_cache
|
||||
# total_cost += Nright_cache_clear * Tmatch_group
|
||||
# total_cost += Nrow_res * Tassemble_row
|
||||
# total_cost += (Nequal_cond - Nrow_res - 2 * Tmatch_group) * Tequal_fail
|
||||
# total_cost += Nrow_left * Trow_left
|
||||
# total_cost += (Nrow_right - Nright_cache_in) * Trow_right
|
||||
|
||||
return total_cost
|
||||
#
|
||||
# def merge_model_form(args,
|
||||
# params
|
||||
# ):
|
||||
# (
|
||||
# Nrow_res,
|
||||
# Nrow_left,
|
||||
# Nrow_right,
|
||||
# Nright_cache_in,
|
||||
# Nright_cache_out,
|
||||
# Nright_cache_clear,
|
||||
# Nequal_cond,
|
||||
# ) = args
|
||||
#
|
||||
# (
|
||||
# Tstartup,
|
||||
# Tright_cache_in,
|
||||
# Tright_cache_out,
|
||||
# Tright_cache_clear,
|
||||
# Tassemble_row,
|
||||
# Tequal_fail,
|
||||
# Trow_left,
|
||||
# #Trow_right
|
||||
# ) = params
|
||||
#
|
||||
# total_cost = Tstartup
|
||||
# total_cost += Nright_cache_in * Tright_cache_in
|
||||
# total_cost += (Nright_cache_out - Nright_cache_clear) * Tright_cache_out
|
||||
# total_cost += Nright_cache_clear * Tright_cache_clear
|
||||
# total_cost += Nrow_res * Tassemble_row
|
||||
# total_cost += (Nequal_cond - Nrow_res - 2 * Tright_cache_clear) * Tequal_fail
|
||||
# total_cost += Nrow_left * Trow_left
|
||||
# #total_cost += (Nrow_right - Nright_cache_in) * Trow_right
|
||||
#
|
||||
# return total_cost
|
||||
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
for line in input_file:
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[6], #Nrow_res
|
||||
case_param[0], #Nrow_left
|
||||
case_param[1], #Nrow_right
|
||||
case_param[-3], #Nright_cache_in
|
||||
case_param[-2], #Nright_cache_out
|
||||
case_param[-1],
|
||||
case_param[8])
|
||||
time = case_param[7]
|
||||
cost_val = merge_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
101
unittest/sql/optimizer/cost_model_utils/apply_mg_model.py
Executable file
101
unittest/sql/optimizer/cost_model_utils/apply_mg_model.py
Executable file
@ -0,0 +1,101 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
params
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
(
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) = params
|
||||
|
||||
total_cost = Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_res * Tgroup_cmp_col
|
||||
total_cost += (Nrow_input - Nrow_res) * Ncol_group * Tgroup_cmp_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
for line in input_file:
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3])
|
||||
time = case_param[6]
|
||||
cost_val = mg_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
97
unittest/sql/optimizer/cost_model_utils/apply_nl_model.py
Executable file
97
unittest/sql/optimizer/cost_model_utils/apply_nl_model.py
Executable file
@ -0,0 +1,97 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
|
||||
def nl_model_form(args,
|
||||
params
|
||||
):
|
||||
(
|
||||
Nrow_res,
|
||||
Nrow_left,
|
||||
Nrow_right,
|
||||
Nright_cache_in,
|
||||
Nright_cache_out,
|
||||
Nright_cache_clear,
|
||||
Nequal_cond
|
||||
) = args
|
||||
|
||||
(
|
||||
Tstartup,
|
||||
#Tqual,
|
||||
Tres,
|
||||
Tfail,
|
||||
Tleft_row,
|
||||
Tright_row
|
||||
) = params
|
||||
|
||||
total_cost = Tstartup
|
||||
total_cost += Nrow_res * Tres
|
||||
#total_cost += Nequal_cond * Tqual
|
||||
total_cost += (Nequal_cond - Nrow_res) * Tfail
|
||||
total_cost += Nrow_left * Tleft_row
|
||||
total_cost += Nrow_right * Tright_row
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
|
||||
|
||||
for line in input_file:
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[6], #Nrow_res
|
||||
case_param[0], #Nrow_left
|
||||
case_param[1], #Nrow_right
|
||||
case_param[-3], #Nright_cache_in
|
||||
case_param[-2], #Nright_cache_out
|
||||
case_param[-1],
|
||||
case_param[8])
|
||||
time = case_param[7]
|
||||
cost_val = nl_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),"\t" ,str(cost_val),"\t" , str(time),"\t\t" , str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
213
unittest/sql/optimizer/cost_model_utils/apply_sort_model.py
Executable file
213
unittest/sql/optimizer/cost_model_utils/apply_sort_model.py
Executable file
@ -0,0 +1,213 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import getopt
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
|
||||
def material_model_form(args):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
Trow_col = 0.02674675
|
||||
Trow_once = 0.07931677
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
return total_cost
|
||||
|
||||
def array_model_form(args):
|
||||
# (
|
||||
# Nelem,
|
||||
# ) = args
|
||||
|
||||
Telem_ence = 0.00898860
|
||||
Telem_copy = 0.00631888
|
||||
|
||||
Nelem = args
|
||||
|
||||
ELEM_PER_PAGE = 1024
|
||||
extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
if extend_cnt < 0:
|
||||
extend_cnt = 0
|
||||
copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
|
||||
total_cost = Telem_ence * Nelem
|
||||
#total_cost += Tmem_alloc * extend_cnt
|
||||
total_cost += Telem_copy * copy_cnt
|
||||
|
||||
return total_cost
|
||||
|
||||
def get_row_size(reserve, col):
|
||||
size = 16
|
||||
size += reserve * 16
|
||||
col /= 8
|
||||
size += col * (3 + 8 + 4 + 8 + 16 + 32 + 64 + 128)
|
||||
size += col
|
||||
return size
|
||||
|
||||
def get_miss_prob(Nrow, Ncol, Nord, Turn):
|
||||
total_size = Nrow * get_row_size(Nord, Ncol)
|
||||
TLBcovered = Turn
|
||||
if TLBcovered >= 0.9 * total_size:
|
||||
hit = 0.9
|
||||
else:
|
||||
hit = TLBcovered / total_size
|
||||
return 1 - hit
|
||||
|
||||
|
||||
def sort_model_form(args,
|
||||
params
|
||||
):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
Nordering
|
||||
) = args
|
||||
|
||||
(
|
||||
# Tstartup,
|
||||
#Trowstore_once,
|
||||
#Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
# Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn
|
||||
# Tmiss_K2,
|
||||
# Turn
|
||||
|
||||
) = params
|
||||
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
|
||||
# total_cost += Nrow * Trow_once
|
||||
#cost for rowstore
|
||||
# total_cost += material_model_form((Nrow, Ncol))
|
||||
# total_cost += 0.0044 * Nrow * Ncol * Nordering
|
||||
# total_cost += Tordercol * Nrow * Nordering
|
||||
|
||||
#cost for push array
|
||||
# total_cost += array_model_form(Nrow)
|
||||
|
||||
# cost for sorting
|
||||
Nordering_cmp = Nordering
|
||||
if Nordering >= 1:
|
||||
Nordering_cmp = 1
|
||||
compare_cost = Tcompare * Nordering_cmp + Tmiss_K1 * get_miss_prob(Nrow, Ncol, Nordering, Turn)
|
||||
total_cost += Nrow * compare_cost * math.log(Nrow, 2)
|
||||
|
||||
#cost for get row
|
||||
# total_cost += Nrow * (Tmiss_K2 * get_miss_prob(Nrow, Ncol, Nordering))
|
||||
return total_cost
|
||||
|
||||
#
|
||||
# def sort_model_form(args,
|
||||
# params):
|
||||
# (
|
||||
# Nrow,
|
||||
# Nordering,
|
||||
# Ncol,
|
||||
# ) = args
|
||||
#
|
||||
# (
|
||||
# Tstartup,
|
||||
# Trowstore_once,
|
||||
# Trowstore_col,
|
||||
# # Tarray_once,
|
||||
# # Tarray_elem_copy,
|
||||
# Treserve_cell,
|
||||
# Tcompare
|
||||
# ) = params
|
||||
#
|
||||
#
|
||||
# total_cost = Tstartup
|
||||
#
|
||||
# #cost for row store
|
||||
# total_cost += Nrow * (Trowstore_once + Ncol * Trowstore_col)
|
||||
# total_cost += Treserve_cell * Nrow * Ncol * Nordering
|
||||
#
|
||||
# #cost for array
|
||||
# # ELEM_PER_PAGE = 1024
|
||||
# # extend_cnt = math.ceil(math.log(float(Nrow)/ELEM_PER_PAGE, 2))
|
||||
# # copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
# #total_cost += Tarray_once * Nrow + Tarray_elem_copy * copy_cnt
|
||||
#
|
||||
# #cost for sorting
|
||||
# if Nordering > 2:
|
||||
# Nordering_cmp = 2
|
||||
# else:
|
||||
# Nordering_cmp = Nordering
|
||||
# compare_cost = Tcompare * Nordering_cmp
|
||||
# total_cost += Nrow * compare_cost * math.log(Nrow, 2)
|
||||
#
|
||||
# return total_cost
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
# sys.argv.extend('-i sort.prep.double -o sort.fit.double -m sort.model.double'.split())
|
||||
|
||||
file_name = "get_total.data.prep"
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:m:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-m" == op:
|
||||
model_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
input_file = open(file_name, "r")
|
||||
model_file = open(model_file_name, "r")
|
||||
out_file = open(out_file_name, "w")
|
||||
|
||||
|
||||
line = model_file.readline()
|
||||
model_params = [float(p) for p in line.split(",")]
|
||||
# if len(model_params) == 1:
|
||||
# model_params = model_params[0]
|
||||
|
||||
|
||||
for line in input_file:
|
||||
if line.startswith('#'):
|
||||
out_file.write(line)
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
args = (case_param[0],
|
||||
case_param[1],
|
||||
case_param[2])
|
||||
time = case_param[4]
|
||||
cost_val = sort_model_form(args, model_params)
|
||||
percent = (cost_val - time) / time
|
||||
|
||||
new_line = ",".join([line.strip(),str(cost_val),str(percent * 100)])
|
||||
new_line += "\n"
|
||||
out_file.write(new_line)
|
||||
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
33
unittest/sql/optimizer/cost_model_utils/benchmaster_array.py
Executable file
33
unittest/sql/optimizer/cost_model_utils/benchmaster_array.py
Executable file
@ -0,0 +1,33 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
if os.path.exists("array_result"):
|
||||
os.remove("array_result")
|
||||
|
||||
#cmd_form = 'LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GB -s c10k1.schema -t array -r 1000000'
|
||||
cmd_form = 'LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -G -s c10k1.schema -t array -r 1000000'
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
minrc = 1
|
||||
maxrc = 1100001
|
||||
step = 1000
|
||||
case_run_time = 5
|
||||
|
||||
total_case_count = (maxrc - minrc) / step
|
||||
case_count = 0
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for row_count in xrange(minrc, maxrc + 1, step):
|
||||
cmd_elements[-1] = str(row_count)
|
||||
|
||||
case_count += 1
|
||||
prompt = "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_elements))
|
||||
print prompt
|
||||
sp.check_call('echo "### %s" >> array_result' % prompt, shell=True)
|
||||
for time in xrange(case_run_time):
|
||||
#print "running the %d time" % time
|
||||
sp.check_call("echo -n '%s,' >> array_result"%(row_count), shell=True)
|
||||
sp.check_call(" ".join(cmd_elements) + ' >> array_result', shell=True)
|
||||
|
36
unittest/sql/optimizer/cost_model_utils/benchmaster_material.py
Executable file
36
unittest/sql/optimizer/cost_model_utils/benchmaster_material.py
Executable file
@ -0,0 +1,36 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
if os.path.exists("material_result"):
|
||||
os.remove("material_result")
|
||||
|
||||
if os.path.exists("material_final_result"):
|
||||
os.remove("material_final_result")
|
||||
|
||||
# cmd_form = "./cost_model_util -B -t material -s c10k1.schema -r 1000 -p 1 >> material_result"
|
||||
cmd_form = "./cost_model_util -G -t material -s c10k1.schema -r 1000 -p 1 >> material_result"
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
row_count_max = 10001
|
||||
row_count_step = 100
|
||||
|
||||
column_counts = [3, 5, 8]
|
||||
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = (row_count_max / row_count_step + 1) * len(column_counts) * case_run_time
|
||||
case_count = 0
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for row_count in xrange(1, row_count_max + 1, row_count_step):
|
||||
for column_count in column_counts:
|
||||
for time in xrange(case_run_time):
|
||||
case_count += 1
|
||||
cmd_elements[7] = str(row_count)
|
||||
cmd_elements[9] = str(column_count)
|
||||
sp.check_call("echo -n '%s,' >> material_result" % (row_count), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> material_result" % (column_count), shell=True)
|
||||
print "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_elements))
|
||||
sp.check_call(" ".join(cmd_elements), shell=True)
|
102
unittest/sql/optimizer/cost_model_utils/benchmaster_merge.py
Executable file
102
unittest/sql/optimizer/cost_model_utils/benchmaster_merge.py
Executable file
@ -0,0 +1,102 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
import sys
|
||||
import getopt
|
||||
import time
|
||||
|
||||
ISOTIMEFORMAT='%Y-%m-%d %X'
|
||||
|
||||
#cmd_form = "./cost_model_util -B -s c10k1x2.schema -t merge -r 900 -r 900 -Z1 -Z1 -C 2 -C 2 -V 3 -V 3 >> res"
|
||||
cmd_form = "./cost_model_util -G -s c10k1x2.schema -t merge -r 900 -r 900 -Z1 -Z1 -C 2 -C 2 -V 3 -V 3 >> res"
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
row_count_max = 10001
|
||||
row_count_step = 100
|
||||
|
||||
|
||||
left_row_counts = [5000, 10000, 50000, 100000]
|
||||
right_row_counts = [5000, 10000, 50000, 100000]
|
||||
|
||||
left_steps = [1, 3, 4, 5, 7, 10]
|
||||
right_steps = [1, 3, 4, 5, 7, 10]
|
||||
|
||||
left_step_lengths = [1, 2, 4, 5, 10]
|
||||
right_step_lengths = [1, 2, 4, 5, 10]
|
||||
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(left_row_counts)
|
||||
total_case_count *= len(right_row_counts)
|
||||
total_case_count *= len(left_steps)
|
||||
total_case_count *= len(right_steps)
|
||||
total_case_count *= len(left_step_lengths)
|
||||
total_case_count *= len(right_step_lengths)
|
||||
total_case_count *= case_run_time
|
||||
|
||||
|
||||
wrong_arg = False
|
||||
|
||||
out_file_name = "merge_result"
|
||||
opts,args = getopt.getopt(sys.argv[1:],"o:")
|
||||
for op, value in opts:
|
||||
if "-o" == op:
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
case_count = 0
|
||||
cmd_elements[-1] = out_file_name
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for left_row_count in left_row_counts:
|
||||
for right_row_count in right_row_counts:
|
||||
for left_step in left_steps:
|
||||
for right_step in right_steps:
|
||||
for left_step_length in left_step_lengths:
|
||||
for right_step_length in right_step_lengths:
|
||||
for i in xrange(case_run_time):
|
||||
case_count += 1
|
||||
cmd_elements[7] = str(left_row_count)
|
||||
cmd_elements[9] = str(right_row_count)
|
||||
cmd_elements[13] = str(left_step)
|
||||
cmd_elements[15] = str(right_step)
|
||||
cmd_elements[17] = str(left_step_length)
|
||||
cmd_elements[19] = str(right_step_length)
|
||||
|
||||
prompt = "%s Running case %s / %s ... : %s " % (time.strftime( ISOTIMEFORMAT, time.localtime()), case_count, total_case_count, " ".join(cmd_elements))
|
||||
print prompt
|
||||
|
||||
params = [str(p) for p in [left_row_count, right_row_count, left_step, right_step, left_step_length, right_step_length]]
|
||||
sp.check_call("echo '#%s' >> %s"%(prompt, out_file_name), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> %s"%(",".join(params), out_file_name), shell=True)
|
||||
sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# total_case_count = (row_count_max / row_count_step + 1) * len(column_counts) * case_run_time
|
||||
# case_count = 0
|
||||
#
|
||||
# print "Total case count %s ..." % (total_case_count)
|
||||
# for row_count in xrange(1, row_count_max + 1, row_count_step):
|
||||
# for column_count in column_counts:
|
||||
# for time in xrange(case_run_time):
|
||||
# case_count += 1
|
||||
# cmd_elements[7] = str(row_count)
|
||||
# cmd_elements[9] = str(column_count)
|
||||
# sp.check_call("echo -n '%s,' >> material_result"%(row_count), shell=True)
|
||||
# sp.check_call("echo -n '%s,' >> material_result"%(column_count), shell=True)
|
||||
# print "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_elements))
|
||||
# sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
#
|
87
unittest/sql/optimizer/cost_model_utils/benchmaster_mergegroupby.py
Executable file
87
unittest/sql/optimizer/cost_model_utils/benchmaster_mergegroupby.py
Executable file
@ -0,0 +1,87 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
import sys
|
||||
import getopt
|
||||
import time
|
||||
|
||||
ISOTIMEFORMAT = '%Y-%m-%d %X'
|
||||
|
||||
# cmd_form = "./cost_model_util -t mg -B -s c10k1.schema -r 10000 -Z 1 -V 10 -e 1 -o 10 -p 1 >> out_file"
|
||||
cmd_form = "./cost_model_util -t mg -G -s c10k1.schema -r 10000 -Z 1 -V 10 -e 1 -o 10 -p 1 >> mergegroupby_result"
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
row_counts = [10, 30, 50, 70, 100, 1000, 5000, 10000]
|
||||
steps = [1, 3, 5, 10, 20]
|
||||
aggr_funcs = [1, 4, 7, 10]
|
||||
group_cols = [1, 4, 7, 10]
|
||||
non_group_cols = [10]
|
||||
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(row_counts)
|
||||
total_case_count *= len(steps)
|
||||
total_case_count *= len(aggr_funcs)
|
||||
total_case_count *= len(group_cols)
|
||||
total_case_count *= len(non_group_cols)
|
||||
total_case_count *= case_run_time
|
||||
|
||||
print total_case_count
|
||||
wrong_arg = False
|
||||
|
||||
out_file_name = "mergegroupby_result"
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
opts, args = getopt.getopt(sys.argv[1:], "o:")
|
||||
for op, value in opts:
|
||||
if "-o" == op:
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
case_count = 0
|
||||
cmd_elements[-1] = out_file_name
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
|
||||
for row_count in row_counts:
|
||||
for step in steps:
|
||||
for aggr_func in aggr_funcs:
|
||||
for group_col in group_cols:
|
||||
for non_group_col in non_group_cols:
|
||||
for run_time in xrange(case_run_time):
|
||||
|
||||
cmd_elements[7] = str(row_count)
|
||||
cmd_elements[11] = str(step)
|
||||
cmd_elements[13] = str(aggr_func)
|
||||
cmd_elements[15] = str(group_col)
|
||||
cmd_elements[17] = str(non_group_col)
|
||||
cmd_elements[19] = out_file_name
|
||||
|
||||
param = ",".join([cmd_elements[7],
|
||||
cmd_elements[11],
|
||||
cmd_elements[13],
|
||||
cmd_elements[15],
|
||||
cmd_elements[17]]) + ","
|
||||
|
||||
prompt = "%s Running case %s / %s ... : %s " % (
|
||||
time.strftime(ISOTIMEFORMAT, time.localtime()), case_count, total_case_count,
|
||||
" ".join(cmd_elements))
|
||||
print prompt
|
||||
|
||||
case_count += 1
|
||||
|
||||
sp.check_call("echo '#%s' >> %s" % (prompt, out_file_name), shell=True)
|
||||
|
||||
if group_col <= non_group_col:
|
||||
sp.check_call("echo -n '%s' >> %s" % (param, out_file_name), shell=True)
|
||||
sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
else:
|
||||
sp.check_call("echo '#%s skipped' >> %s" % (param, out_file_name), shell=True)
|
101
unittest/sql/optimizer/cost_model_utils/benchmaster_miss.py
Normal file
101
unittest/sql/optimizer/cost_model_utils/benchmaster_miss.py
Normal file
@ -0,0 +1,101 @@
|
||||
#!/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
import datetime
|
||||
import multiprocessing
|
||||
import MySQLdb
|
||||
import Queue
|
||||
import signal
|
||||
import re
|
||||
import argparse
|
||||
import time
|
||||
import sys
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
outfile = 'miss.result'
|
||||
schema_file = 'miss.schema'
|
||||
if os.path.exists(outfile):
|
||||
os.remove(outfile)
|
||||
|
||||
|
||||
def remove_schema():
|
||||
global schema_file
|
||||
if os.path.exists(schema_file):
|
||||
os.remove(schema_file)
|
||||
|
||||
|
||||
def write_schema(s):
|
||||
global schema_file
|
||||
of = open(schema_file, 'w')
|
||||
of.write(s)
|
||||
of.close()
|
||||
|
||||
|
||||
def make_seq(t, cnt):
|
||||
types = [t]
|
||||
types *= cnt
|
||||
return types
|
||||
|
||||
|
||||
def make_schema(types):
|
||||
global schema_file
|
||||
remove_schema()
|
||||
col_id = 1
|
||||
s = "create table t1 ("
|
||||
for t in types:
|
||||
s += "c%d %s, " % (col_id, t)
|
||||
col_id += 1
|
||||
s = s[:-2]
|
||||
s += ', primary key (c1))'
|
||||
run_cmd('echo "# %s" >> ' % s + outfile)
|
||||
write_schema(s)
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
# print cmd
|
||||
res = ''
|
||||
p = sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
|
||||
while True:
|
||||
line = p.stdout.readline()
|
||||
res += line
|
||||
if line:
|
||||
# print line.strip()
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
break
|
||||
p.wait()
|
||||
return res
|
||||
|
||||
|
||||
#cmd_form1 = 'LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -BGK -t material -s miss.schema -r 500000'.split()
|
||||
cmd_form1 = 'LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GK -t material -s miss.schema -r 500000'.split()
|
||||
|
||||
types_to_test = {'bigint': 'bigint', 'double': 'double', 'float': 'float', 'timestamp': 'timestamp',
|
||||
'number': 'number(20,3)', 'v32': 'varchar(32)', 'v64': 'varchar(64)', 'v128': 'varchar(128)'}
|
||||
row_counts = [1000, 2000, 4000, 7000, 8000, 10000, 20000, 50000]
|
||||
input_col_cnts = [1, 2, 3, 6]
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(row_counts) * len(input_col_cnts)
|
||||
case_count = 0
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for col_count in input_col_cnts:
|
||||
make_schema(sorted(types_to_test.values()) * col_count)
|
||||
for row_count in row_counts:
|
||||
cmd_form1[-1] = str(row_count)
|
||||
case_count += 1
|
||||
prompt = "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_form1))
|
||||
print prompt
|
||||
sp.check_call('echo "### %s" >> ' % prompt + outfile, shell=True)
|
||||
caseinfo = '%d,%d,' % (row_count, col_count)
|
||||
for t in xrange(case_run_time):
|
||||
print t
|
||||
res = caseinfo + run_cmd(" ".join(cmd_form1) + " -i3").strip()
|
||||
run_cmd('echo "%s" >> ' % (res) + outfile)
|
||||
for t in xrange(case_run_time):
|
||||
print t
|
||||
res = caseinfo + run_cmd(" ".join(cmd_form1) + " -i4").strip()
|
||||
run_cmd('echo "%s" >> ' % (res) + outfile)
|
103
unittest/sql/optimizer/cost_model_utils/benchmaster_nl.py
Executable file
103
unittest/sql/optimizer/cost_model_utils/benchmaster_nl.py
Executable file
@ -0,0 +1,103 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
import sys
|
||||
import getopt
|
||||
import time
|
||||
|
||||
ISOTIMEFORMAT='%Y-%m-%d %X'
|
||||
|
||||
# cmd_form = "./cost_model_util -B -s c10k1x2.schema -t nestloop -r 900 -r 900 -Z1 -Z1 -C 2 -C 2 -V 3 -V 3 >> res"
|
||||
cmd_form = "./cost_model_util -G -s c10k1x2.schema -t nestloop -r 900 -r 900 -Z1 -Z1 -C 2 -C 2 -V 3 -V 3 >> nl_result"
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
row_count_max = 10001
|
||||
row_count_step = 100
|
||||
|
||||
|
||||
left_row_counts = [10, 100, 500, 1000]
|
||||
right_row_counts = [10, 100, 500, 1000]
|
||||
|
||||
left_steps = [1, 3, 4, 5, 7, 10]
|
||||
right_steps = [1, 3, 4, 5, 7, 10]
|
||||
|
||||
left_step_lengths = [1, 2, 4, 5, 10]
|
||||
right_step_lengths = [1, 2, 4, 5, 10]
|
||||
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(left_row_counts)
|
||||
total_case_count *= len(right_row_counts)
|
||||
total_case_count *= len(left_steps)
|
||||
total_case_count *= len(right_steps)
|
||||
total_case_count *= len(left_step_lengths)
|
||||
total_case_count *= len(right_step_lengths)
|
||||
total_case_count *= case_run_time
|
||||
|
||||
|
||||
wrong_arg = False
|
||||
|
||||
#out_file_name = "nestloop_result"
|
||||
out_file_name = "nl_result"
|
||||
opts,args = getopt.getopt(sys.argv[1:],"o:")
|
||||
for op, value in opts:
|
||||
if "-o" == op:
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
case_count = 0
|
||||
cmd_elements[-1] = out_file_name
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for left_row_count in left_row_counts:
|
||||
for right_row_count in right_row_counts:
|
||||
for left_step in left_steps:
|
||||
for right_step in right_steps:
|
||||
for left_step_length in left_step_lengths:
|
||||
for right_step_length in right_step_lengths:
|
||||
for i in xrange(case_run_time):
|
||||
case_count += 1
|
||||
cmd_elements[7] = str(left_row_count)
|
||||
cmd_elements[9] = str(right_row_count)
|
||||
cmd_elements[13] = str(left_step)
|
||||
cmd_elements[15] = str(right_step)
|
||||
cmd_elements[17] = str(left_step_length)
|
||||
cmd_elements[19] = str(right_step_length)
|
||||
|
||||
prompt = "%s Running case %s / %s ... : %s " % (time.strftime( ISOTIMEFORMAT, time.localtime()), case_count, total_case_count, " ".join(cmd_elements))
|
||||
print prompt
|
||||
|
||||
params = [str(p) for p in [left_row_count, right_row_count, left_step, right_step, left_step_length, right_step_length]]
|
||||
sp.check_call("echo '#%s' >> %s"%(prompt, out_file_name), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> %s"%(",".join(params), out_file_name), shell=True)
|
||||
sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# total_case_count = (row_count_max / row_count_step + 1) * len(column_counts) * case_run_time
|
||||
# case_count = 0
|
||||
#
|
||||
# print "Total case count %s ..." % (total_case_count)
|
||||
# for row_count in xrange(1, row_count_max + 1, row_count_step):
|
||||
# for column_count in column_counts:
|
||||
# for time in xrange(case_run_time):
|
||||
# case_count += 1
|
||||
# cmd_elements[7] = str(row_count)
|
||||
# cmd_elements[9] = str(column_count)
|
||||
# sp.check_call("echo -n '%s,' >> material_result"%(row_count), shell=True)
|
||||
# sp.check_call("echo -n '%s,' >> material_result"%(column_count), shell=True)
|
||||
# print "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_elements))
|
||||
# sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
#
|
@ -0,0 +1,76 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
schema_file = 'rowstore.schema'
|
||||
outfile = 'rowstore.result'
|
||||
|
||||
|
||||
def remove_schema():
|
||||
global schema_file
|
||||
if os.path.exists(schema_file):
|
||||
os.remove(schema_file)
|
||||
|
||||
def write_schema(s):
|
||||
global schema_file
|
||||
of = open(schema_file, 'w')
|
||||
of.write(s)
|
||||
of.close()
|
||||
|
||||
|
||||
def make_seq(t, cnt):
|
||||
types = [t]
|
||||
types *= cnt
|
||||
return types
|
||||
|
||||
|
||||
def make_schema(types):
|
||||
global schema_file
|
||||
remove_schema()
|
||||
col_id = 1
|
||||
s = "create table t1 ("
|
||||
for t in types:
|
||||
s += "c%d %s, " % (col_id, t)
|
||||
col_id += 1
|
||||
s = s[:-2]
|
||||
s += ', primary key (c1))'
|
||||
print s
|
||||
write_schema(s)
|
||||
|
||||
|
||||
cmdform = 'LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -RBGK -t material -s rowstore.schema -r 10 -i1'.split()
|
||||
|
||||
|
||||
types_to_test = {'bigint':'bigint', 'double':'double', 'float':'float', 'timestamp':'timestamp', 'number':'number(20,3)', 'v32':'varchar(32)', 'v64':'varchar(64)', 'v128':'varchar(128)'}
|
||||
row_counts = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 20000, 50000]
|
||||
col_nums = [1, 3, 20, 50]
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(types_to_test) * len(row_counts) * len(col_nums)
|
||||
case_count = 0
|
||||
|
||||
if os.path.exists(outfile):
|
||||
os.remove(outfile)
|
||||
|
||||
# for t in types_to_test:
|
||||
# outfile = 'rowstore.result.' + t
|
||||
# if os.path.exists(outfile):
|
||||
# os.remove(outfile)
|
||||
# for n in col_nums:
|
||||
# make_schema(make_seq(types_to_test[t], n))
|
||||
# for rc in row_counts:
|
||||
# cmdform[8] = str(rc)
|
||||
# case_count += 1
|
||||
# prompt = "# %d / %d %s col_cnt = %d rc = %d \n# %s" % (case_count, total_case_count, t, n, rc, ' '.join(cmdform))
|
||||
# print prompt
|
||||
# sp.check_call('echo "%s" >> ' % prompt + outfile, shell=True)
|
||||
# for times in xrange(0, case_run_time):
|
||||
# print times
|
||||
# sp.check_call("echo -n '%s,' >> " % str(rc) + outfile, shell=True)
|
||||
# sp.check_call("echo -n '%s,' >> " % str(n) + outfile, shell=True)
|
||||
# sp.check_call(" ".join(cmdform) + ' >> ' + outfile, shell=True)
|
||||
|
||||
make_schema(make_seq('bigint', 50))
|
||||
|
||||
|
44
unittest/sql/optimizer/cost_model_utils/benchmaster_sort.py
Executable file
44
unittest/sql/optimizer/cost_model_utils/benchmaster_sort.py
Executable file
@ -0,0 +1,44 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
if os.path.exists("sort_result"):
|
||||
os.remove("sort_result")
|
||||
|
||||
#cmd_form = "LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GBR -t sort -s c20.schema -r 1000 -c 10 -p 10 >> sort_result"
|
||||
cmd_form = "LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GR -t sort -s sort.schema -r 1000 -c 10 -p 10 >> sort_result"
|
||||
cmd_elements = cmd_form.split(" ")
|
||||
|
||||
row_counts = [1, 100, 500, 800, 1000, 3000, 5000, 8000, 9000, 10000, 20000, 40000, 60000, 70000, 100000, 300000]
|
||||
column_counts = [1, 2, 3, 4, 5]
|
||||
#input_col_cnts = [15, 30, 45]
|
||||
input_col_cnts = [3, 5, 9] #schema file related, col counts should not be less than projector count
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = len(row_counts) * len(column_counts) * len(input_col_cnts)
|
||||
case_count = 0
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for row_count in row_counts:
|
||||
for column_count in column_counts:
|
||||
for input_col in input_col_cnts:
|
||||
cmd_elements[8] = str(row_count)
|
||||
cmd_elements[10] = str(column_count)
|
||||
cmd_elements[12] = str(input_col)
|
||||
|
||||
case_count += 1
|
||||
prompt = "Running case %s / %s ... : %s " % (case_count, total_case_count, " ".join(cmd_elements))
|
||||
print prompt
|
||||
sp.check_call('echo "### %s" >> sort_result' % prompt, shell=True)
|
||||
if column_count > input_col:
|
||||
print "### PASS"
|
||||
sp.check_call('echo "### PASS" >> sort_result', shell=True)
|
||||
continue
|
||||
for time in xrange(case_run_time):
|
||||
print "running the %d time" % time
|
||||
sp.check_call("echo -n '%s,' >> sort_result"%(row_count), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> sort_result"%(column_count), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> sort_result"%(input_col), shell=True)
|
||||
sp.check_call(" ".join(cmd_elements), shell=True)
|
||||
|
97
unittest/sql/optimizer/cost_model_utils/benchmaster_sort_add.py
Executable file
97
unittest/sql/optimizer/cost_model_utils/benchmaster_sort_add.py
Executable file
@ -0,0 +1,97 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
from cost_test_conf import Config
|
||||
|
||||
schema_file = 'sort.schema'
|
||||
outfile = 'sort.result'
|
||||
|
||||
|
||||
def remove_schema():
|
||||
global schema_file
|
||||
if os.path.exists(schema_file):
|
||||
os.remove(schema_file)
|
||||
|
||||
def write_schema(s):
|
||||
global schema_file
|
||||
of = open(schema_file, 'w')
|
||||
of.write(s)
|
||||
of.close()
|
||||
|
||||
|
||||
def make_seq(t, cnt):
|
||||
types = [t]
|
||||
types *= cnt
|
||||
return types
|
||||
|
||||
|
||||
def make_schema(types):
|
||||
global schema_file
|
||||
remove_schema()
|
||||
col_id = 1
|
||||
s = "create table t1 ("
|
||||
for t in types:
|
||||
s += "c%d %s, " % (col_id, t)
|
||||
col_id += 1
|
||||
s = s[:-2]
|
||||
s += ', primary key (c1))'
|
||||
print s
|
||||
write_schema(s)
|
||||
|
||||
if os.path.exists("sort_result"):
|
||||
os.remove("sort_result")
|
||||
|
||||
#cmd_form = "LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GBR -t sort -s c20.schema -r 1000 -c 10 -i4".split()
|
||||
cmd_form = "LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GR -t sort -s c20.schema -r 1000 -c 10 -i4".split()
|
||||
|
||||
|
||||
types_to_test = {'bigint':'bigint', 'double':'double', 'float':'float', 'timestamp':'timestamp', 'number':'number(20,3)', 'v32':'varchar(32)', 'v64':'varchar(64)', 'v128':'varchar(128)'}
|
||||
row_counts = [1000, 2000, 4000, 8000, 10000, 20000, 50000]
|
||||
sort_column_counts = [1, 2, 3, 5]
|
||||
input_col_cnts = [1, 2, 6]
|
||||
case_run_time = 7
|
||||
|
||||
keys = sorted(types_to_test.keys())
|
||||
|
||||
total_case_count = len(row_counts) * len(sort_column_counts) * len(input_col_cnts) * len(keys)
|
||||
case_count = 0
|
||||
|
||||
cmd_form[6] = schema_file
|
||||
|
||||
|
||||
|
||||
|
||||
def make_headed_seq(head, arr):
|
||||
a = [head] + arr[0:arr.index(head)] + arr[arr.index(head) + 1:]
|
||||
b = [types_to_test[i] for i in a]
|
||||
return b
|
||||
|
||||
#for t in keys:
|
||||
if Config.u_to_test_type is not None:
|
||||
#outfile = 'sort.result.' + t
|
||||
t = Config.u_to_test_type
|
||||
outfile = 'sort_add_' + t + '_' + 'result'
|
||||
if os.path.exists(outfile):
|
||||
os.remove(outfile)
|
||||
for n in input_col_cnts:
|
||||
make_schema(make_headed_seq(t, keys) * n)
|
||||
for rc in row_counts:
|
||||
cmd_form[8] = str(rc)
|
||||
for order_count in sort_column_counts:
|
||||
cmd_form[-2] = str(order_count)
|
||||
case_count+=1
|
||||
prompt = "# %d / %d %s col_cnt = %d rc = %d order_cnt = %d\n# %s" % (case_count, total_case_count, t, n * len(keys), rc, order_count, ' '.join(cmd_form))
|
||||
|
||||
print prompt
|
||||
sp.check_call('echo "%s" >> ' % prompt + outfile, shell=True)
|
||||
if order_count > n * len(keys):
|
||||
print 'PASS'
|
||||
sp.check_call('echo "# PASS" >> ' + outfile, shell=True)
|
||||
continue
|
||||
for times in xrange(0, case_run_time):
|
||||
print times
|
||||
sp.check_call("echo -n '%s,' >> " % str(rc) + outfile, shell=True)
|
||||
sp.check_call("echo -n '%s,' >> " % str(n) + outfile, shell=True)
|
||||
sp.check_call("echo -n '%s,' >> " % str(order_count) + outfile, shell=True)
|
||||
sp.check_call(" ".join(cmd_form) + ' >> ' + outfile, shell=True)
|
94
unittest/sql/optimizer/cost_model_utils/benchmaster_sort_with_type.py
Executable file
94
unittest/sql/optimizer/cost_model_utils/benchmaster_sort_with_type.py
Executable file
@ -0,0 +1,94 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
import subprocess as sp
|
||||
import os
|
||||
|
||||
schema_file = 'sort.schema'
|
||||
outfile = 'sort.result'
|
||||
|
||||
|
||||
def remove_schema():
|
||||
global schema_file
|
||||
if os.path.exists(schema_file):
|
||||
os.remove(schema_file)
|
||||
|
||||
|
||||
def write_schema(s):
|
||||
global schema_file
|
||||
of = open(schema_file, 'w')
|
||||
of.write(s)
|
||||
of.close()
|
||||
|
||||
|
||||
def make_seq(t, cnt):
|
||||
types = [t]
|
||||
types *= cnt
|
||||
return types
|
||||
|
||||
|
||||
def make_schema(types):
|
||||
global schema_file
|
||||
remove_schema()
|
||||
col_id = 1
|
||||
s = "create table t1 ("
|
||||
for t in types:
|
||||
s += "c%d %s, " % (col_id, t)
|
||||
col_id += 1
|
||||
s = s[:-2]
|
||||
s += ', primary key (c1))'
|
||||
print s
|
||||
write_schema(s)
|
||||
|
||||
|
||||
if os.path.exists("sort_result"):
|
||||
os.remove("sort_result")
|
||||
|
||||
cmd_form = "LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./cost_model_util -GBR -t sort -s c20.schema -r 1000 -c 10 -i1".split()
|
||||
|
||||
types_to_test = {'bigint': 'bigint', 'double': 'double', 'float': 'float', 'timestamp': 'timestamp',
|
||||
'number': 'number(20,3)', 'v32': 'varchar(32)', 'v64': 'varchar(64)', 'v128': 'varchar(128)'}
|
||||
row_counts = [1000, 2000, 4000, 7000, 8000, 10000, 20000, 50000]
|
||||
sort_column_counts = [1, 2, 3]
|
||||
input_col_cnts = [1, 2, 3, 6]
|
||||
case_run_time = 7
|
||||
|
||||
keys = sorted(types_to_test.keys())
|
||||
|
||||
total_case_count = len(row_counts) * len(sort_column_counts) * len(input_col_cnts) * len(keys)
|
||||
case_count = 0
|
||||
|
||||
cmd_form[6] = schema_file
|
||||
|
||||
|
||||
def make_headed_seq(head, arr):
|
||||
a = [head] + arr[0:arr.index(head)] + arr[arr.index(head) + 1:]
|
||||
b = [types_to_test[i] for i in a]
|
||||
return b
|
||||
|
||||
|
||||
for t in keys:
|
||||
outfile = 'sort.result.' + t
|
||||
if os.path.exists(outfile):
|
||||
os.remove(outfile)
|
||||
for n in input_col_cnts:
|
||||
make_schema(make_headed_seq(t, keys) * n)
|
||||
for rc in row_counts:
|
||||
cmd_form[8] = str(rc)
|
||||
for order_count in sort_column_counts:
|
||||
cmd_form[-2] = str(order_count)
|
||||
case_count += 1
|
||||
prompt = "# %d / %d %s col_cnt = %d rc = %d order_cnt = %d\n# %s" % (
|
||||
case_count, total_case_count, t, n * len(keys), rc, order_count, ' '.join(cmd_form))
|
||||
|
||||
print prompt
|
||||
sp.check_call('echo "%s" >> ' % prompt + outfile, shell=True)
|
||||
if order_count > n * len(keys):
|
||||
print 'PASS'
|
||||
sp.check_call('echo "# PASS" >> ' + outfile, shell=True)
|
||||
continue
|
||||
for times in xrange(0, case_run_time):
|
||||
print times
|
||||
sp.check_call("echo -n '%s,' >> " % str(rc) + outfile, shell=True)
|
||||
sp.check_call("echo -n '%s,' >> " % str(n) + outfile, shell=True)
|
||||
sp.check_call("echo -n '%s,' >> " % str(order_count) + outfile, shell=True)
|
||||
sp.check_call(" ".join(cmd_form) + ' >> ' + outfile, shell=True)
|
1
unittest/sql/optimizer/cost_model_utils/c10k1.schema
Normal file
1
unittest/sql/optimizer/cost_model_utils/c10k1.schema
Normal file
@ -0,0 +1 @@
|
||||
create table t1 (c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, c8 bigint, c9 bigint, c10 bigint, primary key(c1))
|
2
unittest/sql/optimizer/cost_model_utils/c10k1x2.schema
Normal file
2
unittest/sql/optimizer/cost_model_utils/c10k1x2.schema
Normal file
@ -0,0 +1,2 @@
|
||||
create table t1(c1 bigint,c2 bigint,c3 bigint,c4 bigint,c5 bigint,c6 bigint,c7 bigint,c8 bigint,c9 bigint,c10 bigint,primary key(c1))
|
||||
create table t2(c1 bigint,c2 bigint,c3 bigint,c4 bigint,c5 bigint,c6 bigint,c7 bigint,c8 bigint,c9 bigint,c10 bigint,primary key(c1))
|
42
unittest/sql/optimizer/cost_model_utils/cost_main.py
Executable file
42
unittest/sql/optimizer/cost_model_utils/cost_main.py
Executable file
@ -0,0 +1,42 @@
|
||||
from mylog.mylog import MyLogger
|
||||
import subprocess as sp
|
||||
'''
|
||||
class Tester(object):
|
||||
bench_script = "python benchmaster_{0}.py"
|
||||
data_process_script = 'python preprocess.py -i {0} -o {1} -d'
|
||||
fit_script = 'python fit_{0}.py'
|
||||
|
||||
def __init__(self, conf):
|
||||
self.conf = conf
|
||||
|
||||
def do_all(self):
|
||||
# MyLogger.log('try to do all test fit plot')
|
||||
pass
|
||||
|
||||
def do_bench(self):
|
||||
# MyLogger.log('try to do bench')
|
||||
sp.check_call(Tester.bench_script.format(self.conf.u_to_test_op_c), shell=True)
|
||||
|
||||
def do_fit(self):
|
||||
# MyLogger.log('try to do fit')
|
||||
sp.check_call(Tester.fit_script.format(self.conf.u_to_test_op_c), shell=True)
|
||||
|
||||
def do_plot(self):
|
||||
# MyLogger.log('try to do plot')
|
||||
pass
|
||||
|
||||
def do_data_process(self):
|
||||
if self.conf.u_to_test_type_c is None:
|
||||
sp.check_call(Tester.data_process_script.format(self.conf.u_to_test_op_c + '_result',
|
||||
self.conf.u_to_test_op_c + '_result_final'), shell=True)
|
||||
else:
|
||||
sp.check_call(
|
||||
Tester.data_process_script.format(self.conf.u_to_test_op + '_' + self.conf.u_to_test_type + '_result',
|
||||
self.conf.u_to_test_op + '_' + self.conf.u_to_test_type + '_result_final'
|
||||
), shell=True)
|
||||
'''
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
MyLogger.info("start to do cost model unittest")
|
||||
sp.check_call('python %s' % ('material.py'), shell=True)
|
118
unittest/sql/optimizer/cost_model_utils/cost_test_conf.py
Normal file
118
unittest/sql/optimizer/cost_model_utils/cost_test_conf.py
Normal file
@ -0,0 +1,118 @@
|
||||
class Config(object):
|
||||
'''
|
||||
user input info
|
||||
'''
|
||||
################
|
||||
operators = {
|
||||
'array': 'array',
|
||||
'material': 'material',
|
||||
'mergegroupby': 'mergegroupby',
|
||||
'merge': 'merge',
|
||||
'hash': 'hash',
|
||||
'miss': 'miss',
|
||||
'nl': 'nl',
|
||||
'rowstore': 'rowstore',
|
||||
'sort_add': 'sort_add',
|
||||
'sort': 'sort',
|
||||
'sort_with_type': 'sort_with_type'
|
||||
}
|
||||
types_to_test = {'bigint': 'bigint', 'double': 'double', 'float': 'float', 'timestamp': 'timestamp',
|
||||
'number': 'number(20,3)', 'v32': 'varchar(32)', 'v64': 'varchar(64)', 'v128': 'varchar(128)'}
|
||||
config_map_dict = {
|
||||
'is_printing_help_c': ' -h ',
|
||||
'schema_file_c': ' -s ',
|
||||
'row_count_c': ' -r ',
|
||||
'left_row_count_c': ' -r ',
|
||||
'right_row_count_c': ' -r ',
|
||||
'sort_col_count_c': ' -c ',
|
||||
'input_projector_count_c': ' -p ',
|
||||
'left_pj_c': ' -p ',
|
||||
'right_pj_c': ' -p ',
|
||||
'is_printing_output_c': ' -O ',
|
||||
'equal_cond_count_c': ' -e ',
|
||||
'other_cond_count_c': ' -o ',
|
||||
'u_to_test_op_c': ' -t ',
|
||||
'u_to_test_type_c': '',
|
||||
'is_binding_cpu_c': ' -B ',
|
||||
'seed_min_c': ' -Z ',
|
||||
'left_min_c': ' -Z ',
|
||||
'right_min_c': ' -Z ',
|
||||
'seed_max_c': ' -X ',
|
||||
'left_max_c': '-X',
|
||||
'right_max_c': '-X',
|
||||
'seed_step_c': ' -C ',
|
||||
'left_seed_step_c': ' -C ',
|
||||
'right_seed_step_c': ' -C ',
|
||||
'seed_step_len_c': ' -V ',
|
||||
'left_seed_step_len_c': ' -V ',
|
||||
'right_seed_step_len_c': ' -V ',
|
||||
'limit_c': ' -L ',
|
||||
'is_random_c': ' -R ',
|
||||
'is_experimental_c': ' -K ',
|
||||
'sleep_before_test_c': ' -S ',
|
||||
'add_sort_column_c': ' -T ',
|
||||
'info_type_c': ' -i ',
|
||||
'common_prefix_len_c': ' -l ',
|
||||
'is_not_running_as_unittest_c': ' -G '
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
# config info based on cost_model_util.cpp
|
||||
self.is_printing_help_c = False
|
||||
self.schema_file_c = None
|
||||
self.row_count_c = None
|
||||
self.left_row_count_c = None
|
||||
self.right_row_count_c = None
|
||||
self.sort_col_count_c = None
|
||||
self.input_projector_count_c = None
|
||||
self.left_pj = None
|
||||
self.right_pj = None
|
||||
self.is_printing_output_c = False
|
||||
self.equal_cond_count_c = None
|
||||
self.other_cond_count_c = None
|
||||
self.u_to_test_op_c = None
|
||||
self.u_to_test_type_c = None # special
|
||||
self.is_binding_cpu_c = False
|
||||
self.seed_min_c = None
|
||||
self.left_min_c = None
|
||||
self.rigt_min_c = None
|
||||
self.seed_max_c = None
|
||||
self.left_max_c = None
|
||||
self.right_max_c = None
|
||||
self.seed_step_c = None
|
||||
self.left_seed_step_c = None
|
||||
self.right_seed_step_c = None
|
||||
self.seed_step_len_c = None
|
||||
self.left_seed_step_len_c = None
|
||||
self.right_seed_step_len_c = None
|
||||
self.limit_c = None
|
||||
self.is_random_c = False
|
||||
self.is_experimental_c = False
|
||||
self.sleep_before_test_c = None
|
||||
self.add_sort_column_c = None
|
||||
self.info_type_c = None
|
||||
self.common_prefix_len_c = None
|
||||
self.is_not_running_as_unittest_c = False
|
||||
|
||||
def gen_params(self):
|
||||
if self.is_printing_help_c:
|
||||
return " -h "
|
||||
else:
|
||||
args = " "
|
||||
for key in filter(lambda aname: aname.endswith('_c') and aname != 'is_printing_help_c', dir(self)):
|
||||
val = self.__getattribute__(key)
|
||||
# MyLogger.info("config object %s %s", key, val)
|
||||
if key.startswith('is'):
|
||||
if val is True:
|
||||
args = args + Config.config_map_dict[key]
|
||||
else:
|
||||
if val is not None:
|
||||
args = args + Config.config_map_dict[key]
|
||||
args = args + " " + str(val) + " "
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
conf = Config()
|
||||
conf.is_printing_help_c = True
|
||||
print conf.gen_params()
|
148
unittest/sql/optimizer/cost_model_utils/fit_array.py
Normal file
148
unittest/sql/optimizer/cost_model_utils/fit_array.py
Normal file
@ -0,0 +1,148 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import os
|
||||
#
|
||||
# def array_model_form(args):
|
||||
# # (
|
||||
# # Nelem,
|
||||
# # ) = args
|
||||
#
|
||||
# Telem_ence = 0.00898860
|
||||
# Telem_copy = 0.00631888
|
||||
#
|
||||
# Nelem = args
|
||||
#
|
||||
# ELEM_PER_PAGE = 1024
|
||||
# extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
# if extend_cnt < 0:
|
||||
# extend_cnt = 0
|
||||
# copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
#
|
||||
# total_cost = Telem_ence * Nelem
|
||||
# #total_cost += Tmem_alloc * extend_cnt
|
||||
# total_cost += Telem_copy * copy_cnt
|
||||
#
|
||||
# return total_cost
|
||||
|
||||
def array_model_form(args,
|
||||
#Tstartup,
|
||||
Telem_ence,
|
||||
Telem_copy,
|
||||
#Tmem_alloc
|
||||
):
|
||||
# (
|
||||
# Nelem,
|
||||
# ) = args
|
||||
|
||||
Nelem = args
|
||||
|
||||
ELEM_PER_PAGE = 1024
|
||||
extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
if extend_cnt < 0:
|
||||
extend_cnt = 0
|
||||
copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
|
||||
total_cost = Telem_ence * Nelem
|
||||
#total_cost += Tmem_alloc * extend_cnt
|
||||
total_cost += Telem_copy * copy_cnt
|
||||
|
||||
return total_cost
|
||||
|
||||
def material_model_arr(arg_sets,
|
||||
# Tstartup,
|
||||
Telem_ence,
|
||||
Telem_copy,
|
||||
#Tmem_alloc
|
||||
):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(array_model_form(single_arg_set,
|
||||
# Tstartup,
|
||||
Telem_ence,
|
||||
Telem_copy,
|
||||
#Tmem_alloc
|
||||
))
|
||||
return np.array(res)
|
||||
|
||||
material_model = Model(material_model_arr)
|
||||
# material_model.set_param_hint("Tstartup", min=0.0)
|
||||
material_model.set_param_hint("Telem_ence", min=0.0)
|
||||
material_model.set_param_hint("Telem_copy", min=0.0)
|
||||
#material_model.set_param_hint("Tmem_alloc", min=0.0)
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
file_name = "array_result_final"
|
||||
out_file_name = "array_model"
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
#sys.argv.extend("-i arr.prep -o arr.model".split(" "))
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0]))
|
||||
times.append(case_param[1])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = material_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
# Tstartup=10.0,
|
||||
Telem_ence=1.0,
|
||||
Telem_copy=1.0,
|
||||
#Tmem_alloc=1.0
|
||||
)
|
||||
|
||||
|
||||
# res_line = str(result.best_values["Tstartup"]) + ","
|
||||
res_line = str(result.best_values["Telem_ence"]) + ","
|
||||
res_line += str(result.best_values["Telem_copy"])# + ","
|
||||
#res_line += str(result.best_values["Tmem_alloc"])
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
162
unittest/sql/optimizer/cost_model_utils/fit_hg.py
Executable file
162
unittest/sql/optimizer/cost_model_utils/fit_hg.py
Executable file
@ -0,0 +1,162 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_hash_col,
|
||||
Tcopy_col
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
total_cost = Tstartup + Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_input * Ncol_group * Tgroup_hash_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
eval_count = 0
|
||||
|
||||
|
||||
def mg_model_arr(arg_sets,
|
||||
Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_hash_col,
|
||||
Tcopy_col
|
||||
) :
|
||||
|
||||
res = [mg_model_form(single_arg_set,
|
||||
Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_hash_col,
|
||||
Tcopy_col
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
print "eval "+ str(eval_count)
|
||||
return np.array(res)
|
||||
|
||||
mg_model = Model(mg_model_arr)
|
||||
mg_model.set_param_hint("Tstartup", min=0.0)
|
||||
mg_model.set_param_hint("Trow_once", min=0.0)
|
||||
mg_model.set_param_hint("Tres_once", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_prepare_result", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_process", min=0.0)
|
||||
mg_model.set_param_hint("Tgroup_hash_col", min=0.0)
|
||||
mg_model.set_param_hint("Tcopy_col", min=0.0)
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file_name = "scan_model.res.formal.prep"
|
||||
out_file_name = "scan_model.fit"
|
||||
|
||||
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
|
||||
# Nrow_input,
|
||||
# Nrow_res,
|
||||
# Ncol_input,
|
||||
# Ncol_aggr,
|
||||
# Ncol_group
|
||||
|
||||
|
||||
arg_sets.append((case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3]
|
||||
))
|
||||
times.append(case_param[6])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = mg_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tstartup = 0.1,
|
||||
Trow_once = 0.1,
|
||||
Tres_once = 0.1,
|
||||
Taggr_prepare_result = 0.1,
|
||||
Taggr_process = 0.1,
|
||||
Tgroup_hash_col = 0.1,
|
||||
Tcopy_col = 0.1
|
||||
)
|
||||
|
||||
res_line = str(result.best_values["Tstartup"]) + ","
|
||||
res_line += str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Tres_once"]) + ","
|
||||
res_line += str(result.best_values["Taggr_prepare_result"]) + ","
|
||||
res_line += str(result.best_values["Taggr_process"]) + ","
|
||||
res_line += str(result.best_values["Tgroup_hash_col"]) + ","
|
||||
res_line += str(result.best_values["Tcopy_col"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
111
unittest/sql/optimizer/cost_model_utils/fit_material.py
Executable file
111
unittest/sql/optimizer/cost_model_utils/fit_material.py
Executable file
@ -0,0 +1,111 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import os
|
||||
|
||||
|
||||
def material_model_form(args,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
total_cost = 0 # Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
return total_cost
|
||||
|
||||
|
||||
def material_model_arr(arg_sets,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(material_model_form(single_arg_set,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col))
|
||||
return np.array(res)
|
||||
|
||||
|
||||
material_model = Model(material_model_arr)
|
||||
# material_model.set_param_hint("Tstartup", min=0.0)
|
||||
material_model.set_param_hint("Trow_once", min=0.0)
|
||||
material_model.set_param_hint("Trow_col", min=0.0)
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# file_name = "scan_model.res.formal.prep"
|
||||
file_name = "material_result_final"
|
||||
# out_file_name = "scan_model.fit"
|
||||
out_file_name = "material_model"
|
||||
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
# sys.argv.extend("-i rowstore.prepare.bigint -o rowstore.model".split(" "))
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts, args = getopt.getopt(sys.argv[1:], "i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0], case_param[1]))
|
||||
times.append(case_param[3])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
# 10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = material_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
# Tstartup=10.0,
|
||||
Trow_once=10.0,
|
||||
Trow_col=1.0
|
||||
)
|
||||
|
||||
# res_line = str(result.best_values["Tstartup"]) + ","
|
||||
res_line = str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Trow_col"])
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
167
unittest/sql/optimizer/cost_model_utils/fit_merge.py
Normal file
167
unittest/sql/optimizer/cost_model_utils/fit_merge.py
Normal file
@ -0,0 +1,167 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
def merge_model_form(args,
|
||||
Tstartup,
|
||||
Tres_right_op,
|
||||
Tres_right_cache,
|
||||
Tmatch_group,
|
||||
#Tassemble_row,
|
||||
Tequal_fail,
|
||||
Trow_left,
|
||||
Trow_right
|
||||
):
|
||||
(
|
||||
Nrow_res,
|
||||
Nrow_left,
|
||||
Nrow_right,
|
||||
Nright_cache_in,
|
||||
Nright_cache_out,
|
||||
Nright_cache_clear,
|
||||
Nequal_cond
|
||||
) = args
|
||||
|
||||
total_cost = Tstartup
|
||||
total_cost += Nrow_left * Trow_left
|
||||
total_cost += (Nrow_right - Nright_cache_in) * Trow_right
|
||||
total_cost += Nright_cache_in * Tres_right_op
|
||||
total_cost += Nright_cache_out * Tres_right_cache
|
||||
#total_cost += Nrow_res * Tassemble_row
|
||||
total_cost += Nright_cache_clear * Tmatch_group
|
||||
total_cost += (Nequal_cond - Nrow_res - 2 * Tmatch_group) * Tequal_fail
|
||||
|
||||
|
||||
# total_cost += Nright_cache_in * Tres_right_op
|
||||
# total_cost += (Nrow_res - Nright_cache_in) * Tres_right_cache
|
||||
# total_cost += Nright_cache_clear * Tmatch_group
|
||||
# total_cost += Nrow_res * Tassemble_row
|
||||
# total_cost += (Nequal_cond - Nrow_res - 2 * Tmatch_group) * Tequal_fail
|
||||
# total_cost += Nrow_left * Trow_left
|
||||
# total_cost += (Nrow_right - Nright_cache_in) * Trow_right
|
||||
|
||||
return total_cost
|
||||
|
||||
eval_count = 0
|
||||
|
||||
def merge_model_arr(arg_sets,
|
||||
Tstartup,
|
||||
Tres_right_op,
|
||||
Tres_right_cache,
|
||||
Tmatch_group,
|
||||
#Tassemble_row,
|
||||
Tequal_fail,
|
||||
Trow_left,
|
||||
Trow_right
|
||||
):
|
||||
res = [merge_model_form(single_arg_set,
|
||||
Tstartup,
|
||||
Tres_right_op,
|
||||
Tres_right_cache,
|
||||
Tmatch_group,
|
||||
#Tassemble_row,
|
||||
Tequal_fail,
|
||||
Trow_left,
|
||||
Trow_right
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
return np.array(res)
|
||||
|
||||
|
||||
merge_model = Model(merge_model_arr)
|
||||
merge_model.set_param_hint("Tstartup", min=0.0)
|
||||
merge_model.set_param_hint("Tres_right_op", min=0.0)
|
||||
merge_model.set_param_hint("Tres_right_cache", min=0.0)
|
||||
merge_model.set_param_hint("Tmatch_group", min=0.0)
|
||||
#merge_model.set_param_hint("Tassemble_row", min=0.0)
|
||||
merge_model.set_param_hint("Tequal_fail", min=0.0)
|
||||
merge_model.set_param_hint("Trow_left", min=0.0)
|
||||
merge_model.set_param_hint("Trow_right", min=0.0)
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file_name = "scan_model.res.formal.prep"
|
||||
out_file_name = "scan_model.fit"
|
||||
|
||||
sys.argv.extend("-i merge.prep.1 -o merge.model".split(" "))
|
||||
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[6], #Nrow_res
|
||||
case_param[0], #Nrow_left
|
||||
case_param[1], #Nrow_right
|
||||
case_param[-3], #Nright_cache_in
|
||||
case_param[-2], #Nright_cache_out
|
||||
case_param[-1], #Nright_cache_clear
|
||||
case_param[8] #Nequal_cond
|
||||
))
|
||||
times.append(case_param[7])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = merge_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tstartup=0.1,
|
||||
Tres_right_op=0.1,
|
||||
Tres_right_cache=0.1,
|
||||
Tmatch_group=1.0,
|
||||
#Tassemble_row=0.5,
|
||||
Tequal_fail=1.0,
|
||||
Trow_left=0.05,
|
||||
Trow_right=0.05
|
||||
)
|
||||
|
||||
|
||||
res_line = str(result.best_values["Tstartup"]) + ","
|
||||
res_line += str(result.best_values["Tres_right_op"]) + ","
|
||||
res_line += str(result.best_values["Tres_right_cache"]) + ","
|
||||
res_line += str(result.best_values["Tmatch_group"]) + ","
|
||||
#res_line += str(result.best_values["Tassemble_row"]) + ","
|
||||
res_line += str(result.best_values["Tequal_fail"]) + ","
|
||||
res_line += str(result.best_values["Trow_left"]) + ","
|
||||
res_line += str(result.best_values["Trow_right"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
166
unittest/sql/optimizer/cost_model_utils/fit_mergegroupby.py
Executable file
166
unittest/sql/optimizer/cost_model_utils/fit_mergegroupby.py
Executable file
@ -0,0 +1,166 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
total_cost = Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_res * Tgroup_cmp_col
|
||||
total_cost += (Nrow_input - Nrow_res) * Ncol_group * Tgroup_cmp_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
eval_count = 0
|
||||
|
||||
|
||||
def mg_model_arr(arg_sets,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) :
|
||||
|
||||
res = [mg_model_form(single_arg_set,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
print "eval "+ str(eval_count)
|
||||
return np.array(res)
|
||||
|
||||
mg_model = Model(mg_model_arr)
|
||||
#mg_model.set_param_hint("Tstartup", min=0.0)
|
||||
mg_model.set_param_hint("Trow_once", min=0.0)
|
||||
mg_model.set_param_hint("Tres_once", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_prepare_result", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_process", min=0.0)
|
||||
mg_model.set_param_hint("Tgroup_cmp_col", min=0.0)
|
||||
mg_model.set_param_hint("Tcopy_col", min=0.0)
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
|
||||
file_name = "mergegroupby_result_final"
|
||||
out_file_name = "mergegroupby_model"
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
|
||||
# Nrow_input,
|
||||
# Nrow_res,
|
||||
# Ncol_input,
|
||||
# Ncol_aggr,
|
||||
# Ncol_group
|
||||
|
||||
|
||||
arg_sets.append((case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3]
|
||||
))
|
||||
times.append(case_param[6])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = mg_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
#Tstartup = 0.1,
|
||||
Trow_once = 0.1,
|
||||
Tres_once = 0.1,
|
||||
Taggr_prepare_result = 0.1,
|
||||
Taggr_process = 0.1,
|
||||
Tgroup_cmp_col = 0.1,
|
||||
Tcopy_col = 0.1
|
||||
)
|
||||
|
||||
res_line = str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Tres_once"]) + ","
|
||||
res_line += str(result.best_values["Taggr_prepare_result"]) + ","
|
||||
res_line += str(result.best_values["Taggr_process"]) + ","
|
||||
res_line += str(result.best_values["Tgroup_cmp_col"]) + ","
|
||||
res_line += str(result.best_values["Tcopy_col"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
164
unittest/sql/optimizer/cost_model_utils/fit_mg.py
Executable file
164
unittest/sql/optimizer/cost_model_utils/fit_mg.py
Executable file
@ -0,0 +1,164 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
total_cost = Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_res * Tgroup_cmp_col
|
||||
total_cost += (Nrow_input - Nrow_res) * Ncol_group * Tgroup_cmp_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
eval_count = 0
|
||||
|
||||
|
||||
def mg_model_arr(arg_sets,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) :
|
||||
|
||||
res = [mg_model_form(single_arg_set,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
print "eval "+ str(eval_count)
|
||||
return np.array(res)
|
||||
|
||||
mg_model = Model(mg_model_arr)
|
||||
#mg_model.set_param_hint("Tstartup", min=0.0)
|
||||
mg_model.set_param_hint("Trow_once", min=0.0)
|
||||
mg_model.set_param_hint("Tres_once", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_prepare_result", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_process", min=0.0)
|
||||
mg_model.set_param_hint("Tgroup_cmp_col", min=0.0)
|
||||
mg_model.set_param_hint("Tcopy_col", min=0.0)
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
|
||||
file_name = "mergegroupby_result_final"
|
||||
out_file_name = "mergegroupby_model"
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
|
||||
# Nrow_input,
|
||||
# Nrow_res,
|
||||
# Ncol_input,
|
||||
# Ncol_aggr,
|
||||
# Ncol_group
|
||||
|
||||
|
||||
arg_sets.append((case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3]
|
||||
))
|
||||
times.append(case_param[6])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = mg_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
#Tstartup = 0.1,
|
||||
Trow_once = 0.1,
|
||||
Tres_once = 0.1,
|
||||
Taggr_prepare_result = 0.1,
|
||||
Taggr_process = 0.1,
|
||||
Tgroup_cmp_col = 0.1,
|
||||
Tcopy_col = 0.1
|
||||
)
|
||||
|
||||
res_line = str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Tres_once"]) + ","
|
||||
res_line += str(result.best_values["Taggr_prepare_result"]) + ","
|
||||
res_line += str(result.best_values["Taggr_process"]) + ","
|
||||
res_line += str(result.best_values["Tgroup_cmp_col"]) + ","
|
||||
res_line += str(result.best_values["Tcopy_col"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
140
unittest/sql/optimizer/cost_model_utils/fit_miss.py
Executable file
140
unittest/sql/optimizer/cost_model_utils/fit_miss.py
Executable file
@ -0,0 +1,140 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
def get_row_size(col):
|
||||
size = 16
|
||||
size += col * (3 + 8 + 4 + 8 + 16 + 32 + 64 + 128)
|
||||
size += col
|
||||
return size
|
||||
|
||||
def round_wasted_spave(rsize, psize):
|
||||
nr = math.floor(float(psize / rsize))
|
||||
waste = psize - nr * rsize
|
||||
return rsize + waste / nr
|
||||
|
||||
|
||||
|
||||
def get_miss_prob(Nrow, Ncol, Turn):
|
||||
total_size = Nrow * get_row_size(Ncol)
|
||||
TLBcovered = Turn
|
||||
if TLBcovered >= 0.9 * total_size:
|
||||
hit = 0.9
|
||||
else:
|
||||
hit = TLBcovered / total_size
|
||||
return 1 - hit
|
||||
|
||||
def sort_model_form(args,
|
||||
Tmiss,
|
||||
Turn
|
||||
):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
total_cost = 0
|
||||
|
||||
total_cost += Nrow * Tmiss * Ncol * get_miss_prob(Nrow, Ncol, Turn)
|
||||
|
||||
return total_cost
|
||||
|
||||
def sort_model_arr(arg_sets,
|
||||
Tmiss,
|
||||
Turn,
|
||||
):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(sort_model_form(single_arg_set,
|
||||
Tmiss,
|
||||
Turn,
|
||||
))
|
||||
return np.array(res)
|
||||
|
||||
sort_model = Model(sort_model_arr)
|
||||
sort_model.set_param_hint("Tmiss", min=0.0)
|
||||
sort_model.set_param_hint("Turn", min=2097152.0, max=2097153.0)
|
||||
|
||||
# sort_model.set_param_hint("Tmiss_K2", min=0.0)
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file_name = "miss.prep.1"
|
||||
out_file_name = "miss.model"
|
||||
|
||||
# sys.argv.extend("-i sort.prep.bigint -o sort.model".split(" "))
|
||||
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:R:C:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-R" == op:
|
||||
MATERIAL_ROW_ONCE = float(value)
|
||||
elif "-C" == op:
|
||||
MATERIAL_ROW_COL = float(value)
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0], case_param[1]))
|
||||
times.append(case_param[3])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = sort_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tmiss=1.0,
|
||||
Turn=2097152,
|
||||
)
|
||||
|
||||
Tmiss = result.best_values["Tmiss"]
|
||||
Turn = result.best_values["Turn"]
|
||||
res_line = str(Tmiss) + ","
|
||||
res_line += str(Turn)
|
||||
# res_line += str(result.best_values["Tmiss_K2"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
||||
|
||||
for i, args in enumerate(arg_sets):
|
||||
cost = sort_model_form(args, Tmiss, Turn)
|
||||
time = times[i]
|
||||
print "\t".join([str(args), str(time), str(cost)])
|
147
unittest/sql/optimizer/cost_model_utils/fit_nl.py
Normal file
147
unittest/sql/optimizer/cost_model_utils/fit_nl.py
Normal file
@ -0,0 +1,147 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
|
||||
def nl_model_form(args,
|
||||
Tstartup,
|
||||
#Tqual,
|
||||
Tres,
|
||||
Tfail,
|
||||
Tleft_row,
|
||||
Tright_row
|
||||
):
|
||||
(
|
||||
Nrow_res,
|
||||
Nrow_left,
|
||||
Nrow_right,
|
||||
Nright_cache_in,
|
||||
Nright_cache_out,
|
||||
Nright_cache_clear,
|
||||
Nequal_cond
|
||||
) = args
|
||||
|
||||
total_cost = Tstartup
|
||||
total_cost += Nrow_res * Tres
|
||||
#total_cost += Nequal_cond * Tqual
|
||||
total_cost += (Nequal_cond - Nrow_res) * Tfail
|
||||
total_cost += Nrow_left * Tleft_row
|
||||
total_cost += Nrow_right * Tright_row
|
||||
|
||||
return total_cost
|
||||
|
||||
eval_count = 0
|
||||
|
||||
def nl_model_arr(arg_sets,
|
||||
Tstartup,
|
||||
#Tqual,
|
||||
Tres,
|
||||
Tfail,
|
||||
Tleft_row,
|
||||
Tright_row
|
||||
):
|
||||
res = [nl_model_form(single_arg_set,
|
||||
Tstartup,
|
||||
#Tqual,
|
||||
Tres,
|
||||
Tfail,
|
||||
Tleft_row,
|
||||
Tright_row
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
return np.array(res)
|
||||
|
||||
|
||||
nl_model = Model(nl_model_arr)
|
||||
nl_model.set_param_hint("Tstartup", min=0.0, max = 50)
|
||||
#nl_model.set_param_hint("Tqual", min=0.0)
|
||||
nl_model.set_param_hint("Tres", min=0.0)
|
||||
nl_model.set_param_hint("Tfail", min=0.0)
|
||||
nl_model.set_param_hint("Tleft_row", min=0.0)
|
||||
nl_model.set_param_hint("Tright_row", min=0.0)
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file_name = "scan_model.res.formal.prep"
|
||||
out_file_name = "scan_model.fit"
|
||||
|
||||
sys.argv.extend("-i nl.prep -o nl.model".split(" "))
|
||||
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[6], #Nrow_res
|
||||
case_param[0], #Nrow_left
|
||||
case_param[1], #Nrow_right
|
||||
case_param[-3], #Nright_cache_in
|
||||
case_param[-2], #Nright_cache_out
|
||||
case_param[-1], #Nright_cache_clear
|
||||
case_param[8] #Nequal_cond
|
||||
))
|
||||
times.append(case_param[7])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = nl_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tstartup=50.0,
|
||||
#Tqual=0.1,
|
||||
Tres=0.3,
|
||||
Tfail=0.3,
|
||||
Tleft_row=0.3,
|
||||
Tright_row=0.3
|
||||
)
|
||||
|
||||
|
||||
res_line = str(result.best_values["Tstartup"]) + ","
|
||||
#res_line += str(result.best_values["Tqual"]) + ","
|
||||
res_line += str(result.best_values["Tres"]) + ","
|
||||
res_line += str(result.best_values["Tfail"]) + ","
|
||||
res_line += str(result.best_values["Tleft_row"]) + ","
|
||||
res_line += str(result.best_values["Tright_row"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
@ -0,0 +1,60 @@
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
types_to_test = {'bigint':'bigint', 'double':'double', 'float':'float', 'timestamp':'timestamp', 'number':'number(20,3)','v1':'varchar(1)','v32':'varchar(32)', 'v64':'varchar(64)', 'v128':'varchar(128)'}
|
||||
|
||||
def run_cmd(cmd):
|
||||
#print cmd
|
||||
res = ''
|
||||
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
line = p.stdout.readline()
|
||||
res += line
|
||||
if line:
|
||||
#print line.strip()
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
break
|
||||
p.wait()
|
||||
return res.strip()
|
||||
|
||||
def rm_if_exist(filename):
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
def extract_kv(k, src):
|
||||
pat=k + ':\s*[\d\.e\-\+]+'
|
||||
mat = re.compile(pat)
|
||||
return float(mat.findall(src)[0].split()[1])
|
||||
|
||||
for t in sorted(types_to_test.keys()):
|
||||
result_file_name = 'rowstore.result.' + t
|
||||
prep_file_name = 'rowstore.prep.' + t
|
||||
model_file = 'rowstore.model.' + t
|
||||
fit_file = 'rowstore.fit.' + t
|
||||
rm_if_exist(prep_file_name)
|
||||
run_cmd("./preprocess.py -i %s -o %s -t 7 -C 3 -d" % (result_file_name, prep_file_name))
|
||||
fitres = run_cmd("./fit_material.py -i " + prep_file_name + " -o " + model_file)
|
||||
# print fitres
|
||||
run_cmd("./apply_material_model.py -i %s -o %s -m %s" % (prep_file_name, fit_file, model_file))
|
||||
Trow_col = extract_kv('Trow_col', fitres)
|
||||
Trow_once = extract_kv('Trow_once', fitres)
|
||||
print types_to_test[t] + ":"
|
||||
print " " + str(Trow_col)
|
||||
print " " + str(Trow_once)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
268
unittest/sql/optimizer/cost_model_utils/fit_sort.py
Executable file
268
unittest/sql/optimizer/cost_model_utils/fit_sort.py
Executable file
@ -0,0 +1,268 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import os
|
||||
|
||||
|
||||
MATERIAL_ROW_COL = 0.02674675
|
||||
MATERIAL_ROW_ONCE = 0.07931677
|
||||
RESERVE_CELL = 0.0044
|
||||
|
||||
def material_model_form(args):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
global MATERIAL_ROW_COL
|
||||
global MATERIAL_ROW_ONCE
|
||||
|
||||
Trow_col = MATERIAL_ROW_COL
|
||||
Trow_once = MATERIAL_ROW_ONCE
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
return total_cost
|
||||
|
||||
def array_model_form(args):
|
||||
# (
|
||||
# Nelem,
|
||||
# ) = args
|
||||
|
||||
Telem_ence = 0.00898860
|
||||
Telem_copy = 0.00631888
|
||||
|
||||
Nelem = args
|
||||
|
||||
ELEM_PER_PAGE = 1024
|
||||
extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
if extend_cnt < 0:
|
||||
extend_cnt = 0
|
||||
copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
|
||||
total_cost = Telem_ence * Nelem
|
||||
#total_cost += Tmem_alloc * extend_cnt
|
||||
total_cost += Telem_copy * copy_cnt
|
||||
|
||||
return total_cost
|
||||
|
||||
def get_row_size(reserve, col):
|
||||
size = 16
|
||||
size += reserve * 16
|
||||
col /= 8
|
||||
size += col * (3 + 8 + 4 + 8 + 16 + 32 + 64 + 128)
|
||||
size += col
|
||||
return size
|
||||
|
||||
def round_wasted_spave(rsize, psize):
|
||||
nr = math.floor(float(psize / rsize))
|
||||
waste = psize - nr * rsize
|
||||
return rsize + waste / nr
|
||||
|
||||
|
||||
|
||||
def get_miss_prob(Nrow, Ncol, Nord, Turn):
|
||||
total_size = Nrow * get_row_size(Nord, Ncol)
|
||||
TLBcovered = Turn
|
||||
if TLBcovered >= 0.9 * total_size:
|
||||
hit = 0.9
|
||||
else:
|
||||
hit = TLBcovered / total_size
|
||||
return 1 - hit
|
||||
|
||||
|
||||
|
||||
def sort_model_form(args,
|
||||
#Tstartup,
|
||||
#Trowstore_once,
|
||||
#Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
#Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn
|
||||
# Tmiss_K2
|
||||
):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
Nordering,
|
||||
) = args
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
|
||||
# total_cost += Nrow * Trow_once
|
||||
#cost for rowstore
|
||||
# total_cost += material_model_form((Nrow, Ncol))
|
||||
# total_cost += 0.0044 * Nrow * Ncol * Nordering
|
||||
# total_cost += Tordercol * Nrow * Nordering
|
||||
|
||||
#cost for push array
|
||||
# total_cost += array_model_form(Nrow)
|
||||
|
||||
# cost for sorting
|
||||
Nordering_cmp = Nordering
|
||||
if Nordering >= 1:
|
||||
Nordering_cmp = 1
|
||||
compare_cost = Tcompare * Nordering_cmp + Tmiss_K1 * get_miss_prob(Nrow, Ncol, Nordering, Turn)
|
||||
total_cost += Nrow * compare_cost * math.log(Nrow, 2)
|
||||
|
||||
#cost for get row
|
||||
# total_cost += Nrow * (Tmiss_K2 * get_miss_prob(Nrow, Ncol, Nordering))
|
||||
return total_cost
|
||||
|
||||
def sort_model_arr(arg_sets,
|
||||
#Tstartup,
|
||||
# Trowstore_once,
|
||||
# Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
# Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn,
|
||||
# Tmiss_K2
|
||||
):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(sort_model_form(single_arg_set,
|
||||
# Tstartup,
|
||||
# Trowstore_once,
|
||||
# Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
# Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn,
|
||||
# Tmiss_K2
|
||||
))
|
||||
return np.array(res)
|
||||
|
||||
sort_model = Model(sort_model_arr)
|
||||
# #sort_model.set_param_hint("Tstartup", min=0.0)
|
||||
# #sort_model.set_param_hint("Trow_startup", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_col", min=0.0)
|
||||
# #sort_model.set_param_hint("Tcmp_startup", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_once", min=0.0)
|
||||
# sort_model.set_param_hint("Tcompare", min=0.0)
|
||||
# sort_model.set_param_hint("Talloc", min=0.0)
|
||||
# sort_model.set_param_hint("Treserve_cell", min=0.0)
|
||||
|
||||
# sort_model.set_param_hint("Tstartup", min=0)
|
||||
# sort_model.set_param_hint("Trowstore_once", min=0.0)
|
||||
# sort_model.set_param_hint("Trowstore_col", min=0.0)
|
||||
# sort_model.set_param_hint("Tarray_once", min=0.0)
|
||||
# sort_model.set_param_hint("Tarray_elem_copy", min=0.0)
|
||||
# sort_model.set_param_hint("Tordercol", min=0.0)
|
||||
# sort_model.set_param_hint("Treserve_cell", min=0.0)
|
||||
sort_model.set_param_hint("Tcompare", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_once", min=0.0)
|
||||
sort_model.set_param_hint("Tmiss_K1", min=0.0)
|
||||
sort_model.set_param_hint("Turn", min=2097152.0, max=2097153.0)
|
||||
|
||||
# sort_model.set_param_hint("Tmiss_K2", min=0.0)
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
file_name = "sort_result_final"
|
||||
out_file_name = "sort_model"
|
||||
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
# sys.argv.extend("-i sort.prep.bigint -o sort.model".split(" "))
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:R:C:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-R" == op:
|
||||
MATERIAL_ROW_ONCE = float(value)
|
||||
elif "-C" == op:
|
||||
MATERIAL_ROW_COL = float(value)
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0], case_param[1], case_param[2]))
|
||||
times.append(case_param[4])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = sort_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
# Tstartup=25.0,
|
||||
# Trowstore_once=1.0,
|
||||
# Trowstore_col=1.0,
|
||||
# Tarray_once=1.0,
|
||||
# Tarray_elem_copy=1.0,
|
||||
# Tordercol=1.0,
|
||||
# Treserve_cell=1.0,
|
||||
Tcompare=1.0,
|
||||
# Trow_once=1.0,
|
||||
Tmiss_K1=1.0,
|
||||
Turn=2097152,
|
||||
# Tmiss_K2=1.0
|
||||
)
|
||||
|
||||
# res_line = str(result.best_values["Tstartup"]) + ","
|
||||
# res_line += str(result.best_values["Trowstore_once"]) + ","
|
||||
# res_line += str(result.best_values["Trowstore_col"]) + ","
|
||||
# res_line += str(result.best_values["Tarray_once"]) + ","
|
||||
# res_line += str(result.best_values["Tarray_elem_copy"]) + ","
|
||||
# res_line = str(result.best_values["Tordercol"]) + ","
|
||||
# res_line = str(result.best_values["Treserve_cell"]) + ","
|
||||
res_line = str(result.best_values["Tcompare"]) + ","
|
||||
# res_line += str(result.best_values["Trow_once"]) #+ ","
|
||||
res_line += str(result.best_values["Tmiss_K1"]) + ","
|
||||
res_line += str(result.best_values["Turn"])
|
||||
# res_line += str(result.best_values["Tmiss_K2"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
271
unittest/sql/optimizer/cost_model_utils/fit_sort_add.py
Executable file
271
unittest/sql/optimizer/cost_model_utils/fit_sort_add.py
Executable file
@ -0,0 +1,271 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import os
|
||||
from cost_test_conf import Config
|
||||
|
||||
MATERIAL_ROW_COL = 0.02674675
|
||||
MATERIAL_ROW_ONCE = 0.07931677
|
||||
RESERVE_CELL = 0.0044
|
||||
|
||||
def material_model_form(args):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
global MATERIAL_ROW_COL
|
||||
global MATERIAL_ROW_ONCE
|
||||
|
||||
Trow_col = MATERIAL_ROW_COL
|
||||
Trow_once = MATERIAL_ROW_ONCE
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
return total_cost
|
||||
|
||||
def array_model_form(args):
|
||||
# (
|
||||
# Nelem,
|
||||
# ) = args
|
||||
|
||||
Telem_ence = 0.00898860
|
||||
Telem_copy = 0.00631888
|
||||
|
||||
Nelem = args
|
||||
|
||||
ELEM_PER_PAGE = 1024
|
||||
extend_cnt = math.ceil(math.log(float(Nelem)/ELEM_PER_PAGE, 2))
|
||||
if extend_cnt < 0:
|
||||
extend_cnt = 0
|
||||
copy_cnt = ELEM_PER_PAGE * (math.pow(2, extend_cnt) - 1)
|
||||
|
||||
total_cost = Telem_ence * Nelem
|
||||
#total_cost += Tmem_alloc * extend_cnt
|
||||
total_cost += Telem_copy * copy_cnt
|
||||
|
||||
return total_cost
|
||||
|
||||
def get_row_size(reserve, col):
|
||||
size = 16
|
||||
size += reserve * 16
|
||||
col /= 8
|
||||
size += col * (3 + 8 + 4 + 8 + 16 + 32 + 64 + 128)
|
||||
size += col
|
||||
return size
|
||||
|
||||
def round_wasted_spave(rsize, psize):
|
||||
nr = math.floor(float(psize / rsize))
|
||||
waste = psize - nr * rsize
|
||||
return rsize + waste / nr
|
||||
|
||||
|
||||
|
||||
def get_miss_prob(Nrow, Ncol, Nord, Turn):
|
||||
total_size = Nrow * get_row_size(Nord, Ncol)
|
||||
TLBcovered = Turn
|
||||
if TLBcovered >= 0.9 * total_size:
|
||||
hit = 0.9
|
||||
else:
|
||||
hit = TLBcovered / total_size
|
||||
return 1 - hit
|
||||
|
||||
|
||||
|
||||
def sort_model_form(args,
|
||||
#Tstartup,
|
||||
#Trowstore_once,
|
||||
#Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
#Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn
|
||||
# Tmiss_K2
|
||||
):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
Nordering,
|
||||
) = args
|
||||
|
||||
total_cost = 0 #Tstartup
|
||||
|
||||
# total_cost += Nrow * Trow_once
|
||||
#cost for rowstore
|
||||
# total_cost += material_model_form((Nrow, Ncol))
|
||||
# total_cost += 0.0044 * Nrow * Ncol * Nordering
|
||||
# total_cost += Tordercol * Nrow * Nordering
|
||||
|
||||
#cost for push array
|
||||
# total_cost += array_model_form(Nrow)
|
||||
|
||||
# cost for sorting
|
||||
Nordering_cmp = Nordering
|
||||
if Nordering >= 1:
|
||||
Nordering_cmp = 1
|
||||
compare_cost = Tcompare * Nordering_cmp + Tmiss_K1 * get_miss_prob(Nrow, Ncol, Nordering, Turn)
|
||||
total_cost += Nrow * compare_cost * math.log(Nrow, 2)
|
||||
|
||||
#cost for get row
|
||||
# total_cost += Nrow * (Tmiss_K2 * get_miss_prob(Nrow, Ncol, Nordering))
|
||||
return total_cost
|
||||
|
||||
def sort_model_arr(arg_sets,
|
||||
#Tstartup,
|
||||
# Trowstore_once,
|
||||
# Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
# Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn,
|
||||
# Tmiss_K2
|
||||
):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(sort_model_form(single_arg_set,
|
||||
# Tstartup,
|
||||
# Trowstore_once,
|
||||
# Trowstore_col,
|
||||
# Tarray_once,
|
||||
# Tarray_elem_copy,
|
||||
# Tordercol,
|
||||
# Treserve_cell,
|
||||
Tcompare,
|
||||
# Trow_once,
|
||||
Tmiss_K1,
|
||||
Turn,
|
||||
# Tmiss_K2
|
||||
))
|
||||
return np.array(res)
|
||||
|
||||
sort_model = Model(sort_model_arr)
|
||||
# #sort_model.set_param_hint("Tstartup", min=0.0)
|
||||
# #sort_model.set_param_hint("Trow_startup", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_col", min=0.0)
|
||||
# #sort_model.set_param_hint("Tcmp_startup", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_once", min=0.0)
|
||||
# sort_model.set_param_hint("Tcompare", min=0.0)
|
||||
# sort_model.set_param_hint("Talloc", min=0.0)
|
||||
# sort_model.set_param_hint("Treserve_cell", min=0.0)
|
||||
|
||||
# sort_model.set_param_hint("Tstartup", min=0)
|
||||
# sort_model.set_param_hint("Trowstore_once", min=0.0)
|
||||
# sort_model.set_param_hint("Trowstore_col", min=0.0)
|
||||
# sort_model.set_param_hint("Tarray_once", min=0.0)
|
||||
# sort_model.set_param_hint("Tarray_elem_copy", min=0.0)
|
||||
# sort_model.set_param_hint("Tordercol", min=0.0)
|
||||
# sort_model.set_param_hint("Treserve_cell", min=0.0)
|
||||
sort_model.set_param_hint("Tcompare", min=0.0)
|
||||
# sort_model.set_param_hint("Trow_once", min=0.0)
|
||||
sort_model.set_param_hint("Tmiss_K1", min=0.0)
|
||||
sort_model.set_param_hint("Turn", min=2097152.0, max=2097153.0)
|
||||
|
||||
# sort_model.set_param_hint("Tmiss_K2", min=0.0)
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
|
||||
file_name = "sort_add_" + Config.u_to_test_type + "_result_final"
|
||||
out_file_name = "sort_add_" + Config.u_to_test_type + "_model"
|
||||
# sys.argv.extend("-i sort.prep.bigint -o sort.model".split(" "))
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
output_fit_res = False
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:R:C:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
elif "-R" == op:
|
||||
MATERIAL_ROW_ONCE = float(value)
|
||||
elif "-C" == op:
|
||||
MATERIAL_ROW_COL = float(value)
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0], case_param[1], case_param[2]))
|
||||
times.append(case_param[4])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = sort_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
# Tstartup=25.0,
|
||||
# Trowstore_once=1.0,
|
||||
# Trowstore_col=1.0,
|
||||
# Tarray_once=1.0,
|
||||
# Tarray_elem_copy=1.0,
|
||||
# Tordercol=1.0,
|
||||
# Treserve_cell=1.0,
|
||||
Tcompare=1.0,
|
||||
# Trow_once=1.0,
|
||||
Tmiss_K1=1.0,
|
||||
Turn=2097152,
|
||||
# Tmiss_K2=1.0
|
||||
)
|
||||
|
||||
# res_line = str(result.best_values["Tstartup"]) + ","
|
||||
# res_line += str(result.best_values["Trowstore_once"]) + ","
|
||||
# res_line += str(result.best_values["Trowstore_col"]) + ","
|
||||
# res_line += str(result.best_values["Tarray_once"]) + ","
|
||||
# res_line += str(result.best_values["Tarray_elem_copy"]) + ","
|
||||
# res_line = str(result.best_values["Tordercol"]) + ","
|
||||
# res_line = str(result.best_values["Treserve_cell"]) + ","
|
||||
res_line = str(result.best_values["Tcompare"]) + ","
|
||||
# res_line += str(result.best_values["Trow_once"]) #+ ","
|
||||
res_line += str(result.best_values["Tmiss_K1"]) + ","
|
||||
res_line += str(result.best_values["Turn"])
|
||||
# res_line += str(result.best_values["Tmiss_K2"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
75
unittest/sql/optimizer/cost_model_utils/fit_sort_master.py
Normal file
75
unittest/sql/optimizer/cost_model_utils/fit_sort_master.py
Normal file
@ -0,0 +1,75 @@
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
types_to_test = {'bigint':['bigint', 0.0266846, 0.07364082], 'double': ['double', 0.02970336, 0.07228732], 'float':['float', 0.02512819, 0.07295116], 'timestamp':['timestamp', 0.02998249, 0.07265038],
|
||||
'number':['number(20,3)', 0.08238981, 0.15730252], 'v32':['varchar(32)', 0.08476897, 0.07518651], 'v64':['varchar(64)', 0.13678196, 0.05033624], 'v128':['varchar(128)', 0.22601192, 2.2963e-08]}
|
||||
|
||||
def run_cmd(cmd):
|
||||
print cmd
|
||||
res = ''
|
||||
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
line = p.stdout.readline()
|
||||
res += line
|
||||
if line:
|
||||
#print line.strip()
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
break
|
||||
p.wait()
|
||||
return res.strip()
|
||||
|
||||
def rm_if_exist(filename):
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
def extract_kv(k, src):
|
||||
pat=k + ':\s*[\d\.e\-\+]+'
|
||||
mat = re.compile(pat)
|
||||
return float(mat.findall(src)[0].split()[1])
|
||||
|
||||
for t in sorted(types_to_test.keys()):
|
||||
result_file_name = 'sort.result.' + t
|
||||
prep_file_name = 'sort.prep.' + t
|
||||
model_file = 'sort.model.' + t
|
||||
fit_file = 'sort.fit.' + t
|
||||
if not os.path.exists(result_file_name):
|
||||
continue
|
||||
rm_if_exist(prep_file_name)
|
||||
rm_if_exist(model_file)
|
||||
rm_if_exist(fit_file)
|
||||
run_cmd("./preprocess.py -i %s -o %s -t 7 -C 4 -d" % (result_file_name, prep_file_name))
|
||||
cmd = "./fit_sort.py -i %s -R %s -C %s -o %s" % (prep_file_name, str(types_to_test[t][2]), str(types_to_test[t][1]), model_file)
|
||||
print cmd
|
||||
fitres = run_cmd(cmd)
|
||||
# print fitres
|
||||
appres = run_cmd("./apply_sort_model.py -i %s -o %s -m %s" % (prep_file_name, fit_file, model_file))
|
||||
print appres
|
||||
#print fitres
|
||||
# Treserve_cell = extract_kv('Treserve_cell', fitres)
|
||||
# Tcompare = extract_kv('Tcompare', fitres)
|
||||
# Tmiss_K1 = extract_kv('Tmiss_K1', fitres)
|
||||
# Turn = extract_kv('Turn', fitres)
|
||||
# # Trow_once = extract_kv('Trow_once', fitres)
|
||||
# print types_to_test[t][0] + ":"
|
||||
# # print " Treserve_cell:\t" + str(Treserve_cell)
|
||||
# print " Tcompare:\t" + str(Tcompare)
|
||||
# print " Tmiss_K1:\t" + str(Tmiss_K1)
|
||||
# print " Turn:\t" + str(Turn)
|
||||
# print " Trow_once:\t" + str(Trow_once)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
155
unittest/sql/optimizer/cost_model_utils/hash_join.py
Executable file
155
unittest/sql/optimizer/cost_model_utils/hash_join.py
Executable file
@ -0,0 +1,155 @@
|
||||
from mylog.mylog import MyLogger
|
||||
from op_generator import op_generator
|
||||
from cost_test_conf import Config
|
||||
import subprocess as sp
|
||||
import os
|
||||
from lmfit import Model
|
||||
import numpy as np
|
||||
|
||||
hash_cls = op_generator.gen_operator("hash_join")
|
||||
conf = Config()
|
||||
conf.u_to_test_op_c = 'hash'
|
||||
conf.is_not_running_as_unittest_c = True
|
||||
conf.schema_file_c = 'c10k1x2.schema'
|
||||
conf.left_row_count_c = 1000
|
||||
conf.right_row_count_c = 1000
|
||||
conf.left_min_c = 1
|
||||
conf.right_min_c = 1
|
||||
conf.is_random_c = True
|
||||
conf.left_pj_c = 10
|
||||
conf.right_pj_c = 10
|
||||
hash_op = hash_cls(conf)
|
||||
result_file_name = "hash_join_result"
|
||||
if os.path.exists(result_file_name):
|
||||
os.remove(result_file_name)
|
||||
|
||||
# step 2 do bench and gen data
|
||||
|
||||
case_run_time = 7
|
||||
case_count = 0
|
||||
row_count_max = 100000;
|
||||
row_count_step = 2000;
|
||||
total_case_count = row_count_max/row_count_step
|
||||
total_case_count *= total_case_count
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for left_row_count in xrange(1000, row_count_max + 1, row_count_step):
|
||||
for right_row_count in xrange(1000, row_count_max + 1, row_count_step):
|
||||
case_count+=1
|
||||
hash_op.conf.left_row_count_c = left_row_count
|
||||
hash_op.conf.right_row_count_c = right_row_count
|
||||
hash_op.conf.left_max_c = max(left_row_count, right_row_count) * 3
|
||||
hash_op.conf.right_max_c = hash_op.conf.left_max_c
|
||||
sp.check_call("echo -n '%s,%s,' >> %s" % (left_row_count, right_row_count, result_file_name), shell=True)
|
||||
print "Running case %s / %s ... : %s " % (case_count, total_case_count, hash_op.get_bench_cmd())
|
||||
print "%s >> %s" % (hash_op.get_bench_cmd(), result_file_name)
|
||||
sp.check_call("%s >> %s" % (hash_op.get_bench_cmd(), result_file_name), shell=True)
|
||||
|
||||
# step 3 process data
|
||||
final_file_name = "hash_join_result_final"
|
||||
if os.path.exists(final_file_name):
|
||||
os.remove(final_file_name)
|
||||
|
||||
data_cmd = hash_op.get_data_preprocess_cmd()
|
||||
sp.check_call(data_cmd, shell=True)
|
||||
|
||||
# step 4 fit and output
|
||||
|
||||
out_model_file_name = "hash_model"
|
||||
if os.path.exists(out_model_file_name):
|
||||
os.remove(out_model_file_name)
|
||||
|
||||
|
||||
def hash_model_form(args,
|
||||
Tstart_up,
|
||||
Tbuild_htable,
|
||||
Tright_row_once,
|
||||
Tconvert_tuple,
|
||||
#Tequal_cond,
|
||||
#Tfilter_cond,
|
||||
Tjoin_row
|
||||
):
|
||||
(
|
||||
Nres_row,
|
||||
Nleft_row,
|
||||
Nright_row,
|
||||
Nequal_cond,
|
||||
) = args
|
||||
total_cost = Tstart_up # Tstartup
|
||||
total_cost += Nleft_row * Tbuild_htable
|
||||
total_cost += Nright_row * Tright_row_once
|
||||
total_cost += Nequal_cond * Tconvert_tuple
|
||||
total_cost += Nres_row * Tjoin_row
|
||||
return total_cost
|
||||
|
||||
|
||||
def hash_model_arr(arg_sets,
|
||||
Tstart_up,
|
||||
Tbuild_htable,
|
||||
Tright_row_once,
|
||||
Tconvert_tuple,
|
||||
#Tequal_cond,
|
||||
#Tfilter_cond,
|
||||
Tjoin_row):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(hash_model_form(single_arg_set,
|
||||
Tstart_up,
|
||||
Tbuild_htable,
|
||||
Tright_row_once,
|
||||
Tconvert_tuple,
|
||||
#Tequal_cond,
|
||||
#Tfilter_cond,
|
||||
Tjoin_row))
|
||||
return np.array(res)
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
hash_model = Model(hash_model_arr)
|
||||
hash_model.set_param_hint("Tstart_up", min=0.0)
|
||||
hash_model.set_param_hint("Tbuild_htable", min=0.0)
|
||||
hash_model.set_param_hint("Tright_row_once", min=0.0)
|
||||
hash_model.set_param_hint("Tconvert_tuple", min=0.0)
|
||||
hash_model.set_param_hint("Tjoin_row", min=0.0)
|
||||
file = open(final_file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[2], case_param[0], case_param[1], case_param[3]))
|
||||
times.append(case_param[4])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
|
||||
result = hash_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tstartup=0.0,
|
||||
Tbuild_htable=0.0,
|
||||
Tright_row_once=0.0,
|
||||
Tconvert_tuple=0.0,
|
||||
#Tequal_cond=0.0,
|
||||
#Tfilter_cond=0.0,
|
||||
Tjoin_row=0.0)
|
||||
res_line = str(result.best_values["Tstart_up"]) + ","
|
||||
res_line += str(result.best_values["Tbuild_htable"]) + ","
|
||||
res_line += str(result.best_values["Tright_row_once"]) + ","
|
||||
res_line += str(result.best_values["Tconvert_tuple"]) + ","
|
||||
#res_line += str(result.best_values["Tequal_cond"]) + ","
|
||||
#res_line += str(result.best_values["Tfilter_cond"]) + ","
|
||||
res_line += str(result.best_values["Tjoin_row"])
|
||||
print result.fit_report()
|
||||
|
||||
if out_model_file_name:
|
||||
out_file = open(out_model_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
132
unittest/sql/optimizer/cost_model_utils/material.py
Executable file
132
unittest/sql/optimizer/cost_model_utils/material.py
Executable file
@ -0,0 +1,132 @@
|
||||
from mylog.mylog import MyLogger
|
||||
from op_generator import op_generator
|
||||
from cost_test_conf import Config
|
||||
import subprocess as sp
|
||||
import os
|
||||
from lmfit import Model
|
||||
import numpy as np
|
||||
|
||||
# step 1 gen op and conf
|
||||
material_cls = op_generator.gen_operator("material")
|
||||
conf = Config()
|
||||
conf.u_to_test_op_c = 'material'
|
||||
conf.is_not_running_as_unittest_c = True
|
||||
conf.schema_file_c = 'c10k1.schema'
|
||||
conf.row_count_c = 1000
|
||||
conf.input_projector_count_c = 1
|
||||
|
||||
material_op = material_cls(conf)
|
||||
result_file_name = 'material_result'
|
||||
if os.path.exists(result_file_name):
|
||||
os.remove(result_file_name)
|
||||
|
||||
# step 2 do_bench and gen data
|
||||
row_count_max = 1001
|
||||
row_count_step = 100
|
||||
|
||||
column_counts = [3, 5, 8]
|
||||
|
||||
case_run_time = 7
|
||||
|
||||
total_case_count = (row_count_max / row_count_step + 1) * len(column_counts) * case_run_time
|
||||
case_count = 0
|
||||
|
||||
print "Total case count %s ..." % (total_case_count)
|
||||
for row_count in xrange(1, row_count_max + 1, row_count_step):
|
||||
for column_count in column_counts:
|
||||
for time in xrange(case_run_time):
|
||||
case_count += 1
|
||||
material_op.conf.row_count_c = row_count
|
||||
material_op.conf.input_projector_count_c = column_count
|
||||
sp.check_call("echo -n '%s,' >> %s" % (row_count, result_file_name), shell=True)
|
||||
sp.check_call("echo -n '%s,' >> %s" % (column_count, result_file_name), shell=True)
|
||||
print "Running case %s / %s ... : %s " % (case_count, total_case_count, material_op.get_bench_cmd())
|
||||
print "%s >> %s" % (material_op.get_bench_cmd(), result_file_name)
|
||||
sp.check_call("%s >> %s" % (material_op.get_bench_cmd(), result_file_name), shell=True)
|
||||
|
||||
# step 3 preprocess data
|
||||
final_file_name = "material_result_final"
|
||||
if os.path.exists("material_final_result"):
|
||||
os.remove("material_final_result")
|
||||
data_cmd = material_op.get_data_preprocess_cmd()
|
||||
sp.check_call(data_cmd, shell=True)
|
||||
|
||||
# step 4 fit and output
|
||||
# given model form, do fit using previous result data
|
||||
# case param should be considered with cost_model_util.cpp output format
|
||||
# eg: material_test() in cost_model_util.cpp
|
||||
# output row_count, cost_time
|
||||
out_model_file_name = "material_model"
|
||||
if os.path.exists(out_model_file_name):
|
||||
os.remove(out_model_file_name)
|
||||
|
||||
|
||||
def material_model_form(args,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col):
|
||||
(
|
||||
Nrow,
|
||||
Ncol,
|
||||
) = args
|
||||
|
||||
total_cost = 0 # Tstartup
|
||||
total_cost += Nrow * (Trow_once + Ncol * Trow_col)
|
||||
return total_cost
|
||||
|
||||
|
||||
def material_model_arr(arg_sets,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(material_model_form(single_arg_set,
|
||||
# Tstartup,
|
||||
Trow_once,
|
||||
Trow_col))
|
||||
return np.array(res)
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
material_model = Model(material_model_arr)
|
||||
material_model.set_param_hint("Trow_once", min=0.0)
|
||||
material_model.set_param_hint("Trow_col", min=0.0)
|
||||
file = open(final_file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[0], case_param[1]))
|
||||
times.append(case_param[3])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
# result is the fitting result model
|
||||
result = material_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
# Tstartup=10.0,
|
||||
Trow_once=10.0,
|
||||
Trow_col=1.0
|
||||
)
|
||||
|
||||
# res_line = str(result.best_values["Tstartup"]) + ","
|
||||
res_line = str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Trow_col"])
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if out_model_file_name:
|
||||
out_file = open(out_model_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
43
unittest/sql/optimizer/cost_model_utils/mylog/mylog.py
Normal file
43
unittest/sql/optimizer/cost_model_utils/mylog/mylog.py
Normal file
@ -0,0 +1,43 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
class Singleton(object):
|
||||
def __new__(cls, *args, **kw):
|
||||
if not hasattr(cls, '_instance'):
|
||||
orig = super(Singleton, cls)
|
||||
cls._instance = orig.__new__(cls, *args, **kw)
|
||||
return cls._instance
|
||||
|
||||
|
||||
class MyLogger(Singleton):
|
||||
log = logging.getLogger(__name__)
|
||||
##set to stdout
|
||||
fmt = '%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(name)s - %(message)s'
|
||||
formatter = logging.Formatter(fmt)
|
||||
out_hdlr = logging.StreamHandler(sys.stdout)
|
||||
#handler = logging.handlers.RotatingFileHandler(LOG_FILE, maxBytes=1024 * 1024, backupCount=5)
|
||||
out_hdlr.setFormatter(formatter)
|
||||
out_hdlr.setLevel(logging.INFO)
|
||||
log.addHandler(out_hdlr)
|
||||
log.setLevel(logging.INFO)
|
||||
|
||||
@staticmethod
|
||||
def get_logger():
|
||||
return MyLogger.log
|
||||
|
||||
@staticmethod
|
||||
def info(str, *args, **kargs):
|
||||
MyLogger.log.info(str, *args, **kargs)
|
||||
|
||||
@staticmethod
|
||||
def warn(str, *args, **kargs):
|
||||
MyLogger.log.warn(str, *args, **kargs)
|
||||
|
||||
@staticmethod
|
||||
def error(str, *args, **kargs):
|
||||
MyLogger.log.error(str, *args, **kargs)
|
||||
|
||||
if __name__ == '__main__':
|
||||
MyLogger.get_logger().info("test")
|
||||
MyLogger.get_logger().warn("test warn %s", 'test')
|
||||
MyLogger.error("test error")
|
59
unittest/sql/optimizer/cost_model_utils/op_generator.py
Normal file
59
unittest/sql/optimizer/cost_model_utils/op_generator.py
Normal file
@ -0,0 +1,59 @@
|
||||
from cost_test_conf import Config
|
||||
from mylog.mylog import MyLogger
|
||||
import subprocess as sp
|
||||
|
||||
|
||||
def init_func(self, conf):
|
||||
self.conf = conf
|
||||
|
||||
|
||||
def get_bench_cmd(self):
|
||||
cmd = './cost_model_util ' + self.conf.gen_params()
|
||||
return cmd
|
||||
|
||||
def get_data_preprocess_cmd(self):
|
||||
cmd = 'python preprocess.py -i {0} -o {1} -d'.format(
|
||||
self.__class__.__name__ + '_result',
|
||||
self.__class__.__name__ + '_result_final'
|
||||
)
|
||||
return cmd
|
||||
|
||||
def do_bench(self):
|
||||
MyLogger.info(self.conf)
|
||||
cmd = self.get_bench_cmd()
|
||||
MyLogger.info(cmd)
|
||||
sp.check_call(cmd, shell=True)
|
||||
data_cmd = self.get_data_preprocess_cmd()
|
||||
sp.check_call(data_cmd, shell=True)
|
||||
|
||||
|
||||
class op_generator(object):
|
||||
op_dict = {}
|
||||
'''
|
||||
name if type is not None name = operatorname + test_type_name
|
||||
'''
|
||||
|
||||
@staticmethod
|
||||
def gen_operator(name):
|
||||
if op_generator.op_dict.has_key(name):
|
||||
return op_generator.op_dict[name]
|
||||
else:
|
||||
cls = type(name, (object,), {'__init__': init_func, 'do_bench': do_bench,
|
||||
'get_bench_cmd': get_bench_cmd,
|
||||
'get_data_preprocess_cmd': get_data_preprocess_cmd})
|
||||
op_generator.op_dict[name] = cls
|
||||
return cls
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
##mat related conf
|
||||
material_cls = op_generator.gen_operator('material')
|
||||
conf = Config()
|
||||
conf.u_to_test_op_c = 'material'
|
||||
conf.is_not_running_as_unittest_c = True
|
||||
conf.schema_file_c = 'c10k1.schema'
|
||||
conf.row_count_c = 1000
|
||||
conf.input_projector_count_c = 1
|
||||
|
||||
material_op = material_cls(conf)
|
||||
material_op.do_bench()
|
81
unittest/sql/optimizer/cost_model_utils/plot_demension.py
Executable file
81
unittest/sql/optimizer/cost_model_utils/plot_demension.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import matplotlib as mpl
|
||||
from matplotlib import cm
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
import math
|
||||
import getopt
|
||||
|
||||
def extract_int_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(int(float(item)))
|
||||
return line_info
|
||||
|
||||
def case_cmp(a,b,c):
|
||||
if a[c] < b[c] :
|
||||
return -1
|
||||
elif a[c] > b[c] :
|
||||
return 1
|
||||
else :
|
||||
return 0
|
||||
|
||||
cmp_n = [lambda x, y, z = count: case_cmp(x, y, z) for count in range(10)]
|
||||
#cmp_n = [lambda x, y: cmp(x[count], y[count]) for count in range(10)]
|
||||
|
||||
colors = ["red", "green", "blue", "yellow", "purple", "black", "pink" , "brown", "cyan" ,"orange"]
|
||||
|
||||
def do_plot(file_cases):
|
||||
fig = plt.figure()
|
||||
fig.set_size_inches((20,10))
|
||||
ax1 = fig.add_subplot(111)
|
||||
for i in xrange(len(file_cases)):
|
||||
ax1.plot(file_cases[i][0], file_cases[i][1], color=colors[i])
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
file_names = []
|
||||
horizen = 0
|
||||
demension = 0
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"f:h:d:")
|
||||
|
||||
for op, value in opts:
|
||||
if "-f" == op:
|
||||
file_names.append(value)
|
||||
elif "-h" == op:
|
||||
horizen = int(value)
|
||||
elif "-d" == op:
|
||||
demension = int(value)
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if horizen == demension or len(file_names) == 0 or wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit()
|
||||
|
||||
file_cases = []
|
||||
for name in file_names:
|
||||
file = open(name)
|
||||
horizens = []
|
||||
demensions = []
|
||||
cases = []
|
||||
for line in file:
|
||||
if line[0] == '[' or line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_int_info_from_line(line)
|
||||
cases.append(case_param)
|
||||
cases.sort(cmp_n[horizen])
|
||||
for case in cases:
|
||||
horizens.append(case[horizen])
|
||||
demensions.append(case[demension])
|
||||
file_cases.append([np.array(horizens), np.array(demensions)])
|
||||
|
||||
do_plot(file_cases)
|
81
unittest/sql/optimizer/cost_model_utils/plot_multi.py
Executable file
81
unittest/sql/optimizer/cost_model_utils/plot_multi.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import matplotlib as mpl
|
||||
from matplotlib import cm
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
import math
|
||||
|
||||
def extract_int_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(int(float(item)))
|
||||
return line_info
|
||||
|
||||
def case_cmp(a,b,c):
|
||||
if c > 1251:
|
||||
print c
|
||||
if a[c] < b[c] :
|
||||
return -1
|
||||
elif a[c] > b[c] :
|
||||
return 1
|
||||
else :
|
||||
return 0
|
||||
|
||||
cmp_n = [lambda x, y, z = count: case_cmp(x, y, z) for count in range(10)]
|
||||
#cmp_n = [lambda x, y: cmp(x[count], y[count]) for count in range(10)]
|
||||
|
||||
colors = ["red", "green", "blue", "yellow", "purple", "black", "pink", "cyan", "brown", "gray"]
|
||||
|
||||
def do_plot(arg, horizen, need_columns_id,label):
|
||||
arrs = []
|
||||
for i in arg[0]:
|
||||
arrs.append([])
|
||||
for case in arg:
|
||||
for i in xrange(len(case)):
|
||||
arrs[i].append(case[i])
|
||||
|
||||
np_arrs = [np.array(a) for a in arrs]
|
||||
fig = plt.figure()
|
||||
|
||||
fig.set_size_inches((20,10))
|
||||
ax1 = fig.add_subplot(111)
|
||||
ax1.set_label(label)
|
||||
color_id = 0
|
||||
|
||||
for i in xrange(len(np_arrs)):
|
||||
if i == horizen:
|
||||
continue
|
||||
elif i in need_columns_id:
|
||||
ax1.plot(np_arrs[horizen], np_arrs[i], color=colors[color_id])
|
||||
color_id = color_id + 1
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
#filename column_count horizen
|
||||
if len(sys.argv) < 4:
|
||||
print "wrong arg"
|
||||
pass
|
||||
else:
|
||||
file_name = sys.argv[1]
|
||||
horizen = int(sys.argv[2])
|
||||
file = open(file_name, "r")
|
||||
need_columns = sys.argv[3]
|
||||
if need_columns == "all":
|
||||
need_columns_id = [i for i in xrange(100)]
|
||||
else:
|
||||
need_columns_id = [int(i) for i in need_columns.split(",")]
|
||||
|
||||
cases = []
|
||||
for line in file:
|
||||
if line[0] == '[' or line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_int_info_from_line(line)
|
||||
cases.append(case_param)
|
||||
cases.sort(cmp_n[horizen])
|
||||
do_plot(cases, horizen, need_columns_id, file)
|
175
unittest/sql/optimizer/cost_model_utils/preprocess.py
Executable file
175
unittest/sql/optimizer/cost_model_utils/preprocess.py
Executable file
@ -0,0 +1,175 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import getopt
|
||||
|
||||
|
||||
file_name = "scan_model.res.formal"
|
||||
if len(sys.argv) >= 2:
|
||||
file_name = sys.argv[1]
|
||||
out_file_name = file_name + ".prep"
|
||||
time_per_case = 2
|
||||
use_delete_min_max = False
|
||||
filters = []
|
||||
out_columns = [c for c in xrange(100)]
|
||||
cols_supplied = False
|
||||
wrong_arg = False
|
||||
target_column_id = 0
|
||||
|
||||
#sys.argv.extend("-i sort_result -o sort.prep -t 5 -C 4 -f 0,g,1 -f 0,le,100000".split(" "))
|
||||
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:t:f:a:dc:C:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
out_file_name = value
|
||||
elif "-t" == op:
|
||||
time_per_case = int(value)
|
||||
elif "-f" == op:
|
||||
filter_str = value
|
||||
filter_elements = filter_str.split(",")
|
||||
if not filter_elements[1] in ["g","l","ge","le","e","ne"]:
|
||||
print "invalid filter type"
|
||||
sys.exit(1)
|
||||
filters.append(filter_str.split(","))
|
||||
elif "-a" == op:
|
||||
time_per_case = int(value)
|
||||
elif "-d" == op:
|
||||
use_delete_min_max = True
|
||||
elif "-C" == op:
|
||||
target_column_id = int(value)
|
||||
elif "-c" == op:
|
||||
if not cols_supplied:
|
||||
cols_supplied = True
|
||||
out_columns = []
|
||||
out_columns.extend([int(c) for c in value.split(",")])
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
if time_per_case < 5:
|
||||
use_delete_min_max = False
|
||||
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
origin_file = open(file_name, "r")
|
||||
out_file = open(out_file_name,"w")
|
||||
|
||||
i = 0
|
||||
column_nums = []
|
||||
avgs = []
|
||||
avg_strs = []
|
||||
|
||||
def delete(li, index):
|
||||
li = li[:index] + li[index+1:]
|
||||
return li
|
||||
|
||||
def find_max_index(l):
|
||||
max = -9999999999999999999999
|
||||
max_i = -1
|
||||
for i in xrange(len(l)):
|
||||
if l[i] > max:
|
||||
max = l[i]
|
||||
max_i = i
|
||||
return max_i
|
||||
|
||||
def find_min_index(l):
|
||||
min = 999999999999999999999999
|
||||
min_i = -1
|
||||
for i in xrange(len(l)):
|
||||
if l[i] < min:
|
||||
min = l[i]
|
||||
min_i = i
|
||||
return min_i
|
||||
|
||||
def delete_max_min_case(column_nums, column_id):
|
||||
# min_i = find_min_index(column_nums[len(column_nums) - 1])
|
||||
# for j in xrange(len(column_nums)):
|
||||
# column_nums[j] = delete(column_nums[j], min_i)
|
||||
max_i = find_max_index(column_nums[column_id])
|
||||
for j in xrange(len(column_nums)):
|
||||
column_nums[j] = delete(column_nums[j], max_i)
|
||||
max_i = find_max_index(column_nums[column_id])
|
||||
for j in xrange(len(column_nums)):
|
||||
column_nums[j] = delete(column_nums[j], max_i)
|
||||
# max_i = find_max_index(column_nums[column_id])
|
||||
# for j in xrange(len(column_nums)):
|
||||
# column_nums[j] = delete(column_nums[j], max_i)
|
||||
# max_i = find_max_index(column_nums[column_id])
|
||||
# for j in xrange(len(column_nums)):
|
||||
# column_nums[j] = delete(column_nums[j], max_i)
|
||||
|
||||
|
||||
def do_filter(column_strs):
|
||||
filtered = False
|
||||
for f in filters:
|
||||
if f[1] == "g" and float(column_strs[int(f[0])]) <= int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "l" and float(column_strs[int(f[0])]) >= int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "ge" and float(column_strs[int(f[0])]) < int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "le" and float(column_strs[int(f[0])]) > int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "e" and float(column_strs[int(f[0])]) != int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "ne" and float(column_strs[int(f[0])]) == int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
return filtered
|
||||
|
||||
|
||||
for line in origin_file:
|
||||
if line.startswith("#"):
|
||||
out_file.write(line)
|
||||
continue #skip comment
|
||||
column_strs_raw = line.split(",")
|
||||
if do_filter(column_strs_raw):
|
||||
continue
|
||||
column_count = len(column_strs_raw)
|
||||
if i == 0:
|
||||
avg_strs = []
|
||||
avgs = []
|
||||
column_nums = []
|
||||
for n in xrange(column_count):
|
||||
column_nums.append([])
|
||||
#split line and cast to float
|
||||
for n in xrange(column_count):
|
||||
column_nums[n].append(float(column_strs_raw[n]))
|
||||
if i == time_per_case - 1:
|
||||
if use_delete_min_max:
|
||||
delete_max_min_case(column_nums, target_column_id)
|
||||
#calc avg per column
|
||||
for n in xrange(column_count):
|
||||
avgs.append(np.mean(column_nums[n]))
|
||||
#cast to str
|
||||
avg_strs = [str(a) for a in avgs]
|
||||
real_avg_strs = []
|
||||
#out_columns filter
|
||||
for cid in xrange(len(avg_strs)):
|
||||
if cid in out_columns:
|
||||
real_avg_strs.append(avg_strs[cid])
|
||||
|
||||
out_file.write(",".join(real_avg_strs) + "\n")
|
||||
i = (i + 1) % time_per_case
|
||||
|
||||
origin_file.close()
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
114
unittest/sql/optimizer/cost_model_utils/pro_hash.py
Normal file
114
unittest/sql/optimizer/cost_model_utils/pro_hash.py
Normal file
@ -0,0 +1,114 @@
|
||||
__author__ = 'canfang.scf'
|
||||
from op_generator import op_generator
|
||||
from cost_test_conf import Config
|
||||
import subprocess as sp
|
||||
import os
|
||||
from lmfit import Model
|
||||
import numpy as np
|
||||
|
||||
hash_cls = op_generator.gen_operator("hash_join")
|
||||
conf = Config()
|
||||
conf.u_to_test_op_c = 'hash'
|
||||
conf.is_not_running_as_unittest_c = True
|
||||
conf.schema_file_c = 'c10k1x2.schema'
|
||||
conf.left_row_count_c = 1000
|
||||
conf.right_row_count_c = 1000
|
||||
conf.left_min_c = 1
|
||||
conf.right_min_c = 1
|
||||
conf.is_random_c = True
|
||||
hash_op = hash_cls(conf)
|
||||
# step 3 process data
|
||||
final_file_name = "hash_join_result_final"
|
||||
if os.path.exists(final_file_name):
|
||||
os.remove(final_file_name)
|
||||
|
||||
data_cmd = hash_op.get_data_preprocess_cmd()
|
||||
sp.check_call(data_cmd, shell=True)
|
||||
|
||||
# step 4 fit and output
|
||||
|
||||
out_model_file_name = "hash_model"
|
||||
if os.path.exists(out_model_file_name):
|
||||
os.remove(out_model_file_name)
|
||||
|
||||
|
||||
def hash_model_form(args,
|
||||
Tstart_up,
|
||||
Tright_outer_once,
|
||||
Tleft_outer_once,
|
||||
#Tjoin_row
|
||||
):
|
||||
(
|
||||
Nres_row,
|
||||
Nleft_row,
|
||||
Nright_row,
|
||||
Nequal_cond,
|
||||
Nno_matched_right,
|
||||
Nno_matched_left
|
||||
) = args
|
||||
total_cost = Tstart_up # Tstartup
|
||||
total_cost += Nleft_row * 0.74497774
|
||||
total_cost += Nright_row * 0.26678144
|
||||
total_cost += Nequal_cond * 0.86340381
|
||||
total_cost += Nres_row * 0.28939532
|
||||
total_cost += Nno_matched_left * Tright_outer_once
|
||||
total_cost += Nno_matched_right * Tleft_outer_once
|
||||
return total_cost
|
||||
|
||||
|
||||
def hash_model_arr(arg_sets,
|
||||
Tstart_up,
|
||||
Tright_outer_once,
|
||||
Tleft_outer_once):
|
||||
res = []
|
||||
for single_arg_set in arg_sets:
|
||||
res.append(hash_model_form(single_arg_set,
|
||||
Tstart_up,
|
||||
Tright_outer_once,
|
||||
Tleft_outer_once))
|
||||
return np.array(res)
|
||||
|
||||
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
hash_model = Model(hash_model_arr)
|
||||
hash_model.set_param_hint("Tstart_up", min=0.0)
|
||||
# hash_model.set_param_hint("Tbuild_htable", min=0.0)
|
||||
# hash_model.set_param_hint("Tright_row_once", min=0.0)
|
||||
# hash_model.set_param_hint("Tconvert_tuple", min=0.0)
|
||||
hash_model.set_param_hint("Tright_outer_once", min=0.0)
|
||||
hash_model.set_param_hint("Tleft_outer_once", min=0.0)
|
||||
#hash_model.set_param_hint("Tjoin_row", min=0.0)
|
||||
file = open(final_file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
arg_sets.append((case_param[2], case_param[0], case_param[1], case_param[3], case_param[4], case_param[5]))
|
||||
times.append(case_param[6])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
|
||||
result = hash_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
Tstartup=0.0,
|
||||
Tright_outer_once=0.0,
|
||||
Tleft_outer_once=0.0)
|
||||
res_line = str(result.best_values["Tstart_up"]) + ","
|
||||
res_line += str(result.best_values["Tright_outer_once"]) + ","
|
||||
res_line += str(result.best_values["Tleft_outer_once"])
|
||||
print result.fit_report()
|
||||
|
||||
if out_model_file_name:
|
||||
out_file = open(out_model_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
172
unittest/sql/optimizer/cost_model_utils/process_nestloop.py
Normal file
172
unittest/sql/optimizer/cost_model_utils/process_nestloop.py
Normal file
@ -0,0 +1,172 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import getopt
|
||||
|
||||
|
||||
file_name = "scan_model.res.formal"
|
||||
if len(sys.argv) >= 2:
|
||||
file_name = sys.argv[1]
|
||||
out_file_name = file_name + ".prep"
|
||||
time_per_case = 5
|
||||
use_delete_min_max = False
|
||||
filters = []
|
||||
out_columns = [c for c in xrange(100)]
|
||||
cols_supplied = False
|
||||
wrong_arg = False
|
||||
target_column_id = 0
|
||||
|
||||
#sys.argv.extend("-i sort_result -o sort.8.test -t 7 -C 2 -f 1,e,8".split(" "))
|
||||
sys.argv.extend("-i nestloop_result -o nl_result".split(" "))
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:t:f:a:dc:C:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
out_file_name = value
|
||||
elif "-t" == op:
|
||||
time_per_case = int(value)
|
||||
elif "-f" == op:
|
||||
filter_str = value
|
||||
filter_elements = filter_str.split(",")
|
||||
if not filter_elements[1] in ["g","l","ge","le","e","ne"]:
|
||||
print "invalid filter type"
|
||||
sys.exit(1)
|
||||
filters.append(filter_str.split(","))
|
||||
elif "-a" == op:
|
||||
time_per_case = int(value)
|
||||
elif "-d" == op:
|
||||
use_delete_min_max = True
|
||||
elif "-C" == op:
|
||||
target_column_id = int(value)
|
||||
elif "-c" == op:
|
||||
if not cols_supplied:
|
||||
cols_supplied = True
|
||||
out_columns = []
|
||||
out_columns.extend([int(c) for c in value.split(",")])
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
if time_per_case < 5:
|
||||
use_delete_min_max = False
|
||||
|
||||
if os.path.exists(out_file_name):
|
||||
os.remove(out_file_name)
|
||||
|
||||
origin_file = open(file_name, "r")
|
||||
out_file = open(out_file_name,"w")
|
||||
|
||||
i = 0
|
||||
column_nums = []
|
||||
avgs = []
|
||||
avg_strs = []
|
||||
|
||||
def delete(li, index):
|
||||
li = li[:index] + li[index+1:]
|
||||
return li
|
||||
|
||||
def find_max_index(l):
|
||||
max = -9999999999999999999999
|
||||
max_i = -1
|
||||
for i in xrange(len(l)):
|
||||
if l[i] > max:
|
||||
max = l[i]
|
||||
max_i = i
|
||||
return max_i
|
||||
|
||||
def find_min_index(l):
|
||||
min = 999999999999999999999999
|
||||
min_i = -1
|
||||
for i in xrange(len(l)):
|
||||
if l[i] < min:
|
||||
min = l[i]
|
||||
min_i = i
|
||||
return min_i
|
||||
|
||||
def delete_max_min_case(column_nums, column_id):
|
||||
# min_i = find_min_index(column_nums[len(column_nums) - 1])
|
||||
# for j in xrange(len(column_nums)):
|
||||
# column_nums[j] = delete(column_nums[j], min_i)
|
||||
max_i = find_max_index(column_nums[column_id])
|
||||
for j in xrange(len(column_nums)):
|
||||
column_nums[j] = delete(column_nums[j], max_i)
|
||||
max_i = find_max_index(column_nums[column_id])
|
||||
for j in xrange(len(column_nums)):
|
||||
column_nums[j] = delete(column_nums[j], max_i)
|
||||
|
||||
|
||||
def do_filter(column_strs):
|
||||
filtered = False
|
||||
for f in filters:
|
||||
if f[1] == "g" and float(column_strs[int(f[0])]) <= int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "l" and float(column_strs[int(f[0])]) >= int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "ge" and float(column_strs[int(f[0])]) < int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "le" and float(column_strs[int(f[0])]) > int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "e" and float(column_strs[int(f[0])]) != int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
elif f[1] == "ne" and float(column_strs[int(f[0])]) == int(f[2]) :
|
||||
filtered = True
|
||||
break
|
||||
return filtered
|
||||
|
||||
|
||||
|
||||
state = 0 #comment line
|
||||
|
||||
elements = []
|
||||
|
||||
|
||||
for line in origin_file:
|
||||
line = line.strip()
|
||||
if state == 0:
|
||||
out_file.write(line + "\n")
|
||||
elif state == 1:
|
||||
elements = line.split(",row_count : ")
|
||||
elif state == 2:
|
||||
pass
|
||||
elif state == 3:
|
||||
pass
|
||||
elif state == 4:
|
||||
elements.append(line.split("join_time except conds : ")[1])
|
||||
elif state == 5:
|
||||
elements.append(line.split("equal_eval : ")[1])
|
||||
elif state == 6:
|
||||
pass
|
||||
elif state == 7:
|
||||
elements.append(line.split("other_eval : ")[1])
|
||||
elif state == 8:
|
||||
pass
|
||||
elif state == 9:
|
||||
elements.append(line.split("right_cache_put : ")[1])
|
||||
elif state == 10:
|
||||
elements.append(line.split("right_cache_acc : ")[1])
|
||||
elif state == 11:
|
||||
elements.append(line.split("match_group_count : ")[1])
|
||||
out_file.write(",".join(elements) + "\n")
|
||||
else:
|
||||
print "wrong state"
|
||||
state = (state + 1) % 12
|
||||
|
||||
origin_file.close()
|
||||
out_file.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1 @@
|
||||
create table t1 (a varchar(100) primary key);
|
@ -0,0 +1 @@
|
||||
create table t1 (a varchar(200) primary key);
|
Reference in New Issue
Block a user