patch 4.0
This commit is contained in:
166
unittest/sql/optimizer/cost_model_utils/fit_mergegroupby.py
Executable file
166
unittest/sql/optimizer/cost_model_utils/fit_mergegroupby.py
Executable file
@ -0,0 +1,166 @@
|
||||
#!/bin/env python
|
||||
__author__ = 'dongyun.zdy'
|
||||
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import leastsq
|
||||
from scipy.optimize import curve_fit
|
||||
import sys
|
||||
from lmfit import Model
|
||||
import getopt
|
||||
|
||||
|
||||
|
||||
def mg_model_form(args,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
):
|
||||
(
|
||||
Nrow_input,
|
||||
Nrow_res,
|
||||
Ncol_input,
|
||||
Ncol_aggr,
|
||||
Ncol_group
|
||||
) = args
|
||||
|
||||
total_cost = Nrow_res * Tres_once + Nrow_input * Trow_once
|
||||
#cost for judge group
|
||||
total_cost += Nrow_res * Tgroup_cmp_col
|
||||
total_cost += (Nrow_input - Nrow_res) * Ncol_group * Tgroup_cmp_col
|
||||
|
||||
#cost for group related operation
|
||||
total_cost += Nrow_res * (Ncol_input * Tcopy_col)
|
||||
total_cost += Nrow_res * (Ncol_aggr * Taggr_prepare_result)
|
||||
|
||||
#cost for input row process
|
||||
total_cost += Nrow_input * (Ncol_aggr * Taggr_process)
|
||||
|
||||
return total_cost
|
||||
|
||||
|
||||
|
||||
|
||||
eval_count = 0
|
||||
|
||||
|
||||
def mg_model_arr(arg_sets,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) :
|
||||
|
||||
res = [mg_model_form(single_arg_set,
|
||||
#Tstartup,
|
||||
Trow_once,
|
||||
Tres_once,
|
||||
Taggr_prepare_result,
|
||||
Taggr_process,
|
||||
Tgroup_cmp_col,
|
||||
Tcopy_col
|
||||
) for single_arg_set in arg_sets]
|
||||
global eval_count
|
||||
eval_count += 1
|
||||
print "eval "+ str(eval_count)
|
||||
return np.array(res)
|
||||
|
||||
mg_model = Model(mg_model_arr)
|
||||
#mg_model.set_param_hint("Tstartup", min=0.0)
|
||||
mg_model.set_param_hint("Trow_once", min=0.0)
|
||||
mg_model.set_param_hint("Tres_once", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_prepare_result", min=0.0)
|
||||
mg_model.set_param_hint("Taggr_process", min=0.0)
|
||||
mg_model.set_param_hint("Tgroup_cmp_col", min=0.0)
|
||||
mg_model.set_param_hint("Tcopy_col", min=0.0)
|
||||
def extract_info_from_line(line):
|
||||
splited = line.split(",")
|
||||
line_info = []
|
||||
for item in splited:
|
||||
line_info.append(float(item))
|
||||
return line_info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#file_name = "scan_model.res.formal.prep"
|
||||
#out_file_name = "scan_model.fit"
|
||||
|
||||
file_name = "mergegroupby_result_final"
|
||||
out_file_name = "mergegroupby_model"
|
||||
|
||||
output_fit_res = True
|
||||
wrong_arg = False
|
||||
opts,args = getopt.getopt(sys.argv[1:],"i:o:")
|
||||
for op, value in opts:
|
||||
if "-i" == op:
|
||||
file_name = value
|
||||
elif "-o" == op:
|
||||
output_fit_res = True
|
||||
out_file_name = value
|
||||
else:
|
||||
wrong_arg = True
|
||||
|
||||
if wrong_arg:
|
||||
print "wrong arg"
|
||||
sys.exit(1)
|
||||
|
||||
file = open(file_name, "r")
|
||||
arg_sets = []
|
||||
times = []
|
||||
case_params = []
|
||||
for line in file:
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
case_param = extract_info_from_line(line)
|
||||
case_params.append(case_param)
|
||||
|
||||
# Nrow_input,
|
||||
# Nrow_res,
|
||||
# Ncol_input,
|
||||
# Ncol_aggr,
|
||||
# Ncol_group
|
||||
|
||||
|
||||
arg_sets.append((case_param[0],
|
||||
case_param[5],
|
||||
case_param[4],
|
||||
case_param[2],
|
||||
case_param[3]
|
||||
))
|
||||
times.append(case_param[6])
|
||||
file.close()
|
||||
arg_sets_np = np.array(arg_sets)
|
||||
times_np = np.array(times)
|
||||
#10, 0.20406430879623488, 0.016618100054245379, 14.0, 4.5, 37.0, -0.005, 0.5, -7.0
|
||||
result = mg_model.fit(times_np, arg_sets=arg_sets_np,
|
||||
#Tstartup = 0.1,
|
||||
Trow_once = 0.1,
|
||||
Tres_once = 0.1,
|
||||
Taggr_prepare_result = 0.1,
|
||||
Taggr_process = 0.1,
|
||||
Tgroup_cmp_col = 0.1,
|
||||
Tcopy_col = 0.1
|
||||
)
|
||||
|
||||
res_line = str(result.best_values["Trow_once"]) + ","
|
||||
res_line += str(result.best_values["Tres_once"]) + ","
|
||||
res_line += str(result.best_values["Taggr_prepare_result"]) + ","
|
||||
res_line += str(result.best_values["Taggr_process"]) + ","
|
||||
res_line += str(result.best_values["Tgroup_cmp_col"]) + ","
|
||||
res_line += str(result.best_values["Tcopy_col"])
|
||||
|
||||
|
||||
print result.fit_report()
|
||||
|
||||
if output_fit_res:
|
||||
out_file = open(out_file_name, "w")
|
||||
out_file.write(res_line)
|
||||
out_file.close()
|
Reference in New Issue
Block a user