176 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			176 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/env python
 | |
| __author__ = 'dongyun.zdy'
 | |
| 
 | |
| import sys
 | |
| import os
 | |
| import numpy as np
 | |
| import getopt
 | |
| 
 | |
| 
 | |
| file_name = "scan_model.res.formal"
 | |
| if len(sys.argv) >= 2:
 | |
|     file_name = sys.argv[1]
 | |
| out_file_name = file_name + ".prep"
 | |
| time_per_case = 2
 | |
| use_delete_min_max = False
 | |
| filters = []
 | |
| out_columns = [c for c in xrange(100)]
 | |
| cols_supplied = False
 | |
| wrong_arg = False
 | |
| target_column_id = 0
 | |
| 
 | |
| #sys.argv.extend("-i sort_result -o sort.prep -t 5 -C 4 -f 0,g,1 -f 0,le,100000".split(" "))
 | |
| 
 | |
| opts,args = getopt.getopt(sys.argv[1:],"i:o:t:f:a:dc:C:")
 | |
| for op, value in opts:
 | |
|     if "-i" == op:
 | |
|         file_name = value
 | |
|     elif "-o" == op:
 | |
|         out_file_name = value
 | |
|     elif "-t" == op:
 | |
|         time_per_case = int(value)
 | |
|     elif "-f" == op:
 | |
|         filter_str = value
 | |
|         filter_elements = filter_str.split(",")
 | |
|         if not filter_elements[1] in ["g","l","ge","le","e","ne"]:
 | |
|             print "invalid filter type"
 | |
|             sys.exit(1)
 | |
|         filters.append(filter_str.split(","))
 | |
|     elif "-a" == op:
 | |
|         time_per_case = int(value)
 | |
|     elif "-d" == op:
 | |
|         use_delete_min_max = True
 | |
|     elif "-C" == op:
 | |
|         target_column_id = int(value)
 | |
|     elif "-c" == op:
 | |
|         if not cols_supplied:
 | |
|             cols_supplied = True
 | |
|             out_columns = []
 | |
|         out_columns.extend([int(c) for c in value.split(",")])
 | |
|     else:
 | |
|         wrong_arg = True
 | |
| 
 | |
| if wrong_arg:
 | |
|     print "wrong arg"
 | |
|     sys.exit(1)
 | |
| 
 | |
| if time_per_case < 5:
 | |
|     use_delete_min_max = False
 | |
| 
 | |
| if os.path.exists(out_file_name):
 | |
|     os.remove(out_file_name)
 | |
| 
 | |
| origin_file = open(file_name, "r")
 | |
| out_file = open(out_file_name,"w")
 | |
| 
 | |
| i = 0
 | |
| column_nums = []
 | |
| avgs = []
 | |
| avg_strs = []
 | |
| 
 | |
| def delete(li, index):
 | |
|     li = li[:index] + li[index+1:]
 | |
|     return li
 | |
| 
 | |
| def find_max_index(l):
 | |
|     max = -9999999999999999999999
 | |
|     max_i = -1
 | |
|     for i in xrange(len(l)):
 | |
|         if l[i] > max:
 | |
|             max = l[i]
 | |
|             max_i = i
 | |
|     return max_i
 | |
| 
 | |
| def find_min_index(l):
 | |
|     min = 999999999999999999999999
 | |
|     min_i = -1
 | |
|     for i in xrange(len(l)):
 | |
|         if l[i] < min:
 | |
|             min = l[i]
 | |
|             min_i = i
 | |
|     return min_i
 | |
| 
 | |
| def delete_max_min_case(column_nums, column_id):
 | |
|     # min_i = find_min_index(column_nums[len(column_nums) - 1])
 | |
|     # for j in xrange(len(column_nums)):
 | |
|     #     column_nums[j] = delete(column_nums[j], min_i)
 | |
|     max_i = find_max_index(column_nums[column_id])
 | |
|     for j in xrange(len(column_nums)):
 | |
|        column_nums[j] = delete(column_nums[j], max_i)
 | |
|     max_i = find_max_index(column_nums[column_id])
 | |
|     for j in xrange(len(column_nums)):
 | |
|        column_nums[j] = delete(column_nums[j], max_i)
 | |
|     # max_i = find_max_index(column_nums[column_id])
 | |
|     # for j in xrange(len(column_nums)):
 | |
|     #    column_nums[j] = delete(column_nums[j], max_i)
 | |
|     # max_i = find_max_index(column_nums[column_id])
 | |
|     # for j in xrange(len(column_nums)):
 | |
|     #    column_nums[j] = delete(column_nums[j], max_i)
 | |
| 
 | |
| 
 | |
| def do_filter(column_strs):
 | |
|     filtered = False
 | |
|     for f in filters:
 | |
|         if f[1] == "g" and float(column_strs[int(f[0])]) <= int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|         elif f[1] == "l" and float(column_strs[int(f[0])]) >= int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|         elif f[1] == "ge" and float(column_strs[int(f[0])]) < int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|         elif f[1] == "le" and float(column_strs[int(f[0])]) > int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|         elif f[1] == "e" and float(column_strs[int(f[0])]) != int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|         elif f[1] == "ne" and float(column_strs[int(f[0])]) == int(f[2]) :
 | |
|             filtered = True
 | |
|             break
 | |
|     return filtered
 | |
| 
 | |
| 
 | |
| for line in origin_file:
 | |
|     if line.startswith("#"):
 | |
|         out_file.write(line)
 | |
|         continue #skip comment
 | |
|     column_strs_raw = line.split(",")
 | |
|     if do_filter(column_strs_raw):
 | |
|         continue
 | |
|     column_count = len(column_strs_raw)
 | |
|     if i == 0:
 | |
|         avg_strs = []
 | |
|         avgs = []
 | |
|         column_nums = []
 | |
|         for n in xrange(column_count):
 | |
|             column_nums.append([])
 | |
|     #split line and cast to float
 | |
|     for n in xrange(column_count):
 | |
|         column_nums[n].append(float(column_strs_raw[n]))
 | |
|     if i == time_per_case - 1:
 | |
|         if use_delete_min_max:
 | |
|             delete_max_min_case(column_nums, target_column_id)
 | |
|         #calc avg per column
 | |
|         for n in xrange(column_count):
 | |
|             avgs.append(np.mean(column_nums[n]))
 | |
|         #cast to str
 | |
|         avg_strs = [str(a) for a in avgs]
 | |
|         real_avg_strs = []
 | |
|         #out_columns filter
 | |
|         for cid in xrange(len(avg_strs)):
 | |
|             if cid in out_columns:
 | |
|                 real_avg_strs.append(avg_strs[cid])
 | |
| 
 | |
|         out_file.write(",".join(real_avg_strs) + "\n")
 | |
|     i = (i + 1) % time_per_case
 | |
| 
 | |
| origin_file.close()
 | |
| out_file.close()
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | 
