git-svn-id: http://webrtc.googlecode.com/svn/trunk@8 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
303
tools/refactoring/stringmanipulation.py
Normal file
303
tools/refactoring/stringmanipulation.py
Normal file
@ -0,0 +1,303 @@
|
||||
import string
|
||||
|
||||
# returns tuple, [success,updated_string] where the updated string has
|
||||
# has one less (the first) occurance of match string
|
||||
def removefirstoccurance( remove_string, match_string ):
|
||||
lowercase_string = remove_string.lower()
|
||||
lowercase_match_string = match_string.lower()
|
||||
lowest_index = lowercase_string.find(lowercase_match_string)
|
||||
if(lowest_index == -1):
|
||||
return [False,remove_string]
|
||||
past_match_index = lowest_index + len(lowercase_match_string)
|
||||
highest_index = len(remove_string)
|
||||
remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
|
||||
return [True,remove_string]
|
||||
|
||||
# returns a string with all occurances of match_string removed
|
||||
def removealloccurances( remove_string, match_string ):
|
||||
return_value = [True, remove_string]
|
||||
while(return_value[0]):
|
||||
return_value = removefirstoccurance(return_value[1],match_string)
|
||||
return return_value[1]
|
||||
|
||||
# removes an occurance of match_string only if it's first in the string
|
||||
# returns tuple [succes, new_string]
|
||||
def removeprefix( remove_string, match_string ):
|
||||
lowercase_string = remove_string.lower()
|
||||
lowercase_match_string = match_string.lower()
|
||||
lowest_index = lowercase_string.find(lowercase_match_string)
|
||||
if(lowest_index == -1):
|
||||
return [False,remove_string]
|
||||
if(lowest_index != 0):
|
||||
return [False,remove_string]
|
||||
past_match_index = lowest_index + len(lowercase_match_string)
|
||||
highest_index = len(remove_string)
|
||||
remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
|
||||
# print lowest_index
|
||||
# print past_match_index
|
||||
return [True,remove_string]
|
||||
|
||||
# removes multiple occurances of match string as long as they are first in
|
||||
# the string
|
||||
def removeallprefix( remove_string, match_string ):
|
||||
return_value = [True, remove_string]
|
||||
while(return_value[0]):
|
||||
return_value = removeprefix(return_value[1],match_string)
|
||||
return return_value[1]
|
||||
|
||||
# returns true if extensionstring is a correct extension
|
||||
def isextension( extensionstring ):
|
||||
if(len(extensionstring) < 2):
|
||||
return False
|
||||
if(extensionstring[0] != '.'):
|
||||
return False
|
||||
if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
|
||||
return False
|
||||
return True
|
||||
|
||||
# returns the index of start of the last occurance of match_string
|
||||
def findlastoccurance( original_string, match_string ):
|
||||
search_index = original_string.find(match_string)
|
||||
found_index = search_index
|
||||
last_index = len(original_string) - 1
|
||||
while((search_index != -1) and (search_index < last_index)):
|
||||
search_index = original_string[search_index+1:last_index].find(match_string)
|
||||
if(search_index != -1):
|
||||
found_index = search_index
|
||||
return found_index
|
||||
|
||||
# changes extension from original_extension to new_extension
|
||||
def changeextension( original_string, original_extension, new_extension):
|
||||
if(not isextension(original_extension)):
|
||||
return original_string
|
||||
if(not isextension(new_extension)):
|
||||
return original_string
|
||||
index = findlastoccurance(original_string, original_extension)
|
||||
if(index == -1):
|
||||
return original_string
|
||||
return_value = original_string[0:index] + new_extension
|
||||
return return_value
|
||||
|
||||
# wanted to do this with str.find however didnt seem to work so do it manually
|
||||
# returns the index of the first capital letter
|
||||
def findfirstcapitalletter( original_string ):
|
||||
for index in range(len(original_string)):
|
||||
if(original_string[index].lower() != original_string[index]):
|
||||
return index
|
||||
return -1
|
||||
|
||||
|
||||
# replaces capital letters with underscore and lower case letter (except very
|
||||
# first
|
||||
def lowercasewithunderscore( original_string ):
|
||||
# ignore the first letter since there should be no underscore in front of it
|
||||
if(len(original_string) < 2):
|
||||
return original_string
|
||||
return_value = original_string[1:len(original_string)]
|
||||
index = findfirstcapitalletter(return_value)
|
||||
while(index != -1):
|
||||
return_value = return_value[0:index] + \
|
||||
'_' + \
|
||||
return_value[index].lower() + \
|
||||
return_value[index+1:len(return_value)]
|
||||
index = findfirstcapitalletter(return_value)
|
||||
return_value = original_string[0].lower() + return_value
|
||||
return return_value
|
||||
|
||||
# my table is a duplicate of strings
|
||||
def removeduplicates( my_table ):
|
||||
new_table = []
|
||||
for old_string1, new_string1 in my_table:
|
||||
found = 0
|
||||
for old_string2, new_string2 in new_table:
|
||||
if(old_string1 == old_string2):
|
||||
found += 1
|
||||
if(new_string1 == new_string2):
|
||||
if(new_string1 == ''):
|
||||
found += found
|
||||
else:
|
||||
found += 1
|
||||
if(found == 1):
|
||||
print 'missmatching set, terminating program'
|
||||
print old_string1
|
||||
print new_string1
|
||||
print old_string2
|
||||
print new_string2
|
||||
quit()
|
||||
if(found == 2):
|
||||
break
|
||||
if(found == 0):
|
||||
new_table.append([old_string1,new_string1])
|
||||
return new_table
|
||||
|
||||
def removenochange( my_table ):
|
||||
new_table = []
|
||||
for old_string, new_string in my_table:
|
||||
if(old_string != new_string):
|
||||
new_table.append([old_string,new_string])
|
||||
return new_table
|
||||
|
||||
# order table after size of the string (can be used to replace bigger strings
|
||||
# first which is useful since smaller strings can be inside the bigger string)
|
||||
# E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
|
||||
# be removed. N is small so no need for fancy sort algorithm. Use selection sort
|
||||
def ordertablesizefirst( my_table ):
|
||||
for current_index in range(len(my_table)):
|
||||
biggest_string = 0
|
||||
biggest_string_index = -1
|
||||
for search_index in range(len(my_table)):
|
||||
if(search_index < current_index):
|
||||
continue
|
||||
length_of_string = len(my_table[search_index][0])
|
||||
if(length_of_string > biggest_string):
|
||||
biggest_string = length_of_string
|
||||
biggest_string_index = search_index
|
||||
if(biggest_string_index == -1):
|
||||
print 'sorting algorithm failed, program exit'
|
||||
quit()
|
||||
old_value = my_table[current_index]
|
||||
my_table[current_index] = my_table[biggest_string_index]
|
||||
my_table[biggest_string_index] = old_value
|
||||
return my_table
|
||||
|
||||
# returns true if string 1 or 2 is a substring of the other, assuming neither
|
||||
# has whitespaces
|
||||
def issubstring( string1, string2 ):
|
||||
if(len(string1) == 0):
|
||||
return -1
|
||||
if(len(string2) == 0):
|
||||
return -1
|
||||
large_string = string1
|
||||
small_string = string2
|
||||
if(len(string1) < len(string2)):
|
||||
large_string = string2
|
||||
small_string = string1
|
||||
|
||||
for index in range(len(large_string)):
|
||||
large_sub_string = large_string[index:index+len(small_string)].lower()
|
||||
if(large_sub_string ==\
|
||||
small_string.lower()):
|
||||
return index
|
||||
return -1
|
||||
|
||||
#not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
|
||||
#def ispartofword( char ):
|
||||
# for item in not_part_of_word_table:
|
||||
# if(char == item):
|
||||
# return False
|
||||
# return True
|
||||
|
||||
# must be numerical,_ or charachter
|
||||
def ispartofword( char ):
|
||||
if(char.isalpha()):
|
||||
return True
|
||||
if(char.isalnum()):
|
||||
return True
|
||||
if(char == '_'):
|
||||
return True
|
||||
return False
|
||||
|
||||
# returns the index of the first letter in the word that the current_index
|
||||
# is pointing to and the size of the word
|
||||
def getword( line, current_index):
|
||||
if(current_index < 0):
|
||||
return []
|
||||
line = line.rstrip()
|
||||
if(len(line) <= current_index):
|
||||
return []
|
||||
if(line[current_index] == ' '):
|
||||
return []
|
||||
start_pos = current_index
|
||||
while start_pos >= 0:
|
||||
if(not ispartofword(line[start_pos])):
|
||||
start_pos += 1
|
||||
break
|
||||
start_pos -= 1
|
||||
if(start_pos == -1):
|
||||
start_pos = 0
|
||||
end_pos = current_index
|
||||
while end_pos < len(line):
|
||||
if(not ispartofword(line[end_pos])):
|
||||
break
|
||||
end_pos += 1
|
||||
return [start_pos,end_pos - start_pos]
|
||||
|
||||
# my table is a tuple [string1,string2] complement_to_table is just a list
|
||||
# of strings to compare to string1
|
||||
def complement( my_table, complement_to_table ):
|
||||
new_table = []
|
||||
for index in range(len(my_table)):
|
||||
found = False;
|
||||
for compare_string in complement_to_table:
|
||||
if(my_table[index][0].lower() == compare_string.lower()):
|
||||
found = True
|
||||
if(not found):
|
||||
new_table.append(my_table[index])
|
||||
return new_table
|
||||
|
||||
def removestringfromhead( line, remove_string):
|
||||
for index in range(len(line)):
|
||||
if(line[index:index+len(remove_string)] != remove_string):
|
||||
return line[index:index+len(line)]
|
||||
return ''
|
||||
|
||||
def removeccomment( line ):
|
||||
comment_string = '//'
|
||||
for index in range(len(line)):
|
||||
if(line[index:index+len(comment_string)] == comment_string):
|
||||
return line[0:index]
|
||||
return line
|
||||
|
||||
def whitespacestoonespace( line ):
|
||||
return ' '.join(line.split())
|
||||
|
||||
def fixabbreviations( original_string ):
|
||||
previouswascapital = (original_string[0].upper() == original_string[0])
|
||||
new_string = ''
|
||||
for index in range(len(original_string)):
|
||||
if(index == 0):
|
||||
new_string += original_string[index]
|
||||
continue
|
||||
if(original_string[index] == '_'):
|
||||
new_string += original_string[index]
|
||||
previouswascapital = False
|
||||
continue
|
||||
if(original_string[index].isdigit()):
|
||||
new_string += original_string[index]
|
||||
previouswascapital = False
|
||||
continue
|
||||
currentiscapital = (original_string[index].upper() == original_string[index])
|
||||
letter_to_add = original_string[index]
|
||||
if(previouswascapital and currentiscapital):
|
||||
letter_to_add = letter_to_add.lower()
|
||||
if(previouswascapital and (not currentiscapital)):
|
||||
old_letter = new_string[len(new_string)-1]
|
||||
new_string = new_string[0:len(new_string)-1]
|
||||
new_string += old_letter.upper()
|
||||
previouswascapital = currentiscapital
|
||||
new_string += letter_to_add
|
||||
return new_string
|
||||
|
||||
def replaceoccurances(old_string, replace_string, replace_with_string):
|
||||
if (len(replace_string) == 0):
|
||||
return old_string
|
||||
if (len(old_string) < len(replace_string)):
|
||||
return old_string
|
||||
# Simple implementation, could proably be done smarter
|
||||
new_string = ''
|
||||
for index in range(len(old_string)):
|
||||
#print new_string
|
||||
if(len(replace_string) > (len(old_string) - index)):
|
||||
new_string += old_string[index:index + len(old_string)]
|
||||
break
|
||||
match = (len(replace_string) > 0)
|
||||
for replace_index in range(len(replace_string)):
|
||||
if (replace_string[replace_index] != old_string[index + replace_index]):
|
||||
match = False
|
||||
break
|
||||
if (match):
|
||||
new_string += replace_with_string
|
||||
index =+ len(replace_string)
|
||||
else:
|
||||
new_string += old_string[index]
|
||||
return new_string
|
Reference in New Issue
Block a user