Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/numeric to word #360

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 35 additions & 19 deletions transformations/numeric_to_word/numeric2word.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ def recognized_as_range_not_sticky(word, next_word):
first_part = word[begin_digit_index:end_digit_index]
last_part = word[end_digit_index:]

return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) == 0 and word[0].isnumeric()
return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) == 0 and word[0].isdigit()
else:
return False

def recognized_as_date_word(word, prev_word, next_word):
return (prev_word.lower() in month_words or next_word.lower() in month_words) and word.isnumeric() and int(word) <= 31
return (prev_word.lower() in month_words or next_word.lower() in month_words) and word.isdigit() and int(word) <= 31

def recognized_as_datestring(x):
"""
Expand Down Expand Up @@ -228,7 +228,7 @@ def recognized_as_year(x):

checker = min([character in string.punctuation for character in after_assumed_year]+[True]) and \
min([character in string.punctuation for character in before_assumed_year]+[True]) and \
year in possible_year_list and (len(year) <= 4) and year.isnumeric()
year in possible_year_list and (len(year) <= 4) and year.isdigit()

if checker:
return bool(re.compile(r'.*([1-3][0-9]{3})').match(x)) and len(set(x) - {'0'}) >= 3
Expand Down Expand Up @@ -292,8 +292,10 @@ def recognized_as_currency_symbols(x):

if len(front_checker)>0:
front_checker = front_checker[:-1] if (front_checker[-1] in ['.', ',']) else front_checker
else:
elif len(back_checker)>0:
back_checker = back_checker[:-1] if (back_checker[-1] in ['.', ',']) else back_checker
else:
return x

if front_checker in currency_symbols:
other_end_non_numeric = x[begin_digit_index:][end_digit_index-(len(x[:begin_digit_index])):]
Expand All @@ -311,17 +313,17 @@ def recognized_as_currency_symbols(x):
return False

def recognized_as_cents(x, prev_word, next_word):
return ('¢' in x or x[-1] == 'c') and x[-2].isnumeric() and re.sub('[¢c,.]', "", x).isnumeric() and prev_word != '(' and next_word != ')'
return ('¢' in x or x[-1] == 'c') and x[-2].isdigit() and re.sub('[¢c,.]', "", x).isdigit() and prev_word != '(' and next_word != ')'

def recognized_as_long_number(x):
if x[0] == '+':
x = x[1:]

threshold = 7
return len(x) >= threshold and x.isnumeric()
return len(x) >= threshold and x.isdigit()

def recognized_as_additional_number(x):
return x[0] == '+' and len(x[1:]) <= 3 and x[1:].isnumeric()
return x[0] == '+' and len(x[1:]) <= 3 and x[1:].isdigit()

def recognized_as_long_number_with_stripes(x):
return len(re.sub('[0-9-]','',x)) == 0 and len(x) > 8
Expand All @@ -332,15 +334,15 @@ def recognized_as_sticky_numbers(x):

first_part = x[begin_digit_index:end_digit_index]
last_part = x[end_digit_index:]
return bool(re.search(r'^\d*[.,]?\d*$',first_part)) and (last_part in ['st', 'nd', 'rd','th', '%'] or not re.search(r'\d', last_part)) and x[0].isnumeric()
return bool(re.search(r'^\d*[.,]?\d*$',first_part)) and (last_part in ['st', 'nd', 'rd','th', '%'] or not re.search(r'\d', last_part)) and x[0].isdigit()

def recognized_as_sticky_range(x):
begin_digit_index = re.search(r"\d", x).start()
end_digit_index = len(x) - re.search(r"\d", x[::-1]).start()

first_part = x[begin_digit_index:end_digit_index]
last_part = x[end_digit_index:]
return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) > 0 and not re.search(r'\d', last_part) and x[0].isnumeric()
return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) > 0 and not re.search(r'\d', last_part) and x[0].isdigit()

def recognized_as_math_formula_equality(x):
matches = []
Expand Down Expand Up @@ -373,7 +375,7 @@ def recognized_as_special_phone_number(x):
return x[0] in '*#' and x[-1] in '*#'

def recognized_as_general_numbers(x):
return x.replace(',','').replace('.','').isnumeric() and x[-1].isnumeric()
return x.replace(',','').replace('.','').isdigit() and x[-1].isdigit()

def recognized_as_negatives(x):
return x[0] == '-'
Expand Down Expand Up @@ -460,6 +462,7 @@ def currency_to_words(x):
front_checker = re.sub("[.]", "", x[:begin_digit_index])
back_checker = x[end_digit_index:]

words = x
if front_checker in currency_symbols: # $300
if x.find('.') > -1:
number = re.sub("[^.0-9]", "", x[begin_digit_index-1:])
Expand Down Expand Up @@ -539,7 +542,7 @@ def long_number_to_words(x):
def long_number_with_stripes_to_words(x):
words = ''
for i, char in enumerate(x):
if char.isnumeric():
if char.isdigit():
if i == len(x)-1:
words = words + num2words(char)
else:
Expand All @@ -563,10 +566,12 @@ def sticky_numbers_to_words(x):
first_part = x[begin_digit_index:end_digit_index]
last_part = x[end_digit_index:]

words = x
if last_part in ['st', 'nd', 'rd', 'th']:
words = num2words(first_part, to='ordinal')
else:
elif first_part.isdigit():
words = num2words(first_part, to='cardinal') + ' ' + last_part if len(last_part) > 0 else num2words(first_part, to='cardinal')

return words

def sticky_range_to_words(x):
Expand All @@ -587,10 +592,12 @@ def math_formula_equality_to_words(x):
equality_sign_index_numpy = np.array(equality_sign_index_list)
count_match = sum(equality_sign_index_numpy > 0)
if count_match > 1:
equality_sign_index = max(equality_sign_index_numpy)
equality_sign_index = equality_sign_index_numpy.argmax()
elif count_match == 1:
equality_sign_index = list(equality_sign_index_numpy > 0).index(True)

else:
return x

equality_sign = math_sign[equality_sign_index]

begin_equality_sign_index_in_word = x.index(equality_sign)
Expand All @@ -599,7 +606,11 @@ def math_formula_equality_to_words(x):
before_equal = x[:begin_equality_sign_index_in_word]
after_equal = x[end_equality_sign_index_in_word:]

begin_digit_index = re.search(r"\d", after_equal).start()
begin_digit_index = re.search(r"\d", after_equal)
if not begin_digit_index:
return x

begin_digit_index = begin_digit_index.start()
end_digit_index = len(after_equal) - re.search(r"\d", after_equal[::-1]).start()

first_part = after_equal[begin_digit_index:end_digit_index]
Expand All @@ -622,7 +633,7 @@ def general_numbers_to_words(x):

count = 0
for i in np.arange(last_comma_index+1, len(x), 1):
if x[i].isnumeric():
if x[i].isdigit():
count = count + 1
else:
break
Expand All @@ -635,8 +646,11 @@ def general_numbers_to_words(x):
# last comma is actually dot, there'll only 1 comma in this case
x = x[:last_comma_index] + '.' + x[last_comma_index+1:]

words = ''.join(num2words(x).split(","))
return words
if x.isdigit():
words = ''.join(num2words(x).split(","))
return words
else:
return x

def numeric_beside_end_bracket_to_words(x):
end_digit_index = len(x) - re.search(r"\d", x[::-1]).start()
Expand Down Expand Up @@ -666,8 +680,10 @@ def fraction_to_words(x):
words = 'two quarter'
elif numerator == '3' and denominator == '4':
words = 'three quarter'
else:
elif numerator.isdigit() and denominator.isdigit():
words = num2words(numerator) + ' over ' + num2words(denominator)
else:
words = x
return words

### Supplements
Expand Down
32 changes: 0 additions & 32 deletions transformations/numeric_to_word/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,35 +77,3 @@ def generate(self, sentence: str):
perturbed += " "
perturbed += recognize_transform(word, prev_word, next_word)
return [perturbed]

# if __name__ == '__main__':
# import json
# # from TestRunner import convert_to_snake_case
# tf = NumericToWord()
# sentence = "Please buy me 20 apples"
# test_cases = []
# for sentence in ["Please buy me 20 apples",
# "The deadline is in 2020/01/02",
# "The deadline is in 2020/01",
# "The deadline is in Jan 2020",
# "Slow down, it\'s still 5:00",
# "Quick!, it\'s already 23:00",
# "This is 2020!",
# "My phone number is +1371893178",
# "My phone number is +6287822216501",
# "My phone number is 6287822216501",
# "The price is $300",
# "The price is 300$",
# "The price is USD300",
# "The price is 300USD",
# "The price is USD300!@#!"]:
# test_cases.append({
# "class": tf.name(),
# "inputs": {"sentence": sentence}, "outputs": {"sentence": tf.generate(sentence)}}
# )
# json_file = {"type": "numeric_to_word", "test_cases": test_cases}
# # json_file = {"type": convert_to_snake_case(tf.name()), "test_cases": test_cases}
# print(json.dumps(json_file))

# with open("test.json", "w") as out_file:
# json.dump(json_file, out_file, indent=2, ensure_ascii=True)