GEM-benchmark · SamuelCahyawijaya · Oct 30, 2021 · Oct 30, 2021 · Oct 30, 2021 · Oct 30, 2021
diff --git a/transformations/numeric_to_word/numeric2word.py b/transformations/numeric_to_word/numeric2word.py
@@ -137,12 +137,12 @@ def recognized_as_range_not_sticky(word, next_word):
         first_part = word[begin_digit_index:end_digit_index]
         last_part = word[end_digit_index:]
 
-        return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) == 0 and word[0].isnumeric()
+        return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) == 0 and word[0].isdigit()
     else:
         return False
 
 def recognized_as_date_word(word, prev_word, next_word):
-    return (prev_word.lower() in month_words or next_word.lower() in month_words) and word.isnumeric() and int(word) <= 31
+    return (prev_word.lower() in month_words or next_word.lower() in month_words) and word.isdigit() and int(word) <= 31
 
 def recognized_as_datestring(x):
     """
@@ -228,7 +228,7 @@ def recognized_as_year(x):
 
     checker = min([character in string.punctuation for character in after_assumed_year]+[True]) and \
               min([character in string.punctuation for character in before_assumed_year]+[True]) and \
-              year in possible_year_list and (len(year) <= 4) and year.isnumeric()
+              year in possible_year_list and (len(year) <= 4) and year.isdigit()
 
     if checker:
         return bool(re.compile(r'.*([1-3][0-9]{3})').match(x)) and len(set(x) - {'0'}) >= 3
@@ -292,8 +292,10 @@ def recognized_as_currency_symbols(x):
 
         if len(front_checker)>0:
             front_checker = front_checker[:-1] if (front_checker[-1] in ['.', ',']) else front_checker
-        else:
+        elif len(back_checker)>0:
             back_checker = back_checker[:-1] if (back_checker[-1] in ['.', ',']) else back_checker
+        else:
+            return x
 
     if front_checker in currency_symbols:
         other_end_non_numeric = x[begin_digit_index:][end_digit_index-(len(x[:begin_digit_index])):]
@@ -311,17 +313,17 @@ def recognized_as_currency_symbols(x):
         return False
 
 def recognized_as_cents(x, prev_word, next_word):
-    return ('¢' in x or x[-1] == 'c') and x[-2].isnumeric() and re.sub('[¢c,.]', "", x).isnumeric() and prev_word != '(' and next_word != ')'
+    return ('¢' in x or x[-1] == 'c') and x[-2].isdigit() and re.sub('[¢c,.]', "", x).isdigit() and prev_word != '(' and next_word != ')'
 
 def recognized_as_long_number(x):
     if x[0] == '+':
         x = x[1:]
 
     threshold = 7
-    return len(x) >= threshold and x.isnumeric()
+    return len(x) >= threshold and x.isdigit()
 
 def recognized_as_additional_number(x):
-    return x[0] == '+' and len(x[1:]) <= 3 and x[1:].isnumeric()
+    return x[0] == '+' and len(x[1:]) <= 3 and x[1:].isdigit()
 
 def recognized_as_long_number_with_stripes(x):
     return len(re.sub('[0-9-]','',x)) == 0 and len(x) > 8
@@ -332,15 +334,15 @@ def recognized_as_sticky_numbers(x):
 
     first_part = x[begin_digit_index:end_digit_index]
     last_part = x[end_digit_index:]
-    return bool(re.search(r'^\d*[.,]?\d*$',first_part)) and (last_part in ['st', 'nd', 'rd','th', '%'] or not re.search(r'\d', last_part)) and x[0].isnumeric()
+    return bool(re.search(r'^\d*[.,]?\d*$',first_part)) and (last_part in ['st', 'nd', 'rd','th', '%'] or not re.search(r'\d', last_part)) and x[0].isdigit()
 
 def recognized_as_sticky_range(x):
     begin_digit_index = re.search(r"\d", x).start()
     end_digit_index = len(x) - re.search(r"\d", x[::-1]).start()
 
     first_part = x[begin_digit_index:end_digit_index]
     last_part = x[end_digit_index:]
-    return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) > 0 and not re.search(r'\d', last_part) and x[0].isnumeric()
+    return bool(re.search(r'^\d*[-]?\d*$',first_part)) and len(last_part) > 0 and not re.search(r'\d', last_part) and x[0].isdigit()
 
 def recognized_as_math_formula_equality(x):
     matches = []
@@ -373,7 +375,7 @@ def recognized_as_special_phone_number(x):
     return x[0] in '*#' and x[-1] in '*#'
 
 def recognized_as_general_numbers(x):
-    return x.replace(',','').replace('.','').isnumeric() and x[-1].isnumeric()
+    return x.replace(',','').replace('.','').isdigit() and x[-1].isdigit()
 
 def recognized_as_negatives(x):
     return x[0] == '-'
@@ -460,6 +462,7 @@ def currency_to_words(x):
     front_checker = re.sub("[.]", "", x[:begin_digit_index])
     back_checker = x[end_digit_index:]
 
+    words = x
     if front_checker in currency_symbols: # $300
         if x.find('.') > -1:
             number = re.sub("[^.0-9]", "", x[begin_digit_index-1:])
@@ -539,7 +542,7 @@ def long_number_to_words(x):
 def long_number_with_stripes_to_words(x):
     words = ''
     for i, char in enumerate(x):
-        if char.isnumeric():
+        if char.isdigit():
             if i == len(x)-1:
                 words = words + num2words(char) 
             else:
@@ -563,10 +566,12 @@ def sticky_numbers_to_words(x):
     first_part = x[begin_digit_index:end_digit_index]
     last_part = x[end_digit_index:]
 
+    words = x
     if last_part in ['st', 'nd', 'rd', 'th']:
         words = num2words(first_part, to='ordinal')
-    else:
+    elif first_part.isdigit():
         words = num2words(first_part, to='cardinal') + ' ' +  last_part if len(last_part) > 0 else num2words(first_part, to='cardinal')
+
     return words
 
 def sticky_range_to_words(x):
@@ -587,10 +592,12 @@ def math_formula_equality_to_words(x):
     equality_sign_index_numpy = np.array(equality_sign_index_list)
     count_match = sum(equality_sign_index_numpy > 0)
     if count_match > 1:
-        equality_sign_index = max(equality_sign_index_numpy)
+        equality_sign_index = equality_sign_index_numpy.argmax()
     elif count_match == 1:
         equality_sign_index = list(equality_sign_index_numpy > 0).index(True)
-
+    else:
+        return x
+
     equality_sign = math_sign[equality_sign_index]
 
     begin_equality_sign_index_in_word = x.index(equality_sign)
@@ -599,7 +606,11 @@ def math_formula_equality_to_words(x):
     before_equal = x[:begin_equality_sign_index_in_word]
     after_equal = x[end_equality_sign_index_in_word:]
 
-    begin_digit_index = re.search(r"\d", after_equal).start()
+    begin_digit_index = re.search(r"\d", after_equal)
+    if not begin_digit_index:
+        return x
+
+    begin_digit_index = begin_digit_index.start()
     end_digit_index = len(after_equal) - re.search(r"\d", after_equal[::-1]).start()
 
     first_part = after_equal[begin_digit_index:end_digit_index]
@@ -622,7 +633,7 @@ def general_numbers_to_words(x):
 
         count = 0
         for i in np.arange(last_comma_index+1, len(x), 1):
-            if x[i].isnumeric():
+            if x[i].isdigit():
                 count = count + 1
             else:
                 break
@@ -635,8 +646,11 @@ def general_numbers_to_words(x):
             # last comma is actually dot, there'll only 1 comma in this case
             x = x[:last_comma_index] + '.' + x[last_comma_index+1:]
 
-    words = ''.join(num2words(x).split(","))
-    return words
+    if x.isdigit():
+        words = ''.join(num2words(x).split(","))
+        return words
+    else:
+        return x
 
 def numeric_beside_end_bracket_to_words(x):
     end_digit_index = len(x) - re.search(r"\d", x[::-1]).start()
@@ -666,8 +680,10 @@ def fraction_to_words(x):
         words = 'two quarter'
     elif numerator == '3' and denominator == '4':
         words = 'three quarter'
-    else:
+    elif numerator.isdigit() and denominator.isdigit():
         words = num2words(numerator) + ' over ' + num2words(denominator)
+    else:
+        words = x
     return words
 
 ### Supplements

diff --git a/transformations/numeric_to_word/transformation.py b/transformations/numeric_to_word/transformation.py
@@ -77,35 +77,3 @@ def generate(self, sentence: str):
                 perturbed += " "
             perturbed += recognize_transform(word, prev_word, next_word)
         return [perturbed]
-
-# if __name__ == '__main__':
-#     import json
-#     # from TestRunner import convert_to_snake_case
-#     tf = NumericToWord()
-#     sentence = "Please buy me 20 apples"
-#     test_cases = []
-#     for sentence in ["Please buy me 20 apples",
-#                     "The deadline is in 2020/01/02",
-#                     "The deadline is in 2020/01",
-#                     "The deadline is in Jan 2020",
-#                     "Slow down, it\'s still 5:00",
-#                     "Quick!, it\'s already 23:00",
-#                     "This is 2020!",
-#                     "My phone number is +1371893178",
-#                     "My phone number is +6287822216501",
-#                     "My phone number is 6287822216501",
-#                     "The price is $300",
-#                     "The price is 300$",
-#                     "The price is USD300",
-#                     "The price is 300USD",
-#                     "The price is USD300!@#!"]:
-#         test_cases.append({
-#             "class": tf.name(),
-#             "inputs": {"sentence": sentence}, "outputs": {"sentence": tf.generate(sentence)}}
-#         )
-#     json_file = {"type": "numeric_to_word", "test_cases": test_cases}
-#     # json_file = {"type": convert_to_snake_case(tf.name()), "test_cases": test_cases}
-#     print(json.dumps(json_file))
-
-#     with open("test.json", "w") as out_file:
-#         json.dump(json_file, out_file, indent=2, ensure_ascii=True)