diff --git a/numbers/src/main/java/org/dicio/numbers/lang/de/GermanFormatter.kt b/numbers/src/main/java/org/dicio/numbers/lang/de/GermanFormatter.kt new file mode 100644 index 00000000..f5655e69 --- /dev/null +++ b/numbers/src/main/java/org/dicio/numbers/lang/de/GermanFormatter.kt @@ -0,0 +1,463 @@ +package org.dicio.numbers.lang.de + +import org.dicio.numbers.formatter.Formatter +import org.dicio.numbers.lang.en.EnglishFormatter +import org.dicio.numbers.unit.MixedFraction +import org.dicio.numbers.util.Utils.decimalPlacesNoFinalZeros +import org.dicio.numbers.util.Utils.longPow +import org.dicio.numbers.util.Utils.splitByModulus +import java.time.LocalTime +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.math.abs + +class GermanFormatter : Formatter("config/de-de") { + override fun niceNumber(mixedFraction: MixedFraction, speech: Boolean): String { + if (speech) { + val sign = if (mixedFraction.negative) "minus " else "" + if (mixedFraction.numerator == 0) { + return sign + pronounceNumber(mixedFraction.whole.toDouble(), 0, shortScale = true, + scientific = false, + ordinal = false + ) + } + + val denominatorString: String? + if (mixedFraction.denominator == 1) { + denominatorString = "Eintel" + } else if (mixedFraction.denominator == 2) { + denominatorString = "Halbe" + } else if (mixedFraction.denominator == 3) { + denominatorString = "Drittel" + } else if (mixedFraction.denominator == 7) { + denominatorString = "Siebtel" + } else if (mixedFraction.denominator < 20) { + // below 20 use number name + suffix "tel" + denominatorString = pronounceNumber( + mixedFraction.denominator.toDouble(), + 0, + shortScale = true, + scientific = false, + ordinal = true + ) + "tel" + } else { + // for 20+ use number name + suffix "stel" + denominatorString = pronounceNumber( + mixedFraction.denominator.toDouble(), + 0, + shortScale = true, + scientific = false, + ordinal = true + ) + "stel" + } + val numeratorString = pronounceNumber(mixedFraction.numerator.toDouble(), 0, + shortScale = true, + scientific = false, + ordinal = false + ) + + return if (mixedFraction.whole == 0L) { + "$sign$numeratorString $denominatorString" + } else { + (sign + pronounceNumber( + mixedFraction.whole.toDouble(), + 0, + shortScale = true, + scientific = false, + ordinal = false + ) + " und " + numeratorString + " " + denominatorString) + } + } else { + return niceNumberNotSpeech(mixedFraction) + } + } + + override fun pronounceNumber( + number: Double, + places: Int, + shortScale: Boolean, + scientific: Boolean, + ordinal: Boolean + ): String { + var number = number + if (number == Double.POSITIVE_INFINITY) { + return "unendlich" + } else if (number == Double.Companion.NEGATIVE_INFINITY) { + return "minus unendlich" + } else if (number.isNaN()) { + return "keine Zahl" + } + + // also using scientific mode if the number is too big to be spoken fully. Checking against + // the biggest double smaller than 10^21 = 1000 * 10^18, which is the biggest pronounceable + // number, since e.g. 999.99 * 10^18 can be pronounced correctly. + if (scientific || abs(number) > 999999999999999934463.0) { + val scientificFormatted = String.format(Locale.ENGLISH, "%E", number) + val parts: Array = + scientificFormatted.split("E".toRegex(), limit = 2).toTypedArray() + val power = parts[1]!!.toInt().toDouble() + + if (power != 0.0) { + // This handles negatives of powers separately from the normal + // handling since each call disables the scientific flag + val n = parts[0]!!.toDouble() + return String.format( + "%s mal zehn hoch %s", + pronounceNumber(abs(n), places, shortScale, + scientific = false, + ordinal = false + ), + pronounceNumber(abs(power), places, shortScale, + scientific = false, + ordinal = false + ) + ) + } + } + + val result = StringBuilder() + if (number < 0) { + number = -number + // from here on number is always positive + if (places != 0 || number >= 0.5) { + // do not add minus if number will be rounded to 0 + result.append(if (scientific) "negative " else "minus ") + } + } + + val realPlaces = decimalPlacesNoFinalZeros(number, places) + val numberIsWhole = realPlaces == 0 + // if no decimal places to be printed, numberLong should be the rounded number + val numberLong = number.toLong() + (if (number % 1 >= 0.5 && numberIsWhole) 1 else 0) + + if (!ordinal && numberIsWhole && numberLong > 1000 && numberLong < 2000) { + // deal with 4 digits that can be said like a date, i.e. 1972 => nineteen seventy two + + result.append(NUMBER_NAMES[numberLong / 100]) + result.append(" ") + if (numberLong % 100 == 0L) { + // 1900 => nineteen hundred + result.append(NUMBER_NAMES[100L]) + } else if (numberLong % 100 < 10 && numberLong % 100 != 0L) { + // 1906 => nineteen oh six + result.append("oh ") + result.append(NUMBER_NAMES[numberLong % 10]) + } else if (numberLong % 10 == 0L || numberLong % 100 < 20) { + // 1960 => nineteen sixty; 1911 => nineteen eleven + result.append(NUMBER_NAMES[numberLong % 100]) + } else { + // 1961 => nineteen sixty one + result.append(NUMBER_NAMES[numberLong % 100 - numberLong % 10]) + result.append(" ") + result.append(NUMBER_NAMES[numberLong % 10]) + } + + return result.toString() + } + + if (!ordinal && NUMBER_NAMES.containsKey(numberLong)) { + if (number > 90) { + result.append("one ") + } + result.append(NUMBER_NAMES[numberLong]) + } else if (shortScale) { + var ordi = ordinal && numberIsWhole // not ordinal if not whole + val groups = splitByModulus(numberLong, 1000) + val groupNames: MutableList = ArrayList() + for (i in groups.indices) { + val z: Long = groups[i] + if (z == 0L) { + continue // skip 000 groups + } + var groupName = subThousand(z, i == 0 && ordi) + + if (i != 0) { + val magnitude = longPow(1000, i) + if (ordi) { + // ordi can be true only for the first group (i.e. at the end of the number) + if (z == 1L) { + // remove "one" from first group (e.g. "one billion, millionth") + groupName = + EnglishFormatter.Companion.ORDINAL_NAMES_SHORT_SCALE[magnitude] + } else { + groupName += " " + EnglishFormatter.Companion.ORDINAL_NAMES_SHORT_SCALE[magnitude] + } + } else { + groupName += " " + EnglishFormatter.Companion.NUMBER_NAMES_SHORT_SCALE[magnitude] + } + } + + groupNames.add(groupName) + ordi = false + } + + appendSplitGroups(result, groupNames) + } else { + var ordi = ordinal && numberIsWhole // not ordinal if not whole + val groups = splitByModulus(numberLong, 1000000) + val groupNames: MutableList = ArrayList() + for (i in groups.indices) { + val z: Long = groups[i] + if (z == 0L) { + continue // skip 000000 groups + } + + var groupName: String? + if (z < 1000) { + groupName = subThousand(z, i == 0 && ordi) + } else { + groupName = subThousand(z / 1000, false) + " thousand" + if (z % 1000 != 0L) { + groupName += (if (i == 0) ", " else " ") + subThousand( + z % 1000, + i == 0 && ordi + ) + } else if (i == 0 && ordi) { + if (z / 1000 == 1L) { + groupName = "thousandth" // remove "one" from "one thousandth" + } else { + groupName += "th" + } + } + } + + if (i != 0) { + val magnitude = longPow(1000000, i) + if (ordi) { + // ordi can be true only for the first group (i.e. at the end of the number) + if (z == 1L) { + // remove "one" from first group (e.g. "one billion, millionth") + groupName = + EnglishFormatter.Companion.ORDINAL_NAMES_LONG_SCALE[magnitude] + } else { + groupName += " " + EnglishFormatter.Companion.ORDINAL_NAMES_LONG_SCALE[magnitude] + } + } else { + groupName += " " + EnglishFormatter.Companion.NUMBER_NAMES_LONG_SCALE[magnitude] + } + } + + groupNames.add(groupName) + ordi = false + } + + appendSplitGroups(result, groupNames) + } + + if (realPlaces > 0) { + if (number < 1.0 && (result.isEmpty() || "minus ".contentEquals(result))) { + result.append("zero") // nothing was written before + } + result.append(" point") + + val fractionalPart = String.format("%." + realPlaces + "f", number % 1) + for (i in 2..= 12) " p.m." else " a.m.") + } + return result.toString() + } + } else { + return if (use24Hour) { + time.format(DateTimeFormatter.ofPattern("HH:mm", Locale.ENGLISH)) + } else { + val result = time.format( + DateTimeFormatter.ofPattern( + if (showAmPm) "K:mm a" else "K:mm", Locale.ENGLISH + ) + ) + if (result.startsWith("0:")) { + "12:" + result.substring(2) + } else { + result + } + } + } + } + + + /** + * @param n must be 0 <= n <= 999 + * @param ordinal whether to return an ordinal number (usually with -th) + * @return the string representation of a number smaller than 1000 + */ + private fun subThousand(n: Long, ordinal: Boolean): String? { + // this function calls itself inside if branches to make sure `ordinal` is respected + if (ordinal && ORDINAL_NAMES.containsKey(n)) { + return ORDINAL_NAMES[n] + } else if (n < 100) { + if (!ordinal && NUMBER_NAMES.containsKey(n)) { + return NUMBER_NAMES[n] + } + + // n is surely => 20 from here on, since all n < 20 are in (ORDINAL|NUMBER)_NAMES + return (NUMBER_NAMES[n - n % 10] + + (if (n % 10 > 0) " " + subThousand(n % 10, ordinal) else "")) + } else { + return (NUMBER_NAMES[n / 100] + " hundred" + + (if (n % 100 > 0) + " and " + subThousand(n % 100, ordinal) + else + (if (ordinal) "th" else ""))) + } + } + + /** + * @param result the string builder to append the comma-separated group names to + * @param groupNames the group names + */ + private fun appendSplitGroups(result: StringBuilder, groupNames: MutableList) { + if (!groupNames.isEmpty()) { + result.append(groupNames[groupNames.size - 1]) + } + + for (i in groupNames.size - 2 downTo 0) { + result.append(", ") + result.append(groupNames[i]) + } + } + + companion object { + val NUMBER_NAMES = mapOf( + 0L to "null", + 1L to "eins", + 2L to "zwei", + 3L to "drei", + 4L to "vier", + 5L to "fünf", + 6L to "sechs", + 7L to "sieben", + 8L to "acht", + 9L to "neun", + 10L to "zehn", + 11L to "elf", + 12L to "zwölf", + 13L to "dreizehn", + 14L to "vierzehn", + 15L to "fünfzehn", + 16L to "sechzehn", + 17L to "siebzehn", + 18L to "achtzehn", + 19L to "neunzehn", + 20L to "zwanzig", + 30L to "dreißig", + 40L to "vierzig", + 50L to "fünfzig", + 60L to "sechzig", + 70L to "siebzig", + 80L to "achtzig", + 90L to "neunzig", + 100L to "hundert", + 1000L to "tausend", + 1000000L to "million", + 1000000000L to "milliarde", + 1000000000000L to "billion", + 1000000000000000L to "billiarde", + 1000000000000000000L to "trillion", + ) + + val ORDINAL_NAMES = mapOf( + 1L to "erste", + 2L to "zweite", + 3L to "dritte", + 4L to "vierte", + 5L to "fünfte", + 6L to "sechste", + 7L to "siebte", + 8L to "achte", + 9L to "neunte", + 10L to "zehnte", + 11L to "elfte", + 12L to "zwölfte", + 13L to "dreizehnte", + 14L to "vierzehnte", + 15L to "fünfzehnte", + 16L to "sechzehnte", + 17L to "siebzehnte", + 18L to "achtzehnte", + 19L to "neunzehnte", + 20L to "zwanzigste", + 30L to "dreißigste", + 40L to "vierzigste", + 50L to "fünfzigste", + 60L to "sechzigste", + 70L to "siebzigste", + 80L to "achtzigste", + 90L to "neunzigste", + 100L to "hundertste", + 1000L to "tausendste", + 1000000L to "millionste", + 1000000000L to "milliardste", + 1000000000000L to "billionste", + 1000000000000000L to "billiardste", + 1000000000000000000L to "trilliardste", + ) + } +} diff --git a/numbers/src/main/resources/config/de-de/date_time.json b/numbers/src/main/resources/config/de-de/date_time.json new file mode 100644 index 00000000..8a5aace2 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/date_time.json @@ -0,0 +1,136 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x} und {x0}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "hundert"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundert"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d\\d$", "format": "tausend"}, + "2": {"match": "^\\d0\\d{2}$", "format": "{x_in_x000} tausend"}, + "3": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundert"}, + "4": {"match": "^\\d{2}00$", "format": "{x_in_x000} tausend {x_in_x00} hundert"}, + "5": {"match": "^\\d0\\d\\d$", "format": "{x_in_x000} tausend"}, + "6": {"match": "^1\\d{3}$", "format": "{xx_in_xx00}"}, + "7": {"match": "^\\d{4}$", "format": "{x_in_x000} tausend {x_in_x00} hundert"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^1$", "format": "eins {bc}"}, + "2": {"match": "^\\d{1}?$", "format": "{formatted_decade} {bc}"}, + "3": {"match": "^\\d{2}?$", "format": "{formatted_decade} {bc}"}, + "4": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "5": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "8": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"}, + "9": {"match": "^1[2-9]\\d{2}$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"}, + "10": {"match": "^1\\d{3}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "11": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "v.d.Z." + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "heute", + "tomorrow": "morgen", + "yesterday": "gestern" + }, + "date_time_format": { + "date_time": "{formatted_date} um {formatted_time}" + }, + "weekday": { + "0": "Montag", + "1": "Dienstag", + "2": "Mittwoch", + "3": "Donnerstag", + "4": "Freitag", + "5": "Samstag", + "6": "Sonntag" + }, + "date": { + "1": "erster", + "2": "zweiter", + "3": "dritter", + "4": "vierter", + "5": "fünfter", + "6": "sechster", + "7": "siebter", + "8": "achter", + "9": "neunter", + "10": "zehnter", + "11": "elfter", + "12": "zwölfter", + "13": "dreizehnter", + "14": "vierzehnter", + "15": "fünfzehnter", + "16": "sechzehnter", + "17": "siebzehnter", + "18": "achtzehnter", + "19": "neunzehnter", + "20": "zwanzigster", + "21": "einundzwanzigster", + "22": "zweiundzwanzigster", + "23": "dreiundzwanzigster", + "24": "vierundzwanzigster", + "25": "fünfundzwanzigster", + "26": "sechsundzwanzigster", + "27": "siebenundzwanzigster", + "28": "achtundzwanzigster", + "29": "neunundzwanzigster", + "30": "dreißigster", + "31": "einunddreißigster" + }, + "month": { + "1": "Januar", + "2": "Februar", + "3": "März", + "4": "April", + "5": "Mai", + "6": "Juni", + "7": "Juli", + "8": "August", + "9": "September", + "10": "Oktober", + "11": "November", + "12": "Dezember" + }, + "number": { + "0": "null", + "1": "ein", + "2": "zwei", + "3": "drei", + "4": "vier", + "5": "fünf", + "6": "sechs", + "7": "sieben", + "8": "acht", + "9": "neun", + "10": "zehn", + "11": "elf", + "12": "zwölf", + "13": "dreizehn", + "14": "vierzehn", + "15": "fünfzehn", + "16": "sechzehn", + "17": "siebzehn", + "18": "achtzehn", + "19": "neunzehn", + "20": "zwanzig", + "30": "dreißig", + "40": "vierzig", + "50": "fünfzig", + "60": "sechzig", + "70": "siebzig", + "80": "achtzig", + "90": "neunzig" + } +} diff --git a/numbers/src/main/resources/config/de-de/day.word b/numbers/src/main/resources/config/de-de/day.word new file mode 100644 index 00000000..1aa7c259 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/day.word @@ -0,0 +1 @@ +Tag diff --git a/numbers/src/main/resources/config/de-de/days.word b/numbers/src/main/resources/config/de-de/days.word new file mode 100644 index 00000000..4c2fd37c --- /dev/null +++ b/numbers/src/main/resources/config/de-de/days.word @@ -0,0 +1 @@ +Tage diff --git a/numbers/src/main/resources/config/de-de/hour.word b/numbers/src/main/resources/config/de-de/hour.word new file mode 100644 index 00000000..7e69c570 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/hour.word @@ -0,0 +1 @@ +Stunde diff --git a/numbers/src/main/resources/config/de-de/hours.word b/numbers/src/main/resources/config/de-de/hours.word new file mode 100644 index 00000000..3c728ba8 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/hours.word @@ -0,0 +1 @@ +Stunden diff --git a/numbers/src/main/resources/config/de-de/minute.word b/numbers/src/main/resources/config/de-de/minute.word new file mode 100644 index 00000000..de476669 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/minute.word @@ -0,0 +1 @@ +Minute diff --git a/numbers/src/main/resources/config/de-de/minutes.word b/numbers/src/main/resources/config/de-de/minutes.word new file mode 100644 index 00000000..bdc262ec --- /dev/null +++ b/numbers/src/main/resources/config/de-de/minutes.word @@ -0,0 +1 @@ +Minuten diff --git a/numbers/src/main/resources/config/de-de/second.word b/numbers/src/main/resources/config/de-de/second.word new file mode 100644 index 00000000..e658c219 --- /dev/null +++ b/numbers/src/main/resources/config/de-de/second.word @@ -0,0 +1 @@ +Sekunde diff --git a/numbers/src/main/resources/config/de-de/seconds.word b/numbers/src/main/resources/config/de-de/seconds.word new file mode 100644 index 00000000..2c54f29c --- /dev/null +++ b/numbers/src/main/resources/config/de-de/seconds.word @@ -0,0 +1 @@ +Sekunden diff --git a/numbers/src/main/resources/config/de-de/tokenizer.json b/numbers/src/main/resources/config/de-de/tokenizer.json new file mode 100644 index 00000000..e475322e --- /dev/null +++ b/numbers/src/main/resources/config/de-de/tokenizer.json @@ -0,0 +1,461 @@ +{ + "spaces": " \t\n\f\r:;_!?<>|=()[]{}»«*~^`'\"", + "characters_as_word": "%‰#-+.,/", + "raw_number_categories": [ + "number", + "raw" + ], + "plural_endings": [ + "er" + ], + "word_matches": [ + { + "categories": [ + "ignore" + ], + "values": [ + "und" + ] + }, + { + "categories": [ + "ignore", + "thousand_separator" + ], + "values": [ + "." + ] + }, + { + "categories": [ + "ordinal_suffix" + ], + "values": [ + "ter", + "te", + "tes", + "." + ] + }, + { + "categories": [ + "point" + ], + "values": [ + "punkt", + "komma", + ".", + "," + ] + }, + { + "categories": [ + "point", + "ignore" + ], + "values": [ + ",", + "." + ] + }, + { + "categories": [ + "fraction_separator" + ], + "values": [ + "geteilt", + "/" + ] + }, + { + "categories": [ + "fraction_separator_secondary" + ], + "values": [ + "durch" + ] + }, + { + "categories": [ + "sign", + "positive" + ], + "values": [ + "plus", + "+" + ] + }, + { + "categories": [ + "sign", + "negative" + ], + "values": [ + "minus", + "-" + ] + }, + { + "categories": [ + "duration_separator" + ], + "values": [ + "von" + ] + } + ], + "number_mappings": [ + { + "categories": [ + "number", + "digit", + "digit_after_point" + ], + "values": { + "null": 0, + "eins": 1, + "zwei": 2, + "drei": 3, + "vier": 4, + "fünf": 5, + "sechs": 6, + "sieben": 7, + "acht": 8, + "neun": 9 + } + }, + { + "categories": [ + "number", + "digit_after_point" + ], + "values": { + "null": 0 + } + }, + { + "categories": [ + "number", + "teen" + ], + "values": { + "zehn": 10, + "elf": 11, + "zwölf": 12, + "dreizehn": 13, + "vierzehn": 14, + "fünfzehn": 15, + "sechzehn": 16, + "siebzehn": 17, + "achtzehn": 18, + "neunzehn": 19 + } + }, + { + "categories": [ + "number", + "tens" + ], + "values": { + "zwanzig": 20, + "dreißig": 30, + "vierzig": 40, + "fünfzig": 50, + "sechzig": 60, + "siebzig": 70, + "achtzig": 80, + "neunzig": 90 + } + }, + { + "categories": [ + "number", + "hundred" + ], + "values": { + "hundert": 100 + } + }, + { + "categories": [ + "number", + "multiplier" + ], + "values": { + "tausend": 1000, + "million": 1000000, + "milliarde": 1000000000, + "billion": 1000000000000, + "billiarde": 1000000000000000, + "trillion": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "ordinal", + "digit" + ], + "values": { + "erster": 1, + "erste": 1, + "erstes": 1, + "zweiter": 2, + "zweite": 2, + "zweites": 2, + "dritter": 3, + "dritte": 3, + "drittes": 3, + "vierter": 4, + "vierte": 4, + "viertes": 4, + "fünfter": 5, + "fünfte": 5, + "fünftes": 5, + "sechster": 6, + "sechste": 6, + "sechstes": 6, + "siebter": 7, + "siebte": 7, + "siebtes": 7, + "achter": 8, + "achte": 8, + "achtes": 8, + "neunter": 9 + "neunte": 9 + "neuntes": 9 + } + }, + { + "categories": [ + "number", + "ordinal", + "teen" + ], + "values": { + "zehnter": 10, + "zehnte": 10, + "zehntes": 10, + "elfter": 11, + "elfte": 11, + "elftes": 11, + "zwölfter": 12, + "zwölfte": 12, + "zwölftes": 12, + "dreizehnter": 13, + "dreizehnte": 13, + "dreizehntes": 13, + "vierzehnter": 14, + "vierzehnte": 14, + "vierzehntes": 14, + "fünfzehnter": 15, + "fünfzehnte": 15, + "fünfzehntes": 15, + "sechzehnter": 16, + "sechzehnte": 16, + "sechzehntes": 16, + "siebzehnter": 17, + "siebzehnte": 17, + "siebzehntes": 17, + "achtzehnter": 18, + "achtzehnte": 18, + "achtzehntes": 18, + "neunzehnter": 19, + "neunzehnte": 19, + "neunzehntes": 19 + } + }, + { + "categories": [ + "number", + "ordinal", + "tens" + ], + "values": { + "zwanzigster": 20, + "zwanzigste": 20, + "zwanzigstes": 20, + "dreißigster": 30, + "dreißigste": 30, + "dreißigstes": 30, + "vierzigster": 40, + "vierzigste": 40, + "vierzigstes": 40, + "fünfzigster": 50, + "fünfzigste": 50, + "fünfzigstes": 50, + "sechzigster": 60, + "sechzigste": 60, + "sechzigstes": 60, + "siebzigster": 70, + "siebzigste": 70, + "siebzigstes": 70, + "achtzigster": 80, + "achtzigste": 80, + "achtzigstes": 80, + "neunzigster": 90, + "neunzigste": 90, + "neunzigstes": 90 + } + }, + { + "categories": [ + "number", + "ordinal", + "hundred" + ], + "values": { + "hundertster": 100, + "hundertste": 100, + "hundertstes": 100 + } + }, + { + "categories": [ + "number", + "ordinal", + "multiplier" + ], + "values": { + "tausendster": 1000, + "tausendste": 1000, + "tausendstes": 1000, + "millionster": 1000000, + "millionste": 1000000, + "millionstes": 1000000, + "milliardster": 1000000000, + "milliardste": 1000000000, + "milliardstes": 1000000000, + "billionster": 1000000000000, + "billionste": 1000000000000, + "billionster": 1000000000000, + "billiardster": 1000000000000000, + "billiardste": 1000000000000000, + "billiardstes": 1000000000000000, + "trillionster": 1000000000000000000, + "trillionste": 1000000000000000000, + "trillionstes": 1000000000000000000 + } + }, + { + "categories": [ + "number", + "suffix_multiplier" + ], + "values": { + "halb": 0.5, + "halbe": 0.5, + "viertel": 0.25, + "paar": 2, + "dutzend": 12, + "prozent": 0.01, + "%": 0.01, + "promille": 0.001, + "promill": 0.001, + "‰": 0.001 + } + } + ], + "duration_words": { + "1 NANOS": [ + "Nanosekunde", + "Nanosekunden", + "ns" + ], + "1 MICROS": [ + "Mikrosekunde", + "Mikrosekunden", + "μs" + ], + "1 MILLIS": [ + "Millisekunden", + "Millisekunden", + "ms" + ], + "1 SECONDS": [ + "Sekunde", + "Sekunden", + "s", + "Sek.", + "sec" + ], + "1 MINUTES": [ + "Minute", + "Minuten", + "min", + "Min.", + "m" + ], + "1 HOURS": [ + "Stunde", + "Stunden", + "h", + "Std." + ], + "1 DAYS": [ + "Tag", + "Tage" + "T.", + "d", + "Tg." + ], + "1 WEEKS": [ + "Woche", + "Wochen", + "W.", + "Wo." + ], + "1 MONTHS": [ + "Monat", + "Monate", + "Mon.", + "M.", + "Mt.", + "Mo." + ], + "1 YEARS": [ + "Jahr", + "Jahre", + "J.", + "Jr.", + "y", + "a" + ], + "1 DECADES": [ + "Jahrzehnt", + "Jahrzehnte", + "Dekade", + "Dekaden" + ], + "1 CENTURIES": [ + "Jahrhundert", + "Jahrhunderte", + "Jh.", + "Jhdt.", + "Jahrh." + ], + "1 MILLENNIA": [ + "Jahrtausend", + "Jahrtausende", + "Millennium", + "Millennien", + "Jt.", + "Jtsd.", + "Jahrt.", + "Jhtsd.", + "Jhtsde." + ] + }, + "duration_restrict_after_number": [ + "ns", + "μs", + "ms", + "s", + "m", + "h", + "T", + "W", + "M", + "J" + ] +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeConfigTest.java b/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeConfigTest.java new file mode 100644 index 00000000..e5f99662 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeConfigTest.java @@ -0,0 +1,10 @@ +package org.dicio.numbers.lang.de; + +import org.dicio.numbers.test.DateTimeConfigTestBase; + +public class DateTimeConfigTest extends DateTimeConfigTestBase { + @Override + public String configFolder() { + return "config/de-de"; + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeTest.java b/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeTest.java new file mode 100644 index 00000000..4f73e6a0 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/de/DateTimeTest.java @@ -0,0 +1,46 @@ +package org.dicio.numbers.lang.de; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.DateTimeTestBase; +import org.junit.Test; + +import java.time.LocalDate; +import java.time.LocalDateTime; + +import static org.junit.Assert.assertEquals; + +public class DateTimeTest extends DateTimeTestBase { + + @Override + public String configFolder() { + return "config/de-de"; + } + + @Override + public Formatter buildNumberFormatter() { + return new GermanFormatter(); + } + + @Test + public void testNiceDate() { + // just check that the NumberParserFormatter functions do their job + assertEquals("Mittwoch, der achtundzwangzigste April zweitausendeinundzwanzig", + pf.niceDate(LocalDate.of(2021, 4, 28)).get()); + assertEquals("Sonntag, der dreizehnte August", + pf.niceDate(LocalDate.of(-84, 8, 13)).now(LocalDate.of(-84, 8, 23)).get()); + } + + @Test + public void testNiceYear() { + // just check that the NumberParserFormatter functions do their job + assertEquals("neunzehnhundertvierundachtzig", pf.niceYear(LocalDate.of(1984, 4, 28)).get()); + assertEquals("achhundertzehn v.d.Z.", pf.niceYear(LocalDate.of(-810, 8, 13)).get()); + } + + @Test + public void testNiceDateTime() { + // just check that the NumberParserFormatter functions do their job + assertEquals("Mittwoch, zwölfter September siebzehnhundertvierundsechzig, um zwölf Uhr mittags", pf.niceDateTime(LocalDateTime.of(1764, 9, 12, 12, 0)).get()); + assertEquals("Donnerstag, dritter November dreihundertachtundzwanzig v.d.Z. um fünf Uhr sieben", pf.niceDateTime(LocalDateTime.of(-328, 11, 3, 5, 7)).get()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/de/ExtractDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/de/ExtractDurationTest.java new file mode 100644 index 00000000..ea70e177 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/de/ExtractDurationTest.java @@ -0,0 +1,28 @@ +package org.dicio.numbers.lang.de; + +import static org.dicio.numbers.test.TestUtils.DAY; +import static org.dicio.numbers.test.TestUtils.t; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.dicio.numbers.ParserFormatter; +import org.dicio.numbers.test.WithTokenizerTestBase; +import org.junit.Test; + +public class ExtractDurationTest extends WithTokenizerTestBase { + @Override + public String configFolder() { + return "config/de-de"; + } + + @Test + public void testNumberParserExtractDuration() { + final ParserFormatter npf + = new ParserFormatter(null, null); + assertNull(npf.extractDuration("hallo wie geht's").getFirst()); + assertNull(npf.extractDuration("eine Milliarde Euro").shortScale(true).getFirst()); + assertNull(npf.extractDuration("eine Million").shortScale(false).getFirst()); + assertEquals(t(DAY), npf.extractDuration("vierundzwanzig Stunden sind nicht zwei Tage").getFirst().toJavaDuration()); + assertEquals(t(2 * DAY), npf.extractDuration("zwei Tage sind nicht vierundzwanzig Stunden").getFirst().toJavaDuration()); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/de/NiceDurationTest.java b/numbers/src/test/java/org/dicio/numbers/lang/de/NiceDurationTest.java new file mode 100644 index 00000000..e8ea62ff --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/de/NiceDurationTest.java @@ -0,0 +1,72 @@ +package org.dicio.numbers.lang.de; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.dicio.numbers.test.TestUtils.T; + +import org.dicio.numbers.formatter.Formatter; +import org.dicio.numbers.test.NiceDurationTestBase; +import org.junit.Test; + +public class NiceDurationTest extends NiceDurationTestBase { + + @Override + public Formatter buildNumberFormatter() { + return new GermanFormatter(); + } + + @Test + public void zero() { + assertDuration("null Sekunden", T, 0, 0, 0, 0); + assertDuration("0:00", F, 0, 0, 0, 0); + } + + @Test + public void speechOne() { + assertDuration("eine Sekunde", T, 0, 0, 0, 1); + assertDuration("eine Minute", T, 0, 0, 1, 0); + assertDuration("eine Stunde", T, 0, 1, 0, 0); + assertDuration("ein Tag", T, 1, 0, 0, 0); + } + + @Test + public void speechMany() { + assertDuration("fünf Sekunden", T, 0, 0, 0, 5); + assertDuration("zwei Minuten", T, 0, 0, 2, 0); + assertDuration("siebzehn Stunden", T, 0, 17, 0, 0); + assertDuration("vierundachtzig Tage", T, 84, 0, 0, 0); + } + + @Test + public void speech() { + assertDuration("sechs Tage dreiundzwanzig Stunden neunundfünfzig Minuten zweiunddreißig Sekunden", T, 6, 23, 59, 32); + assertDuration("neunzehn Tage zweiundfünfzig Minuten", T, 19, 0, 52, 0); + assertDuration("eine Stunde sechs Sekunden", T, 0, 1, 0, 6); + assertDuration("dreiundsechzig Tage vierundvierzig Sekunden", T, 63, 0, 0, 44); + assertDuration("ein Tag eine Stunde eine Minute eine Sekunde", T, 1, 1, 1, 1); + } + + @Test + public void noSpeechOne() { + assertDuration("0:01", F, 0, 0, 0, 1); + assertDuration("1:00", F, 0, 0, 1, 0); + assertDuration("1:00:00", F, 0, 1, 0, 0); + assertDuration("1d 0:00:00", F, 1, 0, 0, 0); + } + + @Test + public void noSpeechMany() { + assertDuration("0:39", F, 0, 0, 0, 39); + assertDuration("24:00", F, 0, 0, 24, 0); + assertDuration("3:00:00", F, 0, 3, 0, 0); + assertDuration("76d 0:00:00", F, 76, 0, 0, 0); + } + + @Test + public void noSpeech() { + assertDuration("6d 23:59:32", F, 6, 23, 59, 32); + assertDuration("19d 0:52:00", F, 19, 0, 52, 0); + assertDuration("1:00:06", F, 0, 1, 0, 6); + assertDuration("63d 0:00:44", F, 63, 0, 0, 44); + assertDuration("1d 1:01:01", F , 1, 1, 1, 1); + } +} diff --git a/numbers/src/test/java/org/dicio/numbers/lang/de/NiceNumberTest.java b/numbers/src/test/java/org/dicio/numbers/lang/de/NiceNumberTest.java new file mode 100644 index 00000000..89e6d607 --- /dev/null +++ b/numbers/src/test/java/org/dicio/numbers/lang/de/NiceNumberTest.java @@ -0,0 +1,65 @@ +package org.dicio.numbers.lang.de; + +import org.dicio.numbers.ParserFormatter; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.dicio.numbers.test.TestUtils.F; +import static org.junit.Assert.assertEquals; + +public class NiceNumberTest { + + private static ParserFormatter pf; + + @BeforeClass + public static void setup() { + pf = new ParserFormatter(new GermanFormatter(), null); + } + + @Test + public void speech() { + assertEquals("vierunddreißig und einhalb", pf.niceNumber(34.5).get()); + assertEquals("minus achtzehn und drei Fünftel", pf.niceNumber(-18.6).get()); + assertEquals("achtundneunzig und achtzehn Neunzehntel", pf.niceNumber(98.947368421).get()); + assertEquals("minus fünf und sechs Elftel", pf.niceNumber(-5.5454545).get()); + assertEquals("sieben Neuntel", pf.niceNumber(7.0 / 9).get()); + assertEquals("minus zwei Siebzehntel", pf.niceNumber(-2.0 / 17).get()); + assertEquals("vierhundertfünfundsechzig", pf.niceNumber(465).get()); + assertEquals("minus einundneunzig", pf.niceNumber(-91).get()); + assertEquals("null", pf.niceNumber(0).get()); + } + + @Test + public void noSpeech() { + assertEquals("34 1/2", pf.niceNumber(34.5).speech(F).get()); + assertEquals("-18 3/5", pf.niceNumber(-18.6).speech(F).get()); + assertEquals("98 18/19", pf.niceNumber(98.947368421).speech(F).get()); + assertEquals("-5 6/11", pf.niceNumber(-5.5454545).speech(F).get()); + assertEquals("7/9", pf.niceNumber(7.0 / 9).speech(F).get()); + assertEquals("-2/17", pf.niceNumber(-2.0 / 17).speech(F).get()); + assertEquals("465", pf.niceNumber(465).speech(F).get()); + assertEquals("-91", pf.niceNumber(-91).speech(F).get()); + assertEquals("0", pf.niceNumber(0).speech(F).get()); + } + + @Test + public void customDenominators() { + assertEquals("minus vier und vier Zehntel", pf.niceNumber(-4.4).denominators(Arrays.asList(2, 3, 4, 6, 7, 8, 9, 10, 11)).get()); + assertEquals("-64 6/12", pf.niceNumber(-64.5).speech(F).denominators(Collections.singletonList(12)).get()); + assertEquals("minus drei und fünfhunderttausend Millionstel", pf.niceNumber(-3.5).denominators(Arrays.asList(1000000, 2000000)).get()); + assertEquals("9 1000000/2000000", pf.niceNumber(9.5).speech(F).denominators(Arrays.asList(2000000, 1000000)).get()); + assertEquals("null komma acht", pf.niceNumber(4.0 / 5).denominators(Arrays.asList(2, 3, 4)).get()); + } + + @Test + public void invalidFraction() { + assertEquals("eins komma acht vier", pf.niceNumber(1.837).get()); + assertEquals("minus achtunddreißig komma eins neun", pf.niceNumber(-38.192).get()); + assertEquals("3829.48", pf.niceNumber(3829.47832).speech(F).get()); + assertEquals("-7.19", pf.niceNumber(-7.1928).speech(F).get()); + assertEquals("-9322.38", pf.niceNumber(-9322 - 8.0 / 21).speech(F).get()); + } +} diff --git a/numbers/src/test/resources/config/de-de/date_time_test.json b/numbers/src/test/resources/config/de-de/date_time_test.json new file mode 100644 index 00000000..1bea06f6 --- /dev/null +++ b/numbers/src/test/resources/config/de-de/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "eins v.d.Z." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zehn v.d.Z." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zwei und neunzig v.d.Z." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert drei" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert elf" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vier hundert vier und fünfzig" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend fünf" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend zwölf" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend sechs und vierzig" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "achtzehn hundert sieben" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "siebzehn hundert siebzehn" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "neunzehn hundert acht und achtzig"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend neun"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend achtzehn"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend ein und zwanzig"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend dreißig"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tausend" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend ein hundert zwanzig v.d.Z." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend zwei hundert ein und vierzig v.d.Z." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "fünf tausend zwei hundert" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elf hundert" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "morgen"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "heute"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "gestern"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um ein Uhr zweiundzwanzig nachmittags"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um dreizehn Uhr zweiundzwanzig"} + } +}