From 06982151af22bd119a0cd30bffa87628753b1c56 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 09:48:00 +0800 Subject: [PATCH 001/199] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22246afb..eafe066a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ On this on-development version, things can be broken. ## 0.4.1 +Released 27 Feb 2025 + ilo Token is now a lot faster. - Fix custom dictionary error messages. @@ -24,6 +26,8 @@ ilo Token is now a lot faster. ## 0.4.0 +Released 19 Feb 2025 + ilo Token can now translate into verbs! This means it can translate sentences! It still can't do the following however: From 0a31730a8d70c07916006886a3362a4fb35a254c Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 09:55:11 +0800 Subject: [PATCH 002/199] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eafe066a..e40dac68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ On this on-development version, things can be broken. Released 27 Feb 2025 -ilo Token is now a lot faster. +ilo Token is now a lot faster. In exchange, it now uses more memory. - Fix custom dictionary error messages. - Fix custom dictionary not loading when ilo Token is newly loaded. From ff1365762e92aa5cad378dcc608e23a61dd7ff52 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 10:50:23 +0800 Subject: [PATCH 003/199] fix bundle code --- bundle.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.ts b/bundle.ts index e48cdfb5..e3e1a779 100644 --- a/bundle.ts +++ b/bundle.ts @@ -62,7 +62,7 @@ if (import.meta.main) { const buildDebounced = debounce((buildDictionary: boolean) => { task = task.then(async () => { await buildAll({ - minify: true, + minify: false, buildDictionary, checkDictionary: false, }); From 2294d951c616afa57482730eb99807931f320a17 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:11:45 +0800 Subject: [PATCH 004/199] remove documentation comments --- src/array-result.ts | 27 ---------- src/dictionary.ts | 2 - src/main.ts | 2 - src/mod.ts | 1 - src/parser/ast.ts | 17 ------- src/parser/filter.ts | 18 ------- src/parser/lexer.ts | 52 ------------------- src/parser/parser-lib.ts | 91 ---------------------------------- src/parser/parser.ts | 43 ---------------- src/parser/token.ts | 4 -- src/parser/ucsur.ts | 3 -- src/settings.ts | 2 - src/translator/ast.ts | 2 - telo-misikeke/telo-misikeke.js | 3 -- telo-misikeke/update.ts | 2 - 15 files changed, 269 deletions(-) diff --git a/src/array-result.ts b/src/array-result.ts index d86154c5..4eeac0ef 100644 --- a/src/array-result.ts +++ b/src/array-result.ts @@ -1,5 +1,3 @@ -/** Module containing the Array Result data type. */ - import { distinctBy } from "@std/collections/distinct-by"; import { flattenError } from "./misc.ts"; @@ -7,9 +5,7 @@ export type ArrayResultOptions = { cause: unknown; isHtml: boolean; }; -/** Represents Error used by Array Result. */ export class ArrayResultError extends Error { - /** Determines whether the error message contains HTML. */ isHtml: boolean; constructor(message: string, options: Partial = {}) { super(message, { cause: options.cause }); @@ -17,18 +13,14 @@ export class ArrayResultError extends Error { this.name = "ArrayResultError"; } } -/** Represents Error due to things not implemented yet. */ export class TodoError extends ArrayResultError { constructor(functionality: string) { super(`${functionality} is not yet implemented`); this.name = "TodoError"; } } -/** Represents possibilities and error. */ export class ArrayResult { - /** Represents possibilities, considered error when the array is empty. */ readonly array: ReadonlyArray; - /** A list of all aggregated errors. */ readonly errors: ReadonlyArray; constructor(array?: ReadonlyArray | ArrayResultError); constructor(array: undefined, errors: ReadonlyArray); @@ -54,13 +46,9 @@ export class ArrayResult { static errors(errors: ReadonlyArray): ArrayResult { return new ArrayResult(undefined, errors); } - /** Returns true when the array is empty */ isError(): boolean { return this.array.length === 0; } - /** Filters array. For convenience, the mapper function can throw - * ArrayResultError; Other kinds of errors will be ignored. - */ filter(mapper: (value: T) => boolean): ArrayResult { return this.flatMap((value) => { if (mapper(value)) { @@ -70,19 +58,9 @@ export class ArrayResult { } }); } - /** - * Maps all values and returns new ArrayResult. For convenience, the mapper - * function can throw ArrayResultError; Other kinds of errors will be ignored. - */ map(mapper: (value: T) => U): ArrayResult { return this.flatMap((value) => new ArrayResult([mapper(value)])); } - /** - * Accepts mapper function that returns another ArrayResult. flatMap takes all - * values and flattens them into single array for ArrayResult. For convenience, - * the mapper function can throw ArrayResultError; Other kinds of errors will be - * ignored. - */ flatMap(mapper: (value: T) => ArrayResult): ArrayResult { if (this.isError()) { return this as unknown as ArrayResult; @@ -130,7 +108,6 @@ export class ArrayResult { return this; } } - /** Combines all ArrayResult. */ static concat(...arrayResults: Array>): ArrayResult { return arrayResults.reduce( (left, right) => { @@ -143,10 +120,6 @@ export class ArrayResult { new ArrayResult(), ); } - /** - * Combines all permutations of all ArrayResult into an ArrayResult of a single tuple - * or array. If some of the ArrayResult is an error, all errors are aggregated. - */ static combine>( ...arrayResults: { [I in keyof T]: ArrayResult } & { length: T["length"]; diff --git a/src/dictionary.ts b/src/dictionary.ts index 76203ddc..42b28af4 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -13,14 +13,12 @@ export const tokiPonaWordSet: Set = new Set(); update(); -/** Represents Error due to missing dictionary entry */ export class MissingEntryError extends ArrayResultError { constructor(kind: string, word: string) { super(`${kind} definition for the word "${word}" is missing`); this.name = "MissingEntryError"; } } -/** Updates custom dictionary. */ export function loadCustomDictionary(dictionaryText: string): void { const dictionary = parseDictionary(dictionaryText); customDictionary.clear(); diff --git a/src/main.ts b/src/main.ts index bf7bfd48..3fad7d75 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,5 +1,3 @@ -/** Module for main execution in the browser. */ - import { dictionary } from "../dictionary/dictionary.ts"; import { asComment } from "../dictionary/misc.ts"; import PROJECT_DATA from "../project-data.json" with { type: "json" }; diff --git a/src/mod.ts b/src/mod.ts index 60402ce4..d4231207 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -12,7 +12,6 @@ export { clearCache } from "./parser/cache.ts"; export { defaultSettings, settings } from "./settings.ts"; export type { RedundancySettings, Settings } from "./settings.ts"; -/** Translates Toki Pona text into multiple English translations. */ export function translate(tokiPona: string): Array { const arrayResult = rawTranslate(tokiPona); if (!arrayResult.isError()) { diff --git a/src/parser/ast.ts b/src/parser/ast.ts index 6088c238..92cda87a 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -1,6 +1,3 @@ -/** Module for describing Toki Pona AST. */ - -/** Represents an emphasis particle. */ export type Emphasis = | { type: "word"; word: string } | { type: "long word"; word: string; length: number } @@ -15,21 +12,15 @@ export type SimpleWordUnit = export type HeadedWordUnit = & SimpleHeadedWordUnit & { emphasis: null | Emphasis }; -/** Represents a word unit. */ export type WordUnit = & SimpleWordUnit & { emphasis: null | Emphasis }; -/** Represents a single modifier. */ export type Modifier = | { type: "default"; word: WordUnit } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } | { type: "nanpa"; nanpa: WordUnit; phrase: Phrase } | ({ type: "quotation" } & Quotation); -/** - * Represents a phrase including preverbial phrases, quotations, and - * prepositional phrases intended for predicate. - */ export type Phrase = | { type: "default"; @@ -46,19 +37,16 @@ export type Phrase = } | ({ type: "preposition" } & Preposition) | ({ type: "quotation" } & Quotation); -/** Represents multiple phrases separated by repeated particle or "anu". */ export type MultiplePhrases = | { type: "single"; phrase: Phrase } | { type: "and conjunction"; phrases: Array } | { type: "anu"; phrases: Array }; -/** Represents a single prepositional phrase. */ export type Preposition = { preposition: HeadedWordUnit; modifiers: Array; phrases: MultiplePhrases & { type: "single" | "anu" }; emphasis: null | Emphasis; }; -/** Represents multiple predicates. */ export type Predicate = | { type: "single"; predicate: Phrase } | { @@ -69,7 +57,6 @@ export type Predicate = } | { type: "and conjunction"; predicates: Array } | { type: "anu"; predicates: Array }; -/** Represents a simple clause. */ export type Clause = | { type: "phrases"; phrases: MultiplePhrases } | { type: "o vocative"; phrases: MultiplePhrases } @@ -86,7 +73,6 @@ export type Clause = } | { type: "prepositions"; prepositions: Array } | ({ type: "quotation" } & Quotation); -/** Represents a clause including preclauses and postclauses. */ export type FullClause = | { type: "default"; @@ -97,20 +83,17 @@ export type FullClause = endingParticle: null | Emphasis; } | { type: "filler"; emphasis: Emphasis }; -/** Represents a single full sentence. */ export type Sentence = { laClauses: Array; finalClause: FullClause; interrogative: null | "seme" | "x ala x"; punctuation: string; }; -/** Represents quotation. */ export type Quotation = { sentences: Array; leftMark: string; rightMark: string; }; -/** The final representation of whole Toki Pona input text. */ export type MultipleSentences = | { type: "single word"; word: string } | { type: "sentences"; sentences: Array }; diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 1ac273bb..f8777e7f 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,5 +1,3 @@ -/** Module describing filter rules integrated within AST Parser. */ - import { settings } from "../settings.ts"; import { Clause, @@ -22,7 +20,6 @@ import { import { UnrecognizedError } from "./parser-lib.ts"; import { describe } from "./token.ts"; -/** Array of filter rules for a word unit. */ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" (wordUnit) => { @@ -41,7 +38,6 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ return true; }, ]; -/** Array of filter rules for a single modifier. */ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // quotation modifier cannot exist (modifier) => { @@ -173,7 +169,6 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ return true; }, ]; -/** Array of filter rules for multiple modifiers. */ export const MULTIPLE_MODIFIERS_RULES: Array< (modifier: Array) => boolean > = [ @@ -248,7 +243,6 @@ export const MULTIPLE_MODIFIERS_RULES: Array< return true; }, ]; -/** Array of filter rules for a single phrase. */ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ // Disallow quotation (phrase) => { @@ -310,7 +304,6 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ return true; }, ]; -/** Array of filter rules for preposition. */ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ // Disallow preverb modifiers other than "ala" (preposition) => { @@ -357,7 +350,6 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ return true; }, ]; -/** Array of filter rules for clauses. */ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ // disallow preposition in subject (clause) => { @@ -508,7 +500,6 @@ export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ return true; }, ]; -/** Array of filter rules for multiple sentences. */ export const MULTIPLE_SENTENCES_RULE: Array< (sentences: Array) => boolean > = [ @@ -520,20 +511,14 @@ export const MULTIPLE_SENTENCES_RULE: Array< return true; }, ]; -/** Helper function for generating filter function. */ export function filter( rules: Array<(value: T) => boolean>, ): (value: T) => boolean { return (value) => rules.every((rule) => rule(value)); } -/** Helper function for checking whether a modifier is numeric. */ function modifierIsNumeric(modifier: Modifier): boolean { return modifier.type === "default" && modifier.word.type === "number"; } -/** - * Helper function for checking if the modifiers is exactly just "ala" or - * nothing. - */ function modifiersIsAlaOrNone(modifiers: Array): boolean { switch (modifiers.length) { case 0: @@ -547,9 +532,6 @@ function modifiersIsAlaOrNone(modifiers: Array): boolean { return false; } } -/** - * Helper function for determining whether the phrase has a preposition inside. - */ function hasPrepositionInPhrase(phrase: Phrase): boolean { switch (phrase.type) { case "default": diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 464c2fef..d01c7746 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -1,11 +1,3 @@ -/** - * Module for lexer. It is responsible for turning string into array of token - * trees. It also latinizes UCSUR characters. - * - * Note: the words lexer and parser are used interchangeably since they both - * have the same capabilities. - */ - import { settings } from "../settings.ts"; import { cache } from "./cache.ts"; import { @@ -42,53 +34,33 @@ import { const spacesWithoutNewline = match(/[^\S\n\r]*/, "spaces"); const newline = match(/[\n\r]\s*/, "newline"); -/** parses space. */ const spaces = sourceOnly( sequence(spacesWithoutNewline, choice(nothing, newline)), ); -/** Parses lowercase latin word. */ const latinWord = match(/[a-z][a-zA-Z]*/, "word").skip(spaces); -/** Parses variation selector. */ const variationSelector = match(/[\uFE00-\uFE0F]/, "variation selector"); -/** - * Parses UCSUR word, this doesn't parse space and so must be manually added if - * needed - */ const ucsur = match(UCSUR_CHARACTER_REGEX, "UCSUR glyph") .map((ucsur) => UCSUR_TO_LATIN.get(ucsur)!); -/** - * Parses special UCSUR character, this doesn't parse space and so must be - * manually added if needed - */ function specificSpecialUcsur(specialUcsur: string): Parser { return matchString( specialUcsur, SPECIAL_UCSUR_DESCRIPTIONS.get(specialUcsur)!, ); } -/** Parses a single UCSUR word. */ const singleUcsurWord = ucsur.skip(optionalAll(variationSelector)).skip(spaces); -/** Parses a joiner. */ const joiner = choiceOnlyOne( matchString("\u200D", "zero width joiner"), specificSpecialUcsur(STACKING_JOINER), specificSpecialUcsur(SCALING_JOINER), ); -/** - * Parses combined glyphs. The spaces after aren't parsed and so must be - * manually added by the caller. - */ const combinedGlyphs = sequence(ucsur, allAtLeastOnce(joiner.with(ucsur))) .map(([first, rest]) => [first, ...rest]); -/** Parses a word, either UCSUR or latin. */ const word = choiceOnlyOne(latinWord, singleUcsurWord); -/** Parses proper words spanning multiple words. */ const properWords = allAtLeastOnce( match(/[A-Z][a-zA-Z]*/, "proper word").skip(spaces), ) .map((array) => array.join(" ")) .map((words) => ({ type: "proper word", words, kind: "latin" })); -/** Parses a specific word, either UCSUR or latin. */ function specificWord(thatWord: string): Parser { return word.filter((thisWord) => { if (thatWord === thisWord) { @@ -98,13 +70,11 @@ function specificWord(thatWord: string): Parser { } }); } -/** Parses multiple a. */ const multipleA = sequence( specificWord("a"), count(allAtLeastOnce(specificWord("a"))), ) .map(([_, count]) => ({ type: "multiple a", count: count + 1 })); -/** Parses lengthened words. */ const longWord = choiceOnlyOne(matchString("a"), matchString("n")) .then((word) => count(allAtLeastOnce(matchString(word))) @@ -116,7 +86,6 @@ const longWord = choiceOnlyOne(matchString("a"), matchString("n")) ) .skip(spaces); -/** Parses X ala X constructions if allowed by the settings. */ const xAlaX = lazy(() => { if (settings.xAlaXPartialParsing) { return empty; @@ -129,7 +98,6 @@ const xAlaX = lazy(() => { }) .map((word) => ({ type: "x ala x", word })); -/** Parses a punctuation. */ const punctuation = choiceOnlyOne( match(/[.,:;?!…·。。︒\u{F199C}\u{F199D}]+/u, "punctuation") .map((punctuation) => @@ -142,9 +110,6 @@ const punctuation = choiceOnlyOne( newline.map(() => "."), ) .map((punctuation) => ({ type: "punctuation", punctuation })); -/** - * Parses cartouche element and returns the phonemes or letters it represents. - */ const cartoucheElement = choiceOnlyOne( singleUcsurWord .skip(match(/[\uFF1A\u{F199D}]/u, "full width colon").skip(spaces)), @@ -172,7 +137,6 @@ const cartoucheElement = choiceOnlyOne( .map((letter) => letter.toLowerCase()) .skip(spaces), ); -/** Parses a single cartouche. */ const cartouche = sequence( specificSpecialUcsur(START_OF_CARTOUCHE).skip(spaces), allAtLeastOnce(cartoucheElement), @@ -182,7 +146,6 @@ const cartouche = sequence( const word = words.join(""); return `${word[0].toUpperCase()}${word.slice(1)}`; }); -/** Parses multiple cartouches. */ const cartouches = allAtLeastOnce(cartouche) .map((words) => words.join(" ")) .map((words) => ({ @@ -190,12 +153,6 @@ const cartouches = allAtLeastOnce(cartouche) words, kind: "cartouche", })); -/** - * Parses long glyph container. - * - * spaces after the first glyph and the last glyph aren't parsed and so must be - * manually added by the caller if needed. - */ function longContainer( left: string, right: string, @@ -208,24 +165,16 @@ function longContainer( ) .map(([_, inside]) => inside); } -/** Parses long glyph container containing just spaces. */ const longSpaceContainer = longContainer( START_OF_LONG_GLYPH, END_OF_LONG_GLYPH, spacesWithoutNewline.map((space) => space.length), ) .skip(spaces); -/** - * Parses long glyph head. - * - * This doesn't parses space on the right and so must be manually added by the - * caller if needed. - */ const longGlyphHead = choiceOnlyOne( combinedGlyphs, ucsur.map((word) => [word]), ); -/** Parses long glyph that only contains spaces. */ const spaceLongGlyph = sequence(longGlyphHead, longSpaceContainer) .map(([words, spaceLength]) => ({ type: "space long glyph", @@ -258,7 +207,6 @@ const wordToken = word.map((word) => ({ type: "word", word })); Parser.startCache(cache); -/** Parses a token. */ export const token = choiceOnlyOne( longWord, xAlaX, diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 350799d3..9d705acb 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -1,18 +1,10 @@ -/** - * A generic module for parser and parser combinator. It is used by both lexer - * and AST parser. - */ - import { memoize } from "@std/cache/memoize"; import { ArrayResult, ArrayResultError } from "../array-result.ts"; import { Cache, Clearable, Lazy } from "../cache.ts"; -/** A single parsing result. */ export type ValueRest = Readonly<{ rest: string; value: T }>; -/** A special kind of ArrayResult that parsers returns. */ export type ParserResult = ArrayResult>; -/** Wrapper of parser function with added methods for convenience. */ export class Parser { readonly #parser: (src: string) => ParserResult; static cache: null | Cache = null; @@ -28,10 +20,6 @@ export class Parser { parser(src: string): ParserResult { return ArrayResult.from(() => this.#parser(src)); } - /** - * Maps the parsing result. For convenience, the mapper function can throw - * an ArrayResultError; Other kinds of error are ignored. - */ map(mapper: (value: T) => U): Parser { return new Parser((src) => this @@ -39,19 +27,11 @@ export class Parser { .map(({ value, rest }) => ({ value: mapper(value), rest })) ); } - /** - * Filters ArrayResults. Instead of returning false, ArrayResultError must be thrown - * instead. - */ filter(mapper: (value: T) => boolean): Parser { return new Parser((src) => this.parser(src).filter(({ value }) => mapper(value)) ); } - /** - * Parses `this` then passes the parsing result in the mapper. The resulting - * parser is then also parsed. - */ then(mapper: (value: T) => Parser): Parser { const { cache } = Parser; return new Parser((src) => { @@ -67,11 +47,9 @@ export class Parser { sortBy(mapper: (value: T) => number): Parser { return this.sort((left, right) => mapper(left) - mapper(right)); } - /** Takes another parser and discards the parsing result of `this`. */ with(parser: Parser): Parser { return sequence(this, parser).map(([_, arrayResult]) => arrayResult); } - /** Takes another parser and discards its parsing result. */ skip(parser: Parser): Parser { return sequence(this, parser).map(([arrayResult]) => arrayResult); } @@ -98,50 +76,33 @@ export class Parser { return value; } } -/** Represents Error with unexpected and expected elements. */ export class UnexpectedError extends ArrayResultError { constructor(unexpected: string, expected: string) { super(`unexpected ${unexpected}. ${expected} were expected instead`); this.name = "UnexpectedError"; } } -/** Represents Error caused by unrecognized elements. */ export class UnrecognizedError extends ArrayResultError { constructor(element: string) { super(`${element} is unrecognized`); this.name = "UnrecognizedError"; } } -/** Parser that always outputs an error. */ export function error(error: ArrayResultError): Parser { return new Parser(() => { throw error; }); } -/** Parser that always outputs an empty ArrayResult. */ export const empty = new Parser(() => new ArrayResult()); -/** Parses nothing and leaves the source string intact. */ export const nothing = new Parser((src) => new ArrayResult([{ value: null, rest: src }]) ); export const emptyArray = nothing.map(() => []); -/** Parses without consuming the source string */ export function lookAhead(parser: Parser): Parser { return new Parser((src) => parser.parser(src).map(({ value }) => ({ value, rest: src })) ); } -/** - * Lazily evaluates the parser function only when needed. Useful for recursive - * parsers. - * - * # Notes - * - * This combinator contains memoization, for it to be effective: - * - * - Don't use it for combinators, use `variable` instead. - * - Declare the parser as global constant. - */ export function lazy(parser: () => Parser): Parser { const { cache } = Parser; if (Parser.cache != null) { @@ -152,19 +113,11 @@ export function lazy(parser: () => Parser): Parser { return new Parser((src) => Parser.inContext(parser, cache).parser(src)); } } -/** - * Evaluates all parsers on the same source string and sums it all on a single - * ArrayResult. - */ export function choice(...choices: Array>): Parser { return new Parser((src) => new ArrayResult(choices).flatMap((parser) => parser.parser(src)) ); } -/** - * Tries to evaluate each parsers one at a time and only only use the ArrayResult of - * the first parser that is successful. - */ export function choiceOnlyOne( ...choices: Array> ): Parser { @@ -181,18 +134,12 @@ export function choiceOnlyOne( empty, ); } -/** Combines `parser` and the `nothing` parser, and output `null | T`. */ export function optional(parser: Parser): Parser { return choice(parser, nothing); } -/** - * Like `optional` but when the parser is successful, it doesn't consider - * parsing nothing. - */ export function optionalAll(parser: Parser): Parser { return choiceOnlyOne(parser, nothing); } -/** Takes all parsers and applies them one after another. */ export function sequence>( ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } ): Parser { @@ -203,15 +150,6 @@ export function sequence>( nothing.map(() => []), ) as Parser; } -/** - * Parses `parser` multiple times and returns an `Array`. The resulting - * ArrayResult includes all ArrayResult from parsing nothing to parsing as many as - * possible. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ export const many = memoize((parser: Parser): Parser> => choice( sequence(parser, lazy(() => many(parser))) @@ -219,25 +157,10 @@ export const many = memoize((parser: Parser): Parser> => emptyArray, ) ); -/** - * Like `many` but parses at least once. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ export function manyAtLeastOnce(parser: Parser): Parser> { return sequence(parser, many(parser)) .map(([first, rest]) => [first, ...rest]); } -/** - * Parses `parser` multiple times and returns an `Array`. This function is - * exhaustive unlike `many`. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ export const all = memoize((parser: Parser): Parser> => choiceOnlyOne( sequence(parser, lazy(() => all(parser))) @@ -245,13 +168,6 @@ export const all = memoize((parser: Parser): Parser> => emptyArray, ) ); -/** - * Like `all` but parses at least once. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ export function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)) .map(([first, rest]) => [first, ...rest]); @@ -275,10 +191,6 @@ function describeSource(src: string): string { } } } -/** - * Uses Regular Expression to create parser. The parser outputs - * RegExpMatchArray, which is what `string.match( ... )` returns. - */ export function matchCapture( regex: RegExp, description: string, @@ -298,7 +210,6 @@ export function matchCapture( export function match(regex: RegExp, description: string): Parser { return matchCapture(regex, description).map(([matched]) => matched); } -/** parses a string of consistent length. */ export function slice(length: number, description: string): Parser { return new Parser((src) => { if (src.length >= length) { @@ -310,7 +221,6 @@ export function slice(length: number, description: string): Parser { throw new UnexpectedError(describeSource(src), description); }); } -/** Parses a string that exactly matches the given string. */ export function matchString( match: string, description: string = `"${match}"`, @@ -323,7 +233,6 @@ export function matchString( }); } export const character = match(/./us, "character"); -/** Parses the end of text */ export const end = new Parser((src) => { if (src === "") { return new ArrayResult([{ value: null, rest: "" }]); diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 5c06dced..fa32ece9 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,8 +1,3 @@ -/** - * Module for AST Parser. It is responsible for turning an array of token tree - * into AST. - */ - import { ArrayResult } from "../array-result.ts"; import { contentWordSet, @@ -63,7 +58,6 @@ const spaces = match(/\s*/, "spaces"); Parser.startCache(cache); -/** Parses a specific type of token. */ function specificToken( type: T, ): Parser { @@ -75,21 +69,15 @@ function specificToken( } }); } -/** Parses comma. */ const comma = specificToken("punctuation") .map(({ punctuation }) => punctuation) .filter((punctuation) => punctuation === ","); -/** Parses an optional comma. */ const optionalComma = optional(comma); -/** Parses a toki pona word. */ const word = specificToken("word").map(({ word }) => word); -/** Parses proper words spanning multiple words. */ const properWords = specificToken("proper word").map(({ words }) => words); -/** Parses a toki pona */ const punctuation = specificToken("punctuation").map(({ punctuation }) => punctuation ); -/** Parses word only from `set`. */ function wordFrom(set: Set, description: string): Parser { return word.filter((word) => { if (set.has(word)) { @@ -99,7 +87,6 @@ function wordFrom(set: Set, description: string): Parser { } }); } -/** Parses a specific word. */ function specificWord(thatWord: string): Parser { return word.filter((thisWord) => { if (thatWord === thisWord) { @@ -109,7 +96,6 @@ function specificWord(thatWord: string): Parser { } }); } -/** Parses an emphasis particle. */ const emphasis = choice( specificToken("space long glyph") .map((longGlyph) => { @@ -137,7 +123,6 @@ const emphasis = choice( .map((word) => ({ type: "word", word })), ); const optionalEmphasis = optional(emphasis); -/** Parses an X ala X construction. */ function xAlaX( useWord: Set, description: string, @@ -196,7 +181,6 @@ function simpleWordUnit( .map((word) => ({ type: "default", word })), ); } -/** Parses word unit except numbers. */ function wordUnit( word: Set, description: string, @@ -211,7 +195,6 @@ function wordUnit( })) .filter(filter(WORD_UNIT_RULES)); } -/** Parses a binary combined glyphs. */ function binaryWords( word: Set, description: string, @@ -230,7 +213,6 @@ function binaryWords( } }); } -/** Parses a word unit or a combined glyphs. */ function optionalCombined( word: Set, description: string, @@ -258,9 +240,6 @@ function wordToNumber(word: string): number { } return num; } -/** Parses number words in order other than "ale" and "ala". This can parse - * nothing and return 0. - */ const subAleNumber = sequence( many(specificWord("mute")), many(specificWord("luka")), @@ -280,9 +259,7 @@ const properSubAleNumber = subAleNumber.filter((number) => { return true; } }); -/** Parses "ale" or "ali". */ const ale = choice(specificWord("ale"), specificWord("ali")); -/** Parses number words including "nasin nanpa pona". */ const number = choice( specificWord("ala").map(() => 0), sequence( @@ -325,7 +302,6 @@ const number = choice( .map(([ale, sub]) => ale * 100 + sub) .filter((number) => number !== 0), ); -/** Parses phrases. */ const phrase: Parser = lazy(() => choice( sequence( @@ -381,7 +357,6 @@ const phrase: Parser = lazy(() => ) .filter(filter(PHRASE_RULE)) ); -/** Parses a "pi" construction. */ const pi = choice( sequence( specificToken("headed long glyph start") @@ -403,7 +378,6 @@ const pi = choice( .map(([_, phrase]) => phrase), specificWord("pi").with(phrase), ); -/** Parses multiple modifiers. */ const modifiers = sequence( many( choice( @@ -439,10 +413,6 @@ const modifiers = sequence( ...piModifiers, ]) .filter(filter(MULTIPLE_MODIFIERS_RULES)); -/** - * Parses nested phrases with given nesting rule, only accepting the top level - * operation. - */ function nestedPhrasesOnly( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { @@ -471,7 +441,6 @@ function nestedPhrasesOnly( })); } } -/** Parses nested phrases with given nesting rule. */ function nestedPhrases( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { @@ -485,13 +454,11 @@ function nestedPhrases( ); } } -/** Parses phrases separated by "en" or "anu". */ const subjectPhrases = choice( nestedPhrasesOnly(["en", "anu"]), nestedPhrasesOnly(["anu", "en"]), phrase.map((phrase) => ({ type: "single", phrase })), ); -/** Parses prepositional phrase. */ const preposition = choice( sequence( specificToken("headless long glyph start"), @@ -580,9 +547,6 @@ const preposition = choice( })), ) .filter(filter(PREPOSITION_RULE)); -/** - * Parses associated predicates whose predicates only uses top level operator. - */ function associatedPredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { @@ -606,7 +570,6 @@ function associatedPredicates( prepositions, })); } -/** Parses multiple predicates without "li" nor "o" at the beginning. */ function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { @@ -649,7 +612,6 @@ function multiplePredicates( ); } } -/** Parses a single clause. */ const clause = choice( sequence( wordFrom(new Set(["mi", "sina"]), "mi/sina subject"), @@ -722,7 +684,6 @@ const clause = choice( })), ) .filter(filter(CLAUSE_RULE)); -/** Parses a single clause including preclause and postclause. */ const fullClause = choice( sequence( optional(emphasis.skip(optionalComma)), @@ -758,13 +719,11 @@ const fullClause = choice( .map((emphasis) => ({ type: "filler", emphasis })), ) .filter(filter(FULL_CLAUSE_RULE)); -/** parses "la" with optional comma around. */ const la = choice( comma.with(specificWord("la")), specificWord("la").skip(comma), specificWord("la"), ); -/** Parses a single full sentence with optional punctuations. */ const sentence = sequence( many(fullClause.skip(la)), fullClause, @@ -795,7 +754,6 @@ const sentence = sequence( }; }) .filter(filter(SENTENCE_RULE)); -/** A multiple sentence parser for final parser. */ const FULL_PARSER = spaces .with(choiceOnlyOne( wordFrom(tokiPonaWordSet, "Toki Pona word") @@ -806,7 +764,6 @@ const FULL_PARSER = spaces .filter(filter(MULTIPLE_SENTENCES_RULE)) .map((sentences) => ({ type: "sentences", sentences })), )); -/** Turns string into Toki Pona AST. */ export function parse(src: string): ArrayResult { return ArrayResult.from(() => { if (src.trim().length > 500) { diff --git a/src/parser/token.ts b/src/parser/token.ts index 20694b32..12c07442 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -1,8 +1,5 @@ -/** Module describing token. */ - import { repeatWithSpace } from "../misc.ts"; -/** Represents token. */ export type Token = | { type: "word"; word: string } | { @@ -37,7 +34,6 @@ export type Token = | { type: "x ala x"; word: string } | { type: "proper word"; words: string; kind: "cartouche" | "latin" } | { type: "punctuation"; punctuation: string }; -/** Describes a token. Useful for error messages. */ export function describe(token: Token): string { switch (token.type) { case "word": diff --git a/src/parser/ucsur.ts b/src/parser/ucsur.ts index fd848196..d6c2cfbc 100644 --- a/src/parser/ucsur.ts +++ b/src/parser/ucsur.ts @@ -1,8 +1,5 @@ -/** Module for constants and other helper items for UCSUR. */ - // https://www.kreativekorp.com/ucsur/charts/sitelen.html -/** */ export const START_OF_CARTOUCHE = "\u{F1990}"; export const END_OF_CARTOUCHE = "\u{F1991}"; export const COMBINING_CARTOUCHE_EXTENSION = "\u{F1992}"; diff --git a/src/settings.ts b/src/settings.ts index 772bf4d5..e045454d 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -1,5 +1,3 @@ -/** Module for translation settings stored as a global state */ - /** * Options for determining how to show different forms or conjugations of nouns * or verbs. See diff --git a/src/translator/ast.ts b/src/translator/ast.ts index 43023d60..0c4fa8a4 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -1,5 +1,3 @@ -/** Module for describing English AST. */ - import * as Dictionary from "../../dictionary/type.ts"; export type Word = { diff --git a/telo-misikeke/telo-misikeke.js b/telo-misikeke/telo-misikeke.js index fa70e7f5..e7e7a356 100644 --- a/telo-misikeke/telo-misikeke.js +++ b/telo-misikeke/telo-misikeke.js @@ -1,5 +1,3 @@ -/** Glue code for telo misikeke */ - // @ts-self-types="./telo-misikeke.d.ts" import { @@ -12,7 +10,6 @@ import { build_rules, getMessage } from "./rules.js"; const RULES = build_rules(LINKU); -/** Gets all telo misikeke error messages. */ export function errors(text) { return new ParserWithCallbacks(RULES, false) .tokenize(text) diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index b5788941..8441d0db 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -1,5 +1,3 @@ -/** Codes for updating telo misikeke and Linku data. */ - import { retry } from "@std/async/retry"; import { assertOk } from "../src/misc.ts"; From cb3c888d5591369d401c3045d81abc3ff250b165 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:14:07 +0800 Subject: [PATCH 005/199] update project data --- project-data.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project-data.json b/project-data.json index 8300d6b0..a0dc1700 100644 --- a/project-data.json +++ b/project-data.json @@ -1,5 +1,5 @@ { - "version": "0.4.1", - "onDevelopment": false, + "version": "0.4.2", + "onDevelopment": true, "releaseDate": "2025-2-27" } From 176091743444464365711ebaa72d20475b413aad Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:42:14 +0800 Subject: [PATCH 006/199] improve sentence capitalization --- src/translator/composer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/composer.ts b/src/translator/composer.ts index c8005008..5488ebc9 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -158,7 +158,7 @@ function clause(ast: English.Clause): string { function sentence(sentence: English.Sentence): string { return `${sentence.clauses.map(clause).join(", ")}${sentence.punctuation}` .replace( - /(? character.toUpperCase(), ); } From 7e2733d297e51a14401139bd7aa2c6cb266b0daf Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:43:24 +0800 Subject: [PATCH 007/199] update changelog --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e40dac68..5db540f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,19 @@ NOTE: Before publishing: - Remove details --> - ## 0.4.1 From 394fcd6b8740f4b29f1d0454feca17f8ca3cc9c3 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:51:08 +0800 Subject: [PATCH 008/199] update task name --- README.md | 2 +- deno.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cd470b87..f7c4969d 100644 --- a/README.md +++ b/README.md @@ -52,5 +52,5 @@ This will run a REPL that you can use to test ilo Token. To stop this command, simply press Ctrl + D or Ctrl + C. ``` -deno task run +deno task repl ``` diff --git a/deno.json b/deno.json index 899e5aea..ecbfef66 100644 --- a/deno.json +++ b/deno.json @@ -4,7 +4,7 @@ }, "tasks": { "build": "deno run --allow-read --allow-write --allow-env --allow-net --allow-run --no-prompt ./bundle.ts build", - "run": { + "repl": { "command": "deno run --allow-env --no-prompt ./src/repl.ts", "dependencies": ["build-dictionary"] }, From 6131e9d4095a482bd1c4a465d1c173dca0b1ed4f Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 11:57:45 +0800 Subject: [PATCH 009/199] improve repl --- src/repl.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/repl.ts b/src/repl.ts index e66f8a58..8e876fb4 100644 --- a/src/repl.ts +++ b/src/repl.ts @@ -1,3 +1,5 @@ +import { unescape } from "@std/html/entities"; +import entityList from "@std/html/named-entity-list.json" with { type: "json" }; import { repeatArray } from "./misc.ts"; import { translate } from "./mod.ts"; @@ -14,8 +16,11 @@ if (import.meta.main) { const arrayResult = translate(input); for (const translation of arrayResult) { const count = translation.match(//g)?.length ?? 0; + const text = unescape(translation.replaceAll(/<\/?strong>/g, "%c"), { + entityList, + }); console.log( - ` - ${translation.replaceAll(/<\/?strong>/g, "%c")}`, + ` - ${text}`, ...repeatArray(["font-weight: bold", ""], count).flat(), ); } From 27d7e95b19246f753b6a99cfbf9f00c7b23e71df Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 12:07:22 +0800 Subject: [PATCH 010/199] add target es version --- bundle.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/bundle.ts b/bundle.ts index e3e1a779..3b2ec8aa 100644 --- a/bundle.ts +++ b/bundle.ts @@ -25,6 +25,7 @@ function buildOptions(minify: boolean): ESBuild.BuildOptions { bundle: true, minify, sourcemap: "linked", + target: [`es${new Date().getFullYear() - 3}`], plugins: [...denoPlugins()], }; } From c6dfddbb367aba38c8e0d5f61db2162ab9c5b607 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 15:45:35 +0800 Subject: [PATCH 011/199] include errors in memoization --- src/parser/parser-lib.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 9d705acb..20e38dba 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -6,20 +6,18 @@ export type ValueRest = Readonly<{ rest: string; value: T }>; export type ParserResult = ArrayResult>; export class Parser { - readonly #parser: (src: string) => ParserResult; + readonly parser: (src: string) => ParserResult; static cache: null | Cache = null; constructor(parser: (src: string) => ParserResult) { + const useParser = (src: string) => ArrayResult.from(() => parser(src)); if (Parser.cache != null) { const cache = new Map>(); Parser.addToCache(cache); - this.#parser = memoize(parser, { cache }); + this.parser = memoize(useParser, { cache }); } else { - this.#parser = parser; + this.parser = useParser; } } - parser(src: string): ParserResult { - return ArrayResult.from(() => this.#parser(src)); - } map(mapper: (value: T) => U): Parser { return new Parser((src) => this From 105fc2a68032f39cd1d1647892abb58da366bcda Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 15:52:54 +0800 Subject: [PATCH 012/199] organize imports --- src/translator/adjective.ts | 2 +- src/translator/clause.ts | 2 +- src/translator/composer.ts | 2 +- src/translator/determiner.ts | 2 +- src/translator/modifier.ts | 2 +- src/translator/noun.ts | 2 +- src/translator/phrase.ts | 2 +- src/translator/predicate.ts | 12 ++++++------ src/translator/sentence.ts | 2 +- src/translator/word-unit.ts | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index fa47373d..0f238171 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { UntranslatableError } from "./error.ts"; diff --git a/src/translator/clause.ts b/src/translator/clause.ts index cc2b529c..a18a6735 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { nullableAsArray } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { FilteredOutError, TranslationTodoError } from "./error.ts"; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 5488ebc9..2c9ea70c 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -1,5 +1,5 @@ -import { nullableAsArray } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { nullableAsArray } from "../misc.ts"; import { parse } from "../parser/parser.ts"; import * as English from "./ast.ts"; import { multipleSentences } from "./sentence.ts"; diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index 59f9f35d..674115f9 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { filterSet } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { filterSet } from "../misc.ts"; import * as English from "./ast.ts"; import { FilteredOutError } from "./error.ts"; import { simpleNounForms } from "./noun.ts"; diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 2c4f1c56..e1b7c565 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -1,5 +1,5 @@ -import { dictionary } from "../dictionary.ts"; import { ArrayResult } from "../array-result.ts"; +import { dictionary } from "../dictionary.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 2b16f897..26f4a633 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { nullableAsArray } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { nullableAsArray } from "../misc.ts"; import { settings } from "../settings.ts"; import { adjective } from "./adjective.ts"; import * as English from "./ast.ts"; diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 72b4f326..ed647c16 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -1,5 +1,5 @@ -import { nullableAsArray } from "../misc.ts"; import { ArrayResult } from "../array-result.ts"; +import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; import { AdjectiveWithInWay, fixAdjective } from "./adjective.ts"; diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index 92ec16a3..ab2e30ea 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -1,18 +1,18 @@ -import { forObject, PartialCompoundVerb } from "./verb.ts"; -import * as English from "./ast.ts"; -import { AdjectiveWithInWay } from "./adjective.ts"; +import { ArrayResult } from "../array-result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; +import { AdjectiveWithInWay } from "./adjective.ts"; +import * as English from "./ast.ts"; +import { FilteredOutError, UntranslatableError } from "./error.ts"; +import { CONJUNCTION } from "./misc.ts"; import { multiplePhrases, phrase, phraseAsVerb, PhraseTranslation, } from "./phrase.ts"; -import { ArrayResult } from "../array-result.ts"; -import { FilteredOutError, UntranslatableError } from "./error.ts"; import { nounAsPreposition, preposition } from "./preposition.ts"; -import { CONJUNCTION } from "./misc.ts"; +import { forObject, PartialCompoundVerb } from "./verb.ts"; function verbObject( verb: PartialCompoundVerb, diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 293acfa3..2636d2b0 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -1,6 +1,6 @@ +import { ArrayResult } from "../array-result.ts"; import { dictionary, MissingEntryError } from "../dictionary.ts"; import { nullableAsArray, repeatWithSpace } from "../misc.ts"; -import { ArrayResult } from "../array-result.ts"; import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as-string.ts"; import * as English from "./ast.ts"; diff --git a/src/translator/word-unit.ts b/src/translator/word-unit.ts index cafaacb3..cc0b1da1 100644 --- a/src/translator/word-unit.ts +++ b/src/translator/word-unit.ts @@ -1,5 +1,5 @@ -import { dictionary } from "../dictionary.ts"; import { ArrayResult } from "../array-result.ts"; +import { dictionary } from "../dictionary.ts"; import * as TokiPona from "../parser/ast.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; import * as English from "./ast.ts"; From b00b16f335ac3c2cbecb614e062dcd4e8846cbfc Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 17:49:53 +0800 Subject: [PATCH 013/199] refactor ast --- src/parser/ast.ts | 29 +++--- src/parser/composer.ts | 46 +++++---- src/parser/extract.ts | 33 ++++-- src/parser/filter.ts | 127 ++++++++--------------- src/parser/parser.ts | 135 ++++++++++++++----------- src/translator/clause.ts | 10 ++ src/translator/sentence.ts | 202 +++++++++++++++---------------------- 7 files changed, 276 insertions(+), 306 deletions(-) diff --git a/src/parser/ast.ts b/src/parser/ast.ts index 92cda87a..da64000d 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -15,11 +15,12 @@ export type HeadedWordUnit = export type WordUnit = & SimpleWordUnit & { emphasis: null | Emphasis }; +export type Nanpa = { nanpa: WordUnit; phrase: Phrase }; export type Modifier = | { type: "default"; word: WordUnit } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } - | { type: "nanpa"; nanpa: WordUnit; phrase: Phrase } + | ({ type: "nanpa" } & Nanpa) | ({ type: "quotation" } & Quotation); export type Phrase = | { @@ -73,22 +74,26 @@ export type Clause = } | { type: "prepositions"; prepositions: Array } | ({ type: "quotation" } & Quotation); -export type FullClause = +export type ContextClause = + | Clause + | ({ type: "nanpa" } & Nanpa); +export type Sentence = | { type: "default"; - startingParticle: null | Emphasis; kinOrTaso: null | HeadedWordUnit; - clause: Clause; + laClauses: Array; + finalClause: Clause; anuSeme: null | HeadedWordUnit; - endingParticle: null | Emphasis; + emphasis: null | Emphasis; + punctuation: string; + interrogative: null | "seme" | "x ala x"; } - | { type: "filler"; emphasis: Emphasis }; -export type Sentence = { - laClauses: Array; - finalClause: FullClause; - interrogative: null | "seme" | "x ala x"; - punctuation: string; -}; + | { + type: "filler"; + emphasis: Emphasis; + punctuation: string; + interrogative: null | "seme" | "x ala x"; + }; export type Quotation = { sentences: Array; leftMark: string; diff --git a/src/parser/composer.ts b/src/parser/composer.ts index b98ce1ad..d4707b15 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -1,11 +1,12 @@ import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import { Clause, + ContextClause, Emphasis, - FullClause, Modifier, MultiplePhrases, MultipleSentences, + Nanpa, Phrase, Predicate, Preposition, @@ -47,6 +48,9 @@ export function wordUnit(wordUnit: WordUnit): string { ] .join(" "); } +export function nanpa(nanpa: Nanpa): string { + return `${wordUnit(nanpa.nanpa)} ${phrase(nanpa.phrase)}`; +} export function modifier(modifier: Modifier): string { switch (modifier.type) { case "default": @@ -56,7 +60,7 @@ export function modifier(modifier: Modifier): string { case "pi": return `pi ${phrase(modifier.phrase)}`; case "nanpa": - return `${wordUnit(modifier.nanpa)} ${phrase(modifier.phrase)}`; + return nanpa(modifier); case "quotation": return quotation(modifier); } @@ -173,29 +177,35 @@ export function clause(clause: Clause): string { throw new Error(); } } -export function fullClause(value: FullClause): string { - switch (value.type) { +export function contextClause(contextClause: ContextClause): string { + switch (contextClause.type) { + case "nanpa": + return nanpa(contextClause); + default: + return clause(contextClause); + } +} +export function sentence(sentence: Sentence): string { + let text: string; + switch (sentence.type) { case "default": - return [ - ...emphasisAsArray(value.startingParticle), - ...nullableAsArray(value.kinOrTaso).map(wordUnit), - clause(value.clause), - ...nullableAsArray(value.anuSeme) + text = [ + ...nullableAsArray(sentence.kinOrTaso).map(wordUnit), + ...sentence.laClauses + .map(contextClause) + .map((clause) => `${clause} la`), + clause(sentence.finalClause), + ...nullableAsArray(sentence.anuSeme) .map(wordUnit) .map((word) => `anu ${word}`), - ...emphasisAsArray(value.endingParticle), + ...emphasisAsArray(sentence.emphasis), ] .join(" "); + break; case "filler": - return emphasis(value.emphasis); + text = emphasis(sentence.emphasis); + break; } -} -export function sentence(sentence: Sentence): string { - const text = [ - ...sentence.laClauses.map(fullClause).map((clause) => `${clause} la`), - fullClause(sentence.finalClause), - ] - .join(" "); return `${text}${sentence.punctuation}`; } export function quotation(quotation: Quotation): string { diff --git a/src/parser/extract.ts b/src/parser/extract.ts index c6e0e5bb..4fafb4e3 100644 --- a/src/parser/extract.ts +++ b/src/parser/extract.ts @@ -1,9 +1,10 @@ import { nullableAsArray } from "../misc.ts"; import { Clause, - FullClause, + ContextClause, Modifier, MultiplePhrases, + Nanpa, Phrase, Predicate, Preposition, @@ -11,6 +12,9 @@ import { WordUnit, } from "./ast.ts"; +export function everyWordUnitInNanpa(nanpa: Nanpa): Array { + return [nanpa.nanpa, ...everyWordUnitInPhrase(nanpa.phrase)]; +} export function everyWordUnitInModifier(modifier: Modifier): Array { switch (modifier.type) { case "default": @@ -18,7 +22,7 @@ export function everyWordUnitInModifier(modifier: Modifier): Array { case "pi": return everyWordUnitInPhrase(modifier.phrase); case "nanpa": - return [modifier.nanpa, ...everyWordUnitInPhrase(modifier.phrase)]; + return everyWordUnitInNanpa(modifier); case "quotation": case "proper words": return []; @@ -97,22 +101,29 @@ export function everyWordUnitInClause(clause: Clause): Array { return []; } } -export function everyWordUnitInFullClause(clause: FullClause): Array { - switch (clause.type) { +export function everyWordUnitInContextClause( + contextClause: ContextClause, +): Array { + switch (contextClause.type) { + case "nanpa": + return everyWordUnitInNanpa(contextClause); + default: + return everyWordUnitInClause(contextClause); + } +} +export function everyWordUnitInSentence(sentence: Sentence): Array { + switch (sentence.type) { case "default": return [ - ...nullableAsArray(clause.kinOrTaso), - ...everyWordUnitInClause(clause.clause), - ...nullableAsArray(clause.anuSeme), + ...nullableAsArray(sentence.kinOrTaso), + ...sentence.laClauses.flatMap(everyWordUnitInContextClause), + ...everyWordUnitInClause(sentence.finalClause), + ...nullableAsArray(sentence.anuSeme), ]; case "filler": return []; } } -export function everyWordUnitInSentence(sentence: Sentence): Array { - return [...sentence.laClauses, sentence.finalClause] - .flatMap(everyWordUnitInFullClause); -} export function everyModifierInPhrase(phrase: Phrase): Array { switch (phrase.type) { case "default": diff --git a/src/parser/filter.ts b/src/parser/filter.ts index f8777e7f..e37f32a8 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -2,9 +2,9 @@ import { settings } from "../settings.ts"; import { Clause, Emphasis, - FullClause, Modifier, MultiplePhrases, + Nanpa, Phrase, Preposition, Sentence, @@ -38,38 +38,31 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ return true; }, ]; -export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ - // quotation modifier cannot exist - (modifier) => { - if (modifier.type === "quotation") { - throw new UnrecognizedError("quotation as modifier"); - } - return true; - }, +export const NANPA_RULES: Array<(nanpa: Nanpa) => boolean> = [ // disallow _nanpa ala nanpa_ (modifier) => { - if (modifier.type === "nanpa" && modifier.nanpa.type === "x ala x") { + if (modifier.nanpa.type === "x ala x") { throw new UnrecognizedError('"nanpa ala nanpa"'); } return true; }, // nanpa construction cannot contain preposition (modifier) => { - if (modifier.type === "nanpa" && modifier.phrase.type === "preposition") { + if (modifier.phrase.type === "preposition") { throw new UnrecognizedError("preposition inside nanpa"); } return true; }, // nanpa construction cannot contain preverb (modifier) => { - if (modifier.type === "nanpa" && modifier.phrase.type === "preverb") { + if (modifier.phrase.type === "preverb") { throw new UnrecognizedError("preverb inside nanpa"); } return true; }, // nanpa construction cannot contain quotation (modifier) => { - if (modifier.type === "nanpa" && modifier.phrase.type === "quotation") { + if (modifier.phrase.type === "quotation") { throw new UnrecognizedError("quotation inside nanpa"); } return true; @@ -77,7 +70,6 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // nanpa construction cannot contain pi (modifier) => { if ( - modifier.type === "nanpa" && modifier.phrase.type === "default" && modifier.phrase.modifiers.some((modifier) => modifier.type === "pi") ) { @@ -88,7 +80,6 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // nanpa construction cannot contain nanpa (modifier) => { if ( - modifier.type === "nanpa" && modifier.phrase.type === "default" && modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa") ) { @@ -96,6 +87,30 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ } return true; }, + // nanpa cannot have emphasis particle + (modifier) => { + const { phrase } = modifier; + if ( + ( + phrase.type === "default" || + phrase.type === "preverb" || + phrase.type === "preposition" + ) && + phrase.emphasis != null + ) { + return false; + } + return true; + }, +]; +export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ + // quotation modifier cannot exist + (modifier) => { + if (modifier.type === "quotation") { + throw new UnrecognizedError("quotation as modifier"); + } + return true; + }, // pi cannot contain preposition (modifier) => { if (modifier.type === "pi" && modifier.phrase.type === "preposition") { @@ -151,23 +166,6 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ } return true; }, - // nanpa cannot have emphasis particle - (modifier) => { - if (modifier.type === "nanpa") { - const { phrase } = modifier; - if ( - ( - phrase.type === "default" || - phrase.type === "preverb" || - phrase.type === "preposition" - ) && - phrase.emphasis != null - ) { - return false; - } - } - return true; - }, ]; export const MULTIPLE_MODIFIERS_RULES: Array< (modifier: Array) => boolean @@ -413,71 +411,28 @@ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ return true; }, ]; -export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [ +export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ // Prevent "taso ala taso" or "kin ala kin" - (fullClause) => { - if (fullClause.type === "default") { + (sentence) => { + if (sentence.type === "default") { if ( - fullClause.kinOrTaso != null && fullClause.kinOrTaso.type === "x ala x" + sentence.kinOrTaso != null && sentence.kinOrTaso.type === "x ala x" ) { - const { word } = fullClause.kinOrTaso; + const { word } = sentence.kinOrTaso; throw new UnrecognizedError(`"${word} ala ${word}"`); } } return true; }, -]; -export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ - // If there is "la", there must be no filler - (sentence) => { - if (sentence.laClauses.length > 0) { - for (const clause of [...sentence.laClauses, sentence.finalClause]) { - if (clause.type === "filler") { - throw new UnrecognizedError('filler with "la"'); - } - } - } - return true; - }, // If there is "la", there can't be "taso" or "kin" (sentence) => { - if (sentence.laClauses.length > 0) { - for (const clause of [...sentence.laClauses, sentence.finalClause]) { - if (clause.type === "default" && clause.kinOrTaso != null) { - throw new UnrecognizedError( - `${clause.kinOrTaso.word} particle with "la"`, - ); - } - } - } - return true; - }, - // Only the last clause can have anu seme - (sentence) => { - for (const clause of sentence.laClauses) { - if (clause.type === "default" && clause.anuSeme != null) { - throw new UnrecognizedError("anu seme inside sentence"); - } - } - return true; - }, - // Only the first clause can have starting particle - (sentence) => { - for ( - const clause of [...sentence.laClauses, sentence.finalClause].slice(1) + if ( + sentence.type === "default" && sentence.laClauses.length > 0 && + sentence.kinOrTaso != null ) { - if (clause.type === "default" && clause.startingParticle != null) { - throw new UnrecognizedError("emphasis phrase inside sentence"); - } - } - return true; - }, - // Only the last clause can have ending particle - (sentence) => { - for (const clause of sentence.laClauses) { - if (clause.type === "default" && clause.endingParticle != null) { - throw new UnrecognizedError("emphasis phrase inside sentence"); - } + throw new UnrecognizedError( + `${sentence.kinOrTaso.word} particle with "la"`, + ); } return true; }, diff --git a/src/parser/parser.ts b/src/parser/parser.ts index fa32ece9..1ede0073 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -10,27 +10,29 @@ import { import { nullableAsArray } from "../misc.ts"; import { Clause, + ContextClause, Emphasis, - FullClause, HeadedWordUnit, Modifier, MultiplePhrases, MultipleSentences, + Nanpa, Phrase, Predicate, Preposition, + Sentence, SimpleHeadedWordUnit, SimpleWordUnit, } from "./ast.ts"; import { cache } from "./cache.ts"; -import { everyWordUnitInFullClause } from "./extract.ts"; +import { everyWordUnitInSentence } from "./extract.ts"; import { CLAUSE_RULE, filter, - FULL_CLAUSE_RULE, MODIFIER_RULES, MULTIPLE_MODIFIERS_RULES, MULTIPLE_SENTENCES_RULE, + NANPA_RULES, PHRASE_RULE, PREPOSITION_RULE, SENTENCE_RULE, @@ -43,6 +45,7 @@ import { count, end, lazy, + lookAhead, many, manyAtLeastOnce, match, @@ -357,6 +360,9 @@ const phrase: Parser = lazy(() => ) .filter(filter(PHRASE_RULE)) ); +const nanpa = sequence(wordUnit(new Set(["nanpa"]), '"nanpa"'), phrase) + .map(([nanpa, phrase]) => ({ nanpa, phrase })) + .filter(filter(NANPA_RULES)); const pi = choice( sequence( specificToken("headed long glyph start") @@ -395,11 +401,7 @@ const modifiers = sequence( .filter(filter(MODIFIER_RULES)), ), ), - many( - sequence(wordUnit(new Set(["nanpa"]), '"nanpa"'), phrase) - .map(([nanpa, phrase]) => ({ type: "nanpa", nanpa, phrase })) - .filter(filter(MODIFIER_RULES)), - ), + many(nanpa.map((nanpa) => ({ ...nanpa, type: "nanpa" }))), many( pi .map((phrase) => ({ type: "pi", phrase })) @@ -684,75 +686,86 @@ const clause = choice( })), ) .filter(filter(CLAUSE_RULE)); -const fullClause = choice( +const contextClause = choice( + nanpa.map((nanpa) => ({ ...nanpa, type: "nanpa" })), + clause, +); +const la = choice( + comma.with(specificWord("la")), + specificWord("la").skip(comma), + specificWord("la"), +); +const sentence = choice( sequence( - optional(emphasis.skip(optionalComma)), optional( wordUnit(new Set(["kin", "taso"]), "taso/kin").skip(optionalComma), ), + many(contextClause.skip(la)), clause, optional( optionalComma .with(specificWord("anu")) .with(wordUnit(new Set(["seme"]), '"seme"')), ), - optional(optionalComma.with(emphasis)), + optionalEmphasis, + choice( + punctuation, + end.map(() => ""), + lookAhead(sequence(emphasis, choice(punctuation, end))).map(() => ""), + ), ) - .map( - ([startingParticle, kinOrTaso, clause, anuSeme, endingParticle]) => ({ - type: "default", - startingParticle, - kinOrTaso, - clause, - anuSeme, - endingParticle, - }), - ) - .sortBy((clause) => { - if (clause.anuSeme == null) { + .sortBy(([_, _1, _2, anuSeme]) => { + if (anuSeme == null) { return 1; } else { return 0; } - }), - emphasis - .map((emphasis) => ({ type: "filler", emphasis })), -) - .filter(filter(FULL_CLAUSE_RULE)); -const la = choice( - comma.with(specificWord("la")), - specificWord("la").skip(comma), - specificWord("la"), -); -const sentence = sequence( - many(fullClause.skip(la)), - fullClause, - choice( - end.map(() => ""), - punctuation, - ), + }) + .map( + ( + [ + kinOrTaso, + laClauses, + finalClause, + anuSeme, + emphasis, + punctuation, + ], + ) => { + const sentence = { + type: "default" as const, + kinOrTaso, + laClauses, + finalClause, + anuSeme, + emphasis, + punctuation, + interrogative: null, + }; + const wordUnits = everyWordUnitInSentence(sentence); + let interrogative: null | "x ala x" | "seme" = null; + if (wordUnits.some((wordUnit) => wordUnit.type === "x ala x")) { + interrogative = "x ala x"; + } else if ( + wordUnits.some((wordUnit) => + (wordUnit.type === "default" || + wordUnit.type === "reduplication") && + wordUnit.word === "seme" + ) + ) { + interrogative = "seme"; + } + return { ...sentence, interrogative }; + }, + ), + sequence(emphasis, optional(punctuation)) + .map(([emphasis, punctuation]) => ({ + type: "filler", + emphasis, + punctuation: punctuation ?? "", + interrogative: null, + })), ) - .map(([laClauses, finalClause, punctuation]) => { - const wordUnits = [...laClauses, finalClause] - .flatMap(everyWordUnitInFullClause); - let interrogative: null | "x ala x" | "seme" = null; - if (wordUnits.some((wordUnit) => wordUnit.type === "x ala x")) { - interrogative = "x ala x"; - } else if ( - wordUnits.some((wordUnit) => - (wordUnit.type === "default" || wordUnit.type === "reduplication") && - wordUnit.word === "seme" - ) - ) { - interrogative = "seme"; - } - return { - laClauses, - finalClause, - interrogative, - punctuation, - }; - }) .filter(filter(SENTENCE_RULE)); const FULL_PARSER = spaces .with(choiceOnlyOne( diff --git a/src/translator/clause.ts b/src/translator/clause.ts index a18a6735..74838fec 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -110,3 +110,13 @@ export function clause(clause: TokiPona.Clause): ArrayResult { return new ArrayResult(new TranslationTodoError(clause.type)); } } +export function contextClause( + contextClause: TokiPona.ContextClause, +): ArrayResult { + switch (contextClause.type) { + case "nanpa": + return new ArrayResult(new TranslationTodoError("nanpa context clause")); + default: + return clause(contextClause); + } +} diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 2636d2b0..d2b9663e 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -4,7 +4,7 @@ import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as-string.ts"; import * as English from "./ast.ts"; -import { clause } from "./clause.ts"; +import { clause, contextClause } from "./clause.ts"; import { TranslationTodoError, UntranslatableError } from "./error.ts"; import { unemphasized } from "./word.ts"; @@ -40,9 +40,14 @@ function filler(filler: TokiPona.Emphasis): ArrayResult { function emphasisAsPunctuation( emphasis: null | TokiPona.Emphasis, interrogative: boolean, + originalPunctuation: string, ): string { if (emphasis == null) { - throw new UntranslatableError("missing emphasis", "punctuation"); + if (interrogative) { + return "?"; + } else { + return originalPunctuation; + } } if ( (emphasis.type === "word" || emphasis.type === "long word") && @@ -126,128 +131,85 @@ function anuSeme(seme: TokiPona.HeadedWordUnit): English.Clause { } function sentence( sentence: TokiPona.Sentence, + isFinal: boolean, ): ArrayResult { - // This relies on sentence filter, if some of those filters were disabled, - // this function might break. - if (sentence.interrogative === "x ala x") { - return new ArrayResult(new TranslationTodoError("x ala x")); - } - if (sentence.finalClause.type === "filler") { - return filler(sentence.finalClause.emphasis) - .map((interjection) => ({ - clauses: [{ - type: "interjection", - interjection: unemphasized(interjection), - }], - punctuation: sentence.punctuation, - })); - } else { - const startingParticle = ((sentence.laClauses[0] ?? sentence.finalClause) as - & TokiPona.FullClause - & { type: "default" }) - .startingParticle; - let startingFiller: ArrayResult; - if (startingParticle == null) { - startingFiller = new ArrayResult([null]); - } else { - startingFiller = filler(startingParticle) - .map((interjection) => ({ - type: "interjection", - interjection: { - word: interjection, - emphasis: false, - }, - })); - } - const laClauses = - (sentence.laClauses as Array) - .map(({ clause }) => clause); - const givenClauses = ArrayResult.combine(...laClauses.map(clause)) - .map((clauses) => - clauses.map((clause) => ({ - type: "dependent", - conjunction: { - word: "given", - emphasis: false, - }, - clause, - })) - ); - const { - kinOrTaso, - clause: lastTpClause, - anuSeme: tpAnuSeme, - endingParticle, - } = sentence.finalClause; - if (kinOrTaso != null) { - return new ArrayResult( - new TranslationTodoError(`"${kinOrTaso.word}" preclause`), - ); - } - const lastEngClause = clause(lastTpClause); - let right: Array; - if (tpAnuSeme == null) { - right = []; - } else { - right = [anuSeme(tpAnuSeme)]; - } - let interjectionClause: ArrayResult; - if ( - sentence.laClauses.length === 0 && kinOrTaso == null && - tpAnuSeme == null - ) { - interjectionClause = interjection(lastTpClause); - } else { - interjectionClause = new ArrayResult(); - } - const engClauses = ArrayResult.combine( - startingFiller, - givenClauses, - ArrayResult.concat(interjectionClause, lastEngClause), - ) - .map(([filler, givenClauses, lastClause]) => [ - ...nullableAsArray(filler), - ...givenClauses, - lastClause, - ...right, - ]); - let endingFiller: ArrayResult; - if (endingParticle == null) { - endingFiller = new ArrayResult([null]); - } else { - endingFiller = filler(endingParticle) - .map((interjection) => ({ - type: "interjection", - interjection: { - word: interjection, - emphasis: false, - }, - })); + return ArrayResult.from(() => { + if (sentence.interrogative === "x ala x") { + return new ArrayResult(new TranslationTodoError("x ala x")); } let punctuation: string; - if (sentence.interrogative) { - punctuation = "?"; + if (!isFinal && sentence.punctuation === "") { + punctuation = ","; } else { punctuation = sentence.punctuation; } - return ArrayResult.concat( - ArrayResult.combine( - engClauses, - ArrayResult.from(() => - new ArrayResult([emphasisAsPunctuation( - endingParticle, - sentence.interrogative != null, - )]) - ), - ) - .map(([clauses, punctuation]) => ({ clauses, punctuation })), - ArrayResult.combine(engClauses, endingFiller) - .map(([clauses, filler]) => ({ - clauses: [...clauses, ...nullableAsArray(filler)], + switch (sentence.type) { + case "default": { + const laClauses = sentence.laClauses; + const givenClauses = ArrayResult.combine( + ...laClauses.map(contextClause), + ) + .map((clauses) => + clauses.map((clause) => ({ + type: "dependent", + conjunction: { + word: "given", + emphasis: false, + }, + clause, + })) + ); + if (sentence.kinOrTaso != null) { + return new ArrayResult( + new TranslationTodoError(`"${sentence.kinOrTaso.word}" preclause`), + ); + } + const lastEngClause = clause(sentence.finalClause); + let right: Array; + if (sentence.anuSeme == null) { + right = []; + } else { + right = [anuSeme(sentence.anuSeme)]; + } + let interjectionClause: ArrayResult; + if ( + sentence.laClauses.length === 0 && sentence.kinOrTaso == null && + sentence.kinOrTaso == null + ) { + interjectionClause = interjection(sentence.finalClause); + } else { + interjectionClause = new ArrayResult(); + } + const engClauses = ArrayResult.combine( + givenClauses, + ArrayResult.concat(interjectionClause, lastEngClause), + ) + .map(([givenClauses, lastClause]) => [ + ...givenClauses, + lastClause, + ...right, + ]); + const usePunctuation = emphasisAsPunctuation( + sentence.emphasis, + sentence.interrogative != null, punctuation, - })), - ); - } + ); + return engClauses.map((clauses) => ({ + clauses, + punctuation: usePunctuation, + })); + } + case "filler": + return filler(sentence.emphasis) + .map((interjection) => ({ + clauses: [{ + type: "interjection", + interjection: unemphasized(interjection), + }], + punctuation, + })); + } + }); } export function multipleSentences( sentences: TokiPona.MultipleSentences, @@ -264,6 +226,10 @@ export function multipleSentences( .map((definition) => [definition]); } case "sentences": - return ArrayResult.combine(...sentences.sentences.map(sentence)); + return ArrayResult.combine( + ...sentences.sentences.map((value, i) => + sentence(value, i === sentences.sentences.length - 1) + ), + ); } } From 721b4d968015737e24eab865d729d99d9e6a0c51 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 18:27:37 +0800 Subject: [PATCH 014/199] separate emphasis and filler --- src/dictionary.ts | 2 + src/parser/ast.ts | 6 +- src/parser/composer.ts | 11 ++- src/parser/filter.ts | 39 --------- src/parser/lexer.ts | 29 ++++--- src/parser/parser.ts | 42 ++++++++-- src/translator/adjective.ts | 7 +- src/translator/sentence.ts | 162 ++++++++++++++++-------------------- 8 files changed, 140 insertions(+), 158 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 42b28af4..79730564 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -9,6 +9,7 @@ export const dictionary: Dictionary = new Map(); export const contentWordSet: Set = new Set(); export const prepositionSet: Set = new Set(); export const preverbSet: Set = new Set(); +export const fillerSet: Set = new Set(); export const tokiPonaWordSet: Set = new Set(); update(); @@ -58,6 +59,7 @@ function update(): void { (definition.type === "verb" && definition.predicateType != null) || definition.type === "modal verb", ); + addSet(fillerSet, (definition) => definition.type === "filler"); addSet(tokiPonaWordSet, () => true); } function addSet( diff --git a/src/parser/ast.ts b/src/parser/ast.ts index da64000d..8705b5c9 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -1,6 +1,8 @@ export type Emphasis = | { type: "word"; word: string } - | { type: "long word"; word: string; length: number } + | { type: "long word"; word: string; length: number }; +export type Filler = + | Emphasis | { type: "multiple a"; count: number }; export type SimpleHeadedWordUnit = | { type: "default"; word: string } @@ -90,7 +92,7 @@ export type Sentence = } | { type: "filler"; - emphasis: Emphasis; + filler: Filler; punctuation: string; interrogative: null | "seme" | "x ala x"; }; diff --git a/src/parser/composer.ts b/src/parser/composer.ts index d4707b15..a998e8f7 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -3,6 +3,7 @@ import { Clause, ContextClause, Emphasis, + Filler, Modifier, MultiplePhrases, MultipleSentences, @@ -22,8 +23,14 @@ export function emphasis(emphasis: Emphasis): string { return emphasis.word; case "long word": return emphasis.word.repeat(emphasis.length); + } +} +export function filler(filler: Filler): string { + switch (filler.type) { case "multiple a": - return repeatWithSpace("a", emphasis.count); + return repeatWithSpace("a", filler.count); + default: + return emphasis(filler); } } function emphasisAsArray(value: null | Emphasis): Array { @@ -203,7 +210,7 @@ export function sentence(sentence: Sentence): string { .join(" "); break; case "filler": - text = emphasis(sentence.emphasis); + text = filler(sentence.filler); break; } return `${text}${sentence.punctuation}`; diff --git a/src/parser/filter.ts b/src/parser/filter.ts index e37f32a8..a9f3b851 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,7 +1,6 @@ import { settings } from "../settings.ts"; import { Clause, - Emphasis, Modifier, MultiplePhrases, Nanpa, @@ -18,7 +17,6 @@ import { everyWordUnitInSentence, } from "./extract.ts"; import { UnrecognizedError } from "./parser-lib.ts"; -import { describe } from "./token.ts"; export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" @@ -28,15 +26,6 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ } return true; }, - // "n" and multiple "a" cannot modify a word - (wordUnit) => { - if (isMultipleAOrN(wordUnit.emphasis)) { - throw new UnrecognizedError( - `${describe(wordUnit.emphasis!)} modifying a word`, - ); - } - return true; - }, ]; export const NANPA_RULES: Array<(nanpa: Nanpa) => boolean> = [ // disallow _nanpa ala nanpa_ @@ -272,18 +261,6 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ phrase.type !== "default" || phrase.emphasis == null || phrase.modifiers.length > 0, - // "n" and multiple "a" cannot modify a phrase - (wordUnit) => { - if ( - (wordUnit.type === "default" || wordUnit.type === "preverb") && - isMultipleAOrN(wordUnit.emphasis) - ) { - throw new UnrecognizedError( - `${describe(wordUnit.emphasis!)} modifying a word`, - ); - } - return true; - }, // For preverbs, inner phrase must not have emphasis particle (phrase) => phrase.type !== "preverb" || @@ -320,15 +297,6 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ } return true; }, - // "n" and multiple "a" cannot modify a preposition - (wordUnit) => { - if (isMultipleAOrN(wordUnit.emphasis)) { - throw new UnrecognizedError( - `${describe(wordUnit.emphasis!)} modifying a word`, - ); - } - return true; - }, // Preposition with "anu" must not have emphasis particle (preposition) => preposition.emphasis == null || preposition.phrases.type !== "anu", @@ -499,13 +467,6 @@ function hasPrepositionInPhrase(phrase: Phrase): boolean { return false; } } -function isMultipleAOrN(emphasis: null | Emphasis): boolean { - return emphasis != null && - (emphasis.type === "multiple a" || - ((emphasis.type === "word" || - emphasis.type === "long word") && - emphasis.word === "n")); -} function phraseHasTopLevelEmphasis(phrase: Phrase): boolean { switch (phrase.type) { case "default": diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index d01c7746..358c698a 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -75,16 +75,22 @@ const multipleA = sequence( count(allAtLeastOnce(specificWord("a"))), ) .map(([_, count]) => ({ type: "multiple a", count: count + 1 })); -const longWord = choiceOnlyOne(matchString("a"), matchString("n")) - .then((word) => - count(allAtLeastOnce(matchString(word))) - .map((count) => ({ - type: "long word", - word, - length: count + 1, - })) - ) - .skip(spaces); + +const repeatingLetter = match(/[a-zA-Z]/, "latin letter") + .then((letter) => + count(allAtLeastOnce(matchString(letter))).map<[string, number]>( + (count) => [letter, count + 1], + ) + ); +const longWord = allAtLeastOnce(repeatingLetter) + .skip(spaces) + .map((letters) => { + const word = letters.map(([letter]) => letter).join(""); + const length = letters.reduce((rest, [_, count]) => rest + count, 0) - + word.length + 1; + return { type: "long word", word, length }; + }) + .filter(({ length }) => length > 1); const xAlaX = lazy(() => { if (settings.xAlaXPartialParsing) { @@ -208,10 +214,9 @@ const wordToken = word.map((word) => ({ type: "word", word })); Parser.startCache(cache); export const token = choiceOnlyOne( - longWord, xAlaX, multipleA, - wordToken, + choice(longWord, wordToken), properWords, // UCSUR only spaceLongGlyph, diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 1ede0073..2cd1deb7 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -2,6 +2,7 @@ import { ArrayResult } from "../array-result.ts"; import { contentWordSet, dictionary, + fillerSet, MissingEntryError, prepositionSet, preverbSet, @@ -12,6 +13,7 @@ import { Clause, ContextClause, Emphasis, + Filler, HeadedWordUnit, Modifier, MultiplePhrases, @@ -105,12 +107,12 @@ const emphasis = choice( if (longGlyph.words.length !== 1) { throw new UnexpectedError( describe({ type: "combined glyphs", words: longGlyph.words }), - '"ala"', + '"a"', ); } const word = longGlyph.words[0]; - if (word !== "n" && word !== "a") { - throw new UnexpectedError(`"${word}"`, '"a" or "n"'); + if (word !== "a") { + throw new UnexpectedError(`"${word}"`, '"a"'); } return { type: "long word", @@ -118,11 +120,11 @@ const emphasis = choice( length: longGlyph.spaceLength, }; }), - specificToken("multiple a") - .map(({ count }) => ({ type: "multiple a", count })), specificToken("long word") + // TODO: error message + .filter(({ word }) => word === "a") .map(({ word, length }) => ({ type: "long word", word, length })), - wordFrom(new Set(["a", "n"]), "a/n") + wordFrom(new Set(["a"]), '"a"') .map((word) => ({ type: "word", word })), ); const optionalEmphasis = optional(emphasis); @@ -695,6 +697,28 @@ const la = choice( specificWord("la").skip(comma), specificWord("la"), ); +const filler = choice( + specificToken("space long glyph") + .map((longGlyph) => { + if (longGlyph.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: longGlyph.words }), + '"a"', + ); + } + return { + type: "long word", + word: longGlyph.words[0], + length: longGlyph.spaceLength, + }; + }), + specificToken("multiple a") + .map(({ count }) => ({ type: "multiple a", count })), + specificToken("long word") + .map(({ word, length }) => ({ type: "long word", word, length })), + wordFrom(fillerSet, "filler") + .map((word) => ({ type: "word", word })), +); const sentence = choice( sequence( optional( @@ -758,10 +782,10 @@ const sentence = choice( return { ...sentence, interrogative }; }, ), - sequence(emphasis, optional(punctuation)) - .map(([emphasis, punctuation]) => ({ + sequence(filler, optional(punctuation)) + .map(([filler, punctuation]) => ({ type: "filler", - emphasis, + filler, punctuation: punctuation ?? "", interrogative: null, })), diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index 0f238171..76a5aace 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array-result.ts"; -import { nullableAsArray, repeatWithSpace } from "../misc.ts"; +import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { UntranslatableError } from "./error.ts"; @@ -19,11 +19,6 @@ function so(emphasis: null | TokiPona.Emphasis): string { return "so"; case "long word": return `s${"o".repeat(emphasis.length)}`; - case "multiple a": - throw new UntranslatableError( - `"${repeatWithSpace("a", emphasis.count)}"`, - "adverb", - ); } } } diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index d2b9663e..7eff3787 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -5,10 +5,10 @@ import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as-string.ts"; import * as English from "./ast.ts"; import { clause, contextClause } from "./clause.ts"; -import { TranslationTodoError, UntranslatableError } from "./error.ts"; +import { TranslationTodoError } from "./error.ts"; import { unemphasized } from "./word.ts"; -function filler(filler: TokiPona.Emphasis): ArrayResult { +function filler(filler: TokiPona.Filler): ArrayResult { switch (filler.type) { case "word": case "long word": { @@ -49,12 +49,6 @@ function emphasisAsPunctuation( return originalPunctuation; } } - if ( - (emphasis.type === "word" || emphasis.type === "long word") && - emphasis.word === "n" - ) { - throw new UntranslatableError('"n"', "punctuation"); - } let questionMark: string; if (interrogative) { questionMark = "?"; @@ -69,13 +63,7 @@ function emphasisAsPunctuation( case "long word": exclamationMark = "!".repeat(emphasis.length); break; - case "multiple a": - throw new UntranslatableError( - `"${repeatWithSpace("a", emphasis.count)}"`, - "punctuation", - ); } - return `${questionMark}${exclamationMark}`; } function interjection(clause: TokiPona.Clause): ArrayResult { @@ -133,83 +121,81 @@ function sentence( sentence: TokiPona.Sentence, isFinal: boolean, ): ArrayResult { - return ArrayResult.from(() => { - if (sentence.interrogative === "x ala x") { - return new ArrayResult(new TranslationTodoError("x ala x")); - } - let punctuation: string; - if (!isFinal && sentence.punctuation === "") { - punctuation = ","; - } else { - punctuation = sentence.punctuation; - } - switch (sentence.type) { - case "default": { - const laClauses = sentence.laClauses; - const givenClauses = ArrayResult.combine( - ...laClauses.map(contextClause), - ) - .map((clauses) => - clauses.map((clause) => ({ - type: "dependent", - conjunction: { - word: "given", - emphasis: false, - }, - clause, - })) - ); - if (sentence.kinOrTaso != null) { - return new ArrayResult( - new TranslationTodoError(`"${sentence.kinOrTaso.word}" preclause`), - ); - } - const lastEngClause = clause(sentence.finalClause); - let right: Array; - if (sentence.anuSeme == null) { - right = []; - } else { - right = [anuSeme(sentence.anuSeme)]; - } - let interjectionClause: ArrayResult; - if ( - sentence.laClauses.length === 0 && sentence.kinOrTaso == null && - sentence.kinOrTaso == null - ) { - interjectionClause = interjection(sentence.finalClause); - } else { - interjectionClause = new ArrayResult(); - } - const engClauses = ArrayResult.combine( - givenClauses, - ArrayResult.concat(interjectionClause, lastEngClause), - ) - .map(([givenClauses, lastClause]) => [ - ...givenClauses, - lastClause, - ...right, - ]); - const usePunctuation = emphasisAsPunctuation( - sentence.emphasis, - sentence.interrogative != null, - punctuation, + if (sentence.interrogative === "x ala x") { + return new ArrayResult(new TranslationTodoError("x ala x")); + } + let punctuation: string; + if (!isFinal && sentence.punctuation === "") { + punctuation = ","; + } else { + punctuation = sentence.punctuation; + } + switch (sentence.type) { + case "default": { + const laClauses = sentence.laClauses; + const givenClauses = ArrayResult.combine( + ...laClauses.map(contextClause), + ) + .map((clauses) => + clauses.map((clause) => ({ + type: "dependent", + conjunction: { + word: "given", + emphasis: false, + }, + clause, + })) + ); + if (sentence.kinOrTaso != null) { + return new ArrayResult( + new TranslationTodoError(`"${sentence.kinOrTaso.word}" preclause`), ); - return engClauses.map((clauses) => ({ - clauses, - punctuation: usePunctuation, - })); } - case "filler": - return filler(sentence.emphasis) - .map((interjection) => ({ - clauses: [{ - type: "interjection", - interjection: unemphasized(interjection), - }], - punctuation, - })); + const lastEngClause = clause(sentence.finalClause); + let right: Array; + if (sentence.anuSeme == null) { + right = []; + } else { + right = [anuSeme(sentence.anuSeme)]; + } + let interjectionClause: ArrayResult; + if ( + sentence.laClauses.length === 0 && sentence.kinOrTaso == null && + sentence.kinOrTaso == null + ) { + interjectionClause = interjection(sentence.finalClause); + } else { + interjectionClause = new ArrayResult(); + } + const engClauses = ArrayResult.combine( + givenClauses, + ArrayResult.concat(interjectionClause, lastEngClause), + ) + .map(([givenClauses, lastClause]) => [ + ...givenClauses, + lastClause, + ...right, + ]); + const usePunctuation = emphasisAsPunctuation( + sentence.emphasis, + sentence.interrogative != null, + punctuation, + ); + return engClauses.map((clauses) => ({ + clauses, + punctuation: usePunctuation, + })); } - }); + case "filler": + return filler(sentence.filler) + .map((interjection) => ({ + clauses: [{ + type: "interjection", + interjection: unemphasized(interjection), + }], + punctuation, + })); + } } export function multipleSentences( sentences: TokiPona.MultipleSentences, From b4fec8f171fd65f0da86e032fd3f39605ba3c378 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 18:32:57 +0800 Subject: [PATCH 015/199] small refactor --- src/dictionary.ts | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 79730564..40903faf 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -41,31 +41,30 @@ function update(): void { dictionary.set(word, entry); } } - for ( - const set of [contentWordSet, prepositionSet, preverbSet, tokiPonaWordSet] - ) { - set.clear(); - } - addSet( + redefineSet( contentWordSet, (definition) => definition.type !== "filler" && definition.type !== "particle definition", ); - addSet(prepositionSet, (definition) => definition.type === "preposition"); - addSet( + redefineSet( + prepositionSet, + (definition) => definition.type === "preposition", + ); + redefineSet( preverbSet, (definition) => (definition.type === "verb" && definition.predicateType != null) || definition.type === "modal verb", ); - addSet(fillerSet, (definition) => definition.type === "filler"); - addSet(tokiPonaWordSet, () => true); + redefineSet(fillerSet, (definition) => definition.type === "filler"); + redefineSet(tokiPonaWordSet, () => true); } -function addSet( +function redefineSet( set: Set, filter: (definition: Definition) => boolean, ): void { + set.clear(); for (const [word, entry] of dictionary) { if (entry.definitions.some(filter)) { set.add(word); From 35b597d71e6f1dfc52bcd133adbeedabf2a230f0 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 18:36:02 +0800 Subject: [PATCH 016/199] allow multiple fillers --- src/parser/filter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/filter.ts b/src/parser/filter.ts index a9f3b851..b4383d6e 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -428,7 +428,7 @@ export const MULTIPLE_SENTENCES_RULE: Array< > = [ // Only allow at most 2 sentences (sentences) => { - if (sentences.length > 2) { + if (sentences.filter((sentence) => sentence.type !== "filler").length > 2) { throw new UnrecognizedError("multiple sentences"); } return true; From e70f5385ba9ced0f2e3db4334b6a214de2731d4e Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 18:37:34 +0800 Subject: [PATCH 017/199] fix --- src/parser/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 2cd1deb7..f6c70418 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -735,7 +735,7 @@ const sentence = choice( choice( punctuation, end.map(() => ""), - lookAhead(sequence(emphasis, choice(punctuation, end))).map(() => ""), + lookAhead(sequence(filler, choice(punctuation, end))).map(() => ""), ), ) .sortBy(([_, _1, _2, anuSeme]) => { From aefccd43635f6e09a325c75d9ea83367d56e0ae6 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 18:48:08 +0800 Subject: [PATCH 018/199] update changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5db540f6..f6f957ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,17 @@ NOTE: Before publishing: The latest on-development version can be accessed by building the source code. On this on-development version, things can be broken. +You can now have custom fillers in custom dictionary: + +``` +wa: + wow/woow/wooow(f); +``` + +With this definition, ilo Token can translate "waaaa" into "woooow". There is no +repetition pattern to follow for toki pona words. "wwaaa" is just as valid. + +- Allow custom fillers. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From 6c91a8da738ed064da8fcbfc37f4629f83f32a3c Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:03:54 +0800 Subject: [PATCH 019/199] more std --- src/parser/lexer.ts | 4 ++-- src/parser/parser.ts | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 358c698a..a41e3c8b 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -1,3 +1,4 @@ +import { sumOf } from "@std/collections/sum-of"; import { settings } from "../settings.ts"; import { cache } from "./cache.ts"; import { @@ -86,8 +87,7 @@ const longWord = allAtLeastOnce(repeatingLetter) .skip(spaces) .map((letters) => { const word = letters.map(([letter]) => letter).join(""); - const length = letters.reduce((rest, [_, count]) => rest + count, 0) - - word.length + 1; + const length = sumOf(letters, ([_, count]) => count) - word.length + 1; return { type: "long word", word, length }; }) .filter(({ length }) => length > 1); diff --git a/src/parser/parser.ts b/src/parser/parser.ts index f6c70418..041733b7 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,3 +1,4 @@ +import { sumOf } from "@std/collections/sum-of"; import { ArrayResult } from "../array-result.ts"; import { contentWordSet, @@ -252,9 +253,7 @@ const subAleNumber = sequence( many(specificWord("wan")), ) .map((array) => array.flat()) - .map((array) => - array.reduce((number, word) => number + wordToNumber(word), 0) - ); + .map((array) => sumOf(array, wordToNumber)); const properSubAleNumber = subAleNumber.filter((number) => { if (number > 100) { throw new UnrecognizedError( @@ -297,9 +296,7 @@ const number = choice( ); } }) - .map((numbers) => - numbers.reduce((result, [sub, ale]) => result + sub * 100 ** ale, 0) - ), + .map((numbers) => sumOf(numbers, ([sub, ale]) => sub * 100 ** ale)), sequence( count(many(ale)), subAleNumber, From becbb5f889726af4a513ca4d56ba2f83f39638b4 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:07:28 +0800 Subject: [PATCH 020/199] ensure long word starts with small letter --- src/parser/lexer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index a41e3c8b..2a9a8478 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -90,6 +90,7 @@ const longWord = allAtLeastOnce(repeatingLetter) const length = sumOf(letters, ([_, count]) => count) - word.length + 1; return { type: "long word", word, length }; }) + .filter(({ word }) => /^[a-z]/.test(word)) .filter(({ length }) => length > 1); const xAlaX = lazy(() => { From 872c791746dc18b52ff2a08aefb6537943c67e62 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:09:30 +0800 Subject: [PATCH 021/199] fix --- src/parser/lexer.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 2a9a8478..f6edaea4 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -2,6 +2,7 @@ import { sumOf } from "@std/collections/sum-of"; import { settings } from "../settings.ts"; import { cache } from "./cache.ts"; import { + all, allAtLeastOnce, choice, choiceOnlyOne, @@ -79,7 +80,7 @@ const multipleA = sequence( const repeatingLetter = match(/[a-zA-Z]/, "latin letter") .then((letter) => - count(allAtLeastOnce(matchString(letter))).map<[string, number]>( + count(all(matchString(letter))).map<[string, number]>( (count) => [letter, count + 1], ) ); From ded7ce1c7a17e6068aa0d70aeeba702c64a0d32e Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:12:38 +0800 Subject: [PATCH 022/199] small improvement --- src/parser/parser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 041733b7..7f837ef4 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -125,8 +125,7 @@ const emphasis = choice( // TODO: error message .filter(({ word }) => word === "a") .map(({ word, length }) => ({ type: "long word", word, length })), - wordFrom(new Set(["a"]), '"a"') - .map((word) => ({ type: "word", word })), + specificWord("a").map((word) => ({ type: "word", word })), ); const optionalEmphasis = optional(emphasis); function xAlaX( From c514e9f3d0e3971cde1aeb1944e6e2644b1c2997 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:17:48 +0800 Subject: [PATCH 023/199] small improvement --- src/parser/lexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index f6edaea4..c8533700 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -226,11 +226,11 @@ export const token = choiceOnlyOne( combinedGlyphsToken, // starting with non-words: punctuation, + cartouches, headlessLongGlyphEnd, headedLongGlyphEnd, headlessLongGlyphStart, insideLongGlyph, - cartouches, ); Parser.endCache(); From 787ce4ac6b5dd89a93809d301ac76c8a8f132de7 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 27 Feb 2025 19:29:50 +0800 Subject: [PATCH 024/199] fix filler parser --- src/parser/parser.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 7f837ef4..8c502d72 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -711,6 +711,8 @@ const filler = choice( specificToken("multiple a") .map(({ count }) => ({ type: "multiple a", count })), specificToken("long word") + // TODO: error message + .filter(({ word }) => fillerSet.has(word)) .map(({ word, length }) => ({ type: "long word", word, length })), wordFrom(fillerSet, "filler") .map((word) => ({ type: "word", word })), From e5974c0281edfc15c3612f2b2b05e7631f4a0c17 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 14:57:56 +0800 Subject: [PATCH 025/199] this is unnecessary --- src/translator/sentence.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 7eff3787..9fcd5501 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -30,8 +30,7 @@ function filler(filler: TokiPona.Filler): ArrayResult { } else { return null; } - }) - .addErrorWhenNone(() => new MissingEntryError("filler", filler.word)); + }); } case "multiple a": return new ArrayResult(["ha".repeat(filler.count)]); From 230ce3b43e3aa5c7221adf16d737c56c4db19831 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 15:00:59 +0800 Subject: [PATCH 026/199] remove unused imports --- src/translator/sentence.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 9fcd5501..e2e6e022 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -1,5 +1,5 @@ import { ArrayResult } from "../array-result.ts"; -import { dictionary, MissingEntryError } from "../dictionary.ts"; +import { dictionary } from "../dictionary.ts"; import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as-string.ts"; From 11cc9e47828d2129f2f70460af19ef15b768b6d0 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 15:02:52 +0800 Subject: [PATCH 027/199] remove unnecessary check --- src/translator/sentence.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index e2e6e022..9addbbbd 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -21,8 +21,7 @@ function filler(filler: TokiPona.Filler): ArrayResult { length = filler.length; break; } - return new ArrayResult(nullableAsArray(dictionary.get(filler.word)!)) - .flatMap((entry) => new ArrayResult(entry.definitions)) + return new ArrayResult(dictionary.get(filler.word)!.definitions) .filterMap((definition) => { if (definition.type === "filler") { const { before, repeat, after } = definition; From f38f12adef3d44a5792202550dea69eaba505bab Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 15:06:55 +0800 Subject: [PATCH 028/199] remove unused import --- src/translator/sentence.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 9addbbbd..4a9314d3 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -1,6 +1,6 @@ import { ArrayResult } from "../array-result.ts"; import { dictionary } from "../dictionary.ts"; -import { nullableAsArray, repeatWithSpace } from "../misc.ts"; +import { repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as-string.ts"; import * as English from "./ast.ts"; From 30e5f5b50ae58271f88762651e25992bc00efd23 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 15:47:59 +0800 Subject: [PATCH 029/199] update long space lexer --- src/parser/lexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index c8533700..cd748c0c 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -176,7 +176,7 @@ function longContainer( const longSpaceContainer = longContainer( START_OF_LONG_GLYPH, END_OF_LONG_GLYPH, - spacesWithoutNewline.map((space) => space.length), + count(spacesWithoutNewline).filter((length) => length > 0), ) .skip(spaces); const longGlyphHead = choiceOnlyOne( From 81fc9bc1a6f26f425a245823e00172328632f1db Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 17:19:51 +0800 Subject: [PATCH 030/199] remove unnecessary nesting --- src/main.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.ts b/src/main.ts index 3fad7d75..4782d5a6 100644 --- a/src/main.ts +++ b/src/main.ts @@ -169,7 +169,7 @@ function main(): void { resizeTextarea(); function resizeTextarea(): void { inputTextBox.style.height = "auto"; - inputTextBox.style.height = `${`${inputTextBox.scrollHeight + 14}`}px`; + inputTextBox.style.height = `${inputTextBox.scrollHeight + 14}px`; } // initialize button label From 11880cffeb0c504b938ee0a406a8639f52c42896 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 17:33:10 +0800 Subject: [PATCH 031/199] refactor long strings --- src/misc.ts | 3 ++- src/translator/composer.ts | 3 ++- src/translator/verb.ts | 3 +-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index e1f21af7..72070446 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -53,8 +53,9 @@ export function setIgnoreError(key: string, value: string): void { } export function assertOk(response: Response): Response { if (!response.ok) { + const { url, status, statusText } = response; throw new Error( - `unable to fetch ${response.url} (${response.status} ${response.statusText})`, + `unable to fetch ${url} (${status} ${statusText})`, ); } return response; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 2c9ea70c..714ef4e0 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -31,7 +31,8 @@ function compound( const lastIndex = elements.length - 1; const init = elements.slice(0, lastIndex); const last = elements[lastIndex]; - return `${init.map((item) => `${item},`).join(" ")} ${conjunction} ${last}`; + const initText = init.map((item) => `${item},`).join(" "); + return `${initText} ${conjunction} ${last}`; } } export function noun(phrases: English.NounPhrase, depth: number): string { diff --git a/src/translator/verb.ts b/src/translator/verb.ts index dcefc81d..99d67773 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -130,8 +130,7 @@ export function fromVerbForms( if (quantity === "condensed") { verb = new ArrayResult([{ modal: null, - infinite: - `${presentSingular}/${verbForms.presentPlural}/${pastSingular}/${pastPlural}/will be`, + infinite: `is/are/was/were/will be`, }]); } else { verb = new ArrayResult([{ From 79440ff2b1f4d9381b3bb533fed31556f6fb8da6 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 17:33:31 +0800 Subject: [PATCH 032/199] this can be regular string --- src/translator/verb.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 99d67773..c4251544 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -130,7 +130,7 @@ export function fromVerbForms( if (quantity === "condensed") { verb = new ArrayResult([{ modal: null, - infinite: `is/are/was/were/will be`, + infinite: "is/are/was/were/will be", }]); } else { verb = new ArrayResult([{ From 2d11d2c7a9c03f61adc47790e657ddb339ed28cf Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 28 Feb 2025 17:38:32 +0800 Subject: [PATCH 033/199] remove these --- src/settings.ts | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/settings.ts b/src/settings.ts index e045454d..987d9dbd 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -1,15 +1,5 @@ -/** - * Options for determining how to show different forms or conjugations of nouns - * or verbs. See - * https://github.com/ilo-token/ilo-token.github.io/wiki/Settings-Help#singular-and-plural-forms--verb-tenses - * for more info - */ export type RedundancySettings = "both" | "condensed" | "default only"; -/** - * Interface for configuring translation. See - * https://github.com/ilo-token/ilo-token.github.io/wiki/Settings-Help for more - * info. - */ + // may be extended but existing properties must stay unchanged export type Settings = { teloMisikeke: boolean; From 75bffb21422175cd507feb2dce0c732eedbfcc36 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 09:01:18 +0800 Subject: [PATCH 034/199] reduce memoization --- src/parser/parser-lib.ts | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 20e38dba..5f5a028f 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -6,40 +6,44 @@ export type ValueRest = Readonly<{ rest: string; value: T }>; export type ParserResult = ArrayResult>; export class Parser { + readonly unmemoizedParser: (src: string) => ParserResult; readonly parser: (src: string) => ParserResult; static cache: null | Cache = null; constructor(parser: (src: string) => ParserResult) { - const useParser = (src: string) => ArrayResult.from(() => parser(src)); + this.unmemoizedParser = (src: string) => + ArrayResult.from(() => parser(src)); if (Parser.cache != null) { const cache = new Map>(); Parser.addToCache(cache); - this.parser = memoize(useParser, { cache }); + this.parser = memoize(this.unmemoizedParser, { cache }); } else { - this.parser = useParser; + this.parser = this.unmemoizedParser; } } map(mapper: (value: T) => U): Parser { return new Parser((src) => this - .parser(src) + .unmemoizedParser(src) .map(({ value, rest }) => ({ value: mapper(value), rest })) ); } filter(mapper: (value: T) => boolean): Parser { return new Parser((src) => - this.parser(src).filter(({ value }) => mapper(value)) + this.unmemoizedParser(src).filter(({ value }) => mapper(value)) ); } then(mapper: (value: T) => Parser): Parser { const { cache } = Parser; return new Parser((src) => { - const parser = Parser.inContext(() => this.parser(src), cache); + const parser = Parser.inContext(() => this.unmemoizedParser(src), cache); return parser.flatMap(({ value, rest }) => mapper(value).parser(rest)); }); } sort(comparer: (left: T, right: T) => number): Parser { return new Parser((src) => - this.parser(src).sort((left, right) => comparer(left.value, right.value)) + this.unmemoizedParser(src).sort((left, right) => + comparer(left.value, right.value) + ) ); } sortBy(mapper: (value: T) => number): Parser { @@ -98,7 +102,7 @@ export const nothing = new Parser((src) => export const emptyArray = nothing.map(() => []); export function lookAhead(parser: Parser): Parser { return new Parser((src) => - parser.parser(src).map(({ value }) => ({ value, rest: src })) + parser.unmemoizedParser(src).map(({ value }) => ({ value, rest: src })) ); } export function lazy(parser: () => Parser): Parser { @@ -106,9 +110,11 @@ export function lazy(parser: () => Parser): Parser { if (Parser.cache != null) { const cachedParser = new Lazy(() => Parser.inContext(parser, cache)); Parser.addToCache(cachedParser); - return new Parser((src) => cachedParser.getValue().parser(src)); + return new Parser((src) => cachedParser.getValue().unmemoizedParser(src)); } else { - return new Parser((src) => Parser.inContext(parser, cache).parser(src)); + return new Parser((src) => + Parser.inContext(parser, cache).unmemoizedParser(src) + ); } } export function choice(...choices: Array>): Parser { @@ -241,7 +247,7 @@ export function withSource( parser: Parser, ): Parser<[value: T, source: string]> { return new Parser((src) => - parser.parser(src).map((value) => ({ + parser.unmemoizedParser(src).map((value) => ({ value: [value.value, src.slice(0, src.length - value.rest.length)], rest: value.rest, })) From 02d25e9933f0592a4ac4349c8d5f165d76f929d3 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 09:18:14 +0800 Subject: [PATCH 035/199] rename --- src/parser/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8c502d72..b8d89dca 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -789,7 +789,7 @@ const sentence = choice( })), ) .filter(filter(SENTENCE_RULE)); -const FULL_PARSER = spaces +const fullParser = spaces .with(choiceOnlyOne( wordFrom(tokiPonaWordSet, "Toki Pona word") .skip(end) @@ -804,7 +804,7 @@ export function parse(src: string): ArrayResult { if (src.trim().length > 500) { throw new UnrecognizedError("long text"); } else { - return FULL_PARSER.parse(src); + return fullParser.parse(src); } }); } From 0b2a1a0395e354147ba6eb40910463b462c5947a Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 09:34:42 +0800 Subject: [PATCH 036/199] enable memoization to everything --- dictionary/parser.ts | 4 ++++ src/parser/lexer.ts | 4 ++-- src/parser/parser-lib.ts | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 42edee01..55aaa469 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -26,6 +26,8 @@ import { VerbForms, } from "./type.ts"; +Parser.startCache(); + const comment = match(/#[^\n\r]*/, "comment"); const spaces = sourceOnly(all(choiceOnlyOne(match(/\s/, "space"), comment))); function lex(parser: Parser): Parser { @@ -505,3 +507,5 @@ export function parseDictionary(sourceText: string): Dictionary { throw new AggregateError(errors.deduplicateErrors().errors); } } + +Parser.endCache(); diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index cd748c0c..c6460a90 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -34,6 +34,8 @@ import { UCSUR_TO_LATIN, } from "./ucsur.ts"; +Parser.startCache(cache); + const spacesWithoutNewline = match(/[^\S\n\r]*/, "spaces"); const newline = match(/[\n\r]\s*/, "newline"); const spaces = sourceOnly( @@ -213,8 +215,6 @@ const combinedGlyphsToken = combinedGlyphs .map((words) => ({ type: "combined glyphs", words })); const wordToken = word.map((word) => ({ type: "word", word })); -Parser.startCache(cache); - export const token = choiceOnlyOne( xAlaX, multipleA, diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 5f5a028f..0cd7edd2 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -61,7 +61,7 @@ export class Parser { static addToCache(cache: Clearable): void { Parser.cache?.add(cache); } - static startCache(cache: Cache): void { + static startCache(cache: Cache = new Cache()): void { Parser.cache = cache; } static endCache(): void { From 97d91ffd9efbce7cac9c5913293751bbd59340c3 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 09:41:31 +0800 Subject: [PATCH 037/199] revert to previous setup --- dictionary/parser.ts | 4 ---- src/parser/lexer.ts | 8 ++++++-- src/parser/parser-lib.ts | 10 +++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 55aaa469..42edee01 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -26,8 +26,6 @@ import { VerbForms, } from "./type.ts"; -Parser.startCache(); - const comment = match(/#[^\n\r]*/, "comment"); const spaces = sourceOnly(all(choiceOnlyOne(match(/\s/, "space"), comment))); function lex(parser: Parser): Parser { @@ -507,5 +505,3 @@ export function parseDictionary(sourceText: string): Dictionary { throw new AggregateError(errors.deduplicateErrors().errors); } } - -Parser.endCache(); diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index c6460a90..02bd49ae 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -34,8 +34,6 @@ import { UCSUR_TO_LATIN, } from "./ucsur.ts"; -Parser.startCache(cache); - const spacesWithoutNewline = match(/[^\S\n\r]*/, "spaces"); const newline = match(/[\n\r]\s*/, "newline"); const spaces = sourceOnly( @@ -96,6 +94,8 @@ const longWord = allAtLeastOnce(repeatingLetter) .filter(({ word }) => /^[a-z]/.test(word)) .filter(({ length }) => length > 1); +Parser.startCache(cache); + const xAlaX = lazy(() => { if (settings.xAlaXPartialParsing) { return empty; @@ -108,6 +108,8 @@ const xAlaX = lazy(() => { }) .map((word) => ({ type: "x ala x", word })); +Parser.endCache(); + const punctuation = choiceOnlyOne( match(/[.,:;?!…·。。︒\u{F199C}\u{F199D}]+/u, "punctuation") .map((punctuation) => @@ -215,6 +217,8 @@ const combinedGlyphsToken = combinedGlyphs .map((words) => ({ type: "combined glyphs", words })); const wordToken = word.map((word) => ({ type: "word", word })); +Parser.startCache(cache); + export const token = choiceOnlyOne( xAlaX, multipleA, diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 0cd7edd2..85756d4f 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -12,13 +12,9 @@ export class Parser { constructor(parser: (src: string) => ParserResult) { this.unmemoizedParser = (src: string) => ArrayResult.from(() => parser(src)); - if (Parser.cache != null) { - const cache = new Map>(); - Parser.addToCache(cache); - this.parser = memoize(this.unmemoizedParser, { cache }); - } else { - this.parser = this.unmemoizedParser; - } + const cache = new Map>(); + Parser.addToCache(cache); + this.parser = memoize(this.unmemoizedParser, { cache }); } map(mapper: (value: T) => U): Parser { return new Parser((src) => From a63403cabc7def73c1ae7e7b6741d6ec65a78fa5 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 09:49:56 +0800 Subject: [PATCH 038/199] revert --- src/parser/lexer.ts | 6 ------ src/parser/parser-lib.ts | 10 +++++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 02bd49ae..4024dd7c 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -93,9 +93,6 @@ const longWord = allAtLeastOnce(repeatingLetter) }) .filter(({ word }) => /^[a-z]/.test(word)) .filter(({ length }) => length > 1); - -Parser.startCache(cache); - const xAlaX = lazy(() => { if (settings.xAlaXPartialParsing) { return empty; @@ -107,9 +104,6 @@ const xAlaX = lazy(() => { } }) .map((word) => ({ type: "x ala x", word })); - -Parser.endCache(); - const punctuation = choiceOnlyOne( match(/[.,:;?!…·。。︒\u{F199C}\u{F199D}]+/u, "punctuation") .map((punctuation) => diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 85756d4f..0cd7edd2 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -12,9 +12,13 @@ export class Parser { constructor(parser: (src: string) => ParserResult) { this.unmemoizedParser = (src: string) => ArrayResult.from(() => parser(src)); - const cache = new Map>(); - Parser.addToCache(cache); - this.parser = memoize(this.unmemoizedParser, { cache }); + if (Parser.cache != null) { + const cache = new Map>(); + Parser.addToCache(cache); + this.parser = memoize(this.unmemoizedParser, { cache }); + } else { + this.parser = this.unmemoizedParser; + } } map(mapper: (value: T) => U): Parser { return new Parser((src) => From 828b01accef3e11dd1b53d696ee213e9b871f385 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 16:13:30 +0800 Subject: [PATCH 039/199] small update --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6f957ab..71df6b1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ NOTE: Before publishing: On development changelog -# 0.4.2 +## 0.4.2 The latest on-development version can be accessed by building the source code. On this on-development version, things can be broken. From 174ffbb64f304265cb2a445b43be11b9fc15b559 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 16:23:21 +0800 Subject: [PATCH 040/199] update how comments look --- telo-misikeke/Parser.js | 3 +-- telo-misikeke/rules.js | 3 +-- telo-misikeke/update.ts | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/telo-misikeke/Parser.js b/telo-misikeke/Parser.js index 161ab4e4..df538361 100644 --- a/telo-misikeke/Parser.js +++ b/telo-misikeke/Parser.js @@ -1,5 +1,4 @@ -// This code is from -// https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/Parser.js +// This code is from https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/Parser.js // // Repository: https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/ // Copyright (c) 2023 Nicolas Hurtubise diff --git a/telo-misikeke/rules.js b/telo-misikeke/rules.js index 00adbd29..4509102b 100644 --- a/telo-misikeke/rules.js +++ b/telo-misikeke/rules.js @@ -1,5 +1,4 @@ -// This code is from -// https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/rules.js +// This code is from https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/rules.js // // Repository: https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/ // Copyright (c) 2023 Nicolas Hurtubise diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index 8441d0db..bf399f06 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -38,8 +38,7 @@ async function buildCode( } const exports = exportItems.join(", "); const code = `\ -// This code is from -// ${source} +// This code is from ${source} // // Repository: https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/ // Copyright (c) 2023 Nicolas Hurtubise From 0c3313bce03c3839c96b4e6f9f3bd52094dc4785 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 1 Mar 2025 16:26:43 +0800 Subject: [PATCH 041/199] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71df6b1c..2395a801 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ NOTE: Before publishing: On development changelog -## 0.4.2 +## 0.4.2 (On development) The latest on-development version can be accessed by building the source code. On this on-development version, things can be broken. From eb93b0cda323156305fa18b9c20f2a7cea69cba9 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 08:07:25 +0800 Subject: [PATCH 042/199] improve error messages --- src/parser/parser.ts | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index b8d89dca..10c2a26a 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -76,8 +76,13 @@ function specificToken( }); } const comma = specificToken("punctuation") - .map(({ punctuation }) => punctuation) - .filter((punctuation) => punctuation === ","); + .map(({ punctuation }) => { + if (punctuation === ",") { + return ","; + } else { + throw new UnexpectedError(`"${punctuation}"`, "comma"); + } + }); const optionalComma = optional(comma); const word = specificToken("word").map(({ word }) => word); const properWords = specificToken("proper word").map(({ words }) => words); @@ -122,9 +127,12 @@ const emphasis = choice( }; }), specificToken("long word") - // TODO: error message - .filter(({ word }) => word === "a") - .map(({ word, length }) => ({ type: "long word", word, length })), + .map(({ word, length }) => { + if (word !== "a") { + throw new UnexpectedError(`"${word}"`, '"a"'); + } + return { type: "long word", word, length }; + }), specificWord("a").map((word) => ({ type: "word", word })), ); const optionalEmphasis = optional(emphasis); @@ -711,9 +719,12 @@ const filler = choice( specificToken("multiple a") .map(({ count }) => ({ type: "multiple a", count })), specificToken("long word") - // TODO: error message - .filter(({ word }) => fillerSet.has(word)) - .map(({ word, length }) => ({ type: "long word", word, length })), + .map(({ word, length }) => { + if (!fillerSet.has(word)) { + throw new UnrecognizedError(`"${word}" as filler`); + } + return { type: "long word", word, length }; + }), wordFrom(fillerSet, "filler") .map((word) => ({ type: "word", word })), ); From 748af265b15d3beac910da24b1790e99806d56aa Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 08:12:08 +0800 Subject: [PATCH 043/199] small improvement --- src/parser/parser.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 10c2a26a..9e396add 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -75,10 +75,12 @@ function specificToken( } }); } -const comma = specificToken("punctuation") - .map(({ punctuation }) => { +const punctuation = specificToken("punctuation") + .map(({ punctuation }) => punctuation); +const comma = punctuation + .filter((punctuation) => { if (punctuation === ",") { - return ","; + return true; } else { throw new UnexpectedError(`"${punctuation}"`, "comma"); } @@ -86,9 +88,6 @@ const comma = specificToken("punctuation") const optionalComma = optional(comma); const word = specificToken("word").map(({ word }) => word); const properWords = specificToken("proper word").map(({ words }) => words); -const punctuation = specificToken("punctuation").map(({ punctuation }) => - punctuation -); function wordFrom(set: Set, description: string): Parser { return word.filter((word) => { if (set.has(word)) { From 6eade5dad4cc6c2e0502c94d1cae4474d663650f Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 08:27:24 +0800 Subject: [PATCH 044/199] reduce duplicate parser --- src/parser/parser.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 9e396add..be479a88 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -420,12 +420,15 @@ const modifiers = sequence( ...piModifiers, ]) .filter(filter(MULTIPLE_MODIFIERS_RULES)); +const singlePhrase = phrase.map((phrase) => ({ + type: "single", + phrase, +})); function nestedPhrasesOnly( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { if (nestingRule.length === 0) { - return phrase - .map((phrase) => ({ type: "single", phrase })); + return singlePhrase; } else { const [first, ...rest] = nestingRule; let type: "and conjunction" | "anu"; @@ -452,8 +455,7 @@ function nestedPhrases( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { if (nestingRule.length === 0) { - return phrase - .map((phrase) => ({ type: "single", phrase })); + return singlePhrase; } else { return choice( nestedPhrasesOnly(nestingRule), @@ -464,7 +466,7 @@ function nestedPhrases( const subjectPhrases = choice( nestedPhrasesOnly(["en", "anu"]), nestedPhrasesOnly(["anu", "en"]), - phrase.map((phrase) => ({ type: "single", phrase })), + singlePhrase, ); const preposition = choice( sequence( From 25b72a1875e9d3d93e11a3e5170319b03a1d77f1 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 08:30:32 +0800 Subject: [PATCH 045/199] small formatting --- src/parser/parser.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index be479a88..96626f8b 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -420,10 +420,8 @@ const modifiers = sequence( ...piModifiers, ]) .filter(filter(MULTIPLE_MODIFIERS_RULES)); -const singlePhrase = phrase.map((phrase) => ({ - type: "single", - phrase, -})); +const singlePhrase = phrase + .map((phrase) => ({ type: "single", phrase })); function nestedPhrasesOnly( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { From f630e939ed98fd4200171452d232b4086c6f9206 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 09:15:37 +0800 Subject: [PATCH 046/199] implement long anu parser --- src/parser/parser.ts | 66 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 96626f8b..3d4efb1f 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -46,6 +46,7 @@ import { choice, choiceOnlyOne, count, + empty, end, lazy, lookAhead, @@ -422,6 +423,27 @@ const modifiers = sequence( .filter(filter(MULTIPLE_MODIFIERS_RULES)); const singlePhrase = phrase .map((phrase) => ({ type: "single", phrase })); +const longAnu = sequence( + specificToken("headless long glyph start").with(phrase), + manyAtLeastOnce( + specificToken("inside long glyph") + .filter((words) => { + if (words.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: words.words }), + "pi", + ); + } + if (words.words[0] !== "anu") { + throw new UnexpectedError(`"${words.words[0]}"`, "anu"); + } + return true; + }) + .with(phrase), + ), +) + .skip(specificToken("headless long glyph end")) + .map(([phrase, morePhrase]) => [phrase, ...morePhrase]); function nestedPhrasesOnly( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { @@ -435,18 +457,30 @@ function nestedPhrasesOnly( } else { type = "anu"; } - return sequence( - nestedPhrases(rest), - manyAtLeastOnce( - optionalComma - .with(specificWord(first)) - .with(nestedPhrases(rest)), - ), - ) - .map(([group, moreGroups]) => ({ - type, - phrases: [group, ...moreGroups], + let longAnuParser: Parser; + if (first === "anu") { + longAnuParser = longAnu.map((phrases) => ({ + type: "anu", + phrases: phrases.map((phrase) => ({ type: "single", phrase })), })); + } else { + longAnuParser = empty; + } + return choice( + longAnuParser, + sequence( + nestedPhrases(rest), + manyAtLeastOnce( + optionalComma + .with(specificWord(first)) + .with(nestedPhrases(rest)), + ), + ) + .map(([group, moreGroups]) => ({ + type, + phrases: [group, ...moreGroups], + })), + ); } } function nestedPhrases( @@ -593,7 +627,17 @@ function multiplePredicates( } else { type = "anu"; } + let longAnuParser: Parser; + if (first === "anu") { + longAnuParser = longAnu.map((phrases) => ({ + type: "anu", + predicates: phrases.map((predicate) => ({ type: "single", predicate })), + })); + } else { + longAnuParser = empty; + } return choice( + longAnuParser, associatedPredicates(nestingRule), sequence( choice( From 93d808378bcb219979785cc83e7115e720c42e09 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 09:39:35 +0800 Subject: [PATCH 047/199] refactor parsers --- dictionary/parser.ts | 11 ++-- src/parser/lexer.ts | 31 +++++------- src/parser/parser.ts | 116 +++++++++++++++++++++---------------------- 3 files changed, 77 insertions(+), 81 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 42edee01..e84d7fea 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -35,8 +35,10 @@ const backtick = matchString("`", "backtick"); const word = allAtLeastOnce( choiceOnlyOne( match(/[^():;#/`]/, "word"), - sequence(backtick, character, backtick) - .map(([_, character]) => character), + backtick + .with(character) + .skip(backtick) + .map((character) => character), comment.map(() => ""), ), ) @@ -192,7 +194,7 @@ const adjectiveKind = choiceOnlyOne( keyword("opinion"), keyword("size"), sequence(keyword("physical"), keyword("quality")) - .map(() => "physical quality" as const), + .map<"physical quality">(() => "physical quality"), keyword("age"), keyword("color"), keyword("origin"), @@ -234,8 +236,9 @@ function verbOnly(tagInside: Parser): Parser { sequence( word.skip(slash), word.skip(slash), - word.skip(tag(tagInside)), + word, ) + .skip(tag(tagInside)) .filter(([presentPlural, presentSingular, past]) => { const [_, ...pluralParticles] = presentPlural.split(" "); const [_1, ...singularParticles] = presentSingular.split(" "); diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 4024dd7c..2be53b46 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -72,12 +72,9 @@ function specificWord(thatWord: string): Parser { } }); } -const multipleA = sequence( - specificWord("a"), - count(allAtLeastOnce(specificWord("a"))), -) - .map(([_, count]) => ({ type: "multiple a", count: count + 1 })); - +const multipleA = specificWord("a") + .with(count(allAtLeastOnce(specificWord("a")))) + .map((count) => ({ type: "multiple a", count: count + 1 })); const repeatingLetter = match(/[a-zA-Z]/, "latin letter") .then((letter) => count(all(matchString(letter))).map<[string, number]>( @@ -143,12 +140,12 @@ const cartoucheElement = choiceOnlyOne( .map((letter) => letter.toLowerCase()) .skip(spaces), ); -const cartouche = sequence( - specificSpecialUcsur(START_OF_CARTOUCHE).skip(spaces), - allAtLeastOnce(cartoucheElement), - specificSpecialUcsur(END_OF_CARTOUCHE).skip(spaces), -) - .map(([_, words]) => { +const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) + .skip(spaces) + .with(allAtLeastOnce(cartoucheElement)) + .skip(specificSpecialUcsur(END_OF_CARTOUCHE)) + .skip(spaces) + .map((words) => { const word = words.join(""); return `${word[0].toUpperCase()}${word.slice(1)}`; }); @@ -164,12 +161,10 @@ function longContainer( right: string, inside: Parser, ): Parser { - return sequence( - specificSpecialUcsur(left), - inside, - specificSpecialUcsur(right), - ) - .map(([_, inside]) => inside); + return specificSpecialUcsur(left) + .with(inside) + .skip(specificSpecialUcsur(right)) + .map((inside) => inside); } const longSpaceContainer = longContainer( START_OF_LONG_GLYPH, diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 3d4efb1f..8284b272 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -141,24 +141,24 @@ function xAlaX( description: string, ): Parser { return choice( - sequence( - specificToken("headless long glyph start"), - wordFrom(useWord, description), - specificToken("inside long glyph") - .filter((words) => { - if (words.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: words.words }), - '"ala"', - ); - } - if (words.words[0] !== "ala") { - throw new UnexpectedError(`"${words.words[0]}"`, '"ala"'); - } - return true; - }), - ) - .then(([_, word]) => + specificToken("headless long glyph start") + .with(wordFrom(useWord, description)) + .skip( + specificToken("inside long glyph") + .filter((words) => { + if (words.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: words.words }), + '"ala"', + ); + } + if (words.words[0] !== "ala") { + throw new UnexpectedError(`"${words.words[0]}"`, '"ala"'); + } + return true; + }), + ) + .then((word) => specificWord(word) .skip(specificToken("headless long glyph end")) .map(() => ({ type: "x ala x", word })) @@ -370,24 +370,22 @@ const nanpa = sequence(wordUnit(new Set(["nanpa"]), '"nanpa"'), phrase) .map(([nanpa, phrase]) => ({ nanpa, phrase })) .filter(filter(NANPA_RULES)); const pi = choice( - sequence( - specificToken("headed long glyph start") - .filter((words) => { - if (words.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: words.words }), - "pi", - ); - } - if (words.words[0] !== "pi") { - throw new UnexpectedError(`"${words.words[0]}"`, "pi"); - } - return true; - }), - phrase, - specificToken("headless long glyph end"), - ) - .map(([_, phrase]) => phrase), + specificToken("headed long glyph start") + .filter((words) => { + if (words.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: words.words }), + "pi", + ); + } + if (words.words[0] !== "pi") { + throw new UnexpectedError(`"${words.words[0]}"`, "pi"); + } + return true; + }) + .with(phrase) + .skip(specificToken("headless long glyph end")) + .map((phrase) => phrase), specificWord("pi").with(phrase), ); const modifiers = sequence( @@ -501,12 +499,10 @@ const subjectPhrases = choice( singlePhrase, ); const preposition = choice( - sequence( - specificToken("headless long glyph start"), - phrase, - specificToken("headless long glyph end"), - ) - .map(([_, phrase]) => ({ + specificToken("headless long glyph start") + .with(phrase) + .skip(specificToken("headless long glyph end")) + .map((phrase) => ({ preposition: { type: "default", word: "lon", @@ -600,16 +596,18 @@ function associatedPredicates( ), many(optionalComma.with(preposition)), ) - .filter(([_, objects, prepositions]) => - objects != null || prepositions.length > 0 - ) - .sortBy(([_, _1, prepositions]) => -prepositions.length) - .map(([predicates, objects, prepositions]) => ({ + .map(( + [predicates, objects, prepositions], + ) => ({ type: "associated", predicates, objects, prepositions, - })); + })) + .filter(({ objects, prepositions }) => + objects != null || prepositions.length > 0 + ) + .sortBy(({ prepositions }) => -prepositions.length); } function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, @@ -790,14 +788,7 @@ const sentence = choice( lookAhead(sequence(filler, choice(punctuation, end))).map(() => ""), ), ) - .sortBy(([_, _1, _2, anuSeme]) => { - if (anuSeme == null) { - return 1; - } else { - return 0; - } - }) - .map( + .map( ( [ kinOrTaso, @@ -808,8 +799,8 @@ const sentence = choice( punctuation, ], ) => { - const sentence = { - type: "default" as const, + const sentence: Sentence & { type: "default" } = { + type: "default", kinOrTaso, laClauses, finalClause, @@ -833,7 +824,14 @@ const sentence = choice( } return { ...sentence, interrogative }; }, - ), + ) + .sortBy(({ anuSeme }) => { + if (anuSeme == null) { + return 1; + } else { + return 0; + } + }), sequence(filler, optional(punctuation)) .map(([filler, punctuation]) => ({ type: "filler", From 2d04f0c2853e9d85529f34d5b48f9a26de36a024 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 09:41:38 +0800 Subject: [PATCH 048/199] allow sentence to be followed by multiple fillers --- src/parser/parser.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8284b272..b5ed6476 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -785,7 +785,8 @@ const sentence = choice( choice( punctuation, end.map(() => ""), - lookAhead(sequence(filler, choice(punctuation, end))).map(() => ""), + lookAhead(sequence(manyAtLeastOnce(filler), choice(punctuation, end))) + .map(() => ""), ), ) .map( From 050e1fa02a8cc3a22c69947b885a9e7c6cefeea0 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 09:47:44 +0800 Subject: [PATCH 049/199] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2395a801..216c52b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ With this definition, ilo Token can translate "waaaa" into "woooow". There is no repetition pattern to follow for toki pona words. "wwaaa" is just as valid. - Allow custom fillers. +- Support for long "anu" glyph. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From 466942fa43f3a22db40d820f0cd4cf0359e02f8d Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:01:21 +0800 Subject: [PATCH 050/199] refactor --- src/parser/parser.ts | 77 +++++++++++++------------------------------- 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index b5ed6476..1b12712b 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -107,25 +107,27 @@ function specificWord(thatWord: string): Parser { } }); } +function filterCombinedGlyphs(words: Array, expected: string): boolean { + const description = `"${expected}"`; + if (words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words }), + description, + ); + } else if (words[0] !== "a") { + throw new UnexpectedError(`"${word}"`, description); + } else { + return true; + } +} const emphasis = choice( specificToken("space long glyph") - .map((longGlyph) => { - if (longGlyph.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: longGlyph.words }), - '"a"', - ); - } - const word = longGlyph.words[0]; - if (word !== "a") { - throw new UnexpectedError(`"${word}"`, '"a"'); - } - return { - type: "long word", - word, - length: longGlyph.spaceLength, - }; - }), + .filter(({ words }) => filterCombinedGlyphs(words, "a")) + .map(({ spaceLength }) => ({ + type: "long word", + word: "a", + length: spaceLength, + })), specificToken("long word") .map(({ word, length }) => { if (word !== "a") { @@ -145,18 +147,7 @@ function xAlaX( .with(wordFrom(useWord, description)) .skip( specificToken("inside long glyph") - .filter((words) => { - if (words.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: words.words }), - '"ala"', - ); - } - if (words.words[0] !== "ala") { - throw new UnexpectedError(`"${words.words[0]}"`, '"ala"'); - } - return true; - }), + .filter(({ words }) => filterCombinedGlyphs(words, "ala")), ) .then((word) => specificWord(word) @@ -371,18 +362,7 @@ const nanpa = sequence(wordUnit(new Set(["nanpa"]), '"nanpa"'), phrase) .filter(filter(NANPA_RULES)); const pi = choice( specificToken("headed long glyph start") - .filter((words) => { - if (words.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: words.words }), - "pi", - ); - } - if (words.words[0] !== "pi") { - throw new UnexpectedError(`"${words.words[0]}"`, "pi"); - } - return true; - }) + .filter(({ words }) => filterCombinedGlyphs(words, "pi")) .with(phrase) .skip(specificToken("headless long glyph end")) .map((phrase) => phrase), @@ -425,18 +405,7 @@ const longAnu = sequence( specificToken("headless long glyph start").with(phrase), manyAtLeastOnce( specificToken("inside long glyph") - .filter((words) => { - if (words.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: words.words }), - "pi", - ); - } - if (words.words[0] !== "anu") { - throw new UnexpectedError(`"${words.words[0]}"`, "anu"); - } - return true; - }) + .filter(({ words }) => filterCombinedGlyphs(words, "anu")) .with(phrase), ), ) @@ -748,7 +717,7 @@ const filler = choice( if (longGlyph.words.length !== 1) { throw new UnexpectedError( describe({ type: "combined glyphs", words: longGlyph.words }), - '"a"', + "simple glyph", ); } return { From 0a278d00746cbdb30c0c29de5da7ea5d2d5793f7 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:12:52 +0800 Subject: [PATCH 051/199] implement long anu seme --- src/parser/parser.ts | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 1b12712b..43ca5b5e 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -738,6 +738,16 @@ const filler = choice( wordFrom(fillerSet, "filler") .map((word) => ({ type: "word", word })), ); +const seme = wordUnit(new Set(["seme"]), '"seme"'); +const anuSeme = choice( + specificToken("headed long glyph start") + .filter(({ words }) => filterCombinedGlyphs(words, "anu")) + .with(seme) + .skip(specificToken("headless long glyph end")), + optionalComma + .with(specificWord("anu")) + .with(seme), +); const sentence = choice( sequence( optional( @@ -745,11 +755,7 @@ const sentence = choice( ), many(contextClause.skip(la)), clause, - optional( - optionalComma - .with(specificWord("anu")) - .with(wordUnit(new Set(["seme"]), '"seme"')), - ), + optional(anuSeme), optionalEmphasis, choice( punctuation, From 16d00336c0311898c1cbbbe87cc73a87a452b7d3 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:15:46 +0800 Subject: [PATCH 052/199] memoize specific token parser --- src/parser/parser.ts | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 43ca5b5e..60782aea 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -60,22 +60,23 @@ import { UnrecognizedError, } from "./parser-lib.ts"; import { describe, Token } from "./token.ts"; +import { memoize } from "@std/cache/memoize"; const spaces = match(/\s*/, "spaces"); Parser.startCache(cache); -function specificToken( - type: T, -): Parser { - return token.map((token) => { - if (token.type === type) { - return token as Token & { type: T }; - } else { - throw new UnexpectedError(describe(token), type); - } - }); -} +const specificToken = memoize( + (type: T): Parser => { + return token.map((token) => { + if (token.type === type) { + return token as Token & { type: T }; + } else { + throw new UnexpectedError(describe(token), type); + } + }); + }, +); const punctuation = specificToken("punctuation") .map(({ punctuation }) => punctuation); const comma = punctuation From 43a82d26b6506dae8a383387d29d6c77dce01c8b Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:16:08 +0800 Subject: [PATCH 053/199] organize imports --- src/parser/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 60782aea..72bccf71 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,3 +1,4 @@ +import { memoize } from "@std/cache/memoize"; import { sumOf } from "@std/collections/sum-of"; import { ArrayResult } from "../array-result.ts"; import { @@ -60,7 +61,6 @@ import { UnrecognizedError, } from "./parser-lib.ts"; import { describe, Token } from "./token.ts"; -import { memoize } from "@std/cache/memoize"; const spaces = match(/\s*/, "spaces"); From b936333d476d45a331dd7e2110226bb386e5400e Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:23:12 +0800 Subject: [PATCH 054/199] memoize specific word parser --- src/parser/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 72bccf71..6b387628 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -99,15 +99,15 @@ function wordFrom(set: Set, description: string): Parser { } }); } -function specificWord(thatWord: string): Parser { - return word.filter((thisWord) => { +const specificWord = memoize((thatWord: string) => + word.filter((thisWord) => { if (thatWord === thisWord) { return true; } else { throw new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`); } - }); -} + }) +); function filterCombinedGlyphs(words: Array, expected: string): boolean { const description = `"${expected}"`; if (words.length !== 1) { From 0882bfd657c60edfd4f12eea57338ad5f8e9d143 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 10:28:21 +0800 Subject: [PATCH 055/199] reduce duplicate definitions --- src/parser/parser.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 6b387628..f41f0880 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -631,10 +631,12 @@ function multiplePredicates( ); } } +const liPredicates = multiplePredicates(["li", "anu"]); +const oPredicates = multiplePredicates(["o", "anu"]); const clause = choice( sequence( wordFrom(new Set(["mi", "sina"]), "mi/sina subject"), - multiplePredicates(["li", "anu"]), + liPredicates, ) .map(([subject, predicates]) => ({ type: "li clause", @@ -679,7 +681,7 @@ const clause = choice( subjectPhrases, optionalComma .with(specificWord("li")) - .with(multiplePredicates(["li", "anu"])), + .with(liPredicates), ) .map(([subjects, predicates]) => ({ type: "li clause", @@ -688,13 +690,13 @@ const clause = choice( explicitLi: true, })), specificWord("o") - .with(multiplePredicates(["o", "anu"])) + .with(oPredicates) .map((predicates) => ({ type: "o clause", subjects: null, predicates })), sequence( subjectPhrases, optionalComma .with(specificWord("o")) - .with(multiplePredicates(["o", "anu"])), + .with(oPredicates), ) .map(([subjects, predicates]) => ({ type: "o clause", From 1ffc4d88583ede771beb0ec4ba17eca7b70f6e9a Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 14:11:52 +0800 Subject: [PATCH 056/199] simplify parsers --- src/parser/parser-lib.ts | 3 +++ src/parser/parser.ts | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 0cd7edd2..04a54cd9 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -236,6 +236,9 @@ export function matchString( throw new UnexpectedError(describeSource(src), description); }); } +export const everything = new Parser((src) => + new ArrayResult([{ value: src, rest: "" }]) +); export const character = match(/./us, "character"); export const end = new Parser((src) => { if (src === "") { diff --git a/src/parser/parser.ts b/src/parser/parser.ts index f41f0880..cb7d47cb 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -49,6 +49,7 @@ import { count, empty, end, + everything, lazy, lookAhead, many, @@ -821,6 +822,13 @@ const sentence = choice( ) .filter(filter(SENTENCE_RULE)); const fullParser = spaces + .with(lookAhead(everything.filter((src) => { + if (src.trimEnd().length > 500) { + throw new UnrecognizedError("long text"); + } else { + return true; + } + }))) .with(choiceOnlyOne( wordFrom(tokiPonaWordSet, "Toki Pona word") .skip(end) @@ -830,14 +838,9 @@ const fullParser = spaces .filter(filter(MULTIPLE_SENTENCES_RULE)) .map((sentences) => ({ type: "sentences", sentences })), )); -export function parse(src: string): ArrayResult { - return ArrayResult.from(() => { - if (src.trim().length > 500) { - throw new UnrecognizedError("long text"); - } else { - return fullParser.parse(src); - } - }); -} Parser.endCache(); + +export function parse(src: string): ArrayResult { + return fullParser.parse(src); +} From b78fdadf3844c259887acb101006f5d6c98c047a Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 14:34:32 +0800 Subject: [PATCH 057/199] fix --- src/parser/parser-lib.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 04a54cd9..e2814a4a 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -151,7 +151,7 @@ export function sequence>( return sequence.reduceRight( (right: Parser, left) => left.then((value) => right.map((newValue) => [value, ...newValue])), - nothing.map(() => []), + emptyArray, ) as Parser; } export const many = memoize((parser: Parser): Parser> => From 515a85752c63ac9da280a04eb2bd9be148ef6189 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 14:45:35 +0800 Subject: [PATCH 058/199] small formatting --- src/translator/determiner.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index 674115f9..cf750047 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -93,7 +93,6 @@ export function fixDeterminer( [article.length > 1, encodeDeterminer`multiple articles ${article}`], [ demonstrative.length > 1, - encodeDeterminer`multiple demonstrative determiners ${demonstrative}`, ], [ From 0eaa3713a41b93da85e853cce82e8a3c85f1b4d0 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 14:51:11 +0800 Subject: [PATCH 059/199] simplify --- src/translator/determiner.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index cf750047..e44e4f30 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -1,3 +1,4 @@ +import { zip } from "@std/collections/zip"; import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array-result.ts"; import { filterSet } from "../misc.ts"; @@ -146,10 +147,8 @@ function encodeDeterminer( strings: TemplateStringsArray, ...determiners: Array> ): () => string { - return () => { - const determinerStrings = determiners.map(prettyPrintDeterminers); - return strings - .map((string, i) => `${string}${determinerStrings[i] ?? ""}`) + return () => + zip(strings, [...determiners.map(prettyPrintDeterminers), ""]) + .flat() .join(""); - }; } From 7441d09d09d5b758ab2a4e47de7a35193f675092 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 15:42:10 +0800 Subject: [PATCH 060/199] this can probably help --- src/parser/parser-lib.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index e2814a4a..4f952d27 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -21,27 +21,30 @@ export class Parser { } } map(mapper: (value: T) => U): Parser { + const { unmemoizedParser } = this; return new Parser((src) => - this - .unmemoizedParser(src) + unmemoizedParser(src) .map(({ value, rest }) => ({ value: mapper(value), rest })) ); } filter(mapper: (value: T) => boolean): Parser { + const { unmemoizedParser } = this; return new Parser((src) => - this.unmemoizedParser(src).filter(({ value }) => mapper(value)) + unmemoizedParser(src).filter(({ value }) => mapper(value)) ); } then(mapper: (value: T) => Parser): Parser { const { cache } = Parser; + const { unmemoizedParser } = this; return new Parser((src) => { - const parser = Parser.inContext(() => this.unmemoizedParser(src), cache); + const parser = Parser.inContext(() => unmemoizedParser(src), cache); return parser.flatMap(({ value, rest }) => mapper(value).parser(rest)); }); } sort(comparer: (left: T, right: T) => number): Parser { + const { unmemoizedParser } = this; return new Parser((src) => - this.unmemoizedParser(src).sort((left, right) => + unmemoizedParser(src).sort((left, right) => comparer(left.value, right.value) ) ); From 254f6d076c134379f52778c3f094551c638c9d6f Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 15:45:43 +0800 Subject: [PATCH 061/199] small improvement --- src/parser/parser-lib.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 4f952d27..b099a62a 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -20,6 +20,10 @@ export class Parser { this.parser = this.unmemoizedParser; } } + get parse(): (src: string) => ArrayResult { + const { parser } = this; + return (src) => parser(src).map(({ value }) => value); + } map(mapper: (value: T) => U): Parser { const { unmemoizedParser } = this; return new Parser((src) => @@ -58,9 +62,6 @@ export class Parser { skip(parser: Parser): Parser { return sequence(this, parser).map(([arrayResult]) => arrayResult); } - parse(src: string): ArrayResult { - return this.parser(src).map(({ value }) => value); - } static addToCache(cache: Clearable): void { Parser.cache?.add(cache); } From 408ea4787e31f7cfbcc27550305e3ca364c9423b Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 15:50:34 +0800 Subject: [PATCH 062/199] small improvement --- src/parser/parser.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index cb7d47cb..dea625bd 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,6 +1,5 @@ import { memoize } from "@std/cache/memoize"; import { sumOf } from "@std/collections/sum-of"; -import { ArrayResult } from "../array-result.ts"; import { contentWordSet, dictionary, @@ -821,7 +820,7 @@ const sentence = choice( })), ) .filter(filter(SENTENCE_RULE)); -const fullParser = spaces +export const parser = spaces .with(lookAhead(everything.filter((src) => { if (src.trimEnd().length > 500) { throw new UnrecognizedError("long text"); @@ -837,10 +836,7 @@ const fullParser = spaces .skip(end) .filter(filter(MULTIPLE_SENTENCES_RULE)) .map((sentences) => ({ type: "sentences", sentences })), - )); + )) + .parse; Parser.endCache(); - -export function parse(src: string): ArrayResult { - return fullParser.parse(src); -} From 1a512135109d66b8395686b18f6d39f59c5e833c Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 15:54:37 +0800 Subject: [PATCH 063/199] more improvement --- dictionary/parser.ts | 18 ++++++++++-------- src/parser/parser-lib.ts | 20 ++++++++++---------- src/parser/parser.ts | 4 ++-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index e84d7fea..f003bb57 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -469,7 +469,7 @@ const head = sequence( .map(([init, last]) => [...init, last]); const entry = withSource(spaces.with(all(definition))) .map(([definitions, src]) => ({ definitions, src: src.trimEnd() })); -const dictionary = spaces +const dictionaryParser = spaces .with(all(sequence(head, entry))) .skip(end) .map((entries) => @@ -478,24 +478,26 @@ const dictionary = spaces words.map((word) => [word, definition]) ), ) - ); -const definitionExtract = spaces + ) + .parser; +const definitionExtractor = spaces .with(all(optionalAll(lex(head)).with(lex(match(/[^;]*;/, "definition"))))) - .skip(end); -const definitionAlone = spaces.with(definition).skip(end); + .skip(end) + .parser; +const definitionParser = spaces.with(definition).skip(end).parser; export function parseDictionary(sourceText: string): Dictionary { - const arrayResult = dictionary.parse(sourceText); + const arrayResult = dictionaryParser(sourceText); if (!arrayResult.isError()) { return arrayResult.array[0]; } else { - const definitions = definitionExtract.parse(sourceText); + const definitions = definitionExtractor(sourceText); let errors: ArrayResult; if (!definitions.isError()) { errors = ArrayResult.errors( definitions.array[0] .flatMap((definition) => - definitionAlone.parse(definition).errors.map((error) => + definitionParser(definition).errors.map((error) => new ArrayResultError(`${error.message} at ${definition.trim()}`, { cause: error, }) diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index b099a62a..73f38e52 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -7,7 +7,7 @@ export type ParserResult = ArrayResult>; export class Parser { readonly unmemoizedParser: (src: string) => ParserResult; - readonly parser: (src: string) => ParserResult; + readonly rawParser: (src: string) => ParserResult; static cache: null | Cache = null; constructor(parser: (src: string) => ParserResult) { this.unmemoizedParser = (src: string) => @@ -15,14 +15,14 @@ export class Parser { if (Parser.cache != null) { const cache = new Map>(); Parser.addToCache(cache); - this.parser = memoize(this.unmemoizedParser, { cache }); + this.rawParser = memoize(this.unmemoizedParser, { cache }); } else { - this.parser = this.unmemoizedParser; + this.rawParser = this.unmemoizedParser; } } - get parse(): (src: string) => ArrayResult { - const { parser } = this; - return (src) => parser(src).map(({ value }) => value); + get parser(): (src: string) => ArrayResult { + const { rawParser } = this; + return (src) => rawParser(src).map(({ value }) => value); } map(mapper: (value: T) => U): Parser { const { unmemoizedParser } = this; @@ -42,7 +42,7 @@ export class Parser { const { unmemoizedParser } = this; return new Parser((src) => { const parser = Parser.inContext(() => unmemoizedParser(src), cache); - return parser.flatMap(({ value, rest }) => mapper(value).parser(rest)); + return parser.flatMap(({ value, rest }) => mapper(value).rawParser(rest)); }); } sort(comparer: (left: T, right: T) => number): Parser { @@ -123,7 +123,7 @@ export function lazy(parser: () => Parser): Parser { } export function choice(...choices: Array>): Parser { return new Parser((src) => - new ArrayResult(choices).flatMap((parser) => parser.parser(src)) + new ArrayResult(choices).flatMap((parser) => parser.rawParser(src)) ); } export function choiceOnlyOne( @@ -132,9 +132,9 @@ export function choiceOnlyOne( return choices.reduceRight( (right, left) => new Parser((src) => { - const arrayResult = left.parser(src); + const arrayResult = left.rawParser(src); if (arrayResult.isError()) { - return ArrayResult.concat(arrayResult, right.parser(src)); + return ArrayResult.concat(arrayResult, right.rawParser(src)); } else { return arrayResult; } diff --git a/src/parser/parser.ts b/src/parser/parser.ts index dea625bd..4ae56544 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -820,7 +820,7 @@ const sentence = choice( })), ) .filter(filter(SENTENCE_RULE)); -export const parser = spaces +export const parse = spaces .with(lookAhead(everything.filter((src) => { if (src.trimEnd().length > 500) { throw new UnrecognizedError("long text"); @@ -837,6 +837,6 @@ export const parser = spaces .filter(filter(MULTIPLE_SENTENCES_RULE)) .map((sentences) => ({ type: "sentences", sentences })), )) - .parse; + .parser; Parser.endCache(); From 669b1bf21a9be917d9685ca54e58872fb1008389 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 16:01:08 +0800 Subject: [PATCH 064/199] aggregate errors from filter --- src/parser/filter.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/filter.ts b/src/parser/filter.ts index b4383d6e..4858b810 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,3 +1,4 @@ +import { ArrayResult } from "../array-result.ts"; import { settings } from "../settings.ts"; import { Clause, @@ -437,7 +438,14 @@ export const MULTIPLE_SENTENCES_RULE: Array< export function filter( rules: Array<(value: T) => boolean>, ): (value: T) => boolean { - return (value) => rules.every((rule) => rule(value)); + return (value) => { + const result = new ArrayResult(rules).map((rule) => rule(value)); + if (result.isError()) { + throw new AggregateError(result.errors); + } else { + return result.array.every((result) => result); + } + }; } function modifierIsNumeric(modifier: Modifier): boolean { return modifier.type === "default" && modifier.word.type === "number"; From 2686e0736598505099a0359c34f8dc0b2b5a3dc9 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 16:17:20 +0800 Subject: [PATCH 065/199] refactor errors --- dictionary/parser.ts | 26 ++++++++++++-------------- src/array-result.ts | 17 +++++++---------- src/misc.ts | 6 ++++++ src/mod.ts | 3 ++- src/parser/filter.ts | 13 +++++-------- 5 files changed, 32 insertions(+), 33 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index f003bb57..e7af84f1 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,7 +1,7 @@ import { escape } from "@std/html/entities"; import nlp from "compromise/three"; -import { ArrayResult, ArrayResultError } from "../src/array-result.ts"; -import { nullableAsArray } from "../src/misc.ts"; +import { ArrayResultError } from "../src/array-result.ts"; +import { deduplicateErrors, nullableAsArray } from "../src/misc.ts"; import { all, allAtLeastOnce, @@ -492,21 +492,19 @@ export function parseDictionary(sourceText: string): Dictionary { return arrayResult.array[0]; } else { const definitions = definitionExtractor(sourceText); - let errors: ArrayResult; + let errors: ReadonlyArray; if (!definitions.isError()) { - errors = ArrayResult.errors( - definitions.array[0] - .flatMap((definition) => - definitionParser(definition).errors.map((error) => - new ArrayResultError(`${error.message} at ${definition.trim()}`, { - cause: error, - }) - ) - ), + errors = definitions.array[0].flatMap((definition) => + definitionParser(definition).errors.map((error) => + new ArrayResultError( + `${error.message} at ${definition.trim()}`, + { cause: error }, + ) + ) ); } else { - errors = ArrayResult.errors(arrayResult.errors); + errors = arrayResult.errors; } - throw new AggregateError(errors.deduplicateErrors().errors); + throw new AggregateError(deduplicateErrors(errors)); } } diff --git a/src/array-result.ts b/src/array-result.ts index 4eeac0ef..2c411ad6 100644 --- a/src/array-result.ts +++ b/src/array-result.ts @@ -1,4 +1,3 @@ -import { distinctBy } from "@std/collections/distinct-by"; import { flattenError } from "./misc.ts"; export type ArrayResultOptions = { @@ -49,6 +48,13 @@ export class ArrayResult { isError(): boolean { return this.array.length === 0; } + unwrap(): ReadonlyArray { + if (this.isError()) { + throw new AggregateError(this.errors); + } else { + return this.array; + } + } filter(mapper: (value: T) => boolean): ArrayResult { return this.flatMap((value) => { if (mapper(value)) { @@ -92,15 +98,6 @@ export class ArrayResult { sortBy(mapper: (value: T) => number): ArrayResult { return this.sort((left, right) => mapper(left) - mapper(right)); } - deduplicateErrors(): ArrayResult { - if (this.isError()) { - return ArrayResult.errors( - distinctBy(this.errors, ({ message }) => message), - ); - } else { - return this; - } - } addErrorWhenNone(error: () => ArrayResultError): ArrayResult { if (this.isError() && this.errors.length === 0) { return new ArrayResult(error()); diff --git a/src/misc.ts b/src/misc.ts index 72070446..aa390fc6 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -1,3 +1,4 @@ +import { distinctBy } from "@std/collections/distinct-by"; import { escape } from "@std/html/entities"; import { Lazy } from "./cache.ts"; @@ -83,3 +84,8 @@ export function lazy(fn: () => T): () => T { const cache = new Lazy(fn); return () => cache.getValue(); } +export function deduplicateErrors( + errors: ReadonlyArray, +): ReadonlyArray { + return distinctBy(errors, ({ message }) => message); +} diff --git a/src/mod.ts b/src/mod.ts index d4231207..6f2fd0c8 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -2,6 +2,7 @@ import { distinct } from "@std/collections/distinct"; import { shuffle } from "@std/random/shuffle"; import { errors } from "../telo-misikeke/telo-misikeke.js"; import { ArrayResultError } from "./array-result.ts"; +import { deduplicateErrors } from "./misc.ts"; import { settings } from "./settings.ts"; import { translate as rawTranslate } from "./translator/composer.ts"; @@ -28,7 +29,7 @@ export function translate(tokiPona: string): Array { .map((message) => new ArrayResultError(message, { isHtml: true })); } if (error.length == 0) { - error = arrayResult.deduplicateErrors().errors; + error = deduplicateErrors(arrayResult.errors); } throw new AggregateError(error); } diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 4858b810..40642d38 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -438,14 +438,11 @@ export const MULTIPLE_SENTENCES_RULE: Array< export function filter( rules: Array<(value: T) => boolean>, ): (value: T) => boolean { - return (value) => { - const result = new ArrayResult(rules).map((rule) => rule(value)); - if (result.isError()) { - throw new AggregateError(result.errors); - } else { - return result.array.every((result) => result); - } - }; + return (value) => + new ArrayResult(rules) + .map((rule) => rule(value)) + .unwrap() + .every((result) => result); } function modifierIsNumeric(modifier: Modifier): boolean { return modifier.type === "default" && modifier.word.type === "number"; From 6def6a0f09b655196056dd611c07903889624252 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 16:17:53 +0800 Subject: [PATCH 066/199] small formatting --- src/repl.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/repl.ts b/src/repl.ts index 8e876fb4..0d602f0c 100644 --- a/src/repl.ts +++ b/src/repl.ts @@ -16,9 +16,10 @@ if (import.meta.main) { const arrayResult = translate(input); for (const translation of arrayResult) { const count = translation.match(//g)?.length ?? 0; - const text = unescape(translation.replaceAll(/<\/?strong>/g, "%c"), { - entityList, - }); + const text = unescape( + translation.replaceAll(/<\/?strong>/g, "%c"), + { entityList }, + ); console.log( ` - ${text}`, ...repeatArray(["font-weight: bold", ""], count).flat(), From 0b587cebf85bda6f00893c6953a87f760e7709d2 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 16:20:05 +0800 Subject: [PATCH 067/199] reduce getter --- dictionary/parser.ts | 6 +++--- src/parser/parser-lib.ts | 2 +- src/parser/parser.ts | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index e7af84f1..958540af 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -479,12 +479,12 @@ const dictionaryParser = spaces ), ) ) - .parser; + .parser(); const definitionExtractor = spaces .with(all(optionalAll(lex(head)).with(lex(match(/[^;]*;/, "definition"))))) .skip(end) - .parser; -const definitionParser = spaces.with(definition).skip(end).parser; + .parser(); +const definitionParser = spaces.with(definition).skip(end).parser(); export function parseDictionary(sourceText: string): Dictionary { const arrayResult = dictionaryParser(sourceText); diff --git a/src/parser/parser-lib.ts b/src/parser/parser-lib.ts index 73f38e52..499f6b06 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser-lib.ts @@ -20,7 +20,7 @@ export class Parser { this.rawParser = this.unmemoizedParser; } } - get parser(): (src: string) => ArrayResult { + parser(): (src: string) => ArrayResult { const { rawParser } = this; return (src) => rawParser(src).map(({ value }) => value); } diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 4ae56544..8da12942 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -837,6 +837,6 @@ export const parse = spaces .filter(filter(MULTIPLE_SENTENCES_RULE)) .map((sentences) => ({ type: "sentences", sentences })), )) - .parser; + .parser(); Parser.endCache(); From b18c2c356a32ec4c3779e42a87709dda551258af Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 16:47:05 +0800 Subject: [PATCH 068/199] small refactor --- src/parser/token.ts | 6 ++---- src/translator/noun.ts | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/parser/token.ts b/src/parser/token.ts index 12c07442..27f1bca4 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -55,6 +55,8 @@ export function describe(token: Token): string { return `"${token.word.repeat(token.length)}"`; case "x ala x": return `"${token.word} ala ${token.word}"`; + case "punctuation": + return `punctuation mark "${token.punctuation}"`; case "proper word": switch (token.kind) { case "cartouche": @@ -62,9 +64,5 @@ export function describe(token: Token): string { case "latin": return `proper word "${token.words}"`; } - // this is unreachable - // fallthrough - case "punctuation": - return `punctuation mark "${token.punctuation}"`; } } diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 26f4a633..3dae8669 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -48,6 +48,10 @@ export function fromNounForms( ): ArrayResult<{ noun: string; quantity: English.Quantity }> { const { singular, plural } = nounForms; switch (determinerNumber) { + case "singular": + case "plural": + return new ArrayResult(nullableAsArray(singular)) + .map((noun) => ({ noun, quantity: determinerNumber })); case "both": switch (settings.quantity) { case "both": @@ -72,14 +76,6 @@ export function fromNounForms( return new ArrayResult([{ noun: plural!, quantity: "plural" }]); } } - // unreachable - // fallthrough - case "singular": - return new ArrayResult(nullableAsArray(singular)) - .map((noun) => ({ noun, quantity: "singular" as const })); - case "plural": - return new ArrayResult(nullableAsArray(plural)) - .map((noun) => ({ noun, quantity: "plural" as const })); } } export function simpleNounForms( From c60ea9e15f610c6a74c0e3882f4d5e8d27e59b34 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 19:12:17 +0800 Subject: [PATCH 069/199] add tests --- deno.json | 1 + deno.lock | 12 ++++++++++++ src/parser/parser-test.ts | 12 ++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 src/parser/parser-test.ts diff --git a/deno.json b/deno.json index ecbfef66..f8b8b659 100644 --- a/deno.json +++ b/deno.json @@ -35,6 +35,7 @@ }, "imports": { "@luca/esbuild-deno-loader": "jsr:@luca/esbuild-deno-loader@^0.11.1", + "@std/assert": "jsr:@std/assert@^1.0.11", "@std/async": "jsr:@std/async@^1.0.10", "@std/cache": "jsr:@std/cache@^0.1.3", "@std/collections": "jsr:@std/collections@^1.0.10", diff --git a/deno.lock b/deno.lock index e9eacf0b..b3051caa 100644 --- a/deno.lock +++ b/deno.lock @@ -2,12 +2,14 @@ "version": "4", "specifiers": { "jsr:@luca/esbuild-deno-loader@~0.11.1": "0.11.1", + "jsr:@std/assert@^1.0.11": "1.0.11", "jsr:@std/async@^1.0.10": "1.0.10", "jsr:@std/bytes@^1.0.2": "1.0.4", "jsr:@std/cache@~0.1.3": "0.1.3", "jsr:@std/collections@^1.0.10": "1.0.10", "jsr:@std/encoding@^1.0.5": "1.0.6", "jsr:@std/html@^1.0.3": "1.0.3", + "jsr:@std/internal@^1.0.5": "1.0.5", "jsr:@std/path@^1.0.6": "1.0.8", "jsr:@std/random@0.1": "0.1.0", "jsr:@std/text@^1.0.10": "1.0.10", @@ -23,6 +25,12 @@ "jsr:@std/path" ] }, + "@std/assert@1.0.11": { + "integrity": "2461ef3c368fe88bc60e186e7744a93112f16fd110022e113a0849e94d1c83c1", + "dependencies": [ + "jsr:@std/internal" + ] + }, "@std/async@1.0.10": { "integrity": "2ff1b1c7d33d1416159989b0f69e59ec7ee8cb58510df01e454def2108b3dbec" }, @@ -41,6 +49,9 @@ "@std/html@1.0.3": { "integrity": "7a0ac35e050431fb49d44e61c8b8aac1ebd55937e0dc9ec6409aa4bab39a7988" }, + "@std/internal@1.0.5": { + "integrity": "54a546004f769c1ac9e025abd15a76b6671ddc9687e2313b67376125650dc7ba" + }, "@std/path@1.0.8": { "integrity": "548fa456bb6a04d3c1a1e7477986b6cffbce95102d0bb447c67c4ee70e0364be" }, @@ -178,6 +189,7 @@ "workspace": { "dependencies": [ "jsr:@luca/esbuild-deno-loader@~0.11.1", + "jsr:@std/assert@^1.0.11", "jsr:@std/async@^1.0.10", "jsr:@std/cache@~0.1.3", "jsr:@std/collections@^1.0.10", diff --git a/src/parser/parser-test.ts b/src/parser/parser-test.ts new file mode 100644 index 00000000..506d2b9a --- /dev/null +++ b/src/parser/parser-test.ts @@ -0,0 +1,12 @@ +import { assertNotEquals } from "@std/assert"; +import { parse } from "./parser.ts"; + +Deno.test("AST all distinct", () => { + const ast = parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") + .unwrap(); + for (const [i, a] of ast.entries()) { + for (const b of ast.slice(i + 1)) { + assertNotEquals(a, b); + } + } +}); From bdfc501f63426db25c73024c2b65b7256fc77b5b Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 2 Mar 2025 19:18:36 +0800 Subject: [PATCH 070/199] remove this --- src/parser/parser.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8da12942..139a8044 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -649,7 +649,6 @@ const clause = choice( word: subject, emphasis: null, }, - alaQuestion: false, modifiers: [], emphasis: null, }, From e1e49fb9a31ec635789a89ebc856634ea2ab97da Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 08:50:42 +0800 Subject: [PATCH 071/199] remove unnecessary permission --- deno.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deno.json b/deno.json index f8b8b659..57693795 100644 --- a/deno.json +++ b/deno.json @@ -5,7 +5,7 @@ "tasks": { "build": "deno run --allow-read --allow-write --allow-env --allow-net --allow-run --no-prompt ./bundle.ts build", "repl": { - "command": "deno run --allow-env --no-prompt ./src/repl.ts", + "command": "deno run --no-prompt ./src/repl.ts", "dependencies": ["build-dictionary"] }, "start": "deno run --allow-net --allow-read --no-prompt jsr:@std/http/file-server ./dist/", From d423a34c3afd4d69e065d29c27a59dfe0167a735 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 08:56:10 +0800 Subject: [PATCH 072/199] fix --- src/parser/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 139a8044..8bd6bea9 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -273,7 +273,7 @@ const number = choice( count(manyAtLeastOnce(ale)), ), ), - properSubAleNumber, + properSubAleNumber.filter((number) => number !== 0), ) .map>(([rest, last]) => [...rest, [last, 0]]) // Ensure the ale is in decreasing order From 6fc7f6eededca75d43923ceefcfb96760cb4d047 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 08:57:29 +0800 Subject: [PATCH 073/199] actually, this is not the problem --- src/parser/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8bd6bea9..6a39294e 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -273,7 +273,7 @@ const number = choice( count(manyAtLeastOnce(ale)), ), ), - properSubAleNumber.filter((number) => number !== 0), + properSubAleNumber ) .map>(([rest, last]) => [...rest, [last, 0]]) // Ensure the ale is in decreasing order From 3b0ea91c6c070480a99823471e7ddc8b26572ed3 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 09:32:20 +0800 Subject: [PATCH 074/199] fix filter error aggregation --- src/array-result.ts | 18 +++--- src/parser/filter.ts | 137 ++++++++++++++++++++++++++----------------- 2 files changed, 92 insertions(+), 63 deletions(-) diff --git a/src/array-result.ts b/src/array-result.ts index 2c411ad6..862e3f9a 100644 --- a/src/array-result.ts +++ b/src/array-result.ts @@ -143,15 +143,17 @@ export class ArrayResult { try { return arrayResult(); } catch (error) { - return ArrayResult.errors(extractArrayResultError(error)); + return ArrayResult.errors(extractArrayResultError(flattenError(error))); } } } type Errors = | { type: "array result"; errors: Array } | { type: "outside"; errors: Array }; -function extractArrayResultError(error: unknown): Array { - const errors = flattenError(error).reduce( +export function extractArrayResultError( + errors: ReadonlyArray, +): ReadonlyArray { + const aggregate = errors.reduce( (errors, error) => { switch (errors.type) { case "array result": @@ -176,14 +178,14 @@ function extractArrayResultError(error: unknown): Array { errors: [], }, ); - switch (errors.type) { + switch (aggregate.type) { case "array result": - return errors.errors; + return aggregate.errors; case "outside": - if (errors.errors.length === 1) { - throw errors.errors[0]; + if (aggregate.errors.length === 1) { + throw aggregate.errors[0]; } else { - throw new AggregateError(errors.errors); + throw new AggregateError(aggregate.errors); } } } diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 40642d38..6fc0a219 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,4 +1,5 @@ -import { ArrayResult } from "../array-result.ts"; +import { extractArrayResultError } from "../array-result.ts"; +import { flattenError } from "../misc.ts"; import { settings } from "../settings.ts"; import { Clause, @@ -19,7 +20,7 @@ import { } from "./extract.ts"; import { UnrecognizedError } from "./parser-lib.ts"; -export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ +export const WORD_UNIT_RULES: ReadonlyArray<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" (wordUnit) => { if (wordUnit.type === "x ala x" && wordUnit.word === "seme") { @@ -28,7 +29,7 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ return true; }, ]; -export const NANPA_RULES: Array<(nanpa: Nanpa) => boolean> = [ +export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ // disallow _nanpa ala nanpa_ (modifier) => { if (modifier.nanpa.type === "x ala x") { @@ -93,7 +94,7 @@ export const NANPA_RULES: Array<(nanpa: Nanpa) => boolean> = [ return true; }, ]; -export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ +export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // quotation modifier cannot exist (modifier) => { if (modifier.type === "quotation") { @@ -157,8 +158,8 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ return true; }, ]; -export const MULTIPLE_MODIFIERS_RULES: Array< - (modifier: Array) => boolean +export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< + (modifier: ReadonlyArray) => boolean > = [ // // no multiple pi // (modifiers) => { @@ -231,7 +232,7 @@ export const MULTIPLE_MODIFIERS_RULES: Array< return true; }, ]; -export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ +export const PHRASE_RULE: ReadonlyArray<(phrase: Phrase) => boolean> = [ // Disallow quotation (phrase) => { if (phrase.type === "quotation") { @@ -280,44 +281,45 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ return true; }, ]; -export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ - // Disallow preverb modifiers other than "ala" - (preposition) => { - if (!modifiersIsAlaOrNone(preposition.modifiers)) { - throw new UnrecognizedError('preverb with modifiers other than "ala"'); - } - return true; - }, - // Disallow nested preposition - (preposition) => { - if ( - everyPhraseInMultiplePhrases(preposition.phrases) - .some(hasPrepositionInPhrase) - ) { - throw new UnrecognizedError("preposition inside preposition"); - } - return true; - }, - // Preposition with "anu" must not have emphasis particle - (preposition) => - preposition.emphasis == null || preposition.phrases.type !== "anu", - // Inner phrase must not have emphasis particle - (preposition) => - preposition.phrases.type !== "single" || - !phraseHasTopLevelEmphasis(preposition.phrases.phrase), - // Emphasis must not be nested - (preposition) => { - if ( - preposition.emphasis != null && - everyWordUnitInPreposition(preposition) - .some((wordUnit) => wordUnit.emphasis != null) - ) { - throw new UnrecognizedError("nested emphasis"); - } - return true; - }, -]; -export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ +export const PREPOSITION_RULE: ReadonlyArray<(phrase: Preposition) => boolean> = + [ + // Disallow preverb modifiers other than "ala" + (preposition) => { + if (!modifiersIsAlaOrNone(preposition.modifiers)) { + throw new UnrecognizedError('preverb with modifiers other than "ala"'); + } + return true; + }, + // Disallow nested preposition + (preposition) => { + if ( + everyPhraseInMultiplePhrases(preposition.phrases) + .some(hasPrepositionInPhrase) + ) { + throw new UnrecognizedError("preposition inside preposition"); + } + return true; + }, + // Preposition with "anu" must not have emphasis particle + (preposition) => + preposition.emphasis == null || preposition.phrases.type !== "anu", + // Inner phrase must not have emphasis particle + (preposition) => + preposition.phrases.type !== "single" || + !phraseHasTopLevelEmphasis(preposition.phrases.phrase), + // Emphasis must not be nested + (preposition) => { + if ( + preposition.emphasis != null && + everyWordUnitInPreposition(preposition) + .some((wordUnit) => wordUnit.emphasis != null) + ) { + throw new UnrecognizedError("nested emphasis"); + } + return true; + }, + ]; +export const CLAUSE_RULE: ReadonlyArray<(clause: Clause) => boolean> = [ // disallow preposition in subject (clause) => { let phrases: MultiplePhrases; @@ -380,7 +382,7 @@ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ return true; }, ]; -export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ +export const SENTENCE_RULE: ReadonlyArray<(sentence: Sentence) => boolean> = [ // Prevent "taso ala taso" or "kin ala kin" (sentence) => { if (sentence.type === "default") { @@ -424,8 +426,8 @@ export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ return true; }, ]; -export const MULTIPLE_SENTENCES_RULE: Array< - (sentences: Array) => boolean +export const MULTIPLE_SENTENCES_RULE: ReadonlyArray< + (sentences: ReadonlyArray) => boolean > = [ // Only allow at most 2 sentences (sentences) => { @@ -436,18 +438,43 @@ export const MULTIPLE_SENTENCES_RULE: Array< }, ]; export function filter( - rules: Array<(value: T) => boolean>, + rules: ReadonlyArray<(value: T) => boolean>, ): (value: T) => boolean { - return (value) => - new ArrayResult(rules) - .map((rule) => rule(value)) - .unwrap() - .every((result) => result); + return (value) => { + const result: ReadonlyArray> = rules.map( + (rule) => { + try { + if (rule(value)) { + return null; + } else { + return []; + } + } catch (error) { + return flattenError(error); + } + }, + ); + if (result.every((result) => result == null)) { + return true; + } else { + const errors = extractArrayResultError( + result.flatMap((result) => result ?? []), + ); + switch (errors.length) { + case 0: + return false; + case 1: + throw errors[0]; + default: + throw new AggregateError(errors); + } + } + }; } function modifierIsNumeric(modifier: Modifier): boolean { return modifier.type === "default" && modifier.word.type === "number"; } -function modifiersIsAlaOrNone(modifiers: Array): boolean { +function modifiersIsAlaOrNone(modifiers: ReadonlyArray): boolean { switch (modifiers.length) { case 0: return true; From 457708f00a1b1cce18ec5536e2621e9cb95e2b40 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 09:37:14 +0800 Subject: [PATCH 075/199] new test --- src/parser/parser-test.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser/parser-test.ts b/src/parser/parser-test.ts index 506d2b9a..8fc4cc59 100644 --- a/src/parser/parser-test.ts +++ b/src/parser/parser-test.ts @@ -1,5 +1,6 @@ import { assertNotEquals } from "@std/assert"; import { parse } from "./parser.ts"; +import { assertEquals } from "@std/assert/equals"; Deno.test("AST all distinct", () => { const ast = parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") @@ -10,3 +11,11 @@ Deno.test("AST all distinct", () => { } } }); +Deno.test("just 6", () => { + assertEquals( + parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") + .unwrap() + .length, + 6, + ); +}); From b7bc317d34f42986301c2ba6dc20b3e2b3f80113 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 09:41:14 +0800 Subject: [PATCH 076/199] revert test --- src/parser/parser-test.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/parser/parser-test.ts b/src/parser/parser-test.ts index 8fc4cc59..506d2b9a 100644 --- a/src/parser/parser-test.ts +++ b/src/parser/parser-test.ts @@ -1,6 +1,5 @@ import { assertNotEquals } from "@std/assert"; import { parse } from "./parser.ts"; -import { assertEquals } from "@std/assert/equals"; Deno.test("AST all distinct", () => { const ast = parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") @@ -11,11 +10,3 @@ Deno.test("AST all distinct", () => { } } }); -Deno.test("just 6", () => { - assertEquals( - parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") - .unwrap() - .length, - 6, - ); -}); From ea85f71cff0fd4f7c5aed1375f35377c917ad00f Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 13:35:21 +0800 Subject: [PATCH 077/199] remove this note --- dist/index.html | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dist/index.html b/dist/index.html index b1d13f3d..48719ccc 100644 --- a/dist/index.html +++ b/dist/index.html @@ -35,8 +35,7 @@

ilo Token

properly.

- An open-source rule-based Toki Pona to English translator. No data are - collected. + An open-source rule-based Toki Pona to English translator. Limitations. From 1461649a83a7912bb809fb8c6018d6ae58154ffa Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 14:13:38 +0800 Subject: [PATCH 078/199] remove numeral translation from parser --- src/dictionary.ts | 2 + src/parser/ast.ts | 2 +- src/parser/composer.ts | 2 +- src/parser/parser.ts | 81 +++---------------------------------- src/translator/modifier.ts | 21 +--------- src/translator/word-unit.ts | 20 +-------- 6 files changed, 12 insertions(+), 116 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 40903faf..1fc8a84e 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -10,6 +10,7 @@ export const contentWordSet: Set = new Set(); export const prepositionSet: Set = new Set(); export const preverbSet: Set = new Set(); export const fillerSet: Set = new Set(); +export const numeralSet: Set = new Set(); export const tokiPonaWordSet: Set = new Set(); update(); @@ -58,6 +59,7 @@ function update(): void { definition.type === "modal verb", ); redefineSet(fillerSet, (definition) => definition.type === "filler"); + redefineSet(numeralSet, (definition) => definition.type === "numeral"); redefineSet(tokiPonaWordSet, () => true); } function redefineSet( diff --git a/src/parser/ast.ts b/src/parser/ast.ts index 8705b5c9..04ac3a49 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -10,7 +10,7 @@ export type SimpleHeadedWordUnit = | { type: "reduplication"; word: string; count: number }; export type SimpleWordUnit = | SimpleHeadedWordUnit - | { type: "number"; number: number }; + | { type: "number"; words: Array }; export type HeadedWordUnit = & SimpleHeadedWordUnit & { emphasis: null | Emphasis }; diff --git a/src/parser/composer.ts b/src/parser/composer.ts index a998e8f7..c7f22167 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -39,7 +39,7 @@ function emphasisAsArray(value: null | Emphasis): Array { export function simpleWordUnit(wordUnit: SimpleWordUnit): string { switch (wordUnit.type) { case "number": - return `${wordUnit.number}`; + return wordUnit.words.join(" "); case "default": return wordUnit.word; case "x ala x": diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 6a39294e..8c4e31ed 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,10 +1,8 @@ import { memoize } from "@std/cache/memoize"; -import { sumOf } from "@std/collections/sum-of"; import { contentWordSet, - dictionary, fillerSet, - MissingEntryError, + numeralSet, prepositionSet, preverbSet, tokiPonaWordSet, @@ -235,74 +233,7 @@ function optionalCombined( ]), ); } -function wordToNumber(word: string): number { - const num = dictionary.get(word) - ?.definitions - .filter((definition) => definition.type === "numeral")[0] - ?.numeral; - if (num == null) { - throw new MissingEntryError("numeral", word); - } - return num; -} -const subAleNumber = sequence( - many(specificWord("mute")), - many(specificWord("luka")), - many(specificWord("tu")), - many(specificWord("wan")), -) - .map((array) => array.flat()) - .map((array) => sumOf(array, wordToNumber)); -const properSubAleNumber = subAleNumber.filter((number) => { - if (number > 100) { - throw new UnrecognizedError( - 'numbers after "ale" exceeding 100 in nasin nanpa pona', - ); - } else { - return true; - } -}); -const ale = choice(specificWord("ale"), specificWord("ali")); -const number = choice( - specificWord("ala").map(() => 0), - sequence( - manyAtLeastOnce( - sequence( - properSubAleNumber - .filter((number) => number !== 0), - count(manyAtLeastOnce(ale)), - ), - ), - properSubAleNumber - ) - .map>(([rest, last]) => [...rest, [last, 0]]) - // Ensure the ale is in decreasing order - .filter((numbers) => { - const sorted = numbers.every((number, i) => { - if (i === numbers.length - 1) { - return true; - } else { - const [_, firstAle] = number; - const [_1, secondAle] = numbers[i + 1]; - return firstAle > secondAle; - } - }); - if (sorted) { - return true; - } else { - throw new UnrecognizedError( - 'unordered "ale" places in nasin nanpa pona', - ); - } - }) - .map((numbers) => sumOf(numbers, ([sub, ale]) => sub * 100 ** ale)), - sequence( - count(many(ale)), - subAleNumber, - ) - .map(([ale, sub]) => ale * 100 + sub) - .filter((number) => number !== 0), -); +const number = manyAtLeastOnce(wordFrom(numeralSet, "numeral")); const phrase: Parser = lazy(() => choice( sequence( @@ -311,9 +242,9 @@ const phrase: Parser = lazy(() => modifiers, optionalEmphasis, ) - .map(([number, wordModifier, modifiers, phraseModifier]) => ({ + .map(([words, wordModifier, modifiers, phraseModifier]) => ({ type: "default", - headWord: { type: "number", number, emphasis: wordModifier }, + headWord: { type: "number", words, emphasis: wordModifier }, modifiers, emphasis: phraseModifier, })), @@ -373,9 +304,9 @@ const modifiers = sequence( many( choice( sequence(number, optionalEmphasis) - .map(([number, emphasis]) => ({ + .map(([words, emphasis]) => ({ type: "default", - word: { type: "number", number, emphasis }, + word: { type: "number", words, emphasis }, })) .filter(filter(MODIFIER_RULES)), wordUnit(contentWordSet, "modifier") diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index e1b7c565..2588733e 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -38,32 +38,13 @@ export type AdverbialModifier = { export type MultipleModifierTranslation = | ({ type: "adjectival" } & AdjectivalModifier) | ({ type: "adverbial" } & AdverbialModifier); -function numberModifier( - word: number, - emphasis: boolean, -): ModifierTranslation { - let quantity: English.Quantity; - if (word === 1) { - quantity = "singular"; - } else { - quantity = "plural"; - } - return { - type: "determiner", - determiner: { - determiner: { word: `${word}`, emphasis }, - kind: "numeral", - quantity, - }, - }; -} export function defaultModifier( wordUnit: TokiPona.WordUnit, ): ArrayResult { const emphasis = wordUnit.emphasis != null; switch (wordUnit.type) { case "number": - return new ArrayResult([numberModifier(wordUnit.number, emphasis)]); + return new ArrayResult(new TranslationTodoError("numeral")); case "x ala x": return new ArrayResult(new TranslationTodoError("x ala x")); case "default": diff --git a/src/translator/word-unit.ts b/src/translator/word-unit.ts index cc0b1da1..2624052d 100644 --- a/src/translator/word-unit.ts +++ b/src/translator/word-unit.ts @@ -12,22 +12,6 @@ export type WordUnitTranslation = | ({ type: "noun" } & PartialNoun) | { type: "adjective"; adjective: English.AdjectivePhrase } | ({ type: "verb" } & PartialVerb); -function numberWordUnit( - word: number, - emphasis: boolean, -): WordUnitTranslation { - return { - type: "noun", - determiner: [], - adjective: [], - singular: `${word}`, - plural: null, - emphasis, - reduplicationCount: 1, - perspective: "third", - postAdjective: null, - }; -} function defaultWordUnit( word: string, reduplicationCount: number, @@ -90,9 +74,7 @@ export function wordUnit( ): ArrayResult { switch (wordUnit.type) { case "number": - return new ArrayResult([ - numberWordUnit(wordUnit.number, wordUnit.emphasis != null), - ]); + return new ArrayResult(new TranslationTodoError("numeral")); case "x ala x": return new ArrayResult(new TranslationTodoError("x ala x")); case "default": From fa021816dc50274271fcc6e570c2eb09c3ac7cde Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 14:50:41 +0800 Subject: [PATCH 079/199] more robust test --- src/parser/parser-test.ts | 208 +++++++++++++++++++++++++++++++++++++- 1 file changed, 203 insertions(+), 5 deletions(-) diff --git a/src/parser/parser-test.ts b/src/parser/parser-test.ts index 506d2b9a..dcf47888 100644 --- a/src/parser/parser-test.ts +++ b/src/parser/parser-test.ts @@ -1,12 +1,210 @@ import { assertNotEquals } from "@std/assert"; import { parse } from "./parser.ts"; +// Examples gathered from https://github.com/kilipan/nasin-toki +const EXAMPLE_SENTENCES = [ + // "anu seme", + // "toki [ni] li pona", + "toki li pona", + "toki ni li pona", + "mi lukin e jan ni: ona li tawa tomo", + "mi lukin e ona", + "tomo mi li lili", + "soweli li moku e kili", + "soweli li pali e tomo", + "soweli li lukin e kili", + "tomo waso", + "mi pali ala", + "jan ala li lon tomo ni", + "sina utala ala e waso suli", + "ilo li tawa e jan ala", + "soweli suli pimeja", + "soweli pimeja suli", + "jan lawa pona", + "jan pona lawa", + "tomo ala mute", + "tomo mute ala", + "poki pi telo wawa", + "poki telo wawa", + "kasi li suli tan wawa suno", + "mi sama sina", + "ona li kepeken ilo", + "mi lon ala tomo", + "soweli li tawa ala kasi", + "mi wile ala toki", + "waso li kama ala suli", + "waso li musi kepeken wawa kepeken kon", + "jan li pali li pakala e tomo e ilo lon ma kepeken luka", + "jan li pali e tomo lon ma li pakala e ilo kepeken luka", + "jan li pali e tomo e ilo kepeken luka li pakala e ona lon ma", + "soweli loje li moku ala moku e kili", + "moku ala", + "kili li kama ala kama suli", + "jan li pana ala pana pi wawa mute e sike", + "soweli ala soweli li nasin e sina", + // "anu seme", + "ma sina li lete anu seme", + "lete. taso suno pini li seli a", + // "anu seme", + "jan seme li toki", + "mi toki.", + "ona li seme", + "ona li tawa tomo ona", + "sina lukin e seme", + "mi lukin e waso", + "jan ni li pona tawa mi: ona li mama e kasi", + "soweli li lukin e waso ni: ona li tawa lon ma kasi", + "jan li mama e kasi. jan ni li pona tawa mi", + "jan li mama e kasi. ona li pona tawa mi", + "waso li tawa lon ma kasi. soweli li lukin e waso ni", + "waso li tawa lon ma kasi. soweli li lukin e ona", + "sina jan nanpa wan lon tomo ni", + "ilo nanpa wan li pona. ilo nanpa tu tu li pona ala", + "tomo mi la tomo sina li loje mute", + "nanpa wan", + "pona la toki ni li nanpa wan", + "toki ni li pona nanpa wan", + "mi tomo e waso", + "mi pona e tomo", + "mi tomo e waso", + "mi luka e soweli len", + "mi tomo e waso", + "sina telo e sina", + "pona a", + "waso suwi", + "wawa tawa sina", + "tan seme a", + "jan pi toki pona", + "mi o tawa", + "jan en soweli li sama mute", + "sina en mi li toki", + "tenpo mute la kon en telo li wawa", + "mi pali", + "pali mi li pona", + "pali mi", + "mi taso li lon", + "tomo", + "mi taso", + "sina en mi li toki", + "sina en mi", + "soweli li suwi", + // "mi kama. mi oko. mi anpa", + "ona li pali mute li lape lili", + "soweli li pakala e kasi", + "mi lukin e mun", + "mi pali lon tomo", + "mi pali e tomo", + "ona li tawa tomo", + "ona li tawa e tomo", + "waso li mama e waso lili", + "tenpo lon la mi sitelen e lipu sona", + "mi la ni li pona", + "supa tomo li jaki la jan li telo e ona", + "kepeken ilo telo wawa la mi weka e jaki tan supa", + "jan lawa mute", + "jan pi lawa mute", + "ilo tawa lili mute", + "ilo pi tawa lili mute", + "ilo tawa pi lili mute", + // "[sina] o tawa pona", + "o tawa pona", + "sina o tawa pona", + "ona o lape", + "mi o toki pona", + "waso anu kala li tawa", + "waso li pali anu pakala e tomo", + "soweli li pali e tomo anu lupa", + "pona a", + "ike a", + "o lukin e pali mi a", + "ni li musi a tawa mi", + "ni li pona", + "ona li oko e ni", + "tomo li suwi. jan li ni kin", + "soweli li len e kili lon ma. ona li ni tan tenpo lete", + "ilo pi akesi suwi nanpa wan", + "ilo nanpa wan pi akesi suwi", + "nanpa tu li nanpa pona tawa mi", + "len pi nanpa wan li loje", + "sitelen tawa nanpa pini li musi a", + "mi toki lon kalama pana nanpa kama", + // "kin la", + // "sama la", + // "ante la", + "jan li ken pona kin", + "kin la ma li sike e suno", + "ona kin li pali e lipu", + "ona li pali kin e lipu", + "ona li pali e lipu kin", + "mi sona ala pali e tomo", + "kala li moku kepeken ala ilo", + "soweli li ken pona taso", + "taso ma li sike e suno", + "ona taso li pali e lipu", + "ona li pali taso e lipu", + "ona li pali e lipu taso", + "ona li taso", + "mi pilin ike ala tan taso mi", + "mi pana e ijo tawa ona", + "mi awen lon tomo", + "mi pali sama ona", + "mi suli tan moku", + "mi toki kepeken kalama", + "mi tawa tomo", + "mi lon telo", + "mi sama ona", + "mi kepeken ilo", + "mi tawa e soweli", + "mi lon e kala", + "mi sama e akesi", + "mi tan e ona", + "mi kepeken e ona", + "mi tawa supa e soweli", + "mi lon telo e kala", + "mi sama jan e akesi", + "mi tan utala e ona", + "mi kepeken ilo e ona", + "ona li wile pana e kili", + "jan lili li wile suli", + "mi wile tawa lon nasin noka", + "soweli suli li wile lape lon tenpo lete", + "mi sona toki pona", + "waso li sona pali e tomo", + "o awen pali e ijo", + "mi awen wile e ni", + "ona li awen weka", + "mi kama sona e toki pona", + "akesi li kama lon nasin telo", + "tomo li kama suli", + "sina ken toki tawa mi", + "jan li ken lape lon ma kasi ni", + "kala li ken soweli", + "mi lukin pini e lipu ni", + "ona li lukin tawa waso", + "mi oko jo e tomo lili lon ma kasi", + "ona li open pakala e lupa tomo", + "ona li open e pakala pi lupa tomo", + "ona li lukin pakala e lupa tomo", + "jan li open pona e ma", + "pona ma la jan li open", + "jan li kama pona e ma", + "mi pini tawa tomo sona", + "mi awen ala tawa tomo sona", + "mi kama lon tomo sona", + "soweli li pini jaki e tomo", + "tomo li kama jaki tan soweli", + "soweli li jaki e tomo", + "mi alasa sitelen e lipu pona", + "jan sona li alasa sona e ijo mute", +]; + Deno.test("AST all distinct", () => { - const ast = parse("sina ken ala toki pona e ijo la, sina sona ala e ijo.") - .unwrap(); - for (const [i, a] of ast.entries()) { - for (const b of ast.slice(i + 1)) { - assertNotEquals(a, b); + for (const sentence of EXAMPLE_SENTENCES) { + const ast = parse(sentence).unwrap(); + for (const [i, a] of ast.entries()) { + for (const b of ast.slice(i + 1)) { + assertNotEquals(a, b); + } } } }); From c650af68af4360883dff6e43b920ad7360832492 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 14:55:56 +0800 Subject: [PATCH 080/199] rename to snake case --- dictionary/parser.ts | 4 ++-- dist/{nasin-nanpa.otf => nasin_nanpa.otf} | Bin dist/style.css | 2 +- project-data.json => project_data.json | 0 src/{array-result.ts => array_result.ts} | 0 src/dictionary.ts | 2 +- src/main.ts | 6 +++--- src/mod.ts | 8 ++++---- src/parser/filter.ts | 4 ++-- src/parser/lexer.ts | 2 +- src/parser/parser.ts | 2 +- src/parser/{parser-lib.ts => parser_lib.ts} | 2 +- src/parser/{parser-test.ts => parser_test.ts} | 0 src/{settings-frontend.ts => settings_frontend.ts} | 0 src/translator/adjective.ts | 2 +- src/translator/{as-string.ts => as_string.ts} | 2 +- src/translator/clause.ts | 2 +- src/translator/composer.ts | 2 +- src/translator/determiner.ts | 2 +- src/translator/error.ts | 2 +- src/translator/modifier.ts | 2 +- src/translator/noun.ts | 2 +- src/translator/phrase.ts | 4 ++-- src/translator/predicate.ts | 2 +- src/translator/preposition.ts | 2 +- src/translator/pronoun.ts | 2 +- src/translator/sentence.ts | 4 ++-- src/translator/verb.ts | 2 +- src/translator/{word-unit.ts => word_unit.ts} | 2 +- telo-misikeke/{linku-data.json => linku_data.json} | 0 .../{telo-misikeke.d.ts => telo_misikeke.d.ts} | 0 .../{telo-misikeke.js => telo_misikeke.js} | 4 ++-- 32 files changed, 35 insertions(+), 35 deletions(-) rename dist/{nasin-nanpa.otf => nasin_nanpa.otf} (100%) rename project-data.json => project_data.json (100%) rename src/{array-result.ts => array_result.ts} (100%) rename src/parser/{parser-lib.ts => parser_lib.ts} (99%) rename src/parser/{parser-test.ts => parser_test.ts} (100%) rename src/{settings-frontend.ts => settings_frontend.ts} (100%) rename src/translator/{as-string.ts => as_string.ts} (97%) rename src/translator/{word-unit.ts => word_unit.ts} (98%) rename telo-misikeke/{linku-data.json => linku_data.json} (100%) rename telo-misikeke/{telo-misikeke.d.ts => telo_misikeke.d.ts} (100%) rename telo-misikeke/{telo-misikeke.js => telo_misikeke.js} (85%) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 958540af..0d9f4773 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,6 +1,6 @@ import { escape } from "@std/html/entities"; import nlp from "compromise/three"; -import { ArrayResultError } from "../src/array-result.ts"; +import { ArrayResultError } from "../src/array_result.ts"; import { deduplicateErrors, nullableAsArray } from "../src/misc.ts"; import { all, @@ -16,7 +16,7 @@ import { sourceOnly, UnexpectedError, withSource, -} from "../src/parser/parser-lib.ts"; +} from "../src/parser/parser_lib.ts"; import { Definition, Determiner, diff --git a/dist/nasin-nanpa.otf b/dist/nasin_nanpa.otf similarity index 100% rename from dist/nasin-nanpa.otf rename to dist/nasin_nanpa.otf diff --git a/dist/style.css b/dist/style.css index 0e883c2b..84c95959 100644 --- a/dist/style.css +++ b/dist/style.css @@ -1,6 +1,6 @@ @font-face { font-family: nasin-nanpa; - src: url("./nasin-nanpa.otf"); + src: url("./nasin_nanpa.otf"); } body { margin: 10px; diff --git a/project-data.json b/project_data.json similarity index 100% rename from project-data.json rename to project_data.json diff --git a/src/array-result.ts b/src/array_result.ts similarity index 100% rename from src/array-result.ts rename to src/array_result.ts diff --git a/src/dictionary.ts b/src/dictionary.ts index 1fc8a84e..5a86d564 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -1,7 +1,7 @@ import { dictionary as globalDictionary } from "../dictionary/dictionary.ts"; import { parseDictionary } from "../dictionary/parser.ts"; import { Definition, Dictionary } from "../dictionary/type.ts"; -import { ArrayResultError } from "./array-result.ts"; +import { ArrayResultError } from "./array_result.ts"; const customDictionary: Dictionary = new Map(); export const dictionary: Dictionary = new Map(); diff --git a/src/main.ts b/src/main.ts index 4782d5a6..e9c59dfa 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,7 +1,7 @@ import { dictionary } from "../dictionary/dictionary.ts"; import { asComment } from "../dictionary/misc.ts"; -import PROJECT_DATA from "../project-data.json" with { type: "json" }; -import { ArrayResultError } from "./array-result.ts"; +import PROJECT_DATA from "../project_data.json" with { type: "json" }; +import { ArrayResultError } from "./array_result.ts"; import { loadCustomDictionary } from "./dictionary.ts"; import { checkLocalStorage, @@ -18,7 +18,7 @@ import { loadFromLocalStorage, resetElementsToCurrent, resetElementsToDefault, -} from "./settings-frontend.ts"; +} from "./settings_frontend.ts"; import { settings } from "./settings.ts"; const TRANSLATE_LABEL = "Translate"; diff --git a/src/mod.ts b/src/mod.ts index 6f2fd0c8..0de837f0 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -1,13 +1,13 @@ import { distinct } from "@std/collections/distinct"; import { shuffle } from "@std/random/shuffle"; -import { errors } from "../telo-misikeke/telo-misikeke.js"; -import { ArrayResultError } from "./array-result.ts"; +import { errors } from "../telo-misikeke/telo_misikeke.js"; +import { ArrayResultError } from "./array_result.ts"; import { deduplicateErrors } from "./misc.ts"; import { settings } from "./settings.ts"; import { translate as rawTranslate } from "./translator/composer.ts"; -export { ArrayResultError } from "./array-result.ts"; -export type { ArrayResultOptions } from "./array-result.ts"; +export { ArrayResultError } from "./array_result.ts"; +export type { ArrayResultOptions } from "./array_result.ts"; export { loadCustomDictionary } from "./dictionary.ts"; export { clearCache } from "./parser/cache.ts"; export { defaultSettings, settings } from "./settings.ts"; diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 6fc0a219..4d310083 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,4 +1,4 @@ -import { extractArrayResultError } from "../array-result.ts"; +import { extractArrayResultError } from "../array_result.ts"; import { flattenError } from "../misc.ts"; import { settings } from "../settings.ts"; import { @@ -18,7 +18,7 @@ import { everyWordUnitInPreposition, everyWordUnitInSentence, } from "./extract.ts"; -import { UnrecognizedError } from "./parser-lib.ts"; +import { UnrecognizedError } from "./parser_lib.ts"; export const WORD_UNIT_RULES: ReadonlyArray<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 2be53b46..3f6d957a 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -18,7 +18,7 @@ import { sourceOnly, UnexpectedError, UnrecognizedError, -} from "./parser-lib.ts"; +} from "./parser_lib.ts"; import { Token } from "./token.ts"; import { END_OF_CARTOUCHE, diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8c4e31ed..bca6ca48 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -57,7 +57,7 @@ import { sequence, UnexpectedError, UnrecognizedError, -} from "./parser-lib.ts"; +} from "./parser_lib.ts"; import { describe, Token } from "./token.ts"; const spaces = match(/\s*/, "spaces"); diff --git a/src/parser/parser-lib.ts b/src/parser/parser_lib.ts similarity index 99% rename from src/parser/parser-lib.ts rename to src/parser/parser_lib.ts index 499f6b06..477eb351 100644 --- a/src/parser/parser-lib.ts +++ b/src/parser/parser_lib.ts @@ -1,5 +1,5 @@ import { memoize } from "@std/cache/memoize"; -import { ArrayResult, ArrayResultError } from "../array-result.ts"; +import { ArrayResult, ArrayResultError } from "../array_result.ts"; import { Cache, Clearable, Lazy } from "../cache.ts"; export type ValueRest = Readonly<{ rest: string; value: T }>; diff --git a/src/parser/parser-test.ts b/src/parser/parser_test.ts similarity index 100% rename from src/parser/parser-test.ts rename to src/parser/parser_test.ts diff --git a/src/settings-frontend.ts b/src/settings_frontend.ts similarity index 100% rename from src/settings-frontend.ts rename to src/settings_frontend.ts diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index 76a5aace..1d2478ba 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; diff --git a/src/translator/as-string.ts b/src/translator/as_string.ts similarity index 97% rename from src/translator/as-string.ts rename to src/translator/as_string.ts index c2fcb2d4..1c8aeda9 100644 --- a/src/translator/as-string.ts +++ b/src/translator/as_string.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; import * as EnglishComposer from "./composer.ts"; import { nounAsPlainString, simpleNounForms } from "./noun.ts"; diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 74838fec..6dd470d0 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 714ef4e0..756d9735 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import { parse } from "../parser/parser.ts"; import * as English from "./ast.ts"; diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index e44e4f30..662db71b 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -1,6 +1,6 @@ import { zip } from "@std/collections/zip"; import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { filterSet } from "../misc.ts"; import * as English from "./ast.ts"; import { FilteredOutError } from "./error.ts"; diff --git a/src/translator/error.ts b/src/translator/error.ts index 9af7ba07..77db7015 100644 --- a/src/translator/error.ts +++ b/src/translator/error.ts @@ -1,4 +1,4 @@ -import { ArrayResultError, TodoError } from "../array-result.ts"; +import { ArrayResultError, TodoError } from "../array_result.ts"; export class TranslationTodoError extends TodoError { constructor(type: string) { diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 2588733e..8e968a3d 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 3dae8669..8a9bd68f 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import { settings } from "../settings.ts"; import { adjective } from "./adjective.ts"; diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index ed647c16..d57e4fd3 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; @@ -21,7 +21,7 @@ import { fromNounForms, PartialNoun } from "./noun.ts"; import { nounAsPreposition } from "./preposition.ts"; import { Place } from "./pronoun.ts"; import { PartialCompoundVerb, PartialVerb } from "./verb.ts"; -import { wordUnit } from "./word-unit.ts"; +import { wordUnit } from "./word_unit.ts"; import { word } from "./word.ts"; export type PhraseTranslation = diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index ab2e30ea..c03b029c 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { AdjectiveWithInWay } from "./adjective.ts"; diff --git a/src/translator/preposition.ts b/src/translator/preposition.ts index 4cba90cf..a5ac6010 100644 --- a/src/translator/preposition.ts +++ b/src/translator/preposition.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { TranslationTodoError } from "./error.ts"; diff --git a/src/translator/pronoun.ts b/src/translator/pronoun.ts index c29b2dee..f38ba2f2 100644 --- a/src/translator/pronoun.ts +++ b/src/translator/pronoun.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import * as English from "./ast.ts"; import { fromNounForms, PartialNoun } from "./noun.ts"; import { word } from "./word.ts"; diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 4a9314d3..b0c2cf47 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -1,8 +1,8 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; import { repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; -import { definitionAsPlainString } from "./as-string.ts"; +import { definitionAsPlainString } from "./as_string.ts"; import * as English from "./ast.ts"; import { clause, contextClause } from "./clause.ts"; import { TranslationTodoError } from "./error.ts"; diff --git a/src/translator/verb.ts b/src/translator/verb.ts index c4251544..0e77bce6 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -1,5 +1,5 @@ import * as Dictionary from "../../dictionary/type.ts"; -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { settings } from "../settings.ts"; import * as English from "./ast.ts"; import { Word } from "./ast.ts"; diff --git a/src/translator/word-unit.ts b/src/translator/word_unit.ts similarity index 98% rename from src/translator/word-unit.ts rename to src/translator/word_unit.ts index 2624052d..92bb7265 100644 --- a/src/translator/word-unit.ts +++ b/src/translator/word_unit.ts @@ -1,4 +1,4 @@ -import { ArrayResult } from "../array-result.ts"; +import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; import * as TokiPona from "../parser/ast.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; diff --git a/telo-misikeke/linku-data.json b/telo-misikeke/linku_data.json similarity index 100% rename from telo-misikeke/linku-data.json rename to telo-misikeke/linku_data.json diff --git a/telo-misikeke/telo-misikeke.d.ts b/telo-misikeke/telo_misikeke.d.ts similarity index 100% rename from telo-misikeke/telo-misikeke.d.ts rename to telo-misikeke/telo_misikeke.d.ts diff --git a/telo-misikeke/telo-misikeke.js b/telo-misikeke/telo_misikeke.js similarity index 85% rename from telo-misikeke/telo-misikeke.js rename to telo-misikeke/telo_misikeke.js index e7e7a356..18169b14 100644 --- a/telo-misikeke/telo-misikeke.js +++ b/telo-misikeke/telo_misikeke.js @@ -1,10 +1,10 @@ -// @ts-self-types="./telo-misikeke.d.ts" +// @ts-self-types="./telo_misikeke.d.ts" import { escapeHtmlWithLineBreak, newlineAsHtmlLineBreak, } from "../src/misc.ts"; -import LINKU from "./linku-data.json" with { type: "json" }; +import LINKU from "./linku_data.json" with { type: "json" }; import { ParserWithCallbacks } from "./Parser.js"; import { build_rules, getMessage } from "./rules.js"; From f529fb9ff8ebf73afdde17ff7ea74f1ac3f5e05d Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 15:00:33 +0800 Subject: [PATCH 081/199] remove duplicate example sentences --- src/parser/parser_test.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index dcf47888..7887202e 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -42,10 +42,8 @@ const EXAMPLE_SENTENCES = [ "kili li kama ala kama suli", "jan li pana ala pana pi wawa mute e sike", "soweli ala soweli li nasin e sina", - // "anu seme", "ma sina li lete anu seme", "lete. taso suno pini li seli a", - // "anu seme", "jan seme li toki", "mi toki.", "ona li seme", @@ -66,9 +64,7 @@ const EXAMPLE_SENTENCES = [ "toki ni li pona nanpa wan", "mi tomo e waso", "mi pona e tomo", - "mi tomo e waso", "mi luka e soweli len", - "mi tomo e waso", "sina telo e sina", "pona a", "waso suwi", @@ -85,7 +81,6 @@ const EXAMPLE_SENTENCES = [ "mi taso li lon", "tomo", "mi taso", - "sina en mi li toki", "sina en mi", "soweli li suwi", // "mi kama. mi oko. mi anpa", @@ -114,7 +109,6 @@ const EXAMPLE_SENTENCES = [ "waso anu kala li tawa", "waso li pali anu pakala e tomo", "soweli li pali e tomo anu lupa", - "pona a", "ike a", "o lukin e pali mi a", "ni li musi a tawa mi", From 47ee4dea707161e9aee7d841c5c76d1791cefc24 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 15:07:01 +0800 Subject: [PATCH 082/199] add mention to license --- src/parser/parser_test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index 7887202e..dd1c73ae 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -2,6 +2,7 @@ import { assertNotEquals } from "@std/assert"; import { parse } from "./parser.ts"; // Examples gathered from https://github.com/kilipan/nasin-toki +// CC-BY-SA 4.0 https://github.com/kilipan/nasin-toki/blob/main/LICENSE.txt const EXAMPLE_SENTENCES = [ // "anu seme", // "toki [ni] li pona", From 36256d36c0becb77568b27a43fbebeece455a9c9 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 18:23:01 +0800 Subject: [PATCH 083/199] reimplement number translator --- src/translator/modifier.ts | 18 +++++++- src/translator/number.ts | 87 +++++++++++++++++++++++++++++++++++++ src/translator/word_unit.ts | 14 +++++- 3 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 src/translator/number.ts diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 8e968a3d..d42fdd69 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -11,6 +11,7 @@ import { TranslationTodoError, } from "./error.ts"; import { noun } from "./noun.ts"; +import { number } from "./number.ts"; import { phrase } from "./phrase.ts"; import { pronoun } from "./pronoun.ts"; import { unemphasized, word } from "./word.ts"; @@ -44,7 +45,22 @@ export function defaultModifier( const emphasis = wordUnit.emphasis != null; switch (wordUnit.type) { case "number": - return new ArrayResult(new TranslationTodoError("numeral")); + return number(wordUnit.words).map((number) => { + let quantity: English.Quantity; + if (number === 1) { + quantity = "singular"; + } else { + quantity = "plural"; + } + return { + type: "determiner" as const, + determiner: { + determiner: word(`${number}`, 1, emphasis), + kind: "numeral", + quantity, + }, + }; + }); case "x ala x": return new ArrayResult(new TranslationTodoError("x ala x")); case "default": diff --git a/src/translator/number.ts b/src/translator/number.ts new file mode 100644 index 00000000..4a269724 --- /dev/null +++ b/src/translator/number.ts @@ -0,0 +1,87 @@ +import { sumOf } from "@std/collections/sum-of"; +import { ArrayResult } from "../array_result.ts"; +import { dictionary } from "../dictionary.ts"; +import { nullableAsArray } from "../misc.ts"; +import { FilteredOutError } from "./error.ts"; + +function singleNumber(word: string): ArrayResult { + return new ArrayResult(dictionary.get(word)!.definitions) + .filterMap((definition) => { + if (definition.type === "numeral") { + return definition.numeral; + } else { + return null; + } + }); +} +function regularNumber(number: Array): number { + const duplicate = number.some((a, i) => + i < number.length - 1 && number[i + 1] !== a && + number.slice(i + 2).some((b) => a === b) + ); + if (duplicate) { + throw new FilteredOutError("separate repeated numeral"); + } else { + return sumOf(number, (number) => number); + } +} +function subHundred(number: Array): number { + const total = regularNumber(number); + if (total > 100) { + throw new FilteredOutError("ale position exceeding 100"); + } else { + return total; + } +} +function unfilteredNasinNanpaPona( + number: Array, + previousHundredCount: number, +): number { + if (number.length === 0) { + return 0; + } else { + const aleStart = number.indexOf(100); + if (aleStart === -1) { + return subHundred(number); + } else { + let hundredCount = number + .slice(aleStart) + .findIndex((number) => number !== 100); + if (hundredCount === -1) { + hundredCount = number.length - aleStart; + } + if (previousHundredCount <= hundredCount) { + throw new FilteredOutError("unsorted ale"); + } + return subHundred(number.slice(0, aleStart)) * 100 ** hundredCount + + unfilteredNasinNanpaPona( + number.slice(aleStart + hundredCount), + hundredCount, + ); + } + } +} +function nasinNanpaPona(number: Array): null | number { + if (number.includes(0) || !number.includes(100) || number[0] === 100) { + return null; + } else { + return unfilteredNasinNanpaPona(number, Infinity); + } +} +function combineNumbers(numbers: Array): ArrayResult { + return ArrayResult.from(() => { + if (numbers.length !== 1 && numbers.includes(0)) { + throw new FilteredOutError('"ala" along with other numeral'); + } + return ArrayResult.concat( + ArrayResult.from(() => + new ArrayResult(nullableAsArray(nasinNanpaPona(numbers))) + ), + ArrayResult.from(() => new ArrayResult([regularNumber(numbers)])), + ); + }); +} +export function number(number: Array): ArrayResult { + return ArrayResult.combine(...number.map(singleNumber)) + .flatMap(combineNumbers); +} diff --git a/src/translator/word_unit.ts b/src/translator/word_unit.ts index 92bb7265..ca2460c4 100644 --- a/src/translator/word_unit.ts +++ b/src/translator/word_unit.ts @@ -5,6 +5,7 @@ import { adjective, compoundAdjective } from "./adjective.ts"; import * as English from "./ast.ts"; import { TranslationTodoError } from "./error.ts"; import { PartialNoun, partialNoun } from "./noun.ts"; +import { number } from "./number.ts"; import { partialPronoun, Place } from "./pronoun.ts"; import { PartialVerb, partialVerb } from "./verb.ts"; @@ -74,7 +75,18 @@ export function wordUnit( ): ArrayResult { switch (wordUnit.type) { case "number": - return new ArrayResult(new TranslationTodoError("numeral")); + return number(wordUnit.words) + .map((number) => ({ + type: "noun", + determiner: [], + adjective: [], + singular: `${number}`, + plural: null, + reduplicationCount: 1, + emphasis: wordUnit.emphasis != null, + perspective: "third", + postAdjective: null, + })); case "x ala x": return new ArrayResult(new TranslationTodoError("x ala x")); case "default": From dcb27601a25b5a66f956e6f1a070200dbf95a398 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 18:29:26 +0800 Subject: [PATCH 084/199] fix --- src/translator/number.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translator/number.ts b/src/translator/number.ts index 4a269724..1bb0ec05 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -27,8 +27,8 @@ function regularNumber(number: Array): number { } function subHundred(number: Array): number { const total = regularNumber(number); - if (total > 100) { - throw new FilteredOutError("ale position exceeding 100"); + if (total >= 100) { + throw new FilteredOutError("ale position exceeding 99"); } else { return total; } From 602792f0fdc625783ea205d8a16ba15afa946049 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 18:31:42 +0800 Subject: [PATCH 085/199] update changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 216c52b3..ac359b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,16 @@ wa: With this definition, ilo Token can translate "waaaa" into "woooow". There is no repetition pattern to follow for toki pona words. "wwaaa" is just as valid. +You can also now have custom numerals: + +``` +san: + 3(num); +``` + - Allow custom fillers. +- Allow custom numerals. +- Numerals are now very permissive. Something like "wan tu" is now allowed. - Support for long "anu" glyph. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From b13792971cc194caef8c126787c6e0ad4fa5cd79 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 18:39:55 +0800 Subject: [PATCH 086/199] fix error messages --- src/translator/number.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translator/number.ts b/src/translator/number.ts index 1bb0ec05..9d0ae17f 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -28,7 +28,7 @@ function regularNumber(number: Array): number { function subHundred(number: Array): number { const total = regularNumber(number); if (total >= 100) { - throw new FilteredOutError("ale position exceeding 99"); + throw new FilteredOutError('"ale" position exceeding 99'); } else { return total; } @@ -51,7 +51,7 @@ function unfilteredNasinNanpaPona( hundredCount = number.length - aleStart; } if (previousHundredCount <= hundredCount) { - throw new FilteredOutError("unsorted ale"); + throw new FilteredOutError('unsorted "ale"'); } return subHundred(number.slice(0, aleStart)) * 100 ** hundredCount + unfilteredNasinNanpaPona( From af08522e2eb6a9070772c45b1dbcda09596fc9f9 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 19:05:24 +0800 Subject: [PATCH 087/199] update on development version --- CHANGELOG.md | 2 +- project_data.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac359b56..10c82130 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ NOTE: Before publishing:

On development changelog -## 0.4.2 (On development) +## 0.5.0 (On development) The latest on-development version can be accessed by building the source code. On this on-development version, things can be broken. diff --git a/project_data.json b/project_data.json index a0dc1700..211a72bb 100644 --- a/project_data.json +++ b/project_data.json @@ -1,5 +1,5 @@ { - "version": "0.4.2", + "version": "0.5.0", "onDevelopment": true, "releaseDate": "2025-2-27" } From 3558aeb3e84381f2fc77759014c4cff09aaa2e57 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 19:17:03 +0800 Subject: [PATCH 088/199] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10c82130..e9aaa3d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,7 @@ san: - Allow custom fillers. - Allow custom numerals. -- Numerals are now very permissive. Something like "wan tu" is now allowed. +- Numerals are now very permissive. Something like "wan tu" is now allowed. It is also possible for ilo Token to output both numbers using pu extended system and nasin nanpa pona: "tu ale wan" will be translated into 103 (pu extended system) _and_ 201 (nasin nanpa pona). - Support for long "anu" glyph. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From 4e9f67fb2ec81f595fe92b8215952b4267a18758 Mon Sep 17 00:00:00 2001 From: Koko Date: Mon, 3 Mar 2025 19:19:03 +0800 Subject: [PATCH 089/199] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9aaa3d6..da2259f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,7 @@ san: - Allow custom fillers. - Allow custom numerals. -- Numerals are now very permissive. Something like "wan tu" is now allowed. It is also possible for ilo Token to output both numbers using pu extended system and nasin nanpa pona: "tu ale wan" will be translated into 103 (pu extended system) _and_ 201 (nasin nanpa pona). +- Numerals are now very permissive. Something like "wan tu" is now allowed. It is also possible for ilo Token to output both numbers using pu system and nasin nanpa pona: "tu ale wan" will be translated into 103 (pu system) _and_ 201 (nasin nanpa pona). - Support for long "anu" glyph. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From f92f89de3d0b29c1c8309236c1ecd3cd6cdbef65 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:37:28 +0800 Subject: [PATCH 090/199] add tests for numbers --- src/translator/number_test.ts | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/translator/number_test.ts diff --git a/src/translator/number_test.ts b/src/translator/number_test.ts new file mode 100644 index 00000000..12f48242 --- /dev/null +++ b/src/translator/number_test.ts @@ -0,0 +1,31 @@ +import { assert } from "@std/assert/assert"; +import { number } from "./number.ts"; + +const TESTS = new Map(Object.entries({ + "tu tu tu wan": 7, + "luka tu": 7, + "mute mute mute luka luka luka tu wan": 78, + "wan": 1, + "tu": 2, + "tu wan": 3, + "tu tu": 4, + "luka": 5, + "tu tu wan": 5, + "luka wan": 6, + "mute": 20, + "luka luka luka luka": 20, + "mute luka luka luka wan": 36, + "ale": 100, + "mute mute mute mute mute": 100, + "ale ale ale": 300, + "wan ale": 100, + "tu wan ale": 300, + "luka luka ale": 1000, + "wan ale ale": 10000, +})); +Deno.test("numeral translation", () => { + for (const [tokiPona, expected] of TESTS) { + const numbers = number(tokiPona.trim().split(" ")).unwrap(); + assert(numbers.includes(expected), `Error at ${tokiPona}`); + } +}); From 0485ef73ad8bb0d7869d40c0f72abf84fc9388b5 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:41:01 +0800 Subject: [PATCH 091/199] more tests --- src/translator/number_test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/translator/number_test.ts b/src/translator/number_test.ts index 12f48242..d6808576 100644 --- a/src/translator/number_test.ts +++ b/src/translator/number_test.ts @@ -22,6 +22,7 @@ const TESTS = new Map(Object.entries({ "tu wan ale": 300, "luka luka ale": 1000, "wan ale ale": 10000, + "mute ale mute tu tu": 2024, })); Deno.test("numeral translation", () => { for (const [tokiPona, expected] of TESTS) { From 0bc0d54af619c299279eaeec8c81cd495bc2ebb3 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:41:59 +0800 Subject: [PATCH 092/199] improve error message --- src/parser/parser_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index dd1c73ae..d4602148 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -198,7 +198,7 @@ Deno.test("AST all distinct", () => { const ast = parse(sentence).unwrap(); for (const [i, a] of ast.entries()) { for (const b of ast.slice(i + 1)) { - assertNotEquals(a, b); + assertNotEquals(a, b, `Error at ${sentence}`); } } } From 81090d7a6b6783945513feae15cfd372b00d61d5 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:42:46 +0800 Subject: [PATCH 093/199] even better error message --- src/parser/parser_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index d4602148..19a8d7c7 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -198,7 +198,7 @@ Deno.test("AST all distinct", () => { const ast = parse(sentence).unwrap(); for (const [i, a] of ast.entries()) { for (const b of ast.slice(i + 1)) { - assertNotEquals(a, b, `Error at ${sentence}`); + assertNotEquals(a, b, `Error at "${a}" and "${b}"`); } } } From 5635bccf9f2e80ebad7c696c33ab7699a836158f Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:43:10 +0800 Subject: [PATCH 094/199] better error message --- src/translator/number_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/number_test.ts b/src/translator/number_test.ts index d6808576..edd52e69 100644 --- a/src/translator/number_test.ts +++ b/src/translator/number_test.ts @@ -27,6 +27,6 @@ const TESTS = new Map(Object.entries({ Deno.test("numeral translation", () => { for (const [tokiPona, expected] of TESTS) { const numbers = number(tokiPona.trim().split(" ")).unwrap(); - assert(numbers.includes(expected), `Error at ${tokiPona}`); + assert(numbers.includes(expected), `Error at "${tokiPona}"`); } }); From 436aef54753a8ec1f274c4a2726a5b4b0b1eb4c7 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:45:50 +0800 Subject: [PATCH 095/199] revert --- src/parser/parser_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index 19a8d7c7..b30f54c3 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -198,7 +198,7 @@ Deno.test("AST all distinct", () => { const ast = parse(sentence).unwrap(); for (const [i, a] of ast.entries()) { for (const b of ast.slice(i + 1)) { - assertNotEquals(a, b, `Error at "${a}" and "${b}"`); + assertNotEquals(a, b, `Error at "${sentence}"`); } } } From 0f25e23f170e629ca7bea5c215231f3bf4a13761 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:53:55 +0800 Subject: [PATCH 096/199] enforce immutability on arrays --- dictionary/parser.ts | 2 +- dictionary/type.ts | 12 ++++++------ src/array_result.ts | 12 +++++++----- src/main.ts | 2 +- src/misc.ts | 10 +++++----- src/mod.ts | 2 +- src/parser/ast.ts | 26 +++++++++++++------------- src/parser/composer.ts | 4 ++-- src/parser/extract.ts | 30 +++++++++++++++++------------- src/parser/lexer.ts | 2 +- src/parser/parser.ts | 23 +++++++++++++---------- src/parser/parser_lib.ts | 18 ++++++++++-------- src/parser/token.ts | 10 +++++----- src/settings_frontend.ts | 2 +- src/translator/adjective.ts | 8 ++++---- src/translator/adverb.ts | 4 +++- src/translator/ast.ts | 26 +++++++++++++------------- src/translator/composer.ts | 6 +++--- src/translator/determiner.ts | 22 +++++++++++----------- src/translator/modifier.ts | 8 ++++---- src/translator/noun.ts | 4 ++-- src/translator/number.ts | 12 ++++++------ src/translator/phrase.ts | 4 ++-- src/translator/predicate.ts | 4 ++-- src/translator/sentence.ts | 4 ++-- src/translator/verb.ts | 8 ++++---- telo-misikeke/telo_misikeke.d.ts | 2 +- telo-misikeke/update.ts | 8 +++++--- 28 files changed, 145 insertions(+), 130 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 0d9f4773..294864fa 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -85,7 +85,7 @@ function simpleUnit(kind: string): Parser { return word.skip(tag(keyword(kind))); } function detectRepetition( - source: Array, + source: ReadonlyArray, ): { before: string; repeat: string; after: string } { if (source.length === 1) { return { before: source[0], repeat: "", after: "" }; diff --git a/dictionary/type.ts b/dictionary/type.ts index 018230b8..e3980cf0 100644 --- a/dictionary/type.ts +++ b/dictionary/type.ts @@ -3,8 +3,8 @@ export type NounForms = { plural: null | string; }; export type Noun = NounForms & { - determiner: Array; - adjective: Array; + determiner: ReadonlyArray; + adjective: ReadonlyArray; gerund: boolean; postAdjective: null | { adjective: string; @@ -43,7 +43,7 @@ export type AdjectiveType = | "material" | "qualifier"; export type Adjective = { - adverb: Array; + adverb: ReadonlyArray; adjective: string; kind: AdjectiveType; }; @@ -54,7 +54,7 @@ export type VerbForms = { }; export type Verb = VerbForms & { directObject: null | Noun; - indirectObject: Array<{ + indirectObject: ReadonlyArray<{ preposition: string; object: Noun; }>; @@ -74,11 +74,11 @@ export type Definition = | ({ type: "determiner" } & Determiner) | { type: "numeral"; numeral: number } | ({ type: "adjective" } & Adjective) - | { type: "compound adjective"; adjective: Array } + | { type: "compound adjective"; adjective: ReadonlyArray } | { type: "adverb"; adverb: string } | ({ type: "verb" } & Verb) | { type: "modal verb"; verb: string } | { type: "preposition"; preposition: string } | { type: "interjection"; interjection: string }; -export type Entry = { definitions: Array; src: string }; +export type Entry = { definitions: ReadonlyArray; src: string }; export type Dictionary = Map; diff --git a/src/array_result.ts b/src/array_result.ts index 862e3f9a..09ab1ade 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -105,7 +105,9 @@ export class ArrayResult { return this; } } - static concat(...arrayResults: Array>): ArrayResult { + static concat( + ...arrayResults: ReadonlyArray> + ): ArrayResult { return arrayResults.reduce( (left, right) => { if (left.isError() && right.isError()) { @@ -117,7 +119,7 @@ export class ArrayResult { new ArrayResult(), ); } - static combine>( + static combine>( ...arrayResults: { [I in keyof T]: ArrayResult } & { length: T["length"]; } @@ -148,8 +150,8 @@ export class ArrayResult { } } type Errors = - | { type: "array result"; errors: Array } - | { type: "outside"; errors: Array }; + | { type: "array result"; errors: ReadonlyArray } + | { type: "outside"; errors: ReadonlyArray }; export function extractArrayResultError( errors: ReadonlyArray, ): ReadonlyArray { @@ -163,7 +165,7 @@ export function extractArrayResultError( return { type: "outside", errors: [error] }; } case "outside": { - let moreError: Array; + let moreError: ReadonlyArray; if (error instanceof ArrayResultError) { moreError = []; } else { diff --git a/src/main.ts b/src/main.ts index e9c59dfa..cfa0641e 100644 --- a/src/main.ts +++ b/src/main.ts @@ -320,7 +320,7 @@ function main(): void { } }); } -function errorsFixable(errors: Array): boolean { +function errorsFixable(errors: ReadonlyArray): boolean { return errors.length > 0 && errors.every((error) => error instanceof ArrayResultError); } diff --git a/src/misc.ts b/src/misc.ts index aa390fc6..5d011eab 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -4,14 +4,14 @@ import { Lazy } from "./cache.ts"; export const NEWLINES = /\r\n|\n|\r/g; -export function nullableAsArray(value?: T): Array> { +export function nullableAsArray(value?: T): ReadonlyArray> { if (value == null) { return []; } else { return [value]; } } -export function repeatArray(element: T, count: number): Array { +export function repeatArray(element: T, count: number): ReadonlyArray { return new Array(count).fill(element); } export function repeatWithSpace(text: string, count: number): string { @@ -69,11 +69,11 @@ export function extractErrorMessage(error: unknown): string { } } export function filterSet( - set: Array<[condition: boolean, value: T]>, -): Array { + set: ReadonlyArray, +): ReadonlyArray { return set.filter(([condition]) => condition).map(([_, value]) => value); } -export function flattenError(error: unknown): Array { +export function flattenError(error: unknown): ReadonlyArray { if (error instanceof AggregateError) { return error.errors.flatMap(flattenError); } else { diff --git a/src/mod.ts b/src/mod.ts index 0de837f0..4a397026 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -13,7 +13,7 @@ export { clearCache } from "./parser/cache.ts"; export { defaultSettings, settings } from "./settings.ts"; export type { RedundancySettings, Settings } from "./settings.ts"; -export function translate(tokiPona: string): Array { +export function translate(tokiPona: string): ReadonlyArray { const arrayResult = rawTranslate(tokiPona); if (!arrayResult.isError()) { const values = distinct(arrayResult.array); diff --git a/src/parser/ast.ts b/src/parser/ast.ts index 04ac3a49..cc3da740 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -10,7 +10,7 @@ export type SimpleHeadedWordUnit = | { type: "reduplication"; word: string; count: number }; export type SimpleWordUnit = | SimpleHeadedWordUnit - | { type: "number"; words: Array }; + | { type: "number"; words: ReadonlyArray }; export type HeadedWordUnit = & SimpleHeadedWordUnit & { emphasis: null | Emphasis }; @@ -28,13 +28,13 @@ export type Phrase = | { type: "default"; headWord: WordUnit; - modifiers: Array; + modifiers: ReadonlyArray; emphasis: null | Emphasis; } | { type: "preverb"; preverb: HeadedWordUnit; - modifiers: Array; + modifiers: ReadonlyArray; phrase: Phrase; emphasis: null | Emphasis; } @@ -42,11 +42,11 @@ export type Phrase = | ({ type: "quotation" } & Quotation); export type MultiplePhrases = | { type: "single"; phrase: Phrase } - | { type: "and conjunction"; phrases: Array } - | { type: "anu"; phrases: Array }; + | { type: "and conjunction"; phrases: ReadonlyArray } + | { type: "anu"; phrases: ReadonlyArray }; export type Preposition = { preposition: HeadedWordUnit; - modifiers: Array; + modifiers: ReadonlyArray; phrases: MultiplePhrases & { type: "single" | "anu" }; emphasis: null | Emphasis; }; @@ -56,10 +56,10 @@ export type Predicate = type: "associated"; predicates: MultiplePhrases; objects: null | MultiplePhrases; - prepositions: Array; + prepositions: ReadonlyArray; } - | { type: "and conjunction"; predicates: Array } - | { type: "anu"; predicates: Array }; + | { type: "and conjunction"; predicates: ReadonlyArray } + | { type: "anu"; predicates: ReadonlyArray }; export type Clause = | { type: "phrases"; phrases: MultiplePhrases } | { type: "o vocative"; phrases: MultiplePhrases } @@ -74,7 +74,7 @@ export type Clause = subjects: null | MultiplePhrases; predicates: Predicate; } - | { type: "prepositions"; prepositions: Array } + | { type: "prepositions"; prepositions: ReadonlyArray } | ({ type: "quotation" } & Quotation); export type ContextClause = | Clause @@ -83,7 +83,7 @@ export type Sentence = | { type: "default"; kinOrTaso: null | HeadedWordUnit; - laClauses: Array; + laClauses: ReadonlyArray; finalClause: Clause; anuSeme: null | HeadedWordUnit; emphasis: null | Emphasis; @@ -97,10 +97,10 @@ export type Sentence = interrogative: null | "seme" | "x ala x"; }; export type Quotation = { - sentences: Array; + sentences: ReadonlyArray; leftMark: string; rightMark: string; }; export type MultipleSentences = | { type: "single word"; word: string } - | { type: "sentences"; sentences: Array }; + | { type: "sentences"; sentences: ReadonlyArray }; diff --git a/src/parser/composer.ts b/src/parser/composer.ts index c7f22167..540d4cef 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -33,7 +33,7 @@ export function filler(filler: Filler): string { return emphasis(filler); } } -function emphasisAsArray(value: null | Emphasis): Array { +function emphasisAsArray(value: null | Emphasis): ReadonlyArray { return nullableAsArray(value).map(emphasis); } export function simpleWordUnit(wordUnit: SimpleWordUnit): string { @@ -157,7 +157,7 @@ export function clause(clause: Clause): string { case "o vocative": return `${multiplePhrases(clause.phrases, "en")} o`; case "li clause": { - let li: Array; + let li: ReadonlyArray; if (clause.explicitLi) { li = ["li"]; } else { diff --git a/src/parser/extract.ts b/src/parser/extract.ts index 4fafb4e3..c2b768bb 100644 --- a/src/parser/extract.ts +++ b/src/parser/extract.ts @@ -12,10 +12,12 @@ import { WordUnit, } from "./ast.ts"; -export function everyWordUnitInNanpa(nanpa: Nanpa): Array { +export function everyWordUnitInNanpa(nanpa: Nanpa): ReadonlyArray { return [nanpa.nanpa, ...everyWordUnitInPhrase(nanpa.phrase)]; } -export function everyWordUnitInModifier(modifier: Modifier): Array { +export function everyWordUnitInModifier( + modifier: Modifier, +): ReadonlyArray { switch (modifier.type) { case "default": return [modifier.word]; @@ -28,7 +30,7 @@ export function everyWordUnitInModifier(modifier: Modifier): Array { return []; } } -export function everyWordUnitInPhrase(phrase: Phrase): Array { +export function everyWordUnitInPhrase(phrase: Phrase): ReadonlyArray { switch (phrase.type) { case "default": return [ @@ -49,12 +51,12 @@ export function everyWordUnitInPhrase(phrase: Phrase): Array { } export function everyWordUnitInMultiplePhrases( phrase: MultiplePhrases, -): Array { +): ReadonlyArray { return everyPhraseInMultiplePhrases(phrase).flatMap(everyWordUnitInPhrase); } export function everyWordUnitInPreposition( preposition: Preposition, -): Array { +): ReadonlyArray { return [ preposition.preposition, ...preposition.modifiers.flatMap(everyWordUnitInModifier), @@ -63,7 +65,7 @@ export function everyWordUnitInPreposition( } export function everyWordUnitInMultiplePredicates( predicate: Predicate, -): Array { +): ReadonlyArray { switch (predicate.type) { case "single": return everyWordUnitInPhrase(predicate.predicate); @@ -79,7 +81,7 @@ export function everyWordUnitInMultiplePredicates( return predicate.predicates.flatMap(everyWordUnitInMultiplePredicates); } } -export function everyWordUnitInClause(clause: Clause): Array { +export function everyWordUnitInClause(clause: Clause): ReadonlyArray { switch (clause.type) { case "phrases": case "o vocative": @@ -103,7 +105,7 @@ export function everyWordUnitInClause(clause: Clause): Array { } export function everyWordUnitInContextClause( contextClause: ContextClause, -): Array { +): ReadonlyArray { switch (contextClause.type) { case "nanpa": return everyWordUnitInNanpa(contextClause); @@ -111,7 +113,9 @@ export function everyWordUnitInContextClause( return everyWordUnitInClause(contextClause); } } -export function everyWordUnitInSentence(sentence: Sentence): Array { +export function everyWordUnitInSentence( + sentence: Sentence, +): ReadonlyArray { switch (sentence.type) { case "default": return [ @@ -124,7 +128,7 @@ export function everyWordUnitInSentence(sentence: Sentence): Array { return []; } } -export function everyModifierInPhrase(phrase: Phrase): Array { +export function everyModifierInPhrase(phrase: Phrase): ReadonlyArray { switch (phrase.type) { case "default": return phrase.modifiers; @@ -144,12 +148,12 @@ export function everyModifierInPhrase(phrase: Phrase): Array { } export function everyModifierInMultiplePhrases( phrases: MultiplePhrases, -): Array { +): ReadonlyArray { return everyPhraseInMultiplePhrases(phrases).flatMap(everyModifierInPhrase); } export function everyPhraseInMultiplePhrases( phrases: MultiplePhrases, -): Array { +): ReadonlyArray { switch (phrases.type) { case "single": return [phrases.phrase]; @@ -160,7 +164,7 @@ export function everyPhraseInMultiplePhrases( } export function everyObjectInMultiplePredicates( predicates: Predicate, -): Array { +): ReadonlyArray { switch (predicates.type) { case "single": return []; diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 3f6d957a..c904a91f 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -77,7 +77,7 @@ const multipleA = specificWord("a") .map((count) => ({ type: "multiple a", count: count + 1 })); const repeatingLetter = match(/[a-zA-Z]/, "latin letter") .then((letter) => - count(all(matchString(letter))).map<[string, number]>( + count(all(matchString(letter))).map( (count) => [letter, count + 1], ) ); diff --git a/src/parser/parser.ts b/src/parser/parser.ts index bca6ca48..5509176f 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -106,7 +106,10 @@ const specificWord = memoize((thatWord: string) => } }) ); -function filterCombinedGlyphs(words: Array, expected: string): boolean { +function filterCombinedGlyphs( + words: ReadonlyArray, + expected: string, +): boolean { const description = `"${expected}"`; if (words.length !== 1) { throw new UnexpectedError( @@ -171,7 +174,7 @@ function simpleWordUnit( wordFrom(word, description) .then((word) => count(manyAtLeastOnce(specificWord(word))) - .map<[string, number]>((count) => [word, count + 1]) + .map((count) => [word, count + 1]) ), ) .map(([[word, count]]) => ({ @@ -201,7 +204,7 @@ function wordUnit( function binaryWords( word: Set, description: string, -): Parser<[string, string]> { +): Parser { return specificToken("combined glyphs").map(({ words }) => { if (words.length > 2) { throw new UnrecognizedError( @@ -219,12 +222,12 @@ function binaryWords( function optionalCombined( word: Set, description: string, -): Parser<[HeadedWordUnit, null | Modifier]> { - return choice<[HeadedWordUnit, null | Modifier]>( +): Parser { + return choice( wordUnit(word, description) .map((wordUnit) => [wordUnit, null]), binaryWords(word, description) - .map<[HeadedWordUnit, null | Modifier]>(([first, second]) => [ + .map(([first, second]) => [ { type: "default", word: first, emphasis: null }, { type: "default", @@ -344,7 +347,7 @@ const longAnu = sequence( .skip(specificToken("headless long glyph end")) .map(([phrase, morePhrase]) => [phrase, ...morePhrase]); function nestedPhrasesOnly( - nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, + nestingRule: ReadonlyArray<"en" | "li" | "o" | "e" | "anu">, ): Parser { if (nestingRule.length === 0) { return singlePhrase; @@ -383,7 +386,7 @@ function nestedPhrasesOnly( } } function nestedPhrases( - nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, + nestingRule: ReadonlyArray<"en" | "li" | "o" | "e" | "anu">, ): Parser { if (nestingRule.length === 0) { return singlePhrase; @@ -486,7 +489,7 @@ const preposition = choice( ) .filter(filter(PREPOSITION_RULE)); function associatedPredicates( - nestingRule: Array<"li" | "o" | "anu">, + nestingRule: ReadonlyArray<"li" | "o" | "anu">, ): Parser { return sequence( nestedPhrasesOnly(nestingRule), @@ -511,7 +514,7 @@ function associatedPredicates( .sortBy(({ prepositions }) => -prepositions.length); } function multiplePredicates( - nestingRule: Array<"li" | "o" | "anu">, + nestingRule: ReadonlyArray<"li" | "o" | "anu">, ): Parser { if (nestingRule.length === 0) { return choice( diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 477eb351..3a6936ee 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -121,13 +121,13 @@ export function lazy(parser: () => Parser): Parser { ); } } -export function choice(...choices: Array>): Parser { +export function choice(...choices: ReadonlyArray>): Parser { return new Parser((src) => new ArrayResult(choices).flatMap((parser) => parser.rawParser(src)) ); } export function choiceOnlyOne( - ...choices: Array> + ...choices: ReadonlyArray> ): Parser { return choices.reduceRight( (right, left) => @@ -148,7 +148,7 @@ export function optional(parser: Parser): Parser { export function optionalAll(parser: Parser): Parser { return choiceOnlyOne(parser, nothing); } -export function sequence>( +export function sequence>( ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } ): Parser { // We resorted to using `any` types here, make sure it works properly @@ -158,25 +158,27 @@ export function sequence>( emptyArray, ) as Parser; } -export const many = memoize((parser: Parser): Parser> => +export const many = memoize((parser: Parser): Parser> => choice( sequence(parser, lazy(() => many(parser))) .map(([first, rest]) => [first, ...rest]), emptyArray, ) ); -export function manyAtLeastOnce(parser: Parser): Parser> { +export function manyAtLeastOnce( + parser: Parser, +): Parser> { return sequence(parser, many(parser)) .map(([first, rest]) => [first, ...rest]); } -export const all = memoize((parser: Parser): Parser> => +export const all = memoize((parser: Parser): Parser> => choiceOnlyOne( sequence(parser, lazy(() => all(parser))) .map(([first, rest]) => [first, ...rest]), emptyArray, ) ); -export function allAtLeastOnce(parser: Parser): Parser> { +export function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)) .map(([first, rest]) => [first, ...rest]); } @@ -252,7 +254,7 @@ export const end = new Parser((src) => { }); export function withSource( parser: Parser, -): Parser<[value: T, source: string]> { +): Parser { return new Parser((src) => parser.unmemoizedParser(src).map((value) => ({ value: [value.value, src.slice(0, src.length - value.rest.length)], diff --git a/src/parser/token.ts b/src/parser/token.ts index 27f1bca4..aed7fa83 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -4,16 +4,16 @@ export type Token = | { type: "word"; word: string } | { type: "combined glyphs"; - words: Array; + words: ReadonlyArray; } | { type: "space long glyph"; - words: Array; + words: ReadonlyArray; spaceLength: number; } | { type: "headed long glyph start"; - words: Array; + words: ReadonlyArray; } | { type: "headless long glyph end"; @@ -23,11 +23,11 @@ export type Token = } | { type: "headed long glyph end"; - words: Array; + words: ReadonlyArray; } | { type: "inside long glyph"; - words: Array; + words: ReadonlyArray; } | { type: "multiple a"; count: number } | { type: "long word"; word: string; length: number } diff --git a/src/settings_frontend.ts b/src/settings_frontend.ts index cc5daf53..921d74f0 100644 --- a/src/settings_frontend.ts +++ b/src/settings_frontend.ts @@ -59,7 +59,7 @@ const UPDATERS: Readonly<{ [K in keyof Settings]: Updater }> = { xAlaXPartialParsing: BOOL_UPDATER, separateRepeatedModifiers: BOOL_UPDATER, }; -const KEYS = Object.keys(UPDATERS) as Array; +const KEYS = Object.keys(UPDATERS) as ReadonlyArray; function loadOneFromLocalStorage(key: T): void { const src = localStorage.getItem(key); if (src != null) { diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index 1d2478ba..0b08d96e 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -41,7 +41,7 @@ export function adjective( })); } export function compoundAdjective( - adjectives: Array, + adjectives: ReadonlyArray, reduplicationCount: number, emphasis: null | TokiPona.Emphasis, ): ArrayResult { @@ -79,15 +79,15 @@ export function rankAdjective(kind: Dictionary.AdjectiveType): number { .indexOf(kind); } export function fixAdjective( - adjective: Array, -): Array { + adjective: ReadonlyArray, +): ReadonlyArray { return adjective .flatMap((adjective) => { switch (adjective.type) { case "simple": return [adjective]; case "compound": - return adjective.adjective as Array< + return adjective.adjective as ReadonlyArray< English.AdjectivePhrase & { type: "simple" } >; } diff --git a/src/translator/adverb.ts b/src/translator/adverb.ts index fc44cf80..ee70a230 100644 --- a/src/translator/adverb.ts +++ b/src/translator/adverb.ts @@ -1,7 +1,9 @@ import * as English from "./ast.ts"; import { FilteredOutError } from "./error.ts"; -export function fixAdverb(adverb: Array): Array { +export function fixAdverb( + adverb: ReadonlyArray, +): ReadonlyArray { if (adverb.length > 1) { throw new FilteredOutError("multiple adverbs"); } else { diff --git a/src/translator/ast.ts b/src/translator/ast.ts index 0c4fa8a4..762710c8 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -8,19 +8,19 @@ export type Quantity = "singular" | "plural" | "condensed"; export type NounPhrase = | { type: "simple"; - determiner: Array; - adjective: Array; + determiner: ReadonlyArray; + adjective: ReadonlyArray; noun: Word; quantity: Quantity; perspective: Dictionary.Perspective; postAdjective: null | { adjective: string; name: string }; - preposition: Array; + preposition: ReadonlyArray; emphasis: boolean; } | { type: "compound"; conjunction: string; - nouns: Array; + nouns: ReadonlyArray; quantity: Quantity; }; export type Determiner = { @@ -32,14 +32,14 @@ export type AdjectivePhrase = | { type: "simple"; kind: Dictionary.AdjectiveType; - adverb: Array; + adverb: ReadonlyArray; adjective: Word; emphasis: boolean; } | { type: "compound"; conjunction: string; - adjective: Array; + adjective: ReadonlyArray; emphasis: boolean; }; export type Complement = @@ -47,27 +47,27 @@ export type Complement = | { type: "adjective"; adjective: AdjectivePhrase }; export type Verb = { modal: null | Word; - finite: Array; + finite: ReadonlyArray; infinite: Word; }; export type VerbPhrase = | { type: "default"; - adverb: Array; + adverb: ReadonlyArray; verb: Verb; subjectComplement: null | Complement; object: null | NounPhrase; objectComplement: null | Complement; - preposition: Array; + preposition: ReadonlyArray; hideVerb: boolean; } | { type: "compound"; conjunction: string; - verbs: Array; + verbs: ReadonlyArray; object: null | NounPhrase; objectComplement: null | Complement; - preposition: Array; + preposition: ReadonlyArray; }; export type Clause = | { type: "free form"; text: string } @@ -82,11 +82,11 @@ export type Clause = | { type: "vocative"; call: string; addressee: NounPhrase } | { type: "dependent"; conjunction: Word; clause: Clause }; export type Preposition = { - adverb: Array; + adverb: ReadonlyArray; preposition: Word; object: NounPhrase; }; export type Sentence = { - clauses: Array; + clauses: ReadonlyArray; punctuation: string; }; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 756d9735..b349f9d7 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -21,7 +21,7 @@ function word(word: English.Word): string { } } function compound( - elements: Array, + elements: ReadonlyArray, conjunction: string, depth: number, ): string { @@ -93,7 +93,7 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { let text: string; switch (phrase.type) { case "default": { - let verbText: Array; + let verbText: ReadonlyArray; if (phrase.hideVerb) { verbText = []; } else { @@ -128,7 +128,7 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { .join(" "); } function defaultClause(clause: English.Clause & { type: "default" }): string { - let subject: Array; + let subject: ReadonlyArray; if (clause.hideSubject) { subject = []; } else { diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index 662db71b..6dafe995 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -8,26 +8,26 @@ import { simpleNounForms } from "./noun.ts"; import { word } from "./word.ts"; function prettyPrintDeterminers( - determiners: Array, + determiners: ReadonlyArray, ): string { return `(${ determiners.map((determiner) => determiner.determiner).join(` `) })`; } function filterKind( - determiners: Array, - kinds: Array, -): Array { + determiners: ReadonlyArray, + kinds: ReadonlyArray, +): ReadonlyArray { return determiners.filter((determiner) => kinds.includes(determiner.kind)); } function filterQuantity( - determiners: Array, + determiners: ReadonlyArray, quantity: Dictionary.Quantity, -): Array { +): ReadonlyArray { return determiners.filter((determiner) => determiner.quantity === quantity); } function check( - quantities: Array, + quantities: ReadonlyArray, some: Dictionary.Quantity, not: Dictionary.Quantity, ): boolean { @@ -35,7 +35,7 @@ function check( quantities.every((quantity) => quantity !== not); } export function findNumber( - determiners: Array, + determiners: ReadonlyArray, ): Dictionary.Quantity { const quantities = determiners.map((determiner) => determiner.quantity); if (quantities.every((quantity) => quantity === `both`)) { @@ -72,8 +72,8 @@ export function determiner( })); } export function fixDeterminer( - determiner: Array, -): Array { + determiner: ReadonlyArray, +): ReadonlyArray { const negative = filterKind(determiner, [`negative`]); const first = filterKind(determiner, [ `article`, @@ -145,7 +145,7 @@ export function fixDeterminer( } function encodeDeterminer( strings: TemplateStringsArray, - ...determiners: Array> + ...determiners: ReadonlyArray> ): () => string { return () => zip(strings, [...determiners.map(prettyPrintDeterminers), ""]) diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index d42fdd69..5163a9e1 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -26,14 +26,14 @@ export type ModifierTranslation = | { type: "in position phrase"; noun: English.NounPhrase }; export type AdjectivalModifier = { nounPreposition: null | { noun: English.NounPhrase; preposition: string }; - determiner: Array; - adjective: Array; + determiner: ReadonlyArray; + adjective: ReadonlyArray; name: null | string; ofPhrase: null | English.NounPhrase; inPositionPhrase: null | English.NounPhrase; }; export type AdverbialModifier = { - adverb: Array; + adverb: ReadonlyArray; inWayPhrase: null | English.NounPhrase; }; export type MultipleModifierTranslation = @@ -197,7 +197,7 @@ function modifier( } } export function multipleModifiers( - modifiers: Array, + modifiers: ReadonlyArray, ): ArrayResult { return ArrayResult.combine(...modifiers.map(modifier)) .flatMap((modifiers) => { diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 8a9bd68f..c177d77f 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -10,8 +10,8 @@ import { condense } from "./misc.ts"; import { word } from "./word.ts"; export type PartialNoun = Dictionary.NounForms & { - determiner: Array; - adjective: Array; + determiner: ReadonlyArray; + adjective: ReadonlyArray; reduplicationCount: number; emphasis: boolean; perspective: Dictionary.Perspective; diff --git a/src/translator/number.ts b/src/translator/number.ts index 9d0ae17f..c2cb199a 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -14,7 +14,7 @@ function singleNumber(word: string): ArrayResult { } }); } -function regularNumber(number: Array): number { +function regularNumber(number: ReadonlyArray): number { const duplicate = number.some((a, i) => i < number.length - 1 && number[i + 1] !== a && number.slice(i + 2).some((b) => a === b) @@ -25,7 +25,7 @@ function regularNumber(number: Array): number { return sumOf(number, (number) => number); } } -function subHundred(number: Array): number { +function subHundred(number: ReadonlyArray): number { const total = regularNumber(number); if (total >= 100) { throw new FilteredOutError('"ale" position exceeding 99'); @@ -34,7 +34,7 @@ function subHundred(number: Array): number { } } function unfilteredNasinNanpaPona( - number: Array, + number: ReadonlyArray, previousHundredCount: number, ): number { if (number.length === 0) { @@ -61,14 +61,14 @@ function unfilteredNasinNanpaPona( } } } -function nasinNanpaPona(number: Array): null | number { +function nasinNanpaPona(number: ReadonlyArray): null | number { if (number.includes(0) || !number.includes(100) || number[0] === 100) { return null; } else { return unfilteredNasinNanpaPona(number, Infinity); } } -function combineNumbers(numbers: Array): ArrayResult { +function combineNumbers(numbers: ReadonlyArray): ArrayResult { return ArrayResult.from(() => { if (numbers.length !== 1 && numbers.includes(0)) { throw new FilteredOutError('"ala" along with other numeral'); @@ -81,7 +81,7 @@ function combineNumbers(numbers: Array): ArrayResult { ); }); } -export function number(number: Array): ArrayResult { +export function number(number: ReadonlyArray): ArrayResult { return ArrayResult.combine(...number.map(singleNumber)) .flatMap(combineNumbers); } diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index d57e4fd3..f225aea9 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -207,7 +207,7 @@ export function phrase( } function compoundNoun( conjunction: "and" | "or", - phrase: Array, + phrase: ReadonlyArray, ): English.NounPhrase { const nouns = phrase .flatMap((noun) => { @@ -238,7 +238,7 @@ function compoundNoun( } function compoundAdjective( conjunction: "and" | "or", - phrase: Array, + phrase: ReadonlyArray, ): English.AdjectivePhrase { return { type: "compound", diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index c03b029c..7bd27cf0 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -23,7 +23,7 @@ function verbObject( throw new FilteredOutError("intransitive verb with object"); } else { let englishObject: null | English.NounPhrase; - let preposition: Array; + let preposition: ReadonlyArray; if (useForObject === true) { englishObject = object; preposition = []; @@ -119,7 +119,7 @@ function predicateVerb( function associatedPredicate( predicate: PhraseTranslation, object: null | PhraseTranslation, - preposition: Array, + preposition: ReadonlyArray, ): ArrayResult { return ArrayResult.from(() => { let verbObject: ArrayResult; diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index b0c2cf47..77a8efad 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -150,7 +150,7 @@ function sentence( ); } const lastEngClause = clause(sentence.finalClause); - let right: Array; + let right: ReadonlyArray; if (sentence.anuSeme == null) { right = []; } else { @@ -197,7 +197,7 @@ function sentence( } export function multipleSentences( sentences: TokiPona.MultipleSentences, -): ArrayResult> { +): ArrayResult> { switch (sentences.type) { case "single word": { const { word } = sentences; diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 0e77bce6..1fead5c8 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -11,10 +11,10 @@ import { unemphasized, word } from "./word.ts"; export type VerbObjects = { object: null | English.NounPhrase; objectComplement: null | English.Complement; - preposition: Array; + preposition: ReadonlyArray; }; export type PartialVerb = Dictionary.VerbForms & VerbObjects & { - adverb: Array; + adverb: ReadonlyArray; reduplicationCount: number; wordEmphasis: boolean; subjectComplement: null | English.Complement; @@ -28,7 +28,7 @@ export type PartialCompoundVerb = & { type: "compound"; conjunction: string; - verb: Array; + verb: ReadonlyArray; } & VerbObjects ); @@ -74,7 +74,7 @@ export function partialVerb( } export function everyPartialVerb( verb: PartialCompoundVerb, -): Array { +): ReadonlyArray { switch (verb.type) { case "simple": return [verb]; diff --git a/telo-misikeke/telo_misikeke.d.ts b/telo-misikeke/telo_misikeke.d.ts index 21a0c484..bd43bb2e 100644 --- a/telo-misikeke/telo_misikeke.d.ts +++ b/telo-misikeke/telo_misikeke.d.ts @@ -1 +1 @@ -export function errors(text: string): Array; +export function errors(text: string): ReadonlyArray; diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index bf399f06..700e63c8 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -28,7 +28,7 @@ const COMMONJS_EXPORT = async function buildCode( source: URL, destination: URL, - exportItems: Array, + exportItems: ReadonlyArray, ): Promise { const response = assertOk(await retry(() => fetch(source))); const rawCode = await response.text(); @@ -62,9 +62,11 @@ async function buildSonaLinku(): Promise { } function parseLipuLinku( data: { [word: string]: { usage_category: string } }, -): [string, string][] { +): ReadonlyArray { return Object.entries(data) - .map<[string, string]>(([word, data]) => [word, data.usage_category]) + .map( + ([word, data]) => [word, data.usage_category], + ) .filter(([_, category]) => category !== "sandbox"); } if (import.meta.main) { From cd6994b2169e04ae108472d5b358eb9772493c21 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 09:54:02 +0800 Subject: [PATCH 097/199] format --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da2259f9..b2810626 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,10 @@ san: - Allow custom fillers. - Allow custom numerals. -- Numerals are now very permissive. Something like "wan tu" is now allowed. It is also possible for ilo Token to output both numbers using pu system and nasin nanpa pona: "tu ale wan" will be translated into 103 (pu system) _and_ 201 (nasin nanpa pona). +- Numerals are now very permissive. Something like "wan tu" is now allowed. It + is also possible for ilo Token to output both numbers using pu system and + nasin nanpa pona: "tu ale wan" will be translated into 103 (pu system) _and_ + 201 (nasin nanpa pona). - Support for long "anu" glyph. - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. From ed66762b67f900694035f6bc98a59f538cf1062c Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:04:52 +0800 Subject: [PATCH 098/199] enforce immutability on objects --- dictionary/type.ts | 106 ++++++++++++++++++++---------------- src/array_result.ts | 4 +- src/parser/ast.ts | 101 +++++++++++++++++----------------- src/parser/token.ts | 44 ++++++++------- src/settings_frontend.ts | 12 ++-- src/translator/adjective.ts | 4 +- src/translator/ast.ts | 64 +++++++++++----------- src/translator/modifier.ts | 34 +++++++----- src/translator/noun.ts | 18 +++--- src/translator/phrase.ts | 6 +- src/translator/verb.ts | 31 ++++++----- src/translator/word_unit.ts | 6 +- 12 files changed, 231 insertions(+), 199 deletions(-) diff --git a/dictionary/type.ts b/dictionary/type.ts index e3980cf0..044c6fdf 100644 --- a/dictionary/type.ts +++ b/dictionary/type.ts @@ -1,22 +1,26 @@ -export type NounForms = { +export type NounForms = Readonly<{ singular: null | string; plural: null | string; -}; -export type Noun = NounForms & { - determiner: ReadonlyArray; - adjective: ReadonlyArray; - gerund: boolean; - postAdjective: null | { - adjective: string; - name: string; - }; -}; -export type PronounForms = { - singular: null | { subject: string; object: string }; - plural: null | { subject: string; object: string }; -}; +}>; +export type Noun = + & NounForms + & Readonly<{ + determiner: ReadonlyArray; + adjective: ReadonlyArray; + gerund: boolean; + postAdjective: + | null + | Readonly<{ + adjective: string; + name: string; + }>; + }>; +export type PronounForms = Readonly<{ + singular: null | Readonly<{ subject: string; object: string }>; + plural: null | Readonly<{ subject: string; object: string }>; +}>; export type Perspective = "first" | "second" | "third"; -export type Pronoun = PronounForms & { perspective: Perspective }; +export type Pronoun = PronounForms & Readonly<{ perspective: Perspective }>; export type DeterminerType = | "article" | "demonstrative" @@ -27,12 +31,12 @@ export type DeterminerType = | "negative" | "numeral"; export type Quantity = "singular" | "plural" | "both"; -export type Determiner = { +export type Determiner = Readonly<{ determiner: string; plural: null | string; kind: DeterminerType; quantity: Quantity; -}; +}>; export type AdjectiveType = | "opinion" | "size" @@ -42,43 +46,53 @@ export type AdjectiveType = | "origin" | "material" | "qualifier"; -export type Adjective = { +export type Adjective = Readonly<{ adverb: ReadonlyArray; adjective: string; kind: AdjectiveType; -}; -export type VerbForms = { +}>; +export type VerbForms = Readonly<{ presentPlural: string; presentSingular: string; past: string; -}; -export type Verb = VerbForms & { - directObject: null | Noun; - indirectObject: ReadonlyArray<{ - preposition: string; - object: Noun; +}>; +export type Verb = + & VerbForms + & Readonly<{ + directObject: null | Noun; + indirectObject: ReadonlyArray< + Readonly<{ + preposition: string; + object: Noun; + }> + >; + forObject: boolean | string; + predicateType: null | "verb" | "noun adjective"; }>; - forObject: boolean | string; - predicateType: null | "verb" | "noun adjective"; -}; export type Definition = - | { type: "filler"; before: string; repeat: string; after: string } - | { type: "particle definition"; definition: string } - | ({ type: "noun" } & Noun) - | { + | Readonly<{ type: "filler"; before: string; repeat: string; after: string }> + | Readonly<{ type: "particle definition"; definition: string }> + | (Readonly<{ type: "noun" }> & Noun) + | Readonly<{ type: "noun preposition"; noun: Noun; preposition: string; - } - | ({ type: "personal pronoun" } & Pronoun) - | ({ type: "determiner" } & Determiner) - | { type: "numeral"; numeral: number } - | ({ type: "adjective" } & Adjective) - | { type: "compound adjective"; adjective: ReadonlyArray } - | { type: "adverb"; adverb: string } - | ({ type: "verb" } & Verb) - | { type: "modal verb"; verb: string } - | { type: "preposition"; preposition: string } - | { type: "interjection"; interjection: string }; -export type Entry = { definitions: ReadonlyArray; src: string }; + }> + | (Readonly<{ type: "personal pronoun" }> & Pronoun) + | (Readonly<{ type: "determiner" }> & Determiner) + | Readonly<{ type: "numeral"; numeral: number }> + | (Readonly<{ type: "adjective" }> & Adjective) + | Readonly<{ + type: "compound adjective"; + adjective: ReadonlyArray; + }> + | Readonly<{ type: "adverb"; adverb: string }> + | (Readonly<{ type: "verb" }> & Verb) + | Readonly<{ type: "modal verb"; verb: string }> + | Readonly<{ type: "preposition"; preposition: string }> + | Readonly<{ type: "interjection"; interjection: string }>; +export type Entry = Readonly<{ + definitions: ReadonlyArray; + src: string; +}>; export type Dictionary = Map; diff --git a/src/array_result.ts b/src/array_result.ts index 09ab1ade..8a2d8308 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -150,8 +150,8 @@ export class ArrayResult { } } type Errors = - | { type: "array result"; errors: ReadonlyArray } - | { type: "outside"; errors: ReadonlyArray }; + | Readonly<{ type: "array result"; errors: ReadonlyArray }> + | Readonly<{ type: "outside"; errors: ReadonlyArray }>; export function extractArrayResultError( errors: ReadonlyArray, ): ReadonlyArray { diff --git a/src/parser/ast.ts b/src/parser/ast.ts index cc3da740..cdcbc1b7 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -1,86 +1,89 @@ export type Emphasis = - | { type: "word"; word: string } - | { type: "long word"; word: string; length: number }; + | Readonly<{ type: "word"; word: string }> + | Readonly<{ type: "long word"; word: string; length: number }>; export type Filler = | Emphasis - | { type: "multiple a"; count: number }; + | Readonly<{ type: "multiple a"; count: number }>; export type SimpleHeadedWordUnit = - | { type: "default"; word: string } - | { type: "x ala x"; word: string } - | { type: "reduplication"; word: string; count: number }; + | Readonly<{ type: "default"; word: string }> + | Readonly<{ type: "x ala x"; word: string }> + | Readonly<{ type: "reduplication"; word: string; count: number }>; export type SimpleWordUnit = | SimpleHeadedWordUnit - | { type: "number"; words: ReadonlyArray }; + | Readonly<{ type: "number"; words: ReadonlyArray }>; export type HeadedWordUnit = & SimpleHeadedWordUnit - & { emphasis: null | Emphasis }; + & Readonly<{ emphasis: null | Emphasis }>; export type WordUnit = & SimpleWordUnit - & { emphasis: null | Emphasis }; -export type Nanpa = { nanpa: WordUnit; phrase: Phrase }; + & Readonly<{ emphasis: null | Emphasis }>; +export type Nanpa = Readonly<{ nanpa: WordUnit; phrase: Phrase }>; export type Modifier = - | { type: "default"; word: WordUnit } - | { type: "proper words"; words: string } - | { type: "pi"; phrase: Phrase } - | ({ type: "nanpa" } & Nanpa) - | ({ type: "quotation" } & Quotation); + | Readonly<{ type: "default"; word: WordUnit }> + | Readonly<{ type: "proper words"; words: string }> + | Readonly<{ type: "pi"; phrase: Phrase }> + | (Readonly<{ type: "nanpa" }> & Nanpa) + | (Readonly<{ type: "quotation" }> & Quotation); export type Phrase = - | { + | Readonly<{ type: "default"; headWord: WordUnit; modifiers: ReadonlyArray; emphasis: null | Emphasis; - } - | { + }> + | Readonly<{ type: "preverb"; preverb: HeadedWordUnit; modifiers: ReadonlyArray; phrase: Phrase; emphasis: null | Emphasis; - } - | ({ type: "preposition" } & Preposition) - | ({ type: "quotation" } & Quotation); + }> + | (Readonly<{ type: "preposition" }> & Preposition) + | (Readonly<{ type: "quotation" }> & Quotation); export type MultiplePhrases = - | { type: "single"; phrase: Phrase } - | { type: "and conjunction"; phrases: ReadonlyArray } - | { type: "anu"; phrases: ReadonlyArray }; -export type Preposition = { + | Readonly<{ type: "single"; phrase: Phrase }> + | Readonly<{ + type: "and conjunction"; + phrases: ReadonlyArray; + }> + | Readonly<{ type: "anu"; phrases: ReadonlyArray }>; +export type Preposition = Readonly<{ preposition: HeadedWordUnit; modifiers: ReadonlyArray; - phrases: MultiplePhrases & { type: "single" | "anu" }; + phrases: MultiplePhrases & Readonly<{ type: "single" | "anu" }>; emphasis: null | Emphasis; -}; +}>; export type Predicate = - | { type: "single"; predicate: Phrase } - | { + | Readonly<{ type: "single"; predicate: Phrase }> + | Readonly<{ type: "associated"; predicates: MultiplePhrases; objects: null | MultiplePhrases; prepositions: ReadonlyArray; - } - | { type: "and conjunction"; predicates: ReadonlyArray } - | { type: "anu"; predicates: ReadonlyArray }; + }> + | Readonly<{ type: "and conjunction"; predicates: ReadonlyArray }> + | Readonly<{ type: "anu"; predicates: ReadonlyArray }>; export type Clause = - | { type: "phrases"; phrases: MultiplePhrases } - | { type: "o vocative"; phrases: MultiplePhrases } - | { + | Readonly<{ type: "phrases"; phrases: MultiplePhrases }> + | Readonly<{ type: "o vocative"; phrases: MultiplePhrases }> + | Readonly<{ type: "li clause"; subjects: MultiplePhrases; predicates: Predicate; explicitLi: boolean; - } - | { + }> + | Readonly<{ type: "o clause"; subjects: null | MultiplePhrases; predicates: Predicate; - } - | { type: "prepositions"; prepositions: ReadonlyArray } - | ({ type: "quotation" } & Quotation); + }> + | Readonly<{ type: "prepositions"; prepositions: ReadonlyArray }> + | (Readonly<{ type: "quotation" }> & Quotation); export type ContextClause = | Clause - | ({ type: "nanpa" } & Nanpa); + | (Readonly<{ type: "nanpa" }> & Nanpa); export type Sentence = - | { + | Readonly<{ type: "default"; kinOrTaso: null | HeadedWordUnit; laClauses: ReadonlyArray; @@ -89,18 +92,18 @@ export type Sentence = emphasis: null | Emphasis; punctuation: string; interrogative: null | "seme" | "x ala x"; - } - | { + }> + | Readonly<{ type: "filler"; filler: Filler; punctuation: string; interrogative: null | "seme" | "x ala x"; - }; -export type Quotation = { + }>; +export type Quotation = Readonly<{ sentences: ReadonlyArray; leftMark: string; rightMark: string; -}; +}>; export type MultipleSentences = - | { type: "single word"; word: string } - | { type: "sentences"; sentences: ReadonlyArray }; + | Readonly<{ type: "single word"; word: string }> + | Readonly<{ type: "sentences"; sentences: ReadonlyArray }>; diff --git a/src/parser/token.ts b/src/parser/token.ts index aed7fa83..adc9eb47 100644 --- a/src/parser/token.ts +++ b/src/parser/token.ts @@ -1,39 +1,43 @@ import { repeatWithSpace } from "../misc.ts"; export type Token = - | { type: "word"; word: string } - | { + | Readonly<{ type: "word"; word: string }> + | Readonly<{ type: "combined glyphs"; words: ReadonlyArray; - } - | { + }> + | Readonly<{ type: "space long glyph"; words: ReadonlyArray; spaceLength: number; - } - | { + }> + | Readonly<{ type: "headed long glyph start"; words: ReadonlyArray; - } - | { + }> + | Readonly<{ type: "headless long glyph end"; - } - | { + }> + | Readonly<{ type: "headless long glyph start"; - } - | { + }> + | Readonly<{ type: "headed long glyph end"; words: ReadonlyArray; - } - | { + }> + | Readonly<{ type: "inside long glyph"; words: ReadonlyArray; - } - | { type: "multiple a"; count: number } - | { type: "long word"; word: string; length: number } - | { type: "x ala x"; word: string } - | { type: "proper word"; words: string; kind: "cartouche" | "latin" } - | { type: "punctuation"; punctuation: string }; + }> + | Readonly<{ type: "multiple a"; count: number }> + | Readonly<{ type: "long word"; word: string; length: number }> + | Readonly<{ type: "x ala x"; word: string }> + | Readonly<{ + type: "proper word"; + words: string; + kind: "cartouche" | "latin"; + }> + | Readonly<{ type: "punctuation"; punctuation: string }>; export function describe(token: Token): string { switch (token.type) { case "word": diff --git a/src/settings_frontend.ts b/src/settings_frontend.ts index 921d74f0..8c35ae49 100644 --- a/src/settings_frontend.ts +++ b/src/settings_frontend.ts @@ -7,12 +7,12 @@ import { settings, } from "./settings.ts"; -type Updater = { - readonly parse: (value: string) => null | T; - readonly stringify: (value: T) => string; - readonly load: (input: HTMLInputElement | HTMLSelectElement) => T; - readonly set: (input: HTMLInputElement | HTMLSelectElement, value: T) => void; -}; +type Updater = Readonly<{ + parse: (value: string) => null | T; + stringify: (value: T) => string; + load: (input: HTMLInputElement | HTMLSelectElement) => T; + set: (input: HTMLInputElement | HTMLSelectElement, value: T) => void; +}>; const BOOL_UPDATER: Updater = { parse: (value) => { switch (value) { diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index 0b08d96e..de2dfb76 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -6,10 +6,10 @@ import * as English from "./ast.ts"; import { UntranslatableError } from "./error.ts"; import { unemphasized, word } from "./word.ts"; -export type AdjectiveWithInWay = { +export type AdjectiveWithInWay = Readonly<{ adjective: English.AdjectivePhrase; inWayPhrase: null | English.NounPhrase; -}; +}>; function so(emphasis: null | TokiPona.Emphasis): string { if (emphasis == null) { throw new UntranslatableError("missing emphasis", "adverb"); diff --git a/src/translator/ast.ts b/src/translator/ast.ts index 762710c8..a09e8e73 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -1,57 +1,57 @@ import * as Dictionary from "../../dictionary/type.ts"; -export type Word = { +export type Word = Readonly<{ word: string; emphasis: boolean; -}; +}>; export type Quantity = "singular" | "plural" | "condensed"; export type NounPhrase = - | { + | Readonly<{ type: "simple"; determiner: ReadonlyArray; adjective: ReadonlyArray; noun: Word; quantity: Quantity; perspective: Dictionary.Perspective; - postAdjective: null | { adjective: string; name: string }; + postAdjective: null | Readonly<{ adjective: string; name: string }>; preposition: ReadonlyArray; emphasis: boolean; - } - | { + }> + | Readonly<{ type: "compound"; conjunction: string; nouns: ReadonlyArray; quantity: Quantity; - }; -export type Determiner = { + }>; +export type Determiner = Readonly<{ kind: Dictionary.DeterminerType; determiner: Word; quantity: Dictionary.Quantity; -}; +}>; export type AdjectivePhrase = - | { + | Readonly<{ type: "simple"; kind: Dictionary.AdjectiveType; adverb: ReadonlyArray; adjective: Word; emphasis: boolean; - } - | { + }> + | Readonly<{ type: "compound"; conjunction: string; adjective: ReadonlyArray; emphasis: boolean; - }; + }>; export type Complement = - | { type: "noun"; noun: NounPhrase } - | { type: "adjective"; adjective: AdjectivePhrase }; -export type Verb = { + | Readonly<{ type: "noun"; noun: NounPhrase }> + | Readonly<{ type: "adjective"; adjective: AdjectivePhrase }>; +export type Verb = Readonly<{ modal: null | Word; finite: ReadonlyArray; infinite: Word; -}; +}>; export type VerbPhrase = - | { + | Readonly<{ type: "default"; adverb: ReadonlyArray; verb: Verb; @@ -60,33 +60,33 @@ export type VerbPhrase = objectComplement: null | Complement; preposition: ReadonlyArray; hideVerb: boolean; - } - | { + }> + | Readonly<{ type: "compound"; conjunction: string; verbs: ReadonlyArray; object: null | NounPhrase; objectComplement: null | Complement; preposition: ReadonlyArray; - }; + }>; export type Clause = - | { type: "free form"; text: string } - | { + | Readonly<{ type: "free form"; text: string }> + | Readonly<{ type: "default"; subject: NounPhrase; verb: VerbPhrase; hideSubject: boolean; - } - | { type: "subject phrase"; subject: NounPhrase } - | { type: "interjection"; interjection: Word } - | { type: "vocative"; call: string; addressee: NounPhrase } - | { type: "dependent"; conjunction: Word; clause: Clause }; -export type Preposition = { + }> + | Readonly<{ type: "subject phrase"; subject: NounPhrase }> + | Readonly<{ type: "interjection"; interjection: Word }> + | Readonly<{ type: "vocative"; call: string; addressee: NounPhrase }> + | Readonly<{ type: "dependent"; conjunction: Word; clause: Clause }>; +export type Preposition = Readonly<{ adverb: ReadonlyArray; preposition: Word; object: NounPhrase; -}; -export type Sentence = { +}>; +export type Sentence = Readonly<{ clauses: ReadonlyArray; punctuation: string; -}; +}>; diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 5163a9e1..7ac6d2fb 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -17,28 +17,34 @@ import { pronoun } from "./pronoun.ts"; import { unemphasized, word } from "./word.ts"; export type ModifierTranslation = - | { type: "noun"; noun: English.NounPhrase } - | { type: "noun preposition"; noun: English.NounPhrase; preposition: string } - | { type: "adjective"; adjective: English.AdjectivePhrase } - | { type: "determiner"; determiner: English.Determiner } - | { type: "adverb"; adverb: English.Word } - | { type: "name"; name: string } - | { type: "in position phrase"; noun: English.NounPhrase }; -export type AdjectivalModifier = { - nounPreposition: null | { noun: English.NounPhrase; preposition: string }; + | Readonly<{ type: "noun"; noun: English.NounPhrase }> + | Readonly<{ + type: "noun preposition"; + noun: English.NounPhrase; + preposition: string; + }> + | Readonly<{ type: "adjective"; adjective: English.AdjectivePhrase }> + | Readonly<{ type: "determiner"; determiner: English.Determiner }> + | Readonly<{ type: "adverb"; adverb: English.Word }> + | Readonly<{ type: "name"; name: string }> + | Readonly<{ type: "in position phrase"; noun: English.NounPhrase }>; +export type AdjectivalModifier = Readonly<{ + nounPreposition: + | null + | Readonly<{ noun: English.NounPhrase; preposition: string }>; determiner: ReadonlyArray; adjective: ReadonlyArray; name: null | string; ofPhrase: null | English.NounPhrase; inPositionPhrase: null | English.NounPhrase; -}; -export type AdverbialModifier = { +}>; +export type AdverbialModifier = Readonly<{ adverb: ReadonlyArray; inWayPhrase: null | English.NounPhrase; -}; +}>; export type MultipleModifierTranslation = - | ({ type: "adjectival" } & AdjectivalModifier) - | ({ type: "adverbial" } & AdverbialModifier); + | (Readonly<{ type: "adjectival" }> & AdjectivalModifier) + | (Readonly<{ type: "adverbial" }> & AdverbialModifier); export function defaultModifier( wordUnit: TokiPona.WordUnit, ): ArrayResult { diff --git a/src/translator/noun.ts b/src/translator/noun.ts index c177d77f..58b08978 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -9,14 +9,16 @@ import { determiner } from "./determiner.ts"; import { condense } from "./misc.ts"; import { word } from "./word.ts"; -export type PartialNoun = Dictionary.NounForms & { - determiner: ReadonlyArray; - adjective: ReadonlyArray; - reduplicationCount: number; - emphasis: boolean; - perspective: Dictionary.Perspective; - postAdjective: null | { adjective: string; name: string }; -}; +export type PartialNoun = + & Dictionary.NounForms + & Readonly<{ + determiner: ReadonlyArray; + adjective: ReadonlyArray; + reduplicationCount: number; + emphasis: boolean; + perspective: Dictionary.Perspective; + postAdjective: null | { adjective: string; name: string }; + }>; export function partialNoun( definition: Dictionary.Noun, reduplicationCount: number, diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index f225aea9..b3cf937e 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -25,9 +25,9 @@ import { wordUnit } from "./word_unit.ts"; import { word } from "./word.ts"; export type PhraseTranslation = - | { type: "noun"; noun: English.NounPhrase } - | ({ type: "adjective" } & AdjectiveWithInWay) - | { type: "verb"; verb: PartialCompoundVerb }; + | Readonly<{ type: "noun"; noun: English.NounPhrase }> + | (Readonly<{ type: "adjective" }> & AdjectiveWithInWay) + | Readonly<{ type: "verb"; verb: PartialCompoundVerb }>; function nounPhrase( emphasis: boolean, partialNoun: PartialNoun, diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 1fead5c8..7fd70d7d 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -8,28 +8,31 @@ import { noun } from "./noun.ts"; import { nounAsPreposition } from "./preposition.ts"; import { unemphasized, word } from "./word.ts"; -export type VerbObjects = { +export type VerbObjects = Readonly<{ object: null | English.NounPhrase; objectComplement: null | English.Complement; preposition: ReadonlyArray; -}; -export type PartialVerb = Dictionary.VerbForms & VerbObjects & { - adverb: ReadonlyArray; - reduplicationCount: number; - wordEmphasis: boolean; - subjectComplement: null | English.Complement; - forObject: boolean | string; - predicateType: null | "verb" | "noun adjective"; - phraseEmphasis: boolean; -}; +}>; +export type PartialVerb = + & Dictionary.VerbForms + & VerbObjects + & Readonly<{ + adverb: ReadonlyArray; + reduplicationCount: number; + wordEmphasis: boolean; + subjectComplement: null | English.Complement; + forObject: boolean | string; + predicateType: null | "verb" | "noun adjective"; + phraseEmphasis: boolean; + }>; export type PartialCompoundVerb = - | ({ type: "simple" } & PartialVerb) + | (Readonly<{ type: "simple" }> & PartialVerb) | ( - & { + & Readonly<{ type: "compound"; conjunction: string; verb: ReadonlyArray; - } + }> & VerbObjects ); export function condenseVerb(present: string, past: string): string { diff --git a/src/translator/word_unit.ts b/src/translator/word_unit.ts index ca2460c4..74c0818c 100644 --- a/src/translator/word_unit.ts +++ b/src/translator/word_unit.ts @@ -10,9 +10,9 @@ import { partialPronoun, Place } from "./pronoun.ts"; import { PartialVerb, partialVerb } from "./verb.ts"; export type WordUnitTranslation = - | ({ type: "noun" } & PartialNoun) - | { type: "adjective"; adjective: English.AdjectivePhrase } - | ({ type: "verb" } & PartialVerb); + | (Readonly<{ type: "noun" }> & PartialNoun) + | Readonly<{ type: "adjective"; adjective: English.AdjectivePhrase }> + | (Readonly<{ type: "verb" }> & PartialVerb); function defaultWordUnit( word: string, reduplicationCount: number, From 391e02e92a67a179d5cac926ce1af9e90b208bc5 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:05:20 +0800 Subject: [PATCH 099/199] fix --- deno.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deno.json b/deno.json index 57693795..0a611495 100644 --- a/deno.json +++ b/deno.json @@ -17,7 +17,7 @@ "exclude": [ "./dictionary/dictionary.ts", "./dist/main.js", - "./telo-misikeke/linku-data.json", + "./telo-misikeke/linku_data.json", "./telo-misikeke/rules.js", "./telo-misikeke/Parser.js" ] From cd4b2943949c21e3def23fce69b5b5ad426d288e Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:07:56 +0800 Subject: [PATCH 100/199] more immutability --- src/cache.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cache.ts b/src/cache.ts index 83e27197..6e1c0ae1 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -2,7 +2,7 @@ export interface Clearable { clear(): void; } export class Cache { - #caches: Set> = new Set(); + readonly #caches: Set> = new Set(); add(cache: Clearable): void { this.#caches.add(new WeakRef(cache)); } @@ -20,7 +20,7 @@ export class Cache { export class Lazy implements Clearable { #evaluated = false; #value: undefined | T; - #fn: () => T; + readonly #fn: () => T; constructor(fn: () => T) { this.#fn = fn; } From e75c6109ccd3f53251ae5bfb23b3141bc18ad9ec Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:16:46 +0800 Subject: [PATCH 101/199] improve error message --- src/translator/number.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/number.ts b/src/translator/number.ts index c2cb199a..81a0a435 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -28,7 +28,7 @@ function regularNumber(number: ReadonlyArray): number { function subHundred(number: ReadonlyArray): number { const total = regularNumber(number); if (total >= 100) { - throw new FilteredOutError('"ale" position exceeding 99'); + throw new FilteredOutError("nasin nanpa pona position exceeding 99"); } else { return total; } From 941b462b6be84edf2141a45d77ddd611a9a1ccf6 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:44:56 +0800 Subject: [PATCH 102/199] simplify version display --- src/main.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main.ts b/src/main.ts index cfa0641e..bebb06ad 100644 --- a/src/main.ts +++ b/src/main.ts @@ -122,13 +122,14 @@ function main(): void { ) as HTMLAnchorElement; // set version + let displayDate: string; if (PROJECT_DATA.onDevelopment) { - versionDisplay.innerText = `${PROJECT_DATA.version} (On development)`; + displayDate = "On development"; } else { - const date = new Date(PROJECT_DATA.releaseDate) - .toLocaleDateString(undefined); - versionDisplay.innerText = `${PROJECT_DATA.version} - Released ${date}`; + const date = new Date(PROJECT_DATA.releaseDate).toLocaleDateString(); + displayDate = `- Released ${date}`; } + versionDisplay.innerText = `${PROJECT_DATA.version} ${displayDate}`; // load settings loadFromLocalStorage(); From 23a073c9c3f6a2f680232ce545504d21dad65349 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:50:03 +0800 Subject: [PATCH 103/199] this turns out to be unnecessary --- src/main.ts | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/main.ts b/src/main.ts index bebb06ad..3665a2a5 100644 --- a/src/main.ts +++ b/src/main.ts @@ -5,7 +5,6 @@ import { ArrayResultError } from "./array_result.ts"; import { loadCustomDictionary } from "./dictionary.ts"; import { checkLocalStorage, - escapeHtmlWithLineBreak, extractErrorMessage, flattenError, NEWLINES, @@ -145,13 +144,11 @@ function main(): void { try { loadCustomDictionary(customDictionary); } catch (error) { - let message: string; if (errorsFixable(flattenError(error))) { - message = DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE; + errorDisplay.innerText = DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE; } else { - message = DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; + errorDisplay.innerText = DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; } - errorDisplay.innerHTML = escapeHtmlWithLineBreak(message); console.error(error); } } @@ -207,19 +204,17 @@ function main(): void { } } catch (error) { const errors = flattenError(error); - let message: string; switch (errors.length) { case 0: - message = UNKNOWN_ERROR_MESSAGE; + errorDisplay.innerText = UNKNOWN_ERROR_MESSAGE; break; case 1: - message = SINGULAR_ERROR_MESSAGE; + errorDisplay.innerText = SINGULAR_ERROR_MESSAGE; break; default: - message = MULTIPLE_ERROR_MESSAGE; + errorDisplay.innerText = MULTIPLE_ERROR_MESSAGE; break; } - errorDisplay.innerHTML = escapeHtmlWithLineBreak(message); for (const item of errors) { let property: "innerHTML" | "innerText"; if (item instanceof ArrayResultError && item.isHtml) { From 9ff3730c18c2fbc959f9e34922965817ed95ed08 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 10:50:49 +0800 Subject: [PATCH 104/199] rename --- src/main.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.ts b/src/main.ts index 3665a2a5..df1660bf 100644 --- a/src/main.ts +++ b/src/main.ts @@ -66,7 +66,7 @@ function main(): void { "input", ) as HTMLTextAreaElement; - const outputDisplay = document.getElementById("output") as HTMLUListElement; + const outputList = document.getElementById("output") as HTMLUListElement; const errorDisplay = document.getElementById( "error", ) as HTMLParagraphElement; @@ -193,14 +193,14 @@ function main(): void { } }); function updateOutput(): void { - outputDisplay.innerHTML = ""; + outputList.innerHTML = ""; errorList.innerHTML = ""; errorDisplay.innerText = ""; try { for (const translation of translate(inputTextBox.value)) { const list = document.createElement("li"); list.innerHTML = translation; - outputDisplay.appendChild(list); + outputList.appendChild(list); } } catch (error) { const errors = flattenError(error); From 2e8258546e8918928cfb3288266933247b709934 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 11:06:19 +0800 Subject: [PATCH 105/199] condense objects --- src/translator/determiner.ts | 3 +-- src/translator/noun.ts | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index 6dafe995..ae6d422d 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -66,9 +66,8 @@ export function determiner( plural: definition.plural, }) .map((determiner) => ({ - kind: definition.kind, + ...definition, determiner: word(determiner, reduplicationCount, emphasis), - quantity: definition.quantity, })); } export function fixDeterminer( diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 58b08978..07057b62 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -34,13 +34,11 @@ export function partialNoun( ); return ArrayResult.combine(engDeterminer, engAdjective) .map(([determiner, adjective]) => ({ + ...definition, determiner, adjective, - singular: definition.singular, - plural: definition.plural, perspective: "third", reduplicationCount, - postAdjective: definition.postAdjective, emphasis, })); } From 1635d783ec1f08db4a601341a2348774e98670c2 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 11:34:48 +0800 Subject: [PATCH 106/199] better import --- src/parser/parser_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index b30f54c3..9e09442f 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -1,4 +1,4 @@ -import { assertNotEquals } from "@std/assert"; +import { assertNotEquals } from "@std/assert/not-equals"; import { parse } from "./parser.ts"; // Examples gathered from https://github.com/kilipan/nasin-toki From b311171ab1ed3b3f028e17f5b80ff6d01e962c52 Mon Sep 17 00:00:00 2001 From: Koko Date: Tue, 4 Mar 2025 18:44:09 +0800 Subject: [PATCH 107/199] more immutability --- src/array_result.ts | 6 +++--- src/parser/parser_lib.ts | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/array_result.ts b/src/array_result.ts index 8a2d8308..1c1ae8f6 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -120,9 +120,9 @@ export class ArrayResult { ); } static combine>( - ...arrayResults: { [I in keyof T]: ArrayResult } & { - length: T["length"]; - } + ...arrayResults: + & Readonly<{ [I in keyof T]: ArrayResult }> + & Readonly<{ length: T["length"] }> ): ArrayResult { // We resorted to using `any` types here, make sure it works properly return arrayResults.reduce( diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 3a6936ee..2e2e2250 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -149,7 +149,9 @@ export function optionalAll(parser: Parser): Parser { return choiceOnlyOne(parser, nothing); } export function sequence>( - ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } + ...sequence: + & Readonly<{ [I in keyof T]: Parser }> + & Readonly<{ length: T["length"] }> ): Parser { // We resorted to using `any` types here, make sure it works properly return sequence.reduceRight( From 156e4c13c5254053856335a9a65ee78da8769f08 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 08:03:48 +0800 Subject: [PATCH 108/199] fix equality --- src/mod.ts | 2 +- src/translator/modifier.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mod.ts b/src/mod.ts index 4a397026..a08aded4 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -28,7 +28,7 @@ export function translate(tokiPona: string): ReadonlyArray { error = errors(tokiPona) .map((message) => new ArrayResultError(message, { isHtml: true })); } - if (error.length == 0) { + if (error.length === 0) { error = deduplicateErrors(arrayResult.errors); } throw new AggregateError(error); diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 7ac6d2fb..9f64afaf 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -150,7 +150,7 @@ export function piModifier( modifier.noun.preposition.length === 0 ) .filter((modifier) => - modifier.type != "adjective" || modifier.inWayPhrase == null + modifier.type !== "adjective" || modifier.inWayPhrase == null ) as ArrayResult; } function nanpaModifier( From b0ab632b27c33cdb545d0e456c4f4d56463eee55 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 09:32:15 +0800 Subject: [PATCH 109/199] refactor with new lints --- bundle.ts | 12 ++-- deno.json | 16 +++++ dictionary/build.ts | 3 +- src/parser/parser_lib.ts | 2 +- src/translator/adjective.ts | 28 ++++++--- src/translator/as_string.ts | 24 ++++++-- src/translator/clause.ts | 23 ++++++- src/translator/determiner.ts | 11 ++-- src/translator/modifier.ts | 116 ++++++++++++++++++++++------------- src/translator/noun.ts | 38 +++++++----- src/translator/phrase.ts | 109 +++++++++++++++++++------------- src/translator/predicate.ts | 27 +++++--- src/translator/pronoun.ts | 27 ++++---- src/translator/sentence.ts | 19 +++--- src/translator/verb.ts | 46 +++++++++----- src/translator/word.ts | 9 ++- src/translator/word_unit.ts | 71 +++++++++++---------- 17 files changed, 375 insertions(+), 206 deletions(-) diff --git a/bundle.ts b/bundle.ts index 3b2ec8aa..07e276db 100644 --- a/bundle.ts +++ b/bundle.ts @@ -29,11 +29,13 @@ function buildOptions(minify: boolean): ESBuild.BuildOptions { plugins: [...denoPlugins()], }; } -async function buildAll(options: { - minify: boolean; - buildDictionary: boolean; - checkDictionary?: boolean; -}): Promise { +async function buildAll( + options: Readonly<{ + minify: boolean; + buildDictionary: boolean; + checkDictionary?: boolean; + }>, +): Promise { const { minify, buildDictionary, checkDictionary } = options; try { if (buildDictionary) { diff --git a/deno.json b/deno.json index 0a611495..7d893e49 100644 --- a/deno.json +++ b/deno.json @@ -30,6 +30,22 @@ "./telo-misikeke/Parser.js" ], "rules": { + "tags": ["recommended"], + "include": [ + "camelcase", + "explicit-function-return-type", + "explicit-module-boundary-types", + "no-boolean-literal-for-arguments", + "no-eval", + "no-inferrable-types", + "no-non-null-asserted-optional-chain", + "no-self-compare", + "no-sparse-arrays", + "no-sync-fn-in-async-fn", + "no-throw-literal", + "no-useless-rename", + "single-var-declarator" + ], "exclude": ["no-explicit-any"] } }, diff --git a/dictionary/build.ts b/dictionary/build.ts index 8b834f96..5825c326 100644 --- a/dictionary/build.ts +++ b/dictionary/build.ts @@ -55,5 +55,6 @@ export const original = ${original}; await Deno.writeTextFile(DESTINATION, code); } if (import.meta.main) { - await build(true); + const checkFile = true; + await build(checkFile); } diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 2e2e2250..0cb1c20c 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -235,7 +235,7 @@ export function slice(length: number, description: string): Parser { } export function matchString( match: string, - description: string = `"${match}"`, + description = `"${match}"`, ): Parser { return new Parser((src) => { if (src.length >= match.length && src.slice(0, match.length) === match) { diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index de2dfb76..898b8fcd 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -23,10 +23,13 @@ function so(emphasis: null | TokiPona.Emphasis): string { } } export function adjective( - definition: Dictionary.Adjective, - reduplicationCount: number, - emphasis: null | TokiPona.Emphasis, + options: Readonly<{ + definition: Dictionary.Adjective; + reduplicationCount: number; + emphasis: null | TokiPona.Emphasis; + }>, ): ArrayResult { + const { definition, reduplicationCount, emphasis } = options; return ArrayResult.concat<{ emphasis: boolean; so: null | string }>( ArrayResult.from(() => new ArrayResult([so(emphasis)])) .map((so) => ({ emphasis: false, so })), @@ -36,20 +39,29 @@ export function adjective( type: "simple", kind: definition.kind, adverb: [...definition.adverb, ...nullableAsArray(so)].map(unemphasized), - adjective: word(definition.adjective, reduplicationCount, emphasis), + adjective: word({ + word: definition.adjective, + reduplicationCount, + emphasis, + }), emphasis: false, })); } export function compoundAdjective( - adjectives: ReadonlyArray, - reduplicationCount: number, - emphasis: null | TokiPona.Emphasis, + options: Readonly<{ + adjectives: ReadonlyArray; + reduplicationCount: number; + emphasis: null | TokiPona.Emphasis; + }>, ): ArrayResult { + const { adjectives, reduplicationCount, emphasis } = options; return ArrayResult.from(() => { if (reduplicationCount === 1) { return ArrayResult.combine( ...adjectives - .map((definition) => adjective(definition, 1, emphasis)), + .map((definition) => + adjective({ definition, reduplicationCount: 1, emphasis }) + ), ) .map((adjective) => ({ type: "compound", diff --git a/src/translator/as_string.ts b/src/translator/as_string.ts index 1c8aeda9..436d6364 100644 --- a/src/translator/as_string.ts +++ b/src/translator/as_string.ts @@ -14,15 +14,29 @@ export function definitionAsPlainString( return nounAsPlainString(definition); case "personal pronoun": return ArrayResult.concat( - pronoun(definition, 1, false, "subject"), - pronoun(definition, 1, false, "object"), + pronoun({ + definition, + reduplicationCount: 1, + emphasis: false, + place: "subject", + }), + pronoun({ + definition, + reduplicationCount: 1, + emphasis: false, + place: "object", + }), ) .map((noun) => EnglishComposer.noun(noun, 0)); case "adjective": - return adjective(definition, 1, null) + return adjective({ definition, reduplicationCount: 1, emphasis: null }) .map((adjective) => EnglishComposer.adjective(adjective, 0)); case "compound adjective": { - return compoundAdjective(definition.adjective, 1, null) + return compoundAdjective({ + adjectives: definition.adjective, + reduplicationCount: 1, + emphasis: null, + }) .map((adjective) => EnglishComposer.adjective(adjective, 0)); } case "determiner": @@ -35,7 +49,7 @@ export function definitionAsPlainString( case "interjection": return new ArrayResult([definition.interjection]); case "verb": { - return partialVerb(definition, 1, false) + return partialVerb({ definition, reduplicationCount: 1, emphasis: false }) .flatMap((partialVerb) => verb({ ...partialVerb, type: "simple" }, "third", "plural") ) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 6dd470d0..2177af83 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -13,7 +13,13 @@ import { unemphasized } from "./word.ts"; function phraseClause( phrases: TokiPona.MultiplePhrases, ): ArrayResult { - return multiplePhrases(phrases, "object", true, "en", false) + return multiplePhrases({ + phrases, + place: "object", + includeGerund: true, + andParticle: "en", + includeVerb: false, + }) .map( (phrase) => { switch (phrase.type) { @@ -70,7 +76,12 @@ function liClause( clause: TokiPona.Clause & { type: "li clause" }, ): ArrayResult { return ArrayResult.combine( - multiplePhrasesAsNoun(clause.subjects, "subject", true, "en"), + multiplePhrasesAsNoun({ + phrases: clause.subjects, + place: "subject", + includeGerund: true, + andParticle: "en", + }), predicate(clause.predicates, "li"), ) .flatMap(([subject, predicate]) => { @@ -94,7 +105,13 @@ export function clause(clause: TokiPona.Clause): ArrayResult { case "phrases": return phraseClause(clause.phrases); case "o vocative": - return multiplePhrases(clause.phrases, "object", true, "en", false) + return multiplePhrases({ + phrases: clause.phrases, + place: "object", + includeGerund: true, + andParticle: "en", + includeVerb: false, + }) .map((phrase) => { if (phrase.type === "noun") { return { type: "vocative", call: "hey", addressee: phrase.noun }; diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index ae6d422d..3a5fd5ad 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -57,17 +57,20 @@ export function findNumber( } } export function determiner( - definition: Dictionary.Determiner, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + definition: Dictionary.Determiner; + reduplicationCount: number; + emphasis: boolean; + }>, ): ArrayResult { + const { definition } = options; return simpleNounForms({ singular: definition.determiner, plural: definition.plural, }) .map((determiner) => ({ ...definition, - determiner: word(determiner, reduplicationCount, emphasis), + determiner: word({ ...options, word: determiner }), })); } export function fixDeterminer( diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 9f64afaf..b50194e9 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -61,7 +61,11 @@ export function defaultModifier( return { type: "determiner" as const, determiner: { - determiner: word(`${number}`, 1, emphasis), + determiner: word({ + word: `${number}`, + reduplicationCount: 1, + emphasis, + }), kind: "numeral", quantity, }, @@ -84,47 +88,56 @@ export function defaultModifier( .flatMap((definition) => { switch (definition.type) { case "noun": - return noun(definition, reduplicationCount, emphasis) + return noun({ definition, reduplicationCount, emphasis }) .map((noun) => ({ type: "noun", noun, })); case "noun preposition": - return noun(definition.noun, reduplicationCount, emphasis) + return noun({ + definition: definition.noun, + reduplicationCount, + emphasis, + }) .map((noun) => ({ type: "noun preposition", noun, preposition: definition.preposition, })); case "personal pronoun": - return pronoun(definition, reduplicationCount, emphasis, "object") + return pronoun({ + definition, + reduplicationCount, + emphasis, + place: "object", + }) .map((noun) => ({ type: "noun", noun })); case "determiner": - return determiner( + return determiner({ definition, reduplicationCount, - wordUnit.emphasis != null, - ) + emphasis: wordUnit.emphasis != null, + }) .map((determiner) => ({ type: "determiner", determiner, })); case "adjective": - return adjective( + return adjective({ definition, reduplicationCount, - wordUnit.emphasis, - ) + emphasis: wordUnit.emphasis, + }) .map((adjective) => ({ type: "adjective", adjective, })); case "compound adjective": - return compoundAdjective( - definition.adjective, + return compoundAdjective({ + adjectives: definition.adjective, reduplicationCount, - wordUnit.emphasis, - ) + emphasis: wordUnit.emphasis, + }) .map((adjective) => ({ type: "adjective", adjective, @@ -132,7 +145,11 @@ export function defaultModifier( case "adverb": return new ArrayResult([{ type: "adverb", - adverb: word(definition.adverb, reduplicationCount, emphasis), + adverb: word({ + word: definition.adverb, + reduplicationCount, + emphasis, + }), }]); default: return new ArrayResult(); @@ -144,7 +161,12 @@ export function defaultModifier( export function piModifier( insidePhrase: TokiPona.Phrase, ): ArrayResult { - return phrase(insidePhrase, "object", true, false) + return phrase({ + phrase: insidePhrase, + place: "object", + includeGerund: true, + includeVerb: false, + }) .filter((modifier) => modifier.type !== "noun" || modifier.noun.type !== "simple" || modifier.noun.preposition.length === 0 @@ -156,35 +178,43 @@ export function piModifier( function nanpaModifier( nanpa: TokiPona.Modifier & { type: "nanpa" }, ): ArrayResult { - return phrase(nanpa.phrase, "object", true, false).map((phrase) => { - if (phrase.type !== "noun") { - throw new FilteredOutError(`${phrase.type} within "in position" phrase`); - } else if ( - (phrase.noun as English.NounPhrase & { type: "simple" }) - .preposition.length > 0 - ) { - throw new FilteredOutError('preposition within "in position" phrase'); - } else { - return { - type: "in position phrase", - noun: { - type: "simple", - determiner: [], - adjective: [], + return phrase({ + phrase: nanpa.phrase, + place: "object", + includeGerund: true, + includeVerb: false, + }) + .map((phrase) => { + if (phrase.type !== "noun") { + throw new FilteredOutError( + `${phrase.type} within "in position" phrase`, + ); + } else if ( + (phrase.noun as English.NounPhrase & { type: "simple" }) + .preposition.length > 0 + ) { + throw new FilteredOutError('preposition within "in position" phrase'); + } else { + return { + type: "in position phrase", noun: { - word: "position", - emphasis: nanpa.nanpa.emphasis != null, + type: "simple", + determiner: [], + adjective: [], + noun: { + word: "position", + emphasis: nanpa.nanpa.emphasis != null, + }, + quantity: "singular", + perspective: "third", + postCompound: phrase.noun, + postAdjective: null, + preposition: [], + emphasis: false, }, - quantity: "singular", - perspective: "third", - postCompound: phrase.noun, - postAdjective: null, - preposition: [], - emphasis: false, - }, - }; - } - }); + }; + } + }); } function modifier( modifier: TokiPona.Modifier, diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 07057b62..f460e060 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -20,26 +20,32 @@ export type PartialNoun = postAdjective: null | { adjective: string; name: string }; }>; export function partialNoun( - definition: Dictionary.Noun, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + definition: Dictionary.Noun; + reduplicationCount: number; + emphasis: boolean; + }>, ): ArrayResult { + const { definition } = options; const engDeterminer = ArrayResult.combine( ...definition.determiner - .map((definition) => determiner(definition, 1, false)), + .map((definition) => + determiner({ definition, reduplicationCount: 1, emphasis: false }) + ), ); const engAdjective = ArrayResult.combine( ...definition.adjective - .map((definition) => adjective(definition, 1, null)), + .map((definition) => + adjective({ definition, reduplicationCount: 1, emphasis: null }) + ), ); return ArrayResult.combine(engDeterminer, engAdjective) .map(([determiner, adjective]) => ({ + ...options, ...definition, determiner, adjective, perspective: "third", - reduplicationCount, - emphasis, })); } export function fromNounForms( @@ -84,18 +90,21 @@ export function simpleNounForms( return fromNounForms(nounForms, "both").map((noun) => noun.noun); } export function noun( - definition: Dictionary.Noun, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + definition: Dictionary.Noun; + reduplicationCount: number; + emphasis: boolean; + }>, ): ArrayResult { + const { definition } = options; return ArrayResult.combine( fromNounForms(definition, "both"), - partialNoun(definition, reduplicationCount, emphasis), + partialNoun(options), ) .map(([{ noun, quantity }, partialNoun]) => ({ ...partialNoun, type: "simple", - noun: word(noun, reduplicationCount, emphasis), + noun: word({ ...options, word: noun }), quantity, preposition: [], emphasis: false, @@ -104,7 +113,6 @@ export function noun( export function nounAsPlainString( definition: Dictionary.Noun, ): ArrayResult { - return noun(definition, 1, false).map((noun) => - EnglishComposer.noun(noun, 0) - ); + return noun({ definition, reduplicationCount: 1, emphasis: false }) + .map((noun) => EnglishComposer.noun(noun, 0)); } diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index b3cf937e..411d4b07 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -29,10 +29,13 @@ export type PhraseTranslation = | (Readonly<{ type: "adjective" }> & AdjectiveWithInWay) | Readonly<{ type: "verb"; verb: PartialCompoundVerb }>; function nounPhrase( - emphasis: boolean, - partialNoun: PartialNoun, - modifier: AdjectivalModifier, + options: Readonly<{ + emphasis: boolean; + partialNoun: PartialNoun; + modifier: AdjectivalModifier; + }>, ): ArrayResult { + const { emphasis, partialNoun, modifier } = options; return ArrayResult.from(() => { const determiner = fixDeterminer([ ...[...modifier.determiner].reverse(), @@ -73,7 +76,11 @@ function nounPhrase( type: "simple" as const, determiner, adjective, - noun: word(noun, partialNoun.reduplicationCount, partialNoun.emphasis), + noun: word({ + word: noun, + reduplicationCount: partialNoun.reduplicationCount, + emphasis: partialNoun.emphasis, + }), quantity, perspective: partialNoun.perspective, postCompound: null, @@ -104,10 +111,13 @@ function nounPhrase( }); } function adjectivePhrase( - emphasis: boolean, - adjective: English.AdjectivePhrase, - modifier: AdverbialModifier, + options: Readonly<{ + emphasis: boolean; + adjective: English.AdjectivePhrase; + modifier: AdverbialModifier; + }>, ): AdjectiveWithInWay { + const { emphasis, adjective, modifier } = options; switch (adjective.type) { case "simple": { const adverb = fixAdverb([ @@ -135,10 +145,13 @@ function adjectivePhrase( } } function verbPhrase( - emphasis: boolean, - verb: PartialVerb, - modifier: AdverbialModifier, + options: Readonly<{ + emphasis: boolean; + verb: PartialVerb; + modifier: AdverbialModifier; + }>, ): PartialVerb { + const { emphasis, verb, modifier } = options; const adverb = fixAdverb([ ...[...modifier.adverb].reverse(), ...verb.adverb, @@ -156,25 +169,32 @@ function verbPhrase( }; } function defaultPhrase( - phrase: TokiPona.Phrase & { type: "default" }, - place: Place, - includeGerund: boolean, - includeVerb: boolean, + options: Readonly<{ + phrase: TokiPona.Phrase & { type: "default" }; + place: Place; + includeGerund: boolean; + includeVerb: boolean; + }>, ): ArrayResult { + const { phrase, includeVerb } = options; const emphasis = phrase.emphasis != null; return ArrayResult.combine( - wordUnit(phrase.headWord, place, includeGerund), + wordUnit({ ...options, wordUnit: phrase.headWord }), multipleModifiers(phrase.modifiers), ) .flatMap(([headWord, modifier]) => { if (headWord.type === "noun" && modifier.type === "adjectival") { - return nounPhrase(emphasis, headWord, modifier) + return nounPhrase({ emphasis, partialNoun: headWord, modifier }) .map((noun) => ({ type: "noun", noun })); } else if ( headWord.type === "adjective" && modifier.type === "adverbial" ) { return new ArrayResult([{ - ...adjectivePhrase(emphasis, headWord.adjective, modifier), + ...adjectivePhrase({ + emphasis, + adjective: headWord.adjective, + modifier, + }), type: "adjective", }]); } else if ( @@ -182,7 +202,10 @@ function defaultPhrase( ) { return new ArrayResult([{ type: "verb", - verb: { ...verbPhrase(emphasis, headWord, modifier), type: "simple" }, + verb: { + ...verbPhrase({ emphasis, verb: headWord, modifier }), + type: "simple", + }, }]); } else { return new ArrayResult(); @@ -191,14 +214,17 @@ function defaultPhrase( .addErrorWhenNone(() => new ExhaustedError(Composer.phrase(phrase))); } export function phrase( - phrase: TokiPona.Phrase, - place: Place, - includeGerund: boolean, - includeVerb: boolean, + options: Readonly<{ + phrase: TokiPona.Phrase; + place: Place; + includeGerund: boolean; + includeVerb: boolean; + }>, ): ArrayResult { + const { phrase } = options; switch (phrase.type) { case "default": - return defaultPhrase(phrase, place, includeGerund, includeVerb); + return defaultPhrase({ ...options, phrase }); case "preverb": case "preposition": case "quotation": @@ -300,29 +326,24 @@ export function phraseAsVerb( } } export function multiplePhrases( - phrases: TokiPona.MultiplePhrases, - place: Place, - includeGerund: boolean, - andParticle: string, - includeVerb: boolean, + options: Readonly<{ + phrases: TokiPona.MultiplePhrases; + place: Place; + includeGerund: boolean; + andParticle: string; + includeVerb: boolean; + }>, ): ArrayResult { + const { phrases, andParticle, includeVerb } = options; switch (phrases.type) { case "single": - return phrase(phrases.phrase, place, includeGerund, includeVerb); + return phrase({ ...options, phrase: phrases.phrase }); case "and conjunction": case "anu": { const conjunction = CONJUNCTION[phrases.type]; return ArrayResult.combine( ...phrases.phrases - .map((phrases) => - multiplePhrases( - phrases, - place, - includeGerund, - andParticle, - includeVerb, - ) - ), + .map((phrases) => multiplePhrases({ ...options, phrases })), ) .filterMap((phrase) => { if ( @@ -376,12 +397,14 @@ export function multiplePhrases( } } export function multiplePhrasesAsNoun( - phrases: TokiPona.MultiplePhrases, - place: Place, - includeGerund: boolean, - andParticle: string, + options: Readonly<{ + phrases: TokiPona.MultiplePhrases; + place: Place; + includeGerund: boolean; + andParticle: string; + }>, ): ArrayResult { - return multiplePhrases(phrases, place, includeGerund, andParticle, false) + return multiplePhrases({ ...options, includeVerb: false }) .filterMap((phrase) => { if (phrase.type === "noun") { return phrase.noun; diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index 7bd27cf0..483a734a 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -142,20 +142,31 @@ export function predicate( ): ArrayResult { switch (tokiPonaPredicate.type) { case "single": - return phrase(tokiPonaPredicate.predicate, "object", false, true) + return phrase({ + phrase: tokiPonaPredicate.predicate, + place: "object", + includeGerund: false, + includeVerb: true, + }) .map(phraseAsVerb); case "associated": { - const predicatePhrase = multiplePhrases( - tokiPonaPredicate.predicates, - "object", - false, + const predicatePhrase = multiplePhrases({ + phrases: tokiPonaPredicate.predicates, + place: "object", + includeGerund: false, andParticle, - true, - ); + includeVerb: true, + }); const object = new ArrayResult([tokiPonaPredicate.objects]).flatMap( (object) => { if (object != null) { - return multiplePhrases(object, "object", true, "e", false); + return multiplePhrases({ + phrases: object, + place: "object", + includeGerund: true, + andParticle: "e", + includeVerb: false, + }); } else { return new ArrayResult([null]); } diff --git a/src/translator/pronoun.ts b/src/translator/pronoun.ts index f38ba2f2..1f87d5f7 100644 --- a/src/translator/pronoun.ts +++ b/src/translator/pronoun.ts @@ -24,33 +24,38 @@ function pronounForms( } } export function partialPronoun( - pronoun: Dictionary.Pronoun, - reduplicationCount: number, - emphasis: boolean, - place: Place, + options: Readonly<{ + pronoun: Dictionary.Pronoun; + reduplicationCount: number; + emphasis: boolean; + place: Place; + }>, ): PartialNoun { + const { pronoun, place } = options; return { + ...options, ...pronounForms(pronoun, place), determiner: [], adjective: [], - reduplicationCount, perspective: pronoun.perspective, postAdjective: null, - emphasis, }; } export function pronoun( - definition: Dictionary.Pronoun, - reduplicationCount: number, - emphasis: boolean, - place: Place, + options: Readonly<{ + definition: Dictionary.Pronoun; + reduplicationCount: number; + emphasis: boolean; + place: Place; + }>, ): ArrayResult { + const { definition, place } = options; return fromNounForms(pronounForms(definition, place), "both") .map(({ noun, quantity }) => ({ type: "simple", determiner: [], adjective: [], - noun: word(noun, reduplicationCount, emphasis), + noun: word({ ...options, word: noun }), quantity, perspective: definition.perspective, postCompound: null, diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 77a8efad..81a38273 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -36,10 +36,13 @@ function filler(filler: TokiPona.Filler): ArrayResult { } } function emphasisAsPunctuation( - emphasis: null | TokiPona.Emphasis, - interrogative: boolean, - originalPunctuation: string, + options: Readonly<{ + emphasis: null | TokiPona.Emphasis; + interrogative: boolean; + originalPunctuation: string; + }>, ): string { + const { emphasis, interrogative, originalPunctuation } = options; if (emphasis == null) { if (interrogative) { return "?"; @@ -174,11 +177,11 @@ function sentence( lastClause, ...right, ]); - const usePunctuation = emphasisAsPunctuation( - sentence.emphasis, - sentence.interrogative != null, - punctuation, - ); + const usePunctuation = emphasisAsPunctuation({ + emphasis: sentence.emphasis, + interrogative: sentence.interrogative != null, + originalPunctuation: punctuation, + }); return engClauses.map((clauses) => ({ clauses, punctuation: usePunctuation, diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 7fd70d7d..830b011f 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -41,14 +41,21 @@ export function condenseVerb(present: string, past: string): string { return [condense(first, second), ...rest].join(" "); } export function partialVerb( - definition: Dictionary.Verb, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + definition: Dictionary.Verb; + reduplicationCount: number; + emphasis: boolean; + }>, ): ArrayResult { + const { definition, reduplicationCount, emphasis } = options; const object = new ArrayResult([definition.directObject]) .flatMap((object) => { if (object != null) { - return noun(object, 1, false); + return noun({ + definition: object, + reduplicationCount: 1, + emphasis: false, + }); } else { return new ArrayResult([null]); } @@ -56,7 +63,11 @@ export function partialVerb( const preposition = ArrayResult.combine( ...definition.indirectObject .flatMap((indirectObject) => - noun(indirectObject.object, 1, false) + noun({ + definition: indirectObject.object, + reduplicationCount: 1, + emphasis: false, + }) .map((object) => nounAsPreposition(object, indirectObject.preposition) ) @@ -96,12 +107,15 @@ export function forObject(verb: PartialCompoundVerb): boolean | string { return forObject; } export function fromVerbForms( - verbForms: Dictionary.VerbForms, - perspective: Dictionary.Perspective, - quantity: English.Quantity, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + verbForms: Dictionary.VerbForms; + perspective: Dictionary.Perspective; + quantity: English.Quantity; + reduplicationCount: number; + emphasis: boolean; + }>, ): ArrayResult { + const { verbForms, perspective, quantity } = options; const is = verbForms.presentSingular === "is"; let presentSingular: string; if (is && perspective === "first") { @@ -174,7 +188,7 @@ export function fromVerbForms( return { modal, finite: [], - infinite: word(verb.infinite, reduplicationCount, emphasis), + infinite: word({ ...options, word: verb.infinite }), }; }); } @@ -185,13 +199,13 @@ export function verb( ): ArrayResult { switch (partialVerb.type) { case "simple": { - return fromVerbForms( - partialVerb, + return fromVerbForms({ + verbForms: partialVerb, perspective, quantity, - partialVerb.reduplicationCount, - partialVerb.wordEmphasis, - ) + reduplicationCount: partialVerb.reduplicationCount, + emphasis: partialVerb.wordEmphasis, + }) .map((verb) => ({ ...partialVerb, type: "default", diff --git a/src/translator/word.ts b/src/translator/word.ts index 91332316..4d0d5481 100644 --- a/src/translator/word.ts +++ b/src/translator/word.ts @@ -5,9 +5,12 @@ export function unemphasized(word: string): English.Word { return { word, emphasis: false }; } export function word( - word: string, - reduplicationCount: number, - emphasis: boolean, + options: Readonly<{ + word: string; + reduplicationCount: number; + emphasis: boolean; + }>, ): English.Word { + const { word, reduplicationCount, emphasis } = options; return { word: repeatWithSpace(word, reduplicationCount), emphasis }; } diff --git a/src/translator/word_unit.ts b/src/translator/word_unit.ts index 74c0818c..6abfbb71 100644 --- a/src/translator/word_unit.ts +++ b/src/translator/word_unit.ts @@ -14,12 +14,15 @@ export type WordUnitTranslation = | Readonly<{ type: "adjective"; adjective: English.AdjectivePhrase }> | (Readonly<{ type: "verb" }> & PartialVerb); function defaultWordUnit( - word: string, - reduplicationCount: number, - emphasis: null | TokiPona.Emphasis, - place: Place, - includeGerund: boolean, + options: Readonly<{ + word: string; + reduplicationCount: number; + emphasis: null | TokiPona.Emphasis; + place: Place; + includeGerund: boolean; + }>, ): ArrayResult { + const { word, emphasis, includeGerund } = options; return new ArrayResult(dictionary.get(word)!.definitions) .flatMap((definition) => { switch (definition.type) { @@ -27,41 +30,43 @@ function defaultWordUnit( if (!includeGerund && definition.gerund) { return new ArrayResult(); } else { - return partialNoun(definition, reduplicationCount, emphasis != null) + return partialNoun({ + ...options, + definition, + emphasis: emphasis != null, + }) .map((noun) => ({ ...noun, type: "noun" })); } case "personal pronoun": return new ArrayResult([{ - ...partialPronoun( - definition, - reduplicationCount, - emphasis != null, - place, - ), + ...partialPronoun({ + ...options, + pronoun: definition, + emphasis: emphasis != null, + }), type: "noun", }]); case "adjective": - return adjective( - definition, - reduplicationCount, - emphasis, - ) + return adjective({ ...options, definition }) .map((adjective) => ({ type: "adjective", adjective, })); case "compound adjective": - return compoundAdjective( - definition.adjective, - reduplicationCount, - emphasis, - ) + return compoundAdjective({ + ...options, + adjectives: definition.adjective, + }) .map((adjective) => ({ type: "adjective", adjective, })); case "verb": - return partialVerb(definition, reduplicationCount, emphasis != null) + return partialVerb({ + ...options, + definition, + emphasis: emphasis != null, + }) .map((verb) => ({ ...verb, type: "verb" })); default: return new ArrayResult(); @@ -69,10 +74,13 @@ function defaultWordUnit( }); } export function wordUnit( - wordUnit: TokiPona.WordUnit, - place: Place, - includeGerund: boolean, + options: Readonly<{ + wordUnit: TokiPona.WordUnit; + place: Place; + includeGerund: boolean; + }>, ): ArrayResult { + const { wordUnit } = options; switch (wordUnit.type) { case "number": return number(wordUnit.words) @@ -100,13 +108,12 @@ export function wordUnit( reduplicationCount = wordUnit.count; break; } - return defaultWordUnit( - wordUnit.word, + return defaultWordUnit({ + ...options, + word: wordUnit.word, reduplicationCount, - wordUnit.emphasis, - place, - includeGerund, - ); + emphasis: wordUnit.emphasis, + }); } } } From a38de5d5ae82c839304a13df146c0042042ea95b Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 10:17:03 +0800 Subject: [PATCH 110/199] improve punctuation parsing --- deno.json | 2 ++ deno.lock | 5 +++++ src/misc.ts | 8 ++++++-- src/parser/lexer.ts | 24 ++++++++++++++---------- src/parser/punctuation.ts | 31 +++++++++++++++++++++++++++++++ src/parser/ucsur.ts | 8 ++++---- 6 files changed, 62 insertions(+), 16 deletions(-) create mode 100644 src/parser/punctuation.ts diff --git a/deno.json b/deno.json index 7d893e49..34f75a94 100644 --- a/deno.json +++ b/deno.json @@ -44,6 +44,7 @@ "no-sync-fn-in-async-fn", "no-throw-literal", "no-useless-rename", + "prefer-ascii", "single-var-declarator" ], "exclude": ["no-explicit-any"] @@ -57,6 +58,7 @@ "@std/collections": "jsr:@std/collections@^1.0.10", "@std/html": "jsr:@std/html@^1.0.3", "@std/random": "jsr:@std/random@^0.1.0", + "@std/regexp": "jsr:@std/regexp@^1.0.1", "@std/text": "jsr:@std/text@^1.0.10", "compromise": "npm:compromise@^14.14.3", "esbuild": "npm:esbuild@^0.25.0" diff --git a/deno.lock b/deno.lock index b3051caa..028f4f6b 100644 --- a/deno.lock +++ b/deno.lock @@ -12,6 +12,7 @@ "jsr:@std/internal@^1.0.5": "1.0.5", "jsr:@std/path@^1.0.6": "1.0.8", "jsr:@std/random@0.1": "0.1.0", + "jsr:@std/regexp@^1.0.1": "1.0.1", "jsr:@std/text@^1.0.10": "1.0.10", "npm:compromise@^14.14.3": "14.14.4", "npm:esbuild@0.25": "0.25.0" @@ -58,6 +59,9 @@ "@std/random@0.1.0": { "integrity": "70a006be0ffb77d036bab54aa8ae6bd0119ba77ace0f2f56f63273d4262a5667" }, + "@std/regexp@1.0.1": { + "integrity": "5179d823465085c5480dafb44438466e83c424fadc61ba31f744050ecc0f596d" + }, "@std/text@1.0.10": { "integrity": "9dcab377450253c0efa9a9a0c731040bfd4e1c03f8303b5934381467b7954338" } @@ -195,6 +199,7 @@ "jsr:@std/collections@^1.0.10", "jsr:@std/html@^1.0.3", "jsr:@std/random@0.1", + "jsr:@std/regexp@^1.0.1", "jsr:@std/text@^1.0.10", "npm:compromise@^14.14.3", "npm:esbuild@0.25" diff --git a/src/misc.ts b/src/misc.ts index 5d011eab..56e0c7ec 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -1,5 +1,6 @@ import { distinctBy } from "@std/collections/distinct-by"; -import { escape } from "@std/html/entities"; +import { escape as escapeHtml } from "@std/html/entities"; +import { escape as escapeRegex } from "@std/regexp/escape"; import { Lazy } from "./cache.ts"; export const NEWLINES = /\r\n|\n|\r/g; @@ -36,7 +37,7 @@ export function newlineAsHtmlLineBreak(text: string): string { return text.replaceAll(NEWLINES, "
"); } export function escapeHtmlWithLineBreak(text: string): string { - return newlineAsHtmlLineBreak(escape(text)); + return newlineAsHtmlLineBreak(escapeHtml(text)); } export function setIgnoreError(key: string, value: string): void { if (!checkLocalStorage()) { @@ -89,3 +90,6 @@ export function deduplicateErrors( ): ReadonlyArray { return distinctBy(errors, ({ message }) => message); } +export function characterClass(characters: Iterable): RegExp { + return new RegExp(`[${[...characters].map(escapeRegex).join("")}]`, "u"); +} diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index c904a91f..52191dd3 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -19,6 +19,13 @@ import { UnexpectedError, UnrecognizedError, } from "./parser_lib.ts"; +import { + ELLIPSIS, + NSK_COLON, + NSK_PERIOD, + SENTENCE_TERMINATOR, + SENTENCE_TERMINATOR_TO_ASCII, +} from "./punctuation.ts"; import { Token } from "./token.ts"; import { END_OF_CARTOUCHE, @@ -102,25 +109,22 @@ const xAlaX = lazy(() => { }) .map((word) => ({ type: "x ala x", word })); const punctuation = choiceOnlyOne( - match(/[.,:;?!…·。。︒\u{F199C}\u{F199D}]+/u, "punctuation") - .map((punctuation) => - punctuation - .replaceAll(/[·。。︒\u{F199C}]/gu, ".") - .replaceAll("\u{F199D}", ":") - .replaceAll("...", "…") - ) - .skip(spaces), + allAtLeastOnce( + match(SENTENCE_TERMINATOR, "punctuation") + .map((punctuation) => SENTENCE_TERMINATOR_TO_ASCII.get(punctuation)!), + ) + .map((punctuation) => punctuation.join("").replaceAll("...", ELLIPSIS)), newline.map(() => "."), ) .map((punctuation) => ({ type: "punctuation", punctuation })); const cartoucheElement = choiceOnlyOne( singleUcsurWord - .skip(match(/[\uFF1A\u{F199D}]/u, "full width colon").skip(spaces)), + .skip(match(NSK_COLON, "full width colon").skip(spaces)), sequence( singleUcsurWord, count( allAtLeastOnce( - match(/[・。/\u{F199C}]/u, "full width dot").skip(spaces), + match(NSK_PERIOD, "full width dot").skip(spaces), ), ), ) diff --git a/src/parser/punctuation.ts b/src/parser/punctuation.ts new file mode 100644 index 00000000..459f2a23 --- /dev/null +++ b/src/parser/punctuation.ts @@ -0,0 +1,31 @@ +import { characterClass } from "../misc.ts"; +import { UCSUR_COLON, UCSUR_MIDDLE_DOT } from "./ucsur.ts"; + +export const ELLIPSIS = "\u2026"; +const MIDDLE_DOT = "\u00B7"; + +const ASCII_SENTENCE_TERMINATOR = [".", ",", ":", ";", "?", "!"]; +const FULL_WIDTH_PERIOD = [ + "\u3002", + "\uFF61", + "\uFE12", + "\u30FB", + "\u3002", + UCSUR_MIDDLE_DOT, +]; +const FULL_WIDTH_COLON = ["\uFF1A", UCSUR_COLON]; +const NSK_PERIOD_SET = [...FULL_WIDTH_PERIOD, "\uFF0F"]; + +export const SENTENCE_TERMINATOR_TO_ASCII = new Map([ + ...ASCII_SENTENCE_TERMINATOR.map((symbol) => [symbol, symbol] as const), + ...FULL_WIDTH_PERIOD.map((period) => [period, "."] as const), + ...FULL_WIDTH_COLON.map((colon) => [colon, ":"] as const), + [ELLIPSIS, "..."] as const, + [MIDDLE_DOT, "."] as const, +]); + +export const SENTENCE_TERMINATOR = characterClass( + SENTENCE_TERMINATOR_TO_ASCII.keys(), +); +export const NSK_PERIOD = characterClass(NSK_PERIOD_SET); +export const NSK_COLON = characterClass(FULL_WIDTH_COLON); diff --git a/src/parser/ucsur.ts b/src/parser/ucsur.ts index d6c2cfbc..acf41447 100644 --- a/src/parser/ucsur.ts +++ b/src/parser/ucsur.ts @@ -12,8 +12,8 @@ export const END_OF_LONG_GLYPH = "\u{F1998}"; export const COMBINING_LONG_GLYPH_EXTENSION = "\u{F1999}"; export const START_OF_REVERSE_LONG_GLYPH = "\u{F199A}"; export const END_OF_REVERSE_LONG_GLYPH = "\u{F199B}"; -export const MIDDLE_DOT = "\u{F199C}"; -export const COLON = "\u{F199D}"; +export const UCSUR_MIDDLE_DOT = "\u{F199C}"; +export const UCSUR_COLON = "\u{F199D}"; export const SPECIAL_UCSUR_DESCRIPTIONS: Map = new Map( Object.entries({ @@ -29,8 +29,8 @@ export const SPECIAL_UCSUR_DESCRIPTIONS: Map = new Map( [COMBINING_LONG_GLYPH_EXTENSION]: "combining long glyph extension", [START_OF_REVERSE_LONG_GLYPH]: "start of reverse long glyph", [END_OF_REVERSE_LONG_GLYPH]: "end of reverse long glyph", - [MIDDLE_DOT]: "middle dot", - [COLON]: "colon", + [UCSUR_MIDDLE_DOT]: "middle dot", + [UCSUR_COLON]: "colon", }), ); export const UCSUR_TO_LATIN: Map = new Map(Object.entries({ From 7e3f9e7dd0e0527290f1d1c90568ef71a5ac6845 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 10:20:59 +0800 Subject: [PATCH 111/199] small improvement to misc --- src/misc.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index 56e0c7ec..1a82a911 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -70,9 +70,9 @@ export function extractErrorMessage(error: unknown): string { } } export function filterSet( - set: ReadonlyArray, + set: Iterable, ): ReadonlyArray { - return set.filter(([condition]) => condition).map(([_, value]) => value); + return [...set].filter(([condition]) => condition).map(([_, value]) => value); } export function flattenError(error: unknown): ReadonlyArray { if (error instanceof AggregateError) { @@ -86,7 +86,7 @@ export function lazy(fn: () => T): () => T { return () => cache.getValue(); } export function deduplicateErrors( - errors: ReadonlyArray, + errors: Iterable, ): ReadonlyArray { return distinctBy(errors, ({ message }) => message); } From 790a4e64a70313ed022e6cd65a822ad7e0310d9a Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 10:23:41 +0800 Subject: [PATCH 112/199] use null instead --- src/cache.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cache.ts b/src/cache.ts index 6e1c0ae1..57b7dec1 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -19,7 +19,7 @@ export class Cache { } export class Lazy implements Clearable { #evaluated = false; - #value: undefined | T; + #value: null | T = null; readonly #fn: () => T; constructor(fn: () => T) { this.#fn = fn; @@ -33,6 +33,6 @@ export class Lazy implements Clearable { } clear(): void { this.#evaluated = false; - this.#value = undefined; + this.#value = null; } } From c9f2016049c2fa9d967cee1b63cbbae63648c19e Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 10:45:22 +0800 Subject: [PATCH 113/199] fix --- src/parser/lexer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 52191dd3..c0d57eb4 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -113,6 +113,7 @@ const punctuation = choiceOnlyOne( match(SENTENCE_TERMINATOR, "punctuation") .map((punctuation) => SENTENCE_TERMINATOR_TO_ASCII.get(punctuation)!), ) + .skip(spaces) .map((punctuation) => punctuation.join("").replaceAll("...", ELLIPSIS)), newline.map(() => "."), ) From 6f4d03d1e4d1c5ae84cea07becaefaef065d5b8a Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 10:53:27 +0800 Subject: [PATCH 114/199] fix --- src/parser/parser.ts | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 5509176f..8e7459e4 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -170,18 +170,15 @@ function simpleWordUnit( description: string, ): Parser { return choice( - sequence( - wordFrom(word, description) - .then((word) => - count(manyAtLeastOnce(specificWord(word))) - .map((count) => [word, count + 1]) - ), - ) - .map(([[word, count]]) => ({ - type: "reduplication", - word, - count, - })), + wordFrom(word, description) + .then((word) => + count(manyAtLeastOnce(specificWord(word))) + .map((count) => ({ + type: "reduplication", + word, + count: count + 1, + })) + ), xAlaX(word, description), wordFrom(word, description) .map((word) => ({ type: "default", word })), From 8963f433e377e432cf29d4157a4ad083bbd3f628 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 11:08:52 +0800 Subject: [PATCH 115/199] improve capitalization --- src/parser/lexer.ts | 3 ++- src/translator/composer.ts | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index c0d57eb4..00a75d9f 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -152,7 +152,8 @@ const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) .skip(spaces) .map((words) => { const word = words.join(""); - return `${word[0].toUpperCase()}${word.slice(1)}`; + const [first] = word; + return `${first.toLocaleUpperCase()}${word.slice(first.length)}`; }); const cartouches = allAtLeastOnce(cartouche) .map((words) => words.join(" ")) diff --git a/src/translator/composer.ts b/src/translator/composer.ts index b349f9d7..98c7484d 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -160,7 +160,7 @@ function sentence(sentence: English.Sentence): string { return `${sentence.clauses.map(clause).join(", ")}${sentence.punctuation}` .replace( /(? character.toUpperCase(), + (character) => character.toLocaleUpperCase(), ); } export function translate(src: string): ArrayResult { From bfae72dfce355fb714aafc3ce50ab923d281ca17 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 11:53:30 +0800 Subject: [PATCH 116/199] small refactor --- src/parser/parser_lib.ts | 80 ++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 0cb1c20c..615c8994 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -10,8 +10,7 @@ export class Parser { readonly rawParser: (src: string) => ParserResult; static cache: null | Cache = null; constructor(parser: (src: string) => ParserResult) { - this.unmemoizedParser = (src: string) => - ArrayResult.from(() => parser(src)); + this.unmemoizedParser = parser; if (Parser.cache != null) { const cache = new Map>(); Parser.addToCache(cache); @@ -95,9 +94,7 @@ export class UnrecognizedError extends ArrayResultError { } } export function error(error: ArrayResultError): Parser { - return new Parser(() => { - throw error; - }); + return new Parser(() => new ArrayResult(error)); } export const empty = new Parser(() => new ArrayResult()); export const nothing = new Parser((src) => @@ -208,52 +205,63 @@ export function matchCapture( description: string, ): Parser { const newRegex = new RegExp(`^${regex.source}`, regex.flags); - return new Parser((src) => { - const match = src.match(newRegex); - if (match != null) { - return new ArrayResult([{ - value: match, - rest: src.slice(match[0].length), - }]); - } - throw new UnexpectedError(describeSource(src), description); - }); + return new Parser((src) => + ArrayResult.from(() => { + const match = src.match(newRegex); + if (match != null) { + return new ArrayResult([{ + value: match, + rest: src.slice(match[0].length), + }]); + } + throw new UnexpectedError(describeSource(src), description); + }) + ); } export function match(regex: RegExp, description: string): Parser { return matchCapture(regex, description).map(([matched]) => matched); } export function slice(length: number, description: string): Parser { - return new Parser((src) => { - if (src.length >= length) { - return new ArrayResult([{ - rest: src.slice(length), - value: src.slice(0, length), - }]); - } - throw new UnexpectedError(describeSource(src), description); - }); + return new Parser((src) => + ArrayResult.from(() => { + if (src.length >= length) { + return new ArrayResult([{ + rest: src.slice(length), + value: src.slice(0, length), + }]); + } + throw new UnexpectedError(describeSource(src), description); + }) + ); } export function matchString( match: string, description = `"${match}"`, ): Parser { - return new Parser((src) => { - if (src.length >= match.length && src.slice(0, match.length) === match) { - return new ArrayResult([{ rest: src.slice(match.length), value: match }]); - } - throw new UnexpectedError(describeSource(src), description); - }); + return new Parser((src) => + ArrayResult.from(() => { + if (src.length >= match.length && src.slice(0, match.length) === match) { + return new ArrayResult([{ + rest: src.slice(match.length), + value: match, + }]); + } + throw new UnexpectedError(describeSource(src), description); + }) + ); } export const everything = new Parser((src) => new ArrayResult([{ value: src, rest: "" }]) ); export const character = match(/./us, "character"); -export const end = new Parser((src) => { - if (src === "") { - return new ArrayResult([{ value: null, rest: "" }]); - } - throw new UnexpectedError(describeSource(src), "end of text"); -}); +export const end = new Parser((src) => + ArrayResult.from(() => { + if (src === "") { + return new ArrayResult([{ value: null, rest: "" }]); + } + throw new UnexpectedError(describeSource(src), "end of text"); + }) +); export function withSource( parser: Parser, ): Parser { From ece5705f018e5e1d10a78dbbb83420f88fd6d152 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 12:00:23 +0800 Subject: [PATCH 117/199] small improvement --- src/parser/lexer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 00a75d9f..b2690de0 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -141,8 +141,8 @@ const cartoucheElement = choiceOnlyOne( return morae.slice(0, count).join(""); }), singleUcsurWord.map((word) => word[0]), - match(/[a-zA-Z]+/, "Latin letter") - .map((letter) => letter.toLowerCase()) + match(/[a-zA-Z]/, "Latin letter") + .map((letter) => letter.toLocaleLowerCase()) .skip(spaces), ); const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) From 6d7c766004028e1221b0ce5ef8b29aa283c6e376 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 12:01:21 +0800 Subject: [PATCH 118/199] smaller code --- src/parser/lexer.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index b2690de0..1f5a22b7 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -4,6 +4,7 @@ import { cache } from "./cache.ts"; import { all, allAtLeastOnce, + character, choice, choiceOnlyOne, count, @@ -150,11 +151,9 @@ const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) .with(allAtLeastOnce(cartoucheElement)) .skip(specificSpecialUcsur(END_OF_CARTOUCHE)) .skip(spaces) - .map((words) => { - const word = words.join(""); - const [first] = word; - return `${first.toLocaleUpperCase()}${word.slice(first.length)}`; - }); + .map((words) => + words.join("").replace(/^./u, (character) => character.toLocaleUpperCase()) + ); const cartouches = allAtLeastOnce(cartouche) .map((words) => words.join(" ")) .map((words) => ({ From ed87b65d67ba2d8246b2ee8d293b33fbc970d793 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 12:10:42 +0800 Subject: [PATCH 119/199] remove unused import --- src/parser/lexer.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 1f5a22b7..5ac27dd5 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -4,7 +4,6 @@ import { cache } from "./cache.ts"; import { all, allAtLeastOnce, - character, choice, choiceOnlyOne, count, From 6bbc03899e99a178e8dc3702512cd4a16a721ac4 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 12:23:51 +0800 Subject: [PATCH 120/199] this is unnecessary --- src/parser/lexer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 5ac27dd5..4cc01f5c 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -142,7 +142,7 @@ const cartoucheElement = choiceOnlyOne( }), singleUcsurWord.map((word) => word[0]), match(/[a-zA-Z]/, "Latin letter") - .map((letter) => letter.toLocaleLowerCase()) + .map((letter) => letter.toLowerCase()) .skip(spaces), ); const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) @@ -151,7 +151,7 @@ const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) .skip(specificSpecialUcsur(END_OF_CARTOUCHE)) .skip(spaces) .map((words) => - words.join("").replace(/^./u, (character) => character.toLocaleUpperCase()) + words.join("").replace(/^./u, (character) => character.toUpperCase()) ); const cartouches = allAtLeastOnce(cartouche) .map((words) => words.join(" ")) From 2f9880d2066102a40bf7c9d770293e1b88e9cb1b Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 13:50:35 +0800 Subject: [PATCH 121/199] separate translator --- src/mod.ts | 2 +- src/translator/composer.ts | 11 ++++------- src/translator/translator.ts | 10 ++++++++++ 3 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 src/translator/translator.ts diff --git a/src/mod.ts b/src/mod.ts index a08aded4..7a3b5b84 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -4,7 +4,7 @@ import { errors } from "../telo-misikeke/telo_misikeke.js"; import { ArrayResultError } from "./array_result.ts"; import { deduplicateErrors } from "./misc.ts"; import { settings } from "./settings.ts"; -import { translate as rawTranslate } from "./translator/composer.ts"; +import { translate as rawTranslate } from "./translator/translator.ts"; export { ArrayResultError } from "./array_result.ts"; export type { ArrayResultOptions } from "./array_result.ts"; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 98c7484d..8541b6df 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -1,8 +1,5 @@ -import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; -import { parse } from "../parser/parser.ts"; import * as English from "./ast.ts"; -import { multipleSentences } from "./sentence.ts"; const EMPHASIS_STARTING_TAG = ""; const EMPHASIS_ENDING_TAG = ""; @@ -163,8 +160,8 @@ function sentence(sentence: English.Sentence): string { (character) => character.toLocaleUpperCase(), ); } -export function translate(src: string): ArrayResult { - return parse(src) - .flatMap(multipleSentences) - .map((sentences) => sentences.map(sentence).join(" ")); +export function multipleSentences( + sentences: ReadonlyArray, +): string { + return sentences.map(sentence).join(" "); } diff --git a/src/translator/translator.ts b/src/translator/translator.ts new file mode 100644 index 00000000..80022059 --- /dev/null +++ b/src/translator/translator.ts @@ -0,0 +1,10 @@ +import { ArrayResult } from "../array_result.ts"; +import { parse } from "../parser/parser.ts"; +import * as EnglishComposer from "./composer.ts"; +import { multipleSentences } from "./sentence.ts"; + +export function translate(text: string): ArrayResult { + return parse(text) + .flatMap(multipleSentences) + .map(EnglishComposer.multipleSentences); +} From bcd6c2aef29d50970be2c40674ac5962bf46afbb Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 14:03:45 +0800 Subject: [PATCH 122/199] small improvement --- src/misc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.ts b/src/misc.ts index 1a82a911..a64e6dde 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -91,5 +91,5 @@ export function deduplicateErrors( return distinctBy(errors, ({ message }) => message); } export function characterClass(characters: Iterable): RegExp { - return new RegExp(`[${[...characters].map(escapeRegex).join("")}]`, "u"); + return new RegExp(`[${escapeRegex([...characters].join(""))}]`, "u"); } From 79981f217e0180d97e5778b188c645d2d7fb4cb3 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 14:10:19 +0800 Subject: [PATCH 123/199] small improvement --- src/parser/lexer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 4cc01f5c..bb1683be 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -151,7 +151,7 @@ const cartouche = specificSpecialUcsur(START_OF_CARTOUCHE) .skip(specificSpecialUcsur(END_OF_CARTOUCHE)) .skip(spaces) .map((words) => - words.join("").replace(/^./u, (character) => character.toUpperCase()) + words.join("").replace(/^./, (character) => character.toUpperCase()) ); const cartouches = allAtLeastOnce(cartouche) .map((words) => words.join(" ")) From efee9c9596d8e41d38ea1de49be340ad3b1b3402 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:13:44 +0800 Subject: [PATCH 124/199] only allow siungle assignment --- dictionary/parser.ts | 6 ++++-- src/main.ts | 10 +++++++--- src/mod.ts | 11 ++++++++--- src/parser/lexer.ts | 6 ++++-- src/translator/number.ts | 7 +++++-- src/translator/verb.ts | 4 +++- 6 files changed, 31 insertions(+), 13 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 294864fa..5b4b604c 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -116,8 +116,8 @@ const nounOnly = sequence( ), ) .map(([first, second, [gerund, number]]) => { - let singular: null | string = null; - let plural: null | string = null; + let singular: null | string; + let plural: null | string; switch (number) { case null: { if (second == null) { @@ -160,8 +160,10 @@ const nounOnly = sequence( switch (number) { case "singular": singular = first; + plural = null; break; case "plural": + singular = null; plural = first; break; } diff --git a/src/main.ts b/src/main.ts index df1660bf..127a7e70 100644 --- a/src/main.ts +++ b/src/main.ts @@ -260,11 +260,15 @@ function main(): void { } }); function displayToCustomDictionary(message: string): void { - let original = customDictionaryTextBox.value.trimEnd(); + const original = customDictionaryTextBox.value.trimEnd(); + let append: string; if (original !== "") { - original += "\n\n"; + append = "\n\n"; + } else { + append = ""; } - customDictionaryTextBox.value = `${original}${message.trimEnd()}\n`; + customDictionaryTextBox.value = + `${original}${append}${message.trimEnd()}\n`; customDictionaryTextBox.scrollTo(0, customDictionaryTextBox.scrollHeight); } function addWord(): void { diff --git a/src/mod.ts b/src/mod.ts index 7a3b5b84..c1575dc1 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -23,13 +23,18 @@ export function translate(tokiPona: string): ReadonlyArray { return values; } } else { - let error: ReadonlyArray = []; + let teloMisikekeErrors: ReadonlyArray; if (settings.teloMisikeke) { - error = errors(tokiPona) + teloMisikekeErrors = errors(tokiPona) .map((message) => new ArrayResultError(message, { isHtml: true })); + } else { + teloMisikekeErrors = []; } - if (error.length === 0) { + let error: ReadonlyArray; + if (teloMisikekeErrors.length === 0) { error = deduplicateErrors(arrayResult.errors); + } else { + error = teloMisikekeErrors; } throw new AggregateError(error); } diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index bb1683be..676d4508 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -130,9 +130,11 @@ const cartoucheElement = choiceOnlyOne( ), ) .map(([word, dots]) => { - let count = dots; + let count: number; if (/^[aeiou]/.test(word)) { - count++; + count = dots + 1; + } else { + count = dots; } const morae = word.match(/[aeiou]|[jklmnpstw][aeiou]|n/g)!; if (morae.length < count) { diff --git a/src/translator/number.ts b/src/translator/number.ts index 81a0a435..8ef304b4 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -44,11 +44,14 @@ function unfilteredNasinNanpaPona( if (aleStart === -1) { return subHundred(number); } else { - let hundredCount = number + const index = number .slice(aleStart) .findIndex((number) => number !== 100); - if (hundredCount === -1) { + let hundredCount: number; + if (index === -1) { hundredCount = number.length - aleStart; + } else { + hundredCount = index; } if (previousHundredCount <= hundredCount) { throw new FilteredOutError('unsorted "ale"'); diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 830b011f..23510f0e 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -181,9 +181,11 @@ export function fromVerbForms( break; } return verb.map((verb) => { - let modal: null | Word = null; + let modal: null | Word; if (verb.modal != null) { modal = unemphasized(verb.modal); + } else { + modal = null; } return { modal, From 99feb52d60a7cd27205243d5e001e25627000a8c Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:29:42 +0800 Subject: [PATCH 125/199] refactor --- dictionary/parser.ts | 18 +++++++++--------- src/array_result.ts | 13 ++++--------- src/misc.ts | 10 ++++++++++ src/translator/phrase.ts | 9 +++++---- src/translator/verb.ts | 10 ++-------- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 5b4b604c..ba32ec79 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,7 +1,11 @@ import { escape } from "@std/html/entities"; import nlp from "compromise/three"; import { ArrayResultError } from "../src/array_result.ts"; -import { deduplicateErrors, nullableAsArray } from "../src/misc.ts"; +import { + deduplicateErrors, + mapNullable, + nullableAsArray, +} from "../src/misc.ts"; import { all, allAtLeastOnce, @@ -219,18 +223,14 @@ const noun = sequence( ), ) .map(([determiner, adjective, noun, post]) => { - let postAdjective: null | { adjective: string; name: string }; - if (post == null) { - postAdjective = null; - } else { - const [adjective, name] = post; - postAdjective = { adjective, name }; - } return { ...noun, determiner, adjective, - postAdjective, + postAdjective: mapNullable( + post, + ([adjective, name]) => ({ adjective, name }), + ), }; }); function verbOnly(tagInside: Parser): Parser { diff --git a/src/array_result.ts b/src/array_result.ts index 1c1ae8f6..572afd82 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -1,4 +1,4 @@ -import { flattenError } from "./misc.ts"; +import { flattenError, nullableAsArray } from "./misc.ts"; export type ArrayResultOptions = { cause: unknown; @@ -79,14 +79,9 @@ export class ArrayResult { } } filterMap(mapper: (value: T) => U): ArrayResult> { - return this.flatMap((value) => { - const arrayResult = mapper(value); - if (arrayResult == null) { - return new ArrayResult(); - } else { - return new ArrayResult([arrayResult]); - } - }); + return this.flatMap((value) => + new ArrayResult(nullableAsArray(mapper(value))) + ); } sort(comparer: (left: T, right: T) => number): ArrayResult { if (this.isError()) { diff --git a/src/misc.ts b/src/misc.ts index a64e6dde..82397ff3 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -12,6 +12,16 @@ export function nullableAsArray(value?: T): ReadonlyArray> { return [value]; } } +export function mapNullable( + value: null | T, + mapper: (value: NonNullable) => U, +): null | U { + if (value == null) { + return null; + } else { + return mapper(value); + } +} export function repeatArray(element: T, count: number): ReadonlyArray { return new Array(count).fill(element); } diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 411d4b07..6c390858 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -1,5 +1,5 @@ import { ArrayResult } from "../array_result.ts"; -import { nullableAsArray } from "../misc.ts"; +import { mapNullable, nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; import { AdjectiveWithInWay, fixAdjective } from "./adjective.ts"; @@ -54,10 +54,11 @@ function nounPhrase( throw new FilteredOutError("double name"); } else if (partialNoun.postAdjective != null) { postAdjective = partialNoun.postAdjective; - } else if (modifier.name != null) { - postAdjective = { adjective: "named", name: modifier.name }; } else { - postAdjective = null; + postAdjective = mapNullable( + modifier.name, + (name) => ({ adjective: "named", name }), + ); } const preposition = [ ...nullableAsArray(modifier.inPositionPhrase) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 23510f0e..4e945cb0 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -1,8 +1,8 @@ import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array_result.ts"; +import { mapNullable } from "../misc.ts"; import { settings } from "../settings.ts"; import * as English from "./ast.ts"; -import { Word } from "./ast.ts"; import { condense } from "./misc.ts"; import { noun } from "./noun.ts"; import { nounAsPreposition } from "./preposition.ts"; @@ -181,14 +181,8 @@ export function fromVerbForms( break; } return verb.map((verb) => { - let modal: null | Word; - if (verb.modal != null) { - modal = unemphasized(verb.modal); - } else { - modal = null; - } return { - modal, + modal: mapNullable(verb.modal, unemphasized), finite: [], infinite: word({ ...options, word: verb.infinite }), }; From cc3c10711fe00cd1daf8a499aa91e3d8cde0a8cc Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:31:21 +0800 Subject: [PATCH 126/199] small improvement --- src/misc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.ts b/src/misc.ts index 82397ff3..d084eff5 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -13,7 +13,7 @@ export function nullableAsArray(value?: T): ReadonlyArray> { } } export function mapNullable( - value: null | T, + value: T, mapper: (value: NonNullable) => U, ): null | U { if (value == null) { From 84f3a1fdafb86d992ea6293b19e98130d735058f Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:38:57 +0800 Subject: [PATCH 127/199] small formatting --- src/dictionary.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 5a86d564..8f1f2d27 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -31,12 +31,11 @@ export function loadCustomDictionary(dictionaryText: string): void { } function update(): void { dictionary.clear(); - for ( - const word of new Set([ - ...globalDictionary.keys(), - ...customDictionary.keys(), - ]) - ) { + const words = new Set([ + ...globalDictionary.keys(), + ...customDictionary.keys(), + ]); + for (const word of words) { const entry = customDictionary.get(word) ?? globalDictionary.get(word)!; if (entry.definitions.length > 0) { dictionary.set(word, entry); From 8ebe6c4645c82e2cac2d24163d8a2cbf10662db5 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:47:11 +0800 Subject: [PATCH 128/199] refactor --- src/main.ts | 11 +++++------ src/translator/sentence.ts | 7 +++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main.ts b/src/main.ts index 127a7e70..b18eedf3 100644 --- a/src/main.ts +++ b/src/main.ts @@ -155,12 +155,11 @@ function main(): void { // remove unused local storage data const used = [DICTIONARY_KEY, ...Object.keys(settings)]; - const allKeys = [...new Array(localStorage.length).keys()] - .map((i) => localStorage.key(i)!); - for (const key of allKeys) { - if (!used.includes(key)) { - localStorage.removeItem(key); - } + const unused = [...new Array(localStorage.length).keys()] + .map((i) => localStorage.key(i)!) + .filter((key) => !used.includes(key)); + for (const key of unused) { + localStorage.removeItem(key); } // initial text area size diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 81a38273..15213dd4 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -5,7 +5,7 @@ import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as_string.ts"; import * as English from "./ast.ts"; import { clause, contextClause } from "./clause.ts"; -import { TranslationTodoError } from "./error.ts"; +import { FilteredOutError, TranslationTodoError } from "./error.ts"; import { unemphasized } from "./word.ts"; function filler(filler: TokiPona.Filler): ArrayResult { @@ -109,11 +109,14 @@ function anuSeme(seme: TokiPona.HeadedWordUnit): English.Clause { break; case "reduplication": interjection = repeatWithSpace("right", seme.count); + break; + case "x ala x": + throw new FilteredOutError('"seme ala seme"'); } return { type: "interjection", interjection: { - word: interjection!, + word: interjection, emphasis: seme.emphasis != null, }, }; From 79f3745c7f182a3cfbe91402ac5a09572d4696c4 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 5 Mar 2025 18:53:50 +0800 Subject: [PATCH 129/199] refactor --- src/dictionary.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index 8f1f2d27..cf15ae33 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -47,18 +47,15 @@ function update(): void { definition.type !== "filler" && definition.type !== "particle definition", ); - redefineSet( - prepositionSet, - (definition) => definition.type === "preposition", - ); + redefineSetWithTypes(prepositionSet, "preposition"); redefineSet( preverbSet, (definition) => (definition.type === "verb" && definition.predicateType != null) || definition.type === "modal verb", ); - redefineSet(fillerSet, (definition) => definition.type === "filler"); - redefineSet(numeralSet, (definition) => definition.type === "numeral"); + redefineSetWithTypes(fillerSet, "filler"); + redefineSetWithTypes(numeralSet, "numeral"); redefineSet(tokiPonaWordSet, () => true); } function redefineSet( @@ -72,3 +69,9 @@ function redefineSet( } } } +function redefineSetWithTypes( + set: Set, + type: Definition["type"], +): void { + redefineSet(set, (definition) => definition.type === type); +} From 9f7809691acc178d27e4728b2e37e17b4a592ed1 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 07:52:04 +0800 Subject: [PATCH 130/199] use ternary --- dictionary/parser.ts | 11 +++---- src/array_result.ts | 7 +---- src/main.ts | 54 +++++++++++----------------------- src/mod.ts | 20 +++++-------- src/parser/composer.ts | 7 +---- src/parser/lexer.ts | 7 +---- src/parser/parser.ts | 58 +++++++++++++------------------------ src/translator/clause.ts | 10 ++----- src/translator/composer.ts | 20 ++++--------- src/translator/modifier.ts | 18 ++++-------- src/translator/number.ts | 7 +---- src/translator/phrase.ts | 13 +++------ src/translator/predicate.ts | 12 ++------ src/translator/sentence.ts | 37 +++++++---------------- src/translator/verb.ts | 40 +++++++------------------ 15 files changed, 94 insertions(+), 227 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index ba32ec79..faa60ebc 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -494,19 +494,16 @@ export function parseDictionary(sourceText: string): Dictionary { return arrayResult.array[0]; } else { const definitions = definitionExtractor(sourceText); - let errors: ReadonlyArray; - if (!definitions.isError()) { - errors = definitions.array[0].flatMap((definition) => + const errors = !definitions.isError() + ? definitions.array[0].flatMap((definition) => definitionParser(definition).errors.map((error) => new ArrayResultError( `${error.message} at ${definition.trim()}`, { cause: error }, ) ) - ); - } else { - errors = arrayResult.errors; - } + ) + : arrayResult.errors; throw new AggregateError(deduplicateErrors(errors)); } } diff --git a/src/array_result.ts b/src/array_result.ts index 572afd82..f283e635 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -160,12 +160,7 @@ export function extractArrayResultError( return { type: "outside", errors: [error] }; } case "outside": { - let moreError: ReadonlyArray; - if (error instanceof ArrayResultError) { - moreError = []; - } else { - moreError = [error]; - } + const moreError = error instanceof ArrayResultError ? [] : [error]; return { type: "outside", errors: [...errors.errors, ...moreError] }; } } diff --git a/src/main.ts b/src/main.ts index b18eedf3..3f42e963 100644 --- a/src/main.ts +++ b/src/main.ts @@ -121,25 +121,19 @@ function main(): void { ) as HTMLAnchorElement; // set version - let displayDate: string; - if (PROJECT_DATA.onDevelopment) { - displayDate = "On development"; - } else { - const date = new Date(PROJECT_DATA.releaseDate).toLocaleDateString(); - displayDate = `- Released ${date}`; - } + const displayDate = PROJECT_DATA.onDevelopment + ? "On development" + : `- Released ${new Date(PROJECT_DATA.releaseDate).toLocaleDateString()}`; + versionDisplay.innerText = `${PROJECT_DATA.version} ${displayDate}`; // load settings loadFromLocalStorage(); // load custom dictionary - let customDictionary: string; - if (checkLocalStorage()) { - customDictionary = localStorage.getItem(DICTIONARY_KEY) ?? ""; - } else { - customDictionary = customDictionaryTextBox.value; - } + const customDictionary = checkLocalStorage() + ? localStorage.getItem(DICTIONARY_KEY) ?? "" + : customDictionaryTextBox.value; if (customDictionary.trim() !== "") { try { loadCustomDictionary(customDictionary); @@ -215,12 +209,9 @@ function main(): void { break; } for (const item of errors) { - let property: "innerHTML" | "innerText"; - if (item instanceof ArrayResultError && item.isHtml) { - property = "innerHTML"; - } else { - property = "innerText"; - } + const property = item instanceof ArrayResultError && item.isHtml + ? "innerHTML" + : "innerText"; const list = document.createElement("li"); list[property] = extractErrorMessage(item); errorList.appendChild(list); @@ -260,12 +251,7 @@ function main(): void { }); function displayToCustomDictionary(message: string): void { const original = customDictionaryTextBox.value.trimEnd(); - let append: string; - if (original !== "") { - append = "\n\n"; - } else { - append = ""; - } + const append = original === "" ? "" : "\n\n"; customDictionaryTextBox.value = `${original}${append}${message.trimEnd()}\n`; customDictionaryTextBox.scrollTo(0, customDictionaryTextBox.scrollHeight); @@ -273,16 +259,13 @@ function main(): void { function addWord(): void { const word = addWordTextBox.value.trim(); if (/^[a-z][a-zA-Z]*$/.test(word)) { - let definitions: string; const dictionaryEntry = dictionary.get(word); - if (dictionaryEntry != null) { - definitions = dictionaryEntry.src; - } else { - definitions = `\n${ + const definitions = dictionaryEntry != null + ? dictionaryEntry.src + : `\n${ asComment(EMPTY_DEFINITION_PLACEHOLDER) .replaceAll(/^/gm, " ") }`; - } displayToCustomDictionary(`${word}:${definitions}`); } else { displayToCustomDictionary(asComment(INVALID_WORD_ERROR)); @@ -300,12 +283,9 @@ function main(): void { customDictionaryDialogBox.close(); } catch (error) { const errors = flattenError(error); - let message: string; - if (errorsFixable(errors)) { - message = DICTIONARY_ERROR_FIXABLE_MESSAGE; - } else { - message = DICTIONARY_ERROR_UNFIXABLE_MESSAGE; - } + const message = errorsFixable(errors) + ? DICTIONARY_ERROR_FIXABLE_MESSAGE + : DICTIONARY_ERROR_UNFIXABLE_MESSAGE; const errorListMessage = errors .map(extractErrorMessage) .map((message) => `\n- ${message.replaceAll(NEWLINES, "$& ")}`); diff --git a/src/mod.ts b/src/mod.ts index c1575dc1..5c55366d 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -23,19 +23,13 @@ export function translate(tokiPona: string): ReadonlyArray { return values; } } else { - let teloMisikekeErrors: ReadonlyArray; - if (settings.teloMisikeke) { - teloMisikekeErrors = errors(tokiPona) - .map((message) => new ArrayResultError(message, { isHtml: true })); - } else { - teloMisikekeErrors = []; - } - let error: ReadonlyArray; - if (teloMisikekeErrors.length === 0) { - error = deduplicateErrors(arrayResult.errors); - } else { - error = teloMisikekeErrors; - } + const teloMisikekeErrors = settings.teloMisikeke + ? errors(tokiPona) + .map((message) => new ArrayResultError(message, { isHtml: true })) + : []; + const error = teloMisikekeErrors.length === 0 + ? deduplicateErrors(arrayResult.errors) + : teloMisikekeErrors; throw new AggregateError(error); } } diff --git a/src/parser/composer.ts b/src/parser/composer.ts index 540d4cef..84debbf0 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -157,12 +157,7 @@ export function clause(clause: Clause): string { case "o vocative": return `${multiplePhrases(clause.phrases, "en")} o`; case "li clause": { - let li: ReadonlyArray; - if (clause.explicitLi) { - li = ["li"]; - } else { - li = []; - } + const li = clause.explicitLi ? ["li"] : []; return [ multiplePhrases(clause.subjects, "en"), ...li, diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 676d4508..0c595680 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -130,12 +130,7 @@ const cartoucheElement = choiceOnlyOne( ), ) .map(([word, dots]) => { - let count: number; - if (/^[aeiou]/.test(word)) { - count = dots + 1; - } else { - count = dots; - } + const count = /^[aeiou]/.test(word) ? dots + 1 : dots; const morae = word.match(/[aeiou]|[jklmnpstw][aeiou]|n/g)!; if (morae.length < count) { throw new UnrecognizedError("excess dots"); diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8e7459e4..686d555f 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -350,21 +350,13 @@ function nestedPhrasesOnly( return singlePhrase; } else { const [first, ...rest] = nestingRule; - let type: "and conjunction" | "anu"; - if (["en", "li", "o", "e"].includes(first)) { - type = "and conjunction"; - } else { - type = "anu"; - } - let longAnuParser: Parser; - if (first === "anu") { - longAnuParser = longAnu.map((phrases) => ({ + const type = first === "anu" ? "anu" : "and conjunction"; + const longAnuParser = type === "anu" + ? longAnu.map((phrases) => ({ type: "anu", phrases: phrases.map((phrase) => ({ type: "single", phrase })), - })); - } else { - longAnuParser = empty; - } + })) + : empty; return choice( longAnuParser, sequence( @@ -520,21 +512,13 @@ function multiplePredicates( ); } else { const [first, ...rest] = nestingRule; - let type: "and conjunction" | "anu"; - if (first === "li" || first === "o") { - type = "and conjunction"; - } else { - type = "anu"; - } - let longAnuParser: Parser; - if (first === "anu") { - longAnuParser = longAnu.map((phrases) => ({ + const type = first === "anu" ? "anu" : "and conjunction"; + const longAnuParser = type === "anu" + ? longAnu.map((phrases) => ({ type: "anu", predicates: phrases.map((predicate) => ({ type: "single", predicate })), - })); - } else { - longAnuParser = empty; - } + })) + : empty; return choice( longAnuParser, associatedPredicates(nestingRule), @@ -719,18 +703,16 @@ const sentence = choice( interrogative: null, }; const wordUnits = everyWordUnitInSentence(sentence); - let interrogative: null | "x ala x" | "seme" = null; - if (wordUnits.some((wordUnit) => wordUnit.type === "x ala x")) { - interrogative = "x ala x"; - } else if ( - wordUnits.some((wordUnit) => - (wordUnit.type === "default" || - wordUnit.type === "reduplication") && - wordUnit.word === "seme" - ) - ) { - interrogative = "seme"; - } + const interrogative = + wordUnits.some((wordUnit) => wordUnit.type === "x ala x") + ? "x ala x" + : wordUnits.some((wordUnit) => + (wordUnit.type === "default" || + wordUnit.type === "reduplication") && + wordUnit.word === "seme" + ) + ? "seme" + : null; return { ...sentence, interrogative }; }, ) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 2177af83..c2169383 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -1,4 +1,3 @@ -import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array_result.ts"; import { nullableAsArray } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; @@ -85,12 +84,9 @@ function liClause( predicate(clause.predicates, "li"), ) .flatMap(([subject, predicate]) => { - let perspective: Dictionary.Perspective; - if (subject.type === "simple") { - perspective = subject.perspective; - } else { - perspective = "third"; - } + const perspective = subject.type === "simple" + ? subject.perspective + : "third"; return verb(predicate, perspective, subject.quantity) .map((verb) => ({ type: "default", diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 8541b6df..9c23878f 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -90,17 +90,14 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { let text: string; switch (phrase.type) { case "default": { - let verbText: ReadonlyArray; - if (phrase.hideVerb) { - verbText = []; - } else { - const { modal, finite, infinite } = phrase.verb; - verbText = [ + const { modal, finite, infinite } = phrase.verb; + const verbText = !phrase.hideVerb + ? [ ...nullableAsArray(modal).map(word), ...finite.map(word), word(infinite), - ]; - } + ] + : []; text = [ ...phrase.adverb.map(word), ...verbText, @@ -125,12 +122,7 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { .join(" "); } function defaultClause(clause: English.Clause & { type: "default" }): string { - let subject: ReadonlyArray; - if (clause.hideSubject) { - subject = []; - } else { - subject = [noun(clause.subject, 0)]; - } + const subject = !clause.hideSubject ? [noun(clause.subject, 0)] : []; return [ ...subject, verb(clause.verb, 0), diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index b50194e9..bce88282 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -52,12 +52,7 @@ export function defaultModifier( switch (wordUnit.type) { case "number": return number(wordUnit.words).map((number) => { - let quantity: English.Quantity; - if (number === 1) { - quantity = "singular"; - } else { - quantity = "plural"; - } + const quantity = number === 1 ? "singular" : "plural"; return { type: "determiner" as const, determiner: { @@ -287,9 +282,8 @@ export function multipleModifiers( name.length === 0 && inPositionPhrase.length === 0 ) { - let inWayPhrase: null | English.NounPhrase; - if (adjective.length > 0) { - inWayPhrase = { + const inWayPhrase: null | English.NounPhrase = adjective.length > 0 + ? { type: "simple", determiner: [], adjective, @@ -299,10 +293,8 @@ export function multipleModifiers( postAdjective: null, preposition: [], emphasis: false, - }; - } else { - inWayPhrase = null; - } + } + : null; adverbial = new ArrayResult([{ type: "adverbial", adverb, diff --git a/src/translator/number.ts b/src/translator/number.ts index 8ef304b4..3c541778 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -47,12 +47,7 @@ function unfilteredNasinNanpaPona( const index = number .slice(aleStart) .findIndex((number) => number !== 100); - let hundredCount: number; - if (index === -1) { - hundredCount = number.length - aleStart; - } else { - hundredCount = index; - } + const hundredCount = index !== -1 ? index : number.length - aleStart; if (previousHundredCount <= hundredCount) { throw new FilteredOutError('unsorted "ale"'); } diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 6c390858..895048ed 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -46,20 +46,15 @@ function nounPhrase( ...[...modifier.adjective].reverse(), ...partialNoun.adjective, ]); - let postAdjective: null | { - adjective: string; - name: string; - }; if (partialNoun.postAdjective != null && modifier.name != null) { throw new FilteredOutError("double name"); - } else if (partialNoun.postAdjective != null) { - postAdjective = partialNoun.postAdjective; - } else { - postAdjective = mapNullable( + } + const postAdjective = partialNoun.postAdjective != null + ? partialNoun.postAdjective + : mapNullable( modifier.name, (name) => ({ adjective: "named", name }), ); - } const preposition = [ ...nullableAsArray(modifier.inPositionPhrase) .map((object) => nounAsPreposition(object, "in")), diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index 483a734a..cb95719b 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -22,15 +22,9 @@ function verbObject( if (useForObject === false) { throw new FilteredOutError("intransitive verb with object"); } else { - let englishObject: null | English.NounPhrase; - let preposition: ReadonlyArray; - if (useForObject === true) { - englishObject = object; - preposition = []; - } else { - englishObject = verb.object; - preposition = [nounAsPreposition(object, useForObject)]; - } + const [englishObject, preposition] = useForObject === true + ? [object, []] + : [verb.object, [nounAsPreposition(object, useForObject)]]; return { ...verb, object: englishObject, preposition }; } } diff --git a/src/translator/sentence.ts b/src/translator/sentence.ts index 15213dd4..efd9dd6a 100644 --- a/src/translator/sentence.ts +++ b/src/translator/sentence.ts @@ -1,6 +1,6 @@ import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; -import { repeatWithSpace } from "../misc.ts"; +import { nullableAsArray, repeatWithSpace } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { definitionAsPlainString } from "./as_string.ts"; import * as English from "./ast.ts"; @@ -50,12 +50,7 @@ function emphasisAsPunctuation( return originalPunctuation; } } - let questionMark: string; - if (interrogative) { - questionMark = "?"; - } else { - questionMark = ""; - } + const questionMark = interrogative ? "?" : ""; let exclamationMark: string; switch (emphasis.type) { case "word": @@ -128,12 +123,9 @@ function sentence( if (sentence.interrogative === "x ala x") { return new ArrayResult(new TranslationTodoError("x ala x")); } - let punctuation: string; - if (!isFinal && sentence.punctuation === "") { - punctuation = ","; - } else { - punctuation = sentence.punctuation; - } + const punctuation = !isFinal && sentence.punctuation === "" + ? "," + : sentence.punctuation; switch (sentence.type) { case "default": { const laClauses = sentence.laClauses; @@ -156,21 +148,12 @@ function sentence( ); } const lastEngClause = clause(sentence.finalClause); - let right: ReadonlyArray; - if (sentence.anuSeme == null) { - right = []; - } else { - right = [anuSeme(sentence.anuSeme)]; - } - let interjectionClause: ArrayResult; - if ( + const right = nullableAsArray(sentence.anuSeme).map(anuSeme); + const interjectionClause = sentence.laClauses.length === 0 && sentence.kinOrTaso == null && - sentence.kinOrTaso == null - ) { - interjectionClause = interjection(sentence.finalClause); - } else { - interjectionClause = new ArrayResult(); - } + sentence.kinOrTaso == null + ? interjection(sentence.finalClause) + : new ArrayResult(); const engClauses = ArrayResult.combine( givenClauses, ArrayResult.concat(interjectionClause, lastEngClause), diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 4e945cb0..93bd24af 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -117,29 +117,16 @@ export function fromVerbForms( ): ArrayResult { const { verbForms, perspective, quantity } = options; const is = verbForms.presentSingular === "is"; - let presentSingular: string; - if (is && perspective === "first") { - presentSingular = "am"; - } else { - presentSingular = verbForms.presentSingular; - } - let pastPlural: string; - let pastSingular: string; - if (is) { - pastPlural = "were"; - pastSingular = "was"; - } else { - pastPlural = pastSingular = verbForms.past; - } - let past: string; - let present: string; - if (quantity !== "singular" || (!is && perspective !== "third")) { - past = pastPlural; - present = verbForms.presentPlural; - } else { - past = pastSingular; - present = presentSingular; - } + const presentSingular = is && perspective === "first" + ? "am" + : verbForms.presentSingular; + const [pastPlural, pastSingular] = is + ? ["were", "was"] + : [verbForms.past, verbForms.past]; + const [past, present] = + quantity !== "singular" || (!is && perspective !== "third") + ? [pastPlural, verbForms.presentPlural] + : [pastSingular, presentSingular]; let verb: ArrayResult<{ modal: null | string; infinite: string }>; switch (settings.tense) { case "condensed": @@ -163,12 +150,7 @@ export function fromVerbForms( } break; case "both": { - let future: string; - if (is) { - future = "be"; - } else { - future = verbForms.presentPlural; - } + const future = is ? "be" : verbForms.presentPlural; verb = new ArrayResult([ { modal: null, infinite: present }, { modal: null, infinite: past }, From 47b9796a756a5178f8eeabd924e68651a749efdb Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 07:57:45 +0800 Subject: [PATCH 131/199] reduce code duplication --- src/translator/misc.ts | 2 +- src/translator/modifier.ts | 11 ++--------- src/translator/word_unit.ts | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/translator/misc.ts b/src/translator/misc.ts index ef8d310e..95f7ce5b 100644 --- a/src/translator/misc.ts +++ b/src/translator/misc.ts @@ -10,4 +10,4 @@ export function condense(first: string, second: string): string { } else { return `${first}/${second}`; } -} +} \ No newline at end of file diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index bce88282..31ae5d38 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -15,6 +15,7 @@ import { number } from "./number.ts"; import { phrase } from "./phrase.ts"; import { pronoun } from "./pronoun.ts"; import { unemphasized, word } from "./word.ts"; +import { getReduplicationCount } from "./word_unit.ts"; export type ModifierTranslation = | Readonly<{ type: "noun"; noun: English.NounPhrase }> @@ -70,15 +71,7 @@ export function defaultModifier( return new ArrayResult(new TranslationTodoError("x ala x")); case "default": case "reduplication": { - let reduplicationCount: number; - switch (wordUnit.type) { - case "default": - reduplicationCount = 1; - break; - case "reduplication": - reduplicationCount = wordUnit.count; - break; - } + const reduplicationCount = getReduplicationCount(wordUnit); return new ArrayResult(dictionary.get(wordUnit.word)!.definitions) .flatMap((definition) => { switch (definition.type) { diff --git a/src/translator/word_unit.ts b/src/translator/word_unit.ts index 6abfbb71..2b116191 100644 --- a/src/translator/word_unit.ts +++ b/src/translator/word_unit.ts @@ -99,15 +99,7 @@ export function wordUnit( return new ArrayResult(new TranslationTodoError("x ala x")); case "default": case "reduplication": { - let reduplicationCount: number; - switch (wordUnit.type) { - case "default": - reduplicationCount = 1; - break; - case "reduplication": - reduplicationCount = wordUnit.count; - break; - } + const reduplicationCount = getReduplicationCount(wordUnit); return defaultWordUnit({ ...options, word: wordUnit.word, @@ -117,3 +109,13 @@ export function wordUnit( } } } +export function getReduplicationCount(wordUnit: TokiPona.WordUnit): number { + switch (wordUnit.type) { + case "number": + case "default": + case "x ala x": + return 1; + case "reduplication": + return wordUnit.count; + } +} From dd6248c14da175856a44e902f13a211a8c83ac06 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:07:50 +0800 Subject: [PATCH 132/199] reduce reliance on mutation --- src/parser/filter.ts | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 4d310083..519c209e 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -197,36 +197,34 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< if (settings.separateRepeatedModifiers) { return true; } - const set = new Set(); - for (const modifier of modifiers) { - let word: string; + const words = modifiers.flatMap((modifier) => { switch (modifier.type) { case "default": if (modifier.word.type !== "number") { - word = modifier.word.word; - break; + return [modifier.word.word]; } else { - continue; + return []; } case "pi": if ( modifier.phrase.type === "default" && modifier.phrase.headWord.type !== "number" ) { - word = modifier.phrase.headWord.word; - break; + return [modifier.phrase.headWord.word]; } else { - continue; + return []; } case "quotation": case "proper words": case "nanpa": - continue; + return []; } - if (set.has(word)) { - throw new UnrecognizedError(`duplicate "${word}" in modifier`); - } else { - set.add(word); + }); + for (const [i, a] of words.entries()) { + for (const b of words.slice(i + 1)) { + if (a === b) { + throw new UnrecognizedError(`duplicate "${a}" in modifier`); + } } } return true; From 553f1f5c4582f544e62abfd6dbe03f50fc4a0427 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:22:25 +0800 Subject: [PATCH 133/199] add newline to linku data --- telo-misikeke/linku_data.json | 2 +- telo-misikeke/update.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/telo-misikeke/linku_data.json b/telo-misikeke/linku_data.json index 1f8cb55c..c152a97d 100644 --- a/telo-misikeke/linku_data.json +++ b/telo-misikeke/linku_data.json @@ -723,4 +723,4 @@ "yupekosi", "obscure" ] -] \ No newline at end of file +] diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index 700e63c8..fa0632d5 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -57,7 +57,7 @@ async function buildSonaLinku(): Promise { const processedJson = parseLipuLinku(json); await Deno.writeTextFile( LINKU_DESTINATION, - JSON.stringify(processedJson, undefined, 2), + `${JSON.stringify(processedJson, undefined, 2)}\n`, ); } function parseLipuLinku( From 95b8116931bc64ca568cbbf2671f9649749fa4a8 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:23:17 +0800 Subject: [PATCH 134/199] something something --- telo-misikeke/update.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index fa0632d5..5b434a1e 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -54,10 +54,9 @@ export { ${exports} }; async function buildSonaLinku(): Promise { const response = assertOk(await retry(() => fetch(LINKU_URL))); const json = await response.json(); - const processedJson = parseLipuLinku(json); await Deno.writeTextFile( LINKU_DESTINATION, - `${JSON.stringify(processedJson, undefined, 2)}\n`, + `${JSON.stringify(parseLipuLinku(json), undefined, 2)}\n`, ); } function parseLipuLinku( From 06893c80216e9fc7c0d5e59a54948bc3f0d94bc2 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:38:40 +0800 Subject: [PATCH 135/199] remove unneeded try finally block --- bundle.ts | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/bundle.ts b/bundle.ts index 07e276db..1f6f408c 100644 --- a/bundle.ts +++ b/bundle.ts @@ -59,30 +59,25 @@ if (import.meta.main) { console.log("Press ctrl+c to exit."); const watcher = Deno.watchFs(WATCH); let task = Promise.resolve(); - try { - await buildAll({ minify: false, buildDictionary: true }); - let dictionaryChanged = false; - const buildDebounced = debounce((buildDictionary: boolean) => { - task = task.then(async () => { - await buildAll({ - minify: false, - buildDictionary, - checkDictionary: false, - }); - dictionaryChanged = false; + await buildAll({ minify: false, buildDictionary: true }); + let dictionaryChanged = false; + const buildDebounced = debounce((buildDictionary: boolean) => { + task = task.then(async () => { + await buildAll({ + minify: false, + buildDictionary, + checkDictionary: false, }); - }, 500); - for await (const event of watcher) { - if (event.paths.some((path) => DICTIONARY.test(path))) { - dictionaryChanged = true; - } - buildDebounced(dictionaryChanged); + dictionaryChanged = false; + }); + }, 500); + for await (const event of watcher) { + if (event.paths.some((path) => DICTIONARY.test(path))) { + dictionaryChanged = true; } - throw new Error("unreachable"); - } finally { - watcher.close(); - await task; + buildDebounced(dictionaryChanged); } + throw new Error("unreachable"); } default: throw new Error(`unrecognized build option: ${Deno.args[0]}`); From 36e1cf29fd77c92d1875359265070f22d0197c22 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:53:49 +0800 Subject: [PATCH 136/199] more ternary --- src/main.ts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/main.ts b/src/main.ts index 3f42e963..782f1be9 100644 --- a/src/main.ts +++ b/src/main.ts @@ -138,11 +138,9 @@ function main(): void { try { loadCustomDictionary(customDictionary); } catch (error) { - if (errorsFixable(flattenError(error))) { - errorDisplay.innerText = DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE; - } else { - errorDisplay.innerText = DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; - } + errorDisplay.innerText = errorsFixable(flattenError(error)) + ? DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE + : DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; console.error(error); } } @@ -166,11 +164,9 @@ function main(): void { // initialize button label updateLabel(); function updateLabel(): void { - if (settings.multiline) { - translateButton.innerText = TRANSLATE_LABEL_MULTILINE; - } else { - translateButton.innerText = TRANSLATE_LABEL; - } + translateButton.innerText = settings.multiline + ? TRANSLATE_LABEL_MULTILINE + : TRANSLATE_LABEL; } // add all event listener From 931e89a513d218045c8a961c3c5017686521f9f9 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 08:58:17 +0800 Subject: [PATCH 137/199] small refactor --- src/misc.ts | 19 +++++++++---------- src/settings_frontend.ts | 9 ++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index d084eff5..7a20e96c 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -50,16 +50,15 @@ export function escapeHtmlWithLineBreak(text: string): string { return newlineAsHtmlLineBreak(escapeHtml(text)); } export function setIgnoreError(key: string, value: string): void { - if (!checkLocalStorage()) { - return; - } - try { - localStorage.setItem(key, value); - } catch (error) { - if ( - !(error instanceof DOMException) || error.name !== "QuotaExceededError" - ) { - throw error; + if (checkLocalStorage()) { + try { + localStorage.setItem(key, value); + } catch (error) { + if ( + !(error instanceof DOMException) || error.name !== "QuotaExceededError" + ) { + throw error; + } } } } diff --git a/src/settings_frontend.ts b/src/settings_frontend.ts index 8c35ae49..82db4253 100644 --- a/src/settings_frontend.ts +++ b/src/settings_frontend.ts @@ -94,11 +94,10 @@ function setElement( ); } export function loadFromLocalStorage(): void { - if (!checkLocalStorage()) { - return; - } - for (const key of KEYS) { - loadOneFromLocalStorage(key); + if (checkLocalStorage()) { + for (const key of KEYS) { + loadOneFromLocalStorage(key); + } } } export function loadFromElements(): void { From e85e12f2765f65913c43cf68c2bed6b71abe9c76 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 09:29:03 +0800 Subject: [PATCH 138/199] refactor --- src/array_result.ts | 41 +++++++++++++++------------------------- src/main.ts | 5 ++--- src/parser/lexer.ts | 16 ++++++---------- src/parser/parser.ts | 8 +------- src/settings_frontend.ts | 19 +++++-------------- src/translator/number.ts | 10 +++------- src/translator/phrase.ts | 16 +++------------- 7 files changed, 35 insertions(+), 80 deletions(-) diff --git a/src/array_result.ts b/src/array_result.ts index f283e635..c16647d2 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -56,13 +56,9 @@ export class ArrayResult { } } filter(mapper: (value: T) => boolean): ArrayResult { - return this.flatMap((value) => { - if (mapper(value)) { - return new ArrayResult([value]); - } else { - return new ArrayResult(); - } - }); + return this.flatMap((value) => + mapper(value) ? new ArrayResult([value]) : new ArrayResult() + ); } map(mapper: (value: T) => U): ArrayResult { return this.flatMap((value) => new ArrayResult([mapper(value)])); @@ -104,13 +100,10 @@ export class ArrayResult { ...arrayResults: ReadonlyArray> ): ArrayResult { return arrayResults.reduce( - (left, right) => { - if (left.isError() && right.isError()) { - return ArrayResult.errors([...left.errors, ...right.errors]); - } else { - return new ArrayResult([...left.array, ...right.array]); - } - }, + (left, right) => + left.isError() && right.isError() + ? ArrayResult.errors([...left.errors, ...right.errors]) + : new ArrayResult([...left.array, ...right.array]), new ArrayResult(), ); } @@ -121,18 +114,14 @@ export class ArrayResult { ): ArrayResult { // We resorted to using `any` types here, make sure it works properly return arrayResults.reduce( - (left: ArrayResult, right) => { - if (left.isError() && right.isError()) { - return ArrayResult.concat(left, right); - } else if (left.isError()) { - return ArrayResult.errors(left.errors); - } else if (right.isError()) { - return ArrayResult.errors(right.errors); - } else { - return left - .flatMap((left) => right.map((right) => [...left, right])); - } - }, + (left: ArrayResult, right) => + left.isError() && right.isError() + ? ArrayResult.concat(left, right) + : left.isError() + ? left + : right.isError() + ? right + : left.flatMap((left) => right.map((right) => [...left, right])), new ArrayResult([[]]), ) as ArrayResult; } diff --git a/src/main.ts b/src/main.ts index 782f1be9..ce8bbfd6 100644 --- a/src/main.ts +++ b/src/main.ts @@ -256,9 +256,8 @@ function main(): void { const word = addWordTextBox.value.trim(); if (/^[a-z][a-zA-Z]*$/.test(word)) { const dictionaryEntry = dictionary.get(word); - const definitions = dictionaryEntry != null - ? dictionaryEntry.src - : `\n${ + const definitions = dictionaryEntry?.src ?? + `\n${ asComment(EMPTY_DEFINITION_PLACEHOLDER) .replaceAll(/^/gm, " ") }`; diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 0c595680..e81030f0 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -97,16 +97,12 @@ const longWord = allAtLeastOnce(repeatingLetter) }) .filter(({ word }) => /^[a-z]/.test(word)) .filter(({ length }) => length > 1); -const xAlaX = lazy(() => { - if (settings.xAlaXPartialParsing) { - return empty; - } else { - return word - .then((word) => - sequence(specificWord("ala"), specificWord(word)).map(() => word) - ); - } -}) +const xAlaX = lazy(() => + settings.xAlaXPartialParsing ? empty : word + .then((word) => + sequence(specificWord("ala"), specificWord(word)).map(() => word) + ) +) .map((word) => ({ type: "x ala x", word })); const punctuation = choiceOnlyOne( allAtLeastOnce( diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 686d555f..d4056b1b 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -716,13 +716,7 @@ const sentence = choice( return { ...sentence, interrogative }; }, ) - .sortBy(({ anuSeme }) => { - if (anuSeme == null) { - return 1; - } else { - return 0; - } - }), + .sortBy(({ anuSeme }) => anuSeme == null ? 1 : 0), sequence(filler, optional(punctuation)) .map(([filler, punctuation]) => ({ type: "filler", diff --git a/src/settings_frontend.ts b/src/settings_frontend.ts index 82db4253..3361a2d1 100644 --- a/src/settings_frontend.ts +++ b/src/settings_frontend.ts @@ -24,26 +24,17 @@ const BOOL_UPDATER: Updater = { return null; } }, - stringify: (value) => { - if (value) { - return "T"; - } else { - return "F"; - } - }, + stringify: (value) => value ? "T" : "F", load: (input) => (input as HTMLInputElement).checked, set: (input, value) => { (input as HTMLInputElement).checked = value; }, }; const REDUNDANCY_UPDATER: Updater = { - parse: (value) => { - if (["both", "condensed", "default only"].includes(value)) { - return value as RedundancySettings; - } else { - return null; - } - }, + parse: (value) => + ["both", "condensed", "default only"].includes(value) + ? value as RedundancySettings + : null, stringify: (value) => value, load: (input) => input.value as RedundancySettings, set: (input, value) => { diff --git a/src/translator/number.ts b/src/translator/number.ts index 3c541778..0542ee46 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -6,13 +6,9 @@ import { FilteredOutError } from "./error.ts"; function singleNumber(word: string): ArrayResult { return new ArrayResult(dictionary.get(word)!.definitions) - .filterMap((definition) => { - if (definition.type === "numeral") { - return definition.numeral; - } else { - return null; - } - }); + .filterMap((definition) => + definition.type === "numeral" ? definition.numeral : null + ); } function regularNumber(number: ReadonlyArray): number { const duplicate = number.some((a, i) => diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 895048ed..07c37e56 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -49,12 +49,8 @@ function nounPhrase( if (partialNoun.postAdjective != null && modifier.name != null) { throw new FilteredOutError("double name"); } - const postAdjective = partialNoun.postAdjective != null - ? partialNoun.postAdjective - : mapNullable( - modifier.name, - (name) => ({ adjective: "named", name }), - ); + const postAdjective = partialNoun.postAdjective ?? + mapNullable(modifier.name, (name) => ({ adjective: "named", name })); const preposition = [ ...nullableAsArray(modifier.inPositionPhrase) .map((object) => nounAsPreposition(object, "in")), @@ -401,11 +397,5 @@ export function multiplePhrasesAsNoun( }>, ): ArrayResult { return multiplePhrases({ ...options, includeVerb: false }) - .filterMap((phrase) => { - if (phrase.type === "noun") { - return phrase.noun; - } else { - return null; - } - }); + .filterMap((phrase) => phrase.type === "noun" ? phrase.noun : null); } From b1c73e8c4c1623858b1803a23c9c56e1f72089db Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 09:37:14 +0800 Subject: [PATCH 139/199] huh --- src/translator/misc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/misc.ts b/src/translator/misc.ts index 95f7ce5b..ef8d310e 100644 --- a/src/translator/misc.ts +++ b/src/translator/misc.ts @@ -10,4 +10,4 @@ export function condense(first: string, second: string): string { } else { return `${first}/${second}`; } -} \ No newline at end of file +} From 5a55bed64aa4816be651dd2edbf0dea4e47bc8c8 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 11:09:45 +0800 Subject: [PATCH 140/199] remove duplicate --- src/parser/punctuation.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/punctuation.ts b/src/parser/punctuation.ts index 459f2a23..9407e4f9 100644 --- a/src/parser/punctuation.ts +++ b/src/parser/punctuation.ts @@ -10,7 +10,6 @@ const FULL_WIDTH_PERIOD = [ "\uFF61", "\uFE12", "\u30FB", - "\u3002", UCSUR_MIDDLE_DOT, ]; const FULL_WIDTH_COLON = ["\uFF1A", UCSUR_COLON]; From 30f862ef6b9086ec1645b2f04806b658555a9129 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 11:17:02 +0800 Subject: [PATCH 141/199] add unique pairs --- src/misc.ts | 5 +++++ src/parser/filter.ts | 10 ++++------ src/parser/parser_test.ts | 9 ++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index 7a20e96c..6f8046d3 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -102,3 +102,8 @@ export function deduplicateErrors( export function characterClass(characters: Iterable): RegExp { return new RegExp(`[${escapeRegex([...characters].join(""))}]`, "u"); } +export function uniquePairs( + array: ReadonlyArray, +): ReadonlyArray { + return array.flatMap((a, i) => array.slice(i + 1).map((b) => [a, b])); +} diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 519c209e..9c4d1446 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,5 +1,5 @@ import { extractArrayResultError } from "../array_result.ts"; -import { flattenError } from "../misc.ts"; +import { flattenError, uniquePairs } from "../misc.ts"; import { settings } from "../settings.ts"; import { Clause, @@ -220,11 +220,9 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< return []; } }); - for (const [i, a] of words.entries()) { - for (const b of words.slice(i + 1)) { - if (a === b) { - throw new UnrecognizedError(`duplicate "${a}" in modifier`); - } + for (const [a, b] of uniquePairs(words)) { + if (a === b) { + throw new UnrecognizedError(`duplicate "${a}" in modifier`); } } return true; diff --git a/src/parser/parser_test.ts b/src/parser/parser_test.ts index 9e09442f..b17a4c6d 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/parser_test.ts @@ -1,4 +1,5 @@ import { assertNotEquals } from "@std/assert/not-equals"; +import { uniquePairs } from "../misc.ts"; import { parse } from "./parser.ts"; // Examples gathered from https://github.com/kilipan/nasin-toki @@ -195,11 +196,9 @@ const EXAMPLE_SENTENCES = [ Deno.test("AST all distinct", () => { for (const sentence of EXAMPLE_SENTENCES) { - const ast = parse(sentence).unwrap(); - for (const [i, a] of ast.entries()) { - for (const b of ast.slice(i + 1)) { - assertNotEquals(a, b, `Error at "${sentence}"`); - } + const pairs = uniquePairs(parse(sentence).unwrap()); + for (const [a, b] of pairs) { + assertNotEquals(a, b, `Error at "${sentence}"`); } } }); From be44e736c5018d2ea0c00713401d159c74067b32 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 11:29:42 +0800 Subject: [PATCH 142/199] label tuples --- src/parser/lexer.ts | 7 ++++--- src/parser/parser.ts | 4 ++-- telo-misikeke/update.ts | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index e81030f0..b8684743 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -84,9 +84,10 @@ const multipleA = specificWord("a") .map((count) => ({ type: "multiple a", count: count + 1 })); const repeatingLetter = match(/[a-zA-Z]/, "latin letter") .then((letter) => - count(all(matchString(letter))).map( - (count) => [letter, count + 1], - ) + count(all(matchString(letter))) + .map( + (count) => [letter, count + 1], + ) ); const longWord = allAtLeastOnce(repeatingLetter) .skip(spaces) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index d4056b1b..5357583e 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -201,7 +201,7 @@ function wordUnit( function binaryWords( word: Set, description: string, -): Parser { +): Parser { return specificToken("combined glyphs").map(({ words }) => { if (words.length > 2) { throw new UnrecognizedError( @@ -219,7 +219,7 @@ function binaryWords( function optionalCombined( word: Set, description: string, -): Parser { +): Parser { return choice( wordUnit(word, description) .map((wordUnit) => [wordUnit, null]), diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index 5b434a1e..2b1f5557 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -61,9 +61,9 @@ async function buildSonaLinku(): Promise { } function parseLipuLinku( data: { [word: string]: { usage_category: string } }, -): ReadonlyArray { +): ReadonlyArray { return Object.entries(data) - .map( + .map( ([word, data]) => [word, data.usage_category], ) .filter(([_, category]) => category !== "sandbox"); From ad327e32c8fdf30b5b03ed8ccc613d0e9ae31089 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 11:31:42 +0800 Subject: [PATCH 143/199] update labels --- telo-misikeke/update.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index 2b1f5557..d444e5b1 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -61,9 +61,9 @@ async function buildSonaLinku(): Promise { } function parseLipuLinku( data: { [word: string]: { usage_category: string } }, -): ReadonlyArray { +): ReadonlyArray { return Object.entries(data) - .map( + .map( ([word, data]) => [word, data.usage_category], ) .filter(([_, category]) => category !== "sandbox"); From 3e80773d9a4cf910538ef6786405c83359a32d9f Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 12:21:22 +0800 Subject: [PATCH 144/199] expand reserved symbols --- dictionary/parser.ts | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index faa60ebc..d6cdb182 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,6 +1,7 @@ -import { escape } from "@std/html/entities"; +import { escape as escapeHtml } from "@std/html/entities"; import nlp from "compromise/three"; import { ArrayResultError } from "../src/array_result.ts"; +import { escape as escapeRegex } from "jsr:@std/regexp/escape"; import { deduplicateErrors, mapNullable, @@ -30,6 +31,31 @@ import { VerbForms, } from "./type.ts"; +const RESERVED_SYMBOLS = [ + "#", + "(", + ")", + "*", + "+", + "/", + ":", + ";", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "`", + "{", + "|", + "}", + "~", +]; +const WORDS = new RegExp(`[^${escapeRegex(RESERVED_SYMBOLS.join(""))}]`); + const comment = match(/#[^\n\r]*/, "comment"); const spaces = sourceOnly(all(choiceOnlyOne(match(/\s/, "space"), comment))); function lex(parser: Parser): Parser { @@ -38,7 +64,7 @@ function lex(parser: Parser): Parser { const backtick = matchString("`", "backtick"); const word = allAtLeastOnce( choiceOnlyOne( - match(/[^():;#/`]/, "word"), + match(WORDS, "word"), backtick .with(character) .skip(backtick) @@ -54,7 +80,7 @@ const word = allAtLeastOnce( return true; } }) - .map(escape); + .map(escapeHtml); const slash = lex(matchString("/", "slash")); const forms = sequence(word, all(slash.with(word))) .map(([first, rest]) => [first, ...rest]); From b4ee45a425f9797fe544a96506fb10dda58f3b4c Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 12:22:14 +0800 Subject: [PATCH 145/199] organize imports --- dictionary/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index d6cdb182..49934bed 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,7 +1,7 @@ import { escape as escapeHtml } from "@std/html/entities"; import nlp from "compromise/three"; -import { ArrayResultError } from "../src/array_result.ts"; import { escape as escapeRegex } from "jsr:@std/regexp/escape"; +import { ArrayResultError } from "../src/array_result.ts"; import { deduplicateErrors, mapNullable, From 743ed95369b69c4fbbea6e3d794843066afe9249 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 16:45:12 +0800 Subject: [PATCH 146/199] this happened --- deno.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/deno.lock b/deno.lock index 028f4f6b..98bdb14e 100644 --- a/deno.lock +++ b/deno.lock @@ -12,6 +12,7 @@ "jsr:@std/internal@^1.0.5": "1.0.5", "jsr:@std/path@^1.0.6": "1.0.8", "jsr:@std/random@0.1": "0.1.0", + "jsr:@std/regexp@*": "1.0.1", "jsr:@std/regexp@^1.0.1": "1.0.1", "jsr:@std/text@^1.0.10": "1.0.10", "npm:compromise@^14.14.3": "14.14.4", From d6d351ba58427f91cfbc1d1031a1417ecf81e7a4 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 16:45:58 +0800 Subject: [PATCH 147/199] remove unused classes --- src/dictionary.ts | 7 ------- src/translator/composer.ts | 6 ------ 2 files changed, 13 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index cf15ae33..e67ff4f3 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -1,7 +1,6 @@ import { dictionary as globalDictionary } from "../dictionary/dictionary.ts"; import { parseDictionary } from "../dictionary/parser.ts"; import { Definition, Dictionary } from "../dictionary/type.ts"; -import { ArrayResultError } from "./array_result.ts"; const customDictionary: Dictionary = new Map(); export const dictionary: Dictionary = new Map(); @@ -15,12 +14,6 @@ export const tokiPonaWordSet: Set = new Set(); update(); -export class MissingEntryError extends ArrayResultError { - constructor(kind: string, word: string) { - super(`${kind} definition for the word "${word}" is missing`); - this.name = "MissingEntryError"; - } -} export function loadCustomDictionary(dictionaryText: string): void { const dictionary = parseDictionary(dictionaryText); customDictionary.clear(); diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 9c23878f..7f37540c 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -4,12 +4,6 @@ import * as English from "./ast.ts"; const EMPHASIS_STARTING_TAG = ""; const EMPHASIS_ENDING_TAG = ""; -// class ComposingTodoError extends TodoError { -// constructor(type: string) { -// super(`composing ${type}`); -// this.name = "ComposingTodoError"; -// } -// } function word(word: English.Word): string { if (word.emphasis) { return `${EMPHASIS_STARTING_TAG}${word.word}${EMPHASIS_ENDING_TAG}`; From a9dd449ba9980f0b6d9d093106eda8ffff5590e0 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 16:47:28 +0800 Subject: [PATCH 148/199] remove jsr specifier from code --- deno.lock | 13 ++++++------- dictionary/parser.ts | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/deno.lock b/deno.lock index 98bdb14e..cb1abc75 100644 --- a/deno.lock +++ b/deno.lock @@ -4,15 +4,14 @@ "jsr:@luca/esbuild-deno-loader@~0.11.1": "0.11.1", "jsr:@std/assert@^1.0.11": "1.0.11", "jsr:@std/async@^1.0.10": "1.0.10", - "jsr:@std/bytes@^1.0.2": "1.0.4", + "jsr:@std/bytes@^1.0.2": "1.0.5", "jsr:@std/cache@~0.1.3": "0.1.3", "jsr:@std/collections@^1.0.10": "1.0.10", - "jsr:@std/encoding@^1.0.5": "1.0.6", + "jsr:@std/encoding@^1.0.5": "1.0.7", "jsr:@std/html@^1.0.3": "1.0.3", "jsr:@std/internal@^1.0.5": "1.0.5", "jsr:@std/path@^1.0.6": "1.0.8", "jsr:@std/random@0.1": "0.1.0", - "jsr:@std/regexp@*": "1.0.1", "jsr:@std/regexp@^1.0.1": "1.0.1", "jsr:@std/text@^1.0.10": "1.0.10", "npm:compromise@^14.14.3": "14.14.4", @@ -36,8 +35,8 @@ "@std/async@1.0.10": { "integrity": "2ff1b1c7d33d1416159989b0f69e59ec7ee8cb58510df01e454def2108b3dbec" }, - "@std/bytes@1.0.4": { - "integrity": "11a0debe522707c95c7b7ef89b478c13fb1583a7cfb9a85674cd2cc2e3a28abc" + "@std/bytes@1.0.5": { + "integrity": "4465dd739d7963d964c809202ebea6d5c6b8e3829ef25c6a224290fbb8a1021e" }, "@std/cache@0.1.3": { "integrity": "8d3ab61d994d0915295048999f34a8ce2280f0e4d1f4cccb08c9e3d297c0c6ec" @@ -45,8 +44,8 @@ "@std/collections@1.0.10": { "integrity": "903af106a3d92970d74e20f7ebff77d9658af9bef4403f1dc42a7801c0575899" }, - "@std/encoding@1.0.6": { - "integrity": "ca87122c196e8831737d9547acf001766618e78cd8c33920776c7f5885546069" + "@std/encoding@1.0.7": { + "integrity": "f631247c1698fef289f2de9e2a33d571e46133b38d042905e3eac3715030a82d" }, "@std/html@1.0.3": { "integrity": "7a0ac35e050431fb49d44e61c8b8aac1ebd55937e0dc9ec6409aa4bab39a7988" diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 49934bed..712ba492 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -1,6 +1,6 @@ import { escape as escapeHtml } from "@std/html/entities"; +import { escape as escapeRegex } from "@std/regexp/escape"; import nlp from "compromise/three"; -import { escape as escapeRegex } from "jsr:@std/regexp/escape"; import { ArrayResultError } from "../src/array_result.ts"; import { deduplicateErrors, From 5dd90892ca11a4fee1597432ae89cd3ab7e0d214 Mon Sep 17 00:00:00 2001 From: Koko Date: Thu, 6 Mar 2025 16:50:26 +0800 Subject: [PATCH 149/199] update --- deno.json | 4 ++-- deno.lock | 16 ++++++++-------- telo-misikeke/update.ts | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/deno.json b/deno.json index 34f75a94..5849e50b 100644 --- a/deno.json +++ b/deno.json @@ -53,13 +53,13 @@ "imports": { "@luca/esbuild-deno-loader": "jsr:@luca/esbuild-deno-loader@^0.11.1", "@std/assert": "jsr:@std/assert@^1.0.11", - "@std/async": "jsr:@std/async@^1.0.10", + "@std/async": "jsr:@std/async@^1.0.11", "@std/cache": "jsr:@std/cache@^0.1.3", "@std/collections": "jsr:@std/collections@^1.0.10", "@std/html": "jsr:@std/html@^1.0.3", "@std/random": "jsr:@std/random@^0.1.0", "@std/regexp": "jsr:@std/regexp@^1.0.1", - "@std/text": "jsr:@std/text@^1.0.10", + "@std/text": "jsr:@std/text@^1.0.11", "compromise": "npm:compromise@^14.14.3", "esbuild": "npm:esbuild@^0.25.0" } diff --git a/deno.lock b/deno.lock index cb1abc75..13ae85bf 100644 --- a/deno.lock +++ b/deno.lock @@ -3,7 +3,7 @@ "specifiers": { "jsr:@luca/esbuild-deno-loader@~0.11.1": "0.11.1", "jsr:@std/assert@^1.0.11": "1.0.11", - "jsr:@std/async@^1.0.10": "1.0.10", + "jsr:@std/async@^1.0.11": "1.0.11", "jsr:@std/bytes@^1.0.2": "1.0.5", "jsr:@std/cache@~0.1.3": "0.1.3", "jsr:@std/collections@^1.0.10": "1.0.10", @@ -13,7 +13,7 @@ "jsr:@std/path@^1.0.6": "1.0.8", "jsr:@std/random@0.1": "0.1.0", "jsr:@std/regexp@^1.0.1": "1.0.1", - "jsr:@std/text@^1.0.10": "1.0.10", + "jsr:@std/text@^1.0.11": "1.0.11", "npm:compromise@^14.14.3": "14.14.4", "npm:esbuild@0.25": "0.25.0" }, @@ -32,8 +32,8 @@ "jsr:@std/internal" ] }, - "@std/async@1.0.10": { - "integrity": "2ff1b1c7d33d1416159989b0f69e59ec7ee8cb58510df01e454def2108b3dbec" + "@std/async@1.0.11": { + "integrity": "eee0d3405275506638a9c8efaa849cf0d35873120c69b7caa1309c9a9e5b6f85" }, "@std/bytes@1.0.5": { "integrity": "4465dd739d7963d964c809202ebea6d5c6b8e3829ef25c6a224290fbb8a1021e" @@ -62,8 +62,8 @@ "@std/regexp@1.0.1": { "integrity": "5179d823465085c5480dafb44438466e83c424fadc61ba31f744050ecc0f596d" }, - "@std/text@1.0.10": { - "integrity": "9dcab377450253c0efa9a9a0c731040bfd4e1c03f8303b5934381467b7954338" + "@std/text@1.0.11": { + "integrity": "f191fa22590cac8b1cdba6cc4ab97940e720f7cc67b3084e54405b428bf5843d" } }, "npm": { @@ -194,13 +194,13 @@ "dependencies": [ "jsr:@luca/esbuild-deno-loader@~0.11.1", "jsr:@std/assert@^1.0.11", - "jsr:@std/async@^1.0.10", + "jsr:@std/async@^1.0.11", "jsr:@std/cache@~0.1.3", "jsr:@std/collections@^1.0.10", "jsr:@std/html@^1.0.3", "jsr:@std/random@0.1", "jsr:@std/regexp@^1.0.1", - "jsr:@std/text@^1.0.10", + "jsr:@std/text@^1.0.11", "npm:compromise@^14.14.3", "npm:esbuild@0.25" ] diff --git a/telo-misikeke/update.ts b/telo-misikeke/update.ts index d444e5b1..2a3f1bcc 100644 --- a/telo-misikeke/update.ts +++ b/telo-misikeke/update.ts @@ -4,7 +4,7 @@ import { assertOk } from "../src/misc.ts"; const TELO_MISIKEKE_URL = "https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/"; const LINKU_URL = "https://api.linku.la/v1/words"; -const LINKU_DESTINATION = new URL("./linku-data.json", import.meta.url); +const LINKU_DESTINATION = new URL("./linku_data.json", import.meta.url); const SOURCE = [ { source: new URL("./public/rules.js", TELO_MISIKEKE_URL), From 72301e3268848a7e68cc98863f73602fc785cd9c Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 09:21:07 +0800 Subject: [PATCH 150/199] use snake case --- src/mod.ts | 2 +- {telo-misikeke => telo_misikeke}/Parser.js | 0 {telo-misikeke => telo_misikeke}/linku_data.json | 0 {telo-misikeke => telo_misikeke}/rules.js | 0 {telo-misikeke => telo_misikeke}/telo_misikeke.d.ts | 0 {telo-misikeke => telo_misikeke}/telo_misikeke.js | 0 {telo-misikeke => telo_misikeke}/update.ts | 0 7 files changed, 1 insertion(+), 1 deletion(-) rename {telo-misikeke => telo_misikeke}/Parser.js (100%) rename {telo-misikeke => telo_misikeke}/linku_data.json (100%) rename {telo-misikeke => telo_misikeke}/rules.js (100%) rename {telo-misikeke => telo_misikeke}/telo_misikeke.d.ts (100%) rename {telo-misikeke => telo_misikeke}/telo_misikeke.js (100%) rename {telo-misikeke => telo_misikeke}/update.ts (100%) diff --git a/src/mod.ts b/src/mod.ts index 5c55366d..9833ad71 100644 --- a/src/mod.ts +++ b/src/mod.ts @@ -1,6 +1,6 @@ import { distinct } from "@std/collections/distinct"; import { shuffle } from "@std/random/shuffle"; -import { errors } from "../telo-misikeke/telo_misikeke.js"; +import { errors } from "../telo_misikeke/telo_misikeke.js"; import { ArrayResultError } from "./array_result.ts"; import { deduplicateErrors } from "./misc.ts"; import { settings } from "./settings.ts"; diff --git a/telo-misikeke/Parser.js b/telo_misikeke/Parser.js similarity index 100% rename from telo-misikeke/Parser.js rename to telo_misikeke/Parser.js diff --git a/telo-misikeke/linku_data.json b/telo_misikeke/linku_data.json similarity index 100% rename from telo-misikeke/linku_data.json rename to telo_misikeke/linku_data.json diff --git a/telo-misikeke/rules.js b/telo_misikeke/rules.js similarity index 100% rename from telo-misikeke/rules.js rename to telo_misikeke/rules.js diff --git a/telo-misikeke/telo_misikeke.d.ts b/telo_misikeke/telo_misikeke.d.ts similarity index 100% rename from telo-misikeke/telo_misikeke.d.ts rename to telo_misikeke/telo_misikeke.d.ts diff --git a/telo-misikeke/telo_misikeke.js b/telo_misikeke/telo_misikeke.js similarity index 100% rename from telo-misikeke/telo_misikeke.js rename to telo_misikeke/telo_misikeke.js diff --git a/telo-misikeke/update.ts b/telo_misikeke/update.ts similarity index 100% rename from telo-misikeke/update.ts rename to telo_misikeke/update.ts From a92a382600d065d9a23c0fd1bf406f531e186654 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 09:21:58 +0800 Subject: [PATCH 151/199] snake case --- bundle.ts | 8 ++++---- deno.json | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/bundle.ts b/bundle.ts index 1f6f408c..5e03244c 100644 --- a/bundle.ts +++ b/bundle.ts @@ -8,10 +8,10 @@ const WATCH = [ "./dictionary/misc.ts", "./dictionary/parser.ts", "./dictionary/type.ts", - "./telo-misikeke/linku-data.json", - "./telo-misikeke/Parser.js", - "./telo-misikeke/rules.js", - "./telo-misikeke/telo-misikeke.js", + "./telo_misikeke/linku-data.json", + "./telo_misikeke/Parser.js", + "./telo_misikeke/rules.js", + "./telo_misikeke/telo-misikeke.js", "./src/", "./project-data.json", ]; diff --git a/deno.json b/deno.json index 5849e50b..4f683985 100644 --- a/deno.json +++ b/deno.json @@ -17,17 +17,17 @@ "exclude": [ "./dictionary/dictionary.ts", "./dist/main.js", - "./telo-misikeke/linku_data.json", - "./telo-misikeke/rules.js", - "./telo-misikeke/Parser.js" + "./telo_misikeke/linku_data.json", + "./telo_misikeke/rules.js", + "./telo_misikeke/Parser.js" ] }, "lint": { "exclude": [ "./dictionary/dictionary.ts", "./dist/main.js", - "./telo-misikeke/rules.js", - "./telo-misikeke/Parser.js" + "./telo_misikeke/rules.js", + "./telo_misikeke/Parser.js" ], "rules": { "tags": ["recommended"], From 06064c931c2cb14e665e287bff1374f2e42a4aa0 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 10:49:04 +0800 Subject: [PATCH 152/199] refactor yet again --- dictionary/parser.ts | 39 ++-- src/misc.ts | 6 +- src/parser/filter.ts | 423 +++++++++++++++++------------------- src/parser/lexer.ts | 17 +- src/parser/parser.ts | 147 ++++++------- src/parser/parser_lib.ts | 41 ++-- src/translator/adjective.ts | 24 +- src/translator/clause.ts | 16 +- src/translator/modifier.ts | 31 +-- src/translator/number.ts | 34 +-- src/translator/verb.ts | 5 +- 11 files changed, 369 insertions(+), 414 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 712ba492..f0747081 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -6,6 +6,7 @@ import { deduplicateErrors, mapNullable, nullableAsArray, + throwError, } from "../src/misc.ts"; import { all, @@ -73,26 +74,19 @@ const word = allAtLeastOnce( ), ) .map((word) => word.join("").replaceAll(/\s+/g, " ").trim()) - .filter((word) => { - if (word === "") { - throw new ArrayResultError("missing word"); - } else { - return true; - } - }) + .filter((word) => + word !== "" || throwError(new ArrayResultError("missing word")) + ) .map(escapeHtml); const slash = lex(matchString("/", "slash")); const forms = sequence(word, all(slash.with(word))) .map(([first, rest]) => [first, ...rest]); function keyword(keyword: T): Parser { return lex(match(/[a-z]+/, keyword)) - .filter((that) => { - if (keyword === that) { - return true; - } else { - throw new UnexpectedError(`"${that}"`, `"${keyword}"`); - } - }) as Parser; + .filter((that) => + keyword === that || + throwError(new UnexpectedError(`"${that}"`, `"${keyword}"`)) + ) as Parser; } const number = choiceOnlyOne(keyword("singular"), keyword("plural")); const optionalNumber = optionalAll(number); @@ -271,19 +265,20 @@ function verbOnly(tagInside: Parser): Parser { const [_, ...pluralParticles] = presentPlural.split(" "); const [_1, ...singularParticles] = presentSingular.split(" "); const [_2, ...pastParticles] = past.split(" "); - if ( - pluralParticles.length !== singularParticles.length || - pluralParticles.length !== pastParticles.length || - pluralParticles.some((particle, i) => - particle !== singularParticles[i] || particle !== pastParticles[i] - ) - ) { + const allMatched = + pluralParticles.length === singularParticles.length && + pluralParticles.length === pastParticles.length && + pluralParticles.every((particle, i) => + particle === singularParticles[i] && particle === pastParticles[i] + ); + if (allMatched) { + return true; + } else { throw new ArrayResultError( "mismatched verb particles " + `"${presentPlural}/${presentSingular}/${past}"`, ); } - return true; }) .map(([presentPlural, presentSingular, past]) => ({ presentPlural, diff --git a/src/misc.ts b/src/misc.ts index 6f8046d3..69034380 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -68,8 +68,9 @@ export function assertOk(response: Response): Response { throw new Error( `unable to fetch ${url} (${status} ${statusText})`, ); + } else { + return response; } - return response; } export function extractErrorMessage(error: unknown): string { if (error instanceof Error) { @@ -107,3 +108,6 @@ export function uniquePairs( ): ReadonlyArray { return array.flatMap((a, i) => array.slice(i + 1).map((b) => [a, b])); } +export function throwError(error: unknown): never { + throw error; +} diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 9c4d1446..eb1e8b0e 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,5 +1,5 @@ import { extractArrayResultError } from "../array_result.ts"; -import { flattenError, uniquePairs } from "../misc.ts"; +import { flattenError, throwError, uniquePairs } from "../misc.ts"; import { settings } from "../settings.ts"; import { Clause, @@ -22,93 +22,67 @@ import { UnrecognizedError } from "./parser_lib.ts"; export const WORD_UNIT_RULES: ReadonlyArray<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" - (wordUnit) => { - if (wordUnit.type === "x ala x" && wordUnit.word === "seme") { - throw new UnrecognizedError('"seme ala seme"'); - } - return true; - }, + (wordUnit) => + wordUnit.type !== "x ala x" || wordUnit.word !== "seme" || + throwError(new UnrecognizedError('"seme ala seme"')), ]; export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ // disallow _nanpa ala nanpa_ - (modifier) => { - if (modifier.nanpa.type === "x ala x") { - throw new UnrecognizedError('"nanpa ala nanpa"'); - } - return true; - }, + (modifier) => + modifier.nanpa.type !== "x ala x" || + throwError(new UnrecognizedError('"nanpa ala nanpa"')), + // nanpa construction cannot contain preposition - (modifier) => { - if (modifier.phrase.type === "preposition") { - throw new UnrecognizedError("preposition inside nanpa"); - } - return true; - }, + (modifier) => + modifier.phrase.type !== "preposition" || + throwError(new UnrecognizedError("preposition inside nanpa")), + // nanpa construction cannot contain preverb - (modifier) => { - if (modifier.phrase.type === "preverb") { - throw new UnrecognizedError("preverb inside nanpa"); - } - return true; - }, + (modifier) => + modifier.phrase.type !== "preverb" || + throwError(new UnrecognizedError("preverb inside nanpa")), + // nanpa construction cannot contain quotation - (modifier) => { - if (modifier.phrase.type === "quotation") { - throw new UnrecognizedError("quotation inside nanpa"); - } - return true; - }, + (modifier) => + modifier.phrase.type !== "quotation" || + throwError(new UnrecognizedError("quotation inside nanpa")), + // nanpa construction cannot contain pi - (modifier) => { - if ( - modifier.phrase.type === "default" && - modifier.phrase.modifiers.some((modifier) => modifier.type === "pi") - ) { - throw new UnrecognizedError("pi inside nanpa"); - } - return true; - }, + (modifier) => + modifier.phrase.type !== "default" || + modifier.phrase.modifiers.every((modifier) => modifier.type !== "pi") || + throwError(new UnrecognizedError("pi inside nanpa")), + // nanpa construction cannot contain nanpa - (modifier) => { - if ( - modifier.phrase.type === "default" && - modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa") - ) { - throw new UnrecognizedError("nanpa inside nanpa"); - } - return true; - }, + (modifier) => + modifier.phrase.type !== "default" || + modifier.phrase.modifiers.every((modifier) => modifier.type !== "nanpa") || + throwError(new UnrecognizedError("nanpa inside nanpa")), + // nanpa cannot have emphasis particle (modifier) => { const { phrase } = modifier; - if ( - ( - phrase.type === "default" || - phrase.type === "preverb" || - phrase.type === "preposition" - ) && - phrase.emphasis != null - ) { - return false; + switch (phrase.type) { + case "preposition": + case "preverb": + case "default": + return phrase.emphasis == null; + case "quotation": + return true; } - return true; }, ]; export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // quotation modifier cannot exist - (modifier) => { - if (modifier.type === "quotation") { - throw new UnrecognizedError("quotation as modifier"); - } - return true; - }, + (modifier) => + modifier.type !== "quotation" || + throwError(new UnrecognizedError("quotation as modifier")), + // pi cannot contain preposition - (modifier) => { - if (modifier.type === "pi" && modifier.phrase.type === "preposition") { - throw new UnrecognizedError("preposition inside pi"); - } - return true; - }, + (modifier) => + modifier.type !== "pi" || modifier.phrase.type !== "preposition" || + throwError(new UnrecognizedError("preposition inside pi")), + // pi must follow phrases with modifier (modifier) => { if (modifier.type === "pi") { @@ -140,180 +114,167 @@ export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // } // return true; // }, + // pi cannot have emphasis particle (modifier) => { if (modifier.type === "pi") { const { phrase } = modifier; - if ( - ( - phrase.type === "default" || - phrase.type === "preverb" || - phrase.type === "preposition" - ) && - phrase.emphasis != null - ) { - return false; + switch (phrase.type) { + case "default": + case "preposition": + case "preverb": + return phrase.emphasis == null; + case "quotation": + return true; } + } else { + return true; } - return true; }, ]; export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< (modifier: ReadonlyArray) => boolean > = [ // // no multiple pi - // (modifiers) => { - // if (modifiers.filter((modifier) => modifier.type === "pi").length > 1) { - // throw new UnrecognizedError("multiple pi"); - // } - // return true; - // }, + // (modifiers) => + // modifiers.filter((modifier) => modifier.type === "pi").length <= 1 || + // throwError(new UnrecognizedError("multiple pi")), + // no multiple nanpa - (modifiers) => { - if (modifiers.filter((modifier) => modifier.type === "nanpa").length > 1) { - throw new UnrecognizedError("multiple nanpa"); - } - return true; - }, + (modifiers) => + modifiers.filter((modifier) => modifier.type === "nanpa").length <= 1 || + throwError(new UnrecognizedError("multiple nanpa")), + // no multiple proper words - (modifiers) => { - if ( - modifiers.filter((modifier) => modifier.type === "proper words").length > - 1 - ) { - throw new UnrecognizedError("multiple proper words"); - } - return true; - }, + (modifiers) => + modifiers + .filter((modifier) => modifier.type === "proper words") + .length <= 1 || + throwError(new UnrecognizedError("multiple proper words")), + // no multiple number words - (modifiers) => { - if (modifiers.filter(modifierIsNumeric).length > 1) { - throw new UnrecognizedError("multiple number words"); - } - return true; - }, + (modifiers) => + modifiers.filter(modifierIsNumeric).length <= 1 || + throwError(new UnrecognizedError("multiple number words")), + // avoid duplicate modifiers when disabled by settings (modifiers) => { if (settings.separateRepeatedModifiers) { return true; - } - const words = modifiers.flatMap((modifier) => { - switch (modifier.type) { - case "default": - if (modifier.word.type !== "number") { - return [modifier.word.word]; - } else { - return []; - } - case "pi": - if ( - modifier.phrase.type === "default" && - modifier.phrase.headWord.type !== "number" - ) { - return [modifier.phrase.headWord.word]; - } else { + } else { + const words = modifiers.flatMap((modifier) => { + switch (modifier.type) { + case "default": + if (modifier.word.type !== "number") { + return [modifier.word.word]; + } else { + return []; + } + case "pi": + if ( + modifier.phrase.type === "default" && + modifier.phrase.headWord.type !== "number" + ) { + return [modifier.phrase.headWord.word]; + } else { + return []; + } + case "quotation": + case "proper words": + case "nanpa": return []; - } - case "quotation": - case "proper words": - case "nanpa": - return []; - } - }); - for (const [a, b] of uniquePairs(words)) { - if (a === b) { - throw new UnrecognizedError(`duplicate "${a}" in modifier`); + } + }); + for (const [a, b] of uniquePairs(words)) { + if (a === b) { + throw new UnrecognizedError(`duplicate "${a}" in modifier`); + } } + return true; } - return true; }, ]; export const PHRASE_RULE: ReadonlyArray<(phrase: Phrase) => boolean> = [ // Disallow quotation - (phrase) => { - if (phrase.type === "quotation") { - throw new UnrecognizedError("quotation as phrase"); - } - return true; - }, + (phrase) => + phrase.type !== "quotation" || + throwError(new UnrecognizedError("quotation as phrase")), + // Disallow preverb modifiers other than "ala" - (phrase) => { - if (phrase.type === "preverb" && !modifiersIsAlaOrNone(phrase.modifiers)) { - throw new UnrecognizedError('preverb with modifiers other than "ala"'); - } - return true; - }, + (phrase) => + phrase.type !== "preverb" || modifiersIsAlaOrNone(phrase.modifiers) || + throwError( + new UnrecognizedError('preverb with modifiers other than "ala"'), + ), + // No multiple number words - (phrase) => { - if ( - phrase.type === "default" && - phrase.headWord.type === "number" && - phrase.modifiers.some(modifierIsNumeric) - ) { - throw new UnrecognizedError("multiple number words"); - } - return true; - }, + (phrase) => + phrase.type !== "default" || + phrase.headWord.type !== "number" || + !phrase.modifiers.some(modifierIsNumeric) || + throwError(new UnrecognizedError("multiple number words")), + // If the phrase has no modifiers, avoid emphasis particle (phrase) => phrase.type !== "default" || phrase.emphasis == null || phrase.modifiers.length > 0, + // For preverbs, inner phrase must not have emphasis particle (phrase) => phrase.type !== "preverb" || !phraseHasTopLevelEmphasis(phrase.phrase), + // Emphasis must not be nested (phrase) => { - if ( - (phrase.type === "default" || phrase.type === "preverb" || - phrase.type === "preposition") && - phrase.emphasis != null && - everyWordUnitInPhrase(phrase) - .some((wordUnit) => wordUnit.emphasis != null) - ) { - throw new UnrecognizedError("nested emphasis"); + switch (phrase.type) { + case "preposition": + case "preverb": + case "default": + if ( + phrase.emphasis == null || + everyWordUnitInPhrase(phrase) + .every((wordUnit) => wordUnit.emphasis == null) + ) { + return true; + } else { + throw new UnrecognizedError("nested emphasis"); + } + case "quotation": + return true; } - return true; }, ]; export const PREPOSITION_RULE: ReadonlyArray<(phrase: Preposition) => boolean> = [ // Disallow preverb modifiers other than "ala" - (preposition) => { - if (!modifiersIsAlaOrNone(preposition.modifiers)) { - throw new UnrecognizedError('preverb with modifiers other than "ala"'); - } - return true; - }, + (preposition) => + modifiersIsAlaOrNone(preposition.modifiers) || + throwError( + new UnrecognizedError('preverb with modifiers other than "ala"'), + ), + // Disallow nested preposition - (preposition) => { - if ( - everyPhraseInMultiplePhrases(preposition.phrases) - .some(hasPrepositionInPhrase) - ) { - throw new UnrecognizedError("preposition inside preposition"); - } - return true; - }, + (preposition) => + !everyPhraseInMultiplePhrases(preposition.phrases) + .some(hasPrepositionInPhrase) || + throwError(new UnrecognizedError("preposition inside preposition")), + // Preposition with "anu" must not have emphasis particle (preposition) => preposition.emphasis == null || preposition.phrases.type !== "anu", + // Inner phrase must not have emphasis particle (preposition) => preposition.phrases.type !== "single" || !phraseHasTopLevelEmphasis(preposition.phrases.phrase), + // Emphasis must not be nested - (preposition) => { - if ( - preposition.emphasis != null && - everyWordUnitInPreposition(preposition) - .some((wordUnit) => wordUnit.emphasis != null) - ) { - throw new UnrecognizedError("nested emphasis"); - } - return true; - }, + (preposition) => + preposition.emphasis == null || + everyWordUnitInPreposition(preposition) + .every((wordUnit) => wordUnit.emphasis == null) || + throwError(new UnrecognizedError("nested emphasis")), ]; export const CLAUSE_RULE: ReadonlyArray<(clause: Clause) => boolean> = [ // disallow preposition in subject @@ -340,19 +301,26 @@ export const CLAUSE_RULE: ReadonlyArray<(clause: Clause) => boolean> = [ everyPhraseInMultiplePhrases(phrases).some(hasPrepositionInPhrase) ) { throw new UnrecognizedError("preposition in subject"); + } else { + return true; } - return true; }, // disallow preposition in object (clause) => { - if ( - (clause.type === "li clause" || clause.type === "o clause") && - everyObjectInMultiplePredicates(clause.predicates) - .some(hasPrepositionInPhrase) - ) { - throw new UnrecognizedError("preposition in object"); + switch (clause.type) { + case "li clause": + case "o clause": + if ( + everyObjectInMultiplePredicates(clause.predicates) + .some(hasPrepositionInPhrase) + ) { + throw new UnrecognizedError("preposition in object"); + } else { + return true; + } + default: + return true; } - return true; }, // disallow "mi li" or "sina li" (clause) => { @@ -392,32 +360,34 @@ export const SENTENCE_RULE: ReadonlyArray<(sentence: Sentence) => boolean> = [ return true; }, // If there is "la", there can't be "taso" or "kin" - (sentence) => { - if ( - sentence.type === "default" && sentence.laClauses.length > 0 && - sentence.kinOrTaso != null - ) { - throw new UnrecognizedError( + (sentence) => + sentence.type !== "default" || sentence.laClauses.length === 0 || + sentence.kinOrTaso == null || throwError( + new UnrecognizedError( `${sentence.kinOrTaso.word} particle with "la"`, - ); - } - return true; - }, + ), + ), + // There can't be more than 1 "x ala x" or "seme" (sentence) => { - if ( - sentence.interrogative != null && everyWordUnitInSentence(sentence) - .filter((wordUnit) => - wordUnit.type === "x ala x" || - ((wordUnit.type === "default" || - wordUnit.type === "reduplication") && - wordUnit.word === "seme") - ) - .length > 1 - ) { - throw new UnrecognizedError( - 'more than 1 interrogative elements: "x ala x" or "seme"', - ); + if (sentence.interrogative) { + const interrogative = everyWordUnitInSentence(sentence) + .filter((wordUnit) => { + switch (wordUnit.type) { + case "number": + return false; + case "x ala x": + return true; + case "default": + case "reduplication": + return wordUnit.word === "seme"; + } + }); + if (interrogative.length > 1) { + throw new UnrecognizedError( + 'more than 1 interrogative elements: "x ala x" or "seme"', + ); + } } return true; }, @@ -426,12 +396,9 @@ export const MULTIPLE_SENTENCES_RULE: ReadonlyArray< (sentences: ReadonlyArray) => boolean > = [ // Only allow at most 2 sentences - (sentences) => { - if (sentences.filter((sentence) => sentence.type !== "filler").length > 2) { - throw new UnrecognizedError("multiple sentences"); - } - return true; - }, + (sentences) => + sentences.filter((sentence) => sentence.type !== "filler").length <= 2 || + throwError(new UnrecognizedError("multiple sentences")), ]; export function filter( rules: ReadonlyArray<(value: T) => boolean>, diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index b8684743..00c6ca1d 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -40,6 +40,7 @@ import { UCSUR_CHARACTER_REGEX, UCSUR_TO_LATIN, } from "./ucsur.ts"; +import { throwError } from "../misc.ts"; const spacesWithoutNewline = match(/[^\S\n\r]*/, "spaces"); const newline = match(/[\n\r]\s*/, "newline"); @@ -71,13 +72,10 @@ const properWords = allAtLeastOnce( .map((array) => array.join(" ")) .map((words) => ({ type: "proper word", words, kind: "latin" })); function specificWord(thatWord: string): Parser { - return word.filter((thisWord) => { - if (thatWord === thisWord) { - return true; - } else { - throw new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`); - } - }); + return word.filter((thisWord) => + thatWord === thisWord || + throwError(new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`)) + ); } const multipleA = specificWord("a") .with(count(allAtLeastOnce(specificWord("a")))) @@ -129,10 +127,11 @@ const cartoucheElement = choiceOnlyOne( .map(([word, dots]) => { const count = /^[aeiou]/.test(word) ? dots + 1 : dots; const morae = word.match(/[aeiou]|[jklmnpstw][aeiou]|n/g)!; - if (morae.length < count) { + if (count < morae.length) { + return morae.slice(0, count).join(""); + } else { throw new UnrecognizedError("excess dots"); } - return morae.slice(0, count).join(""); }), singleUcsurWord.map((word) => word[0]), match(/[a-zA-Z]/, "Latin letter") diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 5357583e..ad776323 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -7,7 +7,7 @@ import { preverbSet, tokiPonaWordSet, } from "../dictionary.ts"; -import { nullableAsArray } from "../misc.ts"; +import { nullableAsArray, throwError } from "../misc.ts"; import { Clause, ContextClause, @@ -66,45 +66,34 @@ Parser.startCache(cache); const specificToken = memoize( (type: T): Parser => { - return token.map((token) => { - if (token.type === type) { - return token as Token & { type: T }; - } else { - throw new UnexpectedError(describe(token), type); - } - }); + return token.map((token) => + token.type === type + ? token as Token & { type: T } + : throwError(new UnexpectedError(describe(token), type)) + ); }, ); const punctuation = specificToken("punctuation") .map(({ punctuation }) => punctuation); const comma = punctuation - .filter((punctuation) => { - if (punctuation === ",") { - return true; - } else { - throw new UnexpectedError(`"${punctuation}"`, "comma"); - } - }); + .filter((punctuation) => + punctuation === "," || + throwError(new UnexpectedError(`"${punctuation}"`, "comma")) + ); const optionalComma = optional(comma); const word = specificToken("word").map(({ word }) => word); const properWords = specificToken("proper word").map(({ words }) => words); function wordFrom(set: Set, description: string): Parser { - return word.filter((word) => { - if (set.has(word)) { - return true; - } else { - throw new UnrecognizedError(`"${word}" as ${description}`); - } - }); + return word.filter((word) => + set.has(word) || + throwError(new UnrecognizedError(`"${word}" as ${description}`)) + ); } const specificWord = memoize((thatWord: string) => - word.filter((thisWord) => { - if (thatWord === thisWord) { - return true; - } else { - throw new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`); - } - }) + word.filter((thisWord) => + thatWord === thisWord || + throwError(new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`)) + ) ); function filterCombinedGlyphs( words: ReadonlyArray, @@ -131,12 +120,11 @@ const emphasis = choice( length: spaceLength, })), specificToken("long word") - .map(({ word, length }) => { - if (word !== "a") { - throw new UnexpectedError(`"${word}"`, '"a"'); - } - return { type: "long word", word, length }; - }), + .map(({ word, length }) => + word === "a" + ? { type: "long word", word, length } + : throwError(new UnexpectedError(`"${word}"`, '"a"')) + ), specificWord("a").map((word) => ({ type: "word", word })), ); const optionalEmphasis = optional(emphasis); @@ -202,19 +190,19 @@ function binaryWords( word: Set, description: string, ): Parser { - return specificToken("combined glyphs").map(({ words }) => { - if (words.length > 2) { - throw new UnrecognizedError( - `combined glyphs of ${words.length} words`, - ); - } else if (!word.has(words[0])) { - throw new UnrecognizedError(`"${words[0]}" as ${description}`); - } else if (!contentWordSet.has(words[1])) { - throw new UnrecognizedError(`"${words[1]}" as content word`); - } else { - return words as [string, string]; - } - }); + return specificToken("combined glyphs").map(({ words }) => + words.length > 2 + ? throwError( + new UnrecognizedError( + `combined glyphs of ${words.length} words`, + ), + ) + : !word.has(words[0]) + ? throwError(new UnrecognizedError(`"${words[0]}" as ${description}`)) + : !contentWordSet.has(words[1]) + ? throwError(new UnrecognizedError(`"${words[1]}" as content word`)) + : words as [string, string] + ); } function optionalCombined( word: Set, @@ -412,12 +400,14 @@ const preposition = choice( throw new UnrecognizedError( `combined glyphs of ${words.words.length} words`, ); + } else { + const word = words.words[0]; + if (!prepositionSet.has(word)) { + throw new UnrecognizedError(`"${word}" as preposition`); + } else { + return words.words; + } } - const word = words.words[0]; - if (!prepositionSet.has(word)) { - throw new UnrecognizedError(`"${word}" as preposition`); - } - return words.words; }), phrase, specificToken("headless long glyph end"), @@ -630,28 +620,28 @@ const la = choice( ); const filler = choice( specificToken("space long glyph") - .map((longGlyph) => { - if (longGlyph.words.length !== 1) { - throw new UnexpectedError( - describe({ type: "combined glyphs", words: longGlyph.words }), - "simple glyph", - ); - } - return { - type: "long word", - word: longGlyph.words[0], - length: longGlyph.spaceLength, - }; - }), + .map((longGlyph) => + longGlyph.words.length === 1 + ? { + type: "long word", + word: longGlyph.words[0], + length: longGlyph.spaceLength, + } + : throwError( + new UnexpectedError( + describe({ type: "combined glyphs", words: longGlyph.words }), + "simple glyph", + ), + ) + ), specificToken("multiple a") .map(({ count }) => ({ type: "multiple a", count })), specificToken("long word") - .map(({ word, length }) => { - if (!fillerSet.has(word)) { - throw new UnrecognizedError(`"${word}" as filler`); - } - return { type: "long word", word, length }; - }), + .map(({ word, length }) => + fillerSet.has(word) + ? { type: "long word", word, length } + : throwError(new UnrecognizedError(`"${word}" as filler`)) + ), wordFrom(fillerSet, "filler") .map((word) => ({ type: "word", word })), ); @@ -727,13 +717,12 @@ const sentence = choice( ) .filter(filter(SENTENCE_RULE)); export const parse = spaces - .with(lookAhead(everything.filter((src) => { - if (src.trimEnd().length > 500) { - throw new UnrecognizedError("long text"); - } else { - return true; - } - }))) + .with( + lookAhead(everything.filter((src) => + src.trimEnd().length <= 500 || + throwError(new UnrecognizedError("long text")) + )), + ) .with(choiceOnlyOne( wordFrom(tokiPonaWordSet, "Toki Pona word") .skip(end) diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 615c8994..313daae4 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -1,6 +1,7 @@ import { memoize } from "@std/cache/memoize"; import { ArrayResult, ArrayResultError } from "../array_result.ts"; import { Cache, Clearable, Lazy } from "../cache.ts"; +import { throwError } from "../misc.ts"; export type ValueRest = Readonly<{ rest: string; value: T }>; export type ParserResult = ArrayResult>; @@ -213,8 +214,9 @@ export function matchCapture( value: match, rest: src.slice(match[0].length), }]); + } else { + throw new UnexpectedError(describeSource(src), description); } - throw new UnexpectedError(describeSource(src), description); }) ); } @@ -223,15 +225,14 @@ export function match(regex: RegExp, description: string): Parser { } export function slice(length: number, description: string): Parser { return new Parser((src) => - ArrayResult.from(() => { - if (src.length >= length) { - return new ArrayResult([{ + ArrayResult.from(() => + src.length >= length + ? new ArrayResult([{ rest: src.slice(length), value: src.slice(0, length), - }]); - } - throw new UnexpectedError(describeSource(src), description); - }) + }]) + : throwError(new UnexpectedError(describeSource(src), description)) + ) ); } export function matchString( @@ -239,15 +240,14 @@ export function matchString( description = `"${match}"`, ): Parser { return new Parser((src) => - ArrayResult.from(() => { - if (src.length >= match.length && src.slice(0, match.length) === match) { - return new ArrayResult([{ + ArrayResult.from(() => + src.length >= match.length && src.slice(0, match.length) === match + ? new ArrayResult([{ rest: src.slice(match.length), value: match, - }]); - } - throw new UnexpectedError(describeSource(src), description); - }) + }]) + : throwError(new UnexpectedError(describeSource(src), description)) + ) ); } export const everything = new Parser((src) => @@ -255,12 +255,11 @@ export const everything = new Parser((src) => ); export const character = match(/./us, "character"); export const end = new Parser((src) => - ArrayResult.from(() => { - if (src === "") { - return new ArrayResult([{ value: null, rest: "" }]); - } - throw new UnexpectedError(describeSource(src), "end of text"); - }) + ArrayResult.from(() => + src === "" + ? new ArrayResult([{ value: null, rest: "" }]) + : throwError(new UnexpectedError(describeSource(src), "end of text")) + ) ); export function withSource( parser: Parser, diff --git a/src/translator/adjective.ts b/src/translator/adjective.ts index 898b8fcd..7ce1ddf7 100644 --- a/src/translator/adjective.ts +++ b/src/translator/adjective.ts @@ -1,6 +1,6 @@ import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array_result.ts"; -import { nullableAsArray } from "../misc.ts"; +import { nullableAsArray, throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { UntranslatableError } from "./error.ts"; @@ -55,9 +55,9 @@ export function compoundAdjective( }>, ): ArrayResult { const { adjectives, reduplicationCount, emphasis } = options; - return ArrayResult.from(() => { - if (reduplicationCount === 1) { - return ArrayResult.combine( + return ArrayResult.from(() => + reduplicationCount === 1 + ? ArrayResult.combine( ...adjectives .map((definition) => adjective({ definition, reduplicationCount: 1, emphasis }) @@ -68,14 +68,14 @@ export function compoundAdjective( conjunction: "and", adjective, emphasis: false, - })); - } else { - throw new UntranslatableError( - "reduplication", - "compound adjective", - ); - } - }); + })) + : throwError( + new UntranslatableError( + "reduplication", + "compound adjective", + ), + ) + ); } export function rankAdjective(kind: Dictionary.AdjectiveType): number { return [ diff --git a/src/translator/clause.ts b/src/translator/clause.ts index c2169383..6306a4de 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -1,5 +1,5 @@ import { ArrayResult } from "../array_result.ts"; -import { nullableAsArray } from "../misc.ts"; +import { nullableAsArray, throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { FilteredOutError, TranslationTodoError } from "./error.ts"; @@ -108,13 +108,13 @@ export function clause(clause: TokiPona.Clause): ArrayResult { andParticle: "en", includeVerb: false, }) - .map((phrase) => { - if (phrase.type === "noun") { - return { type: "vocative", call: "hey", addressee: phrase.noun }; - } else { - throw new FilteredOutError(`${phrase.type} within o vocative`); - } - }); + .map((phrase) => + phrase.type === "noun" + ? { type: "vocative", call: "hey", addressee: phrase.noun } + : throwError( + new FilteredOutError(`${phrase.type} within o vocative`), + ) + ); case "li clause": return liClause(clause); case "prepositions": diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 31ae5d38..ce69d3e1 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -1,5 +1,6 @@ import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; +import { throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; @@ -172,18 +173,19 @@ function nanpaModifier( includeGerund: true, includeVerb: false, }) - .map((phrase) => { - if (phrase.type !== "noun") { - throw new FilteredOutError( - `${phrase.type} within "in position" phrase`, - ); - } else if ( - (phrase.noun as English.NounPhrase & { type: "simple" }) - .preposition.length > 0 - ) { - throw new FilteredOutError('preposition within "in position" phrase'); - } else { - return { + .map((phrase) => + phrase.type !== "noun" + ? throwError( + new FilteredOutError( + `${phrase.type} within "in position" phrase`, + ), + ) + : (phrase.noun as English.NounPhrase & { type: "simple" }) + .preposition.length > 0 + ? throwError( + new FilteredOutError('preposition within "in position" phrase'), + ) + : { type: "in position phrase", noun: { type: "simple", @@ -200,9 +202,8 @@ function nanpaModifier( preposition: [], emphasis: false, }, - }; - } - }); + } + ); } function modifier( modifier: TokiPona.Modifier, diff --git a/src/translator/number.ts b/src/translator/number.ts index 0542ee46..0706b153 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -1,7 +1,7 @@ import { sumOf } from "@std/collections/sum-of"; import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; -import { nullableAsArray } from "../misc.ts"; +import { nullableAsArray, throwError } from "../misc.ts"; import { FilteredOutError } from "./error.ts"; function singleNumber(word: string): ArrayResult { @@ -46,12 +46,13 @@ function unfilteredNasinNanpaPona( const hundredCount = index !== -1 ? index : number.length - aleStart; if (previousHundredCount <= hundredCount) { throw new FilteredOutError('unsorted "ale"'); + } else { + return subHundred(number.slice(0, aleStart)) * 100 ** hundredCount + + unfilteredNasinNanpaPona( + number.slice(aleStart + hundredCount), + hundredCount, + ); } - return subHundred(number.slice(0, aleStart)) * 100 ** hundredCount + - unfilteredNasinNanpaPona( - number.slice(aleStart + hundredCount), - hundredCount, - ); } } } @@ -63,17 +64,16 @@ function nasinNanpaPona(number: ReadonlyArray): null | number { } } function combineNumbers(numbers: ReadonlyArray): ArrayResult { - return ArrayResult.from(() => { - if (numbers.length !== 1 && numbers.includes(0)) { - throw new FilteredOutError('"ala" along with other numeral'); - } - return ArrayResult.concat( - ArrayResult.from(() => - new ArrayResult(nullableAsArray(nasinNanpaPona(numbers))) - ), - ArrayResult.from(() => new ArrayResult([regularNumber(numbers)])), - ); - }); + return ArrayResult.from(() => + numbers.length === 1 || numbers.includes(0) + ? ArrayResult.concat( + ArrayResult.from(() => + new ArrayResult(nullableAsArray(nasinNanpaPona(numbers))) + ), + ArrayResult.from(() => new ArrayResult([regularNumber(numbers)])), + ) + : throwError(new FilteredOutError('"ala" along with other numeral')) + ); } export function number(number: ReadonlyArray): ArrayResult { return ArrayResult.combine(...number.map(singleNumber)) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 93bd24af..9320cca7 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -100,11 +100,12 @@ export function everyPartialVerb( export function forObject(verb: PartialCompoundVerb): boolean | string { const [{ forObject }, ...rest] = everyPartialVerb(verb); if ( - forObject === false || rest.some((verb) => forObject !== verb.forObject) + forObject !== false && rest.every((verb) => forObject === verb.forObject) ) { + return forObject; + } else { return false; } - return forObject; } export function fromVerbForms( options: Readonly<{ From 7d8c3a40c516601996ea7393d3545666dc654ce2 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 10:51:04 +0800 Subject: [PATCH 153/199] fix --- src/translator/number.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/number.ts b/src/translator/number.ts index 0706b153..f2cb163d 100644 --- a/src/translator/number.ts +++ b/src/translator/number.ts @@ -65,7 +65,7 @@ function nasinNanpaPona(number: ReadonlyArray): null | number { } function combineNumbers(numbers: ReadonlyArray): ArrayResult { return ArrayResult.from(() => - numbers.length === 1 || numbers.includes(0) + numbers.length === 1 || !numbers.includes(0) ? ArrayResult.concat( ArrayResult.from(() => new ArrayResult(nullableAsArray(nasinNanpaPona(numbers))) From 728aeaf31ac017b61c6344ad711cd0535b83bb99 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 11:44:06 +0800 Subject: [PATCH 154/199] turns out using set is better --- src/misc.ts | 11 +++++++++++ src/parser/filter.ts | 12 ++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index 69034380..7e36a54d 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -111,3 +111,14 @@ export function uniquePairs( export function throwError(error: unknown): never { throw error; } +export function findDuplicate(iterable: Iterable): null | T { + const set = new Set(); + for (const value of iterable) { + if (set.has(value)) { + return value; + } else { + set.add(value); + } + } + return null; +} diff --git a/src/parser/filter.ts b/src/parser/filter.ts index eb1e8b0e..d5d7dd38 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -1,5 +1,5 @@ import { extractArrayResultError } from "../array_result.ts"; -import { flattenError, throwError, uniquePairs } from "../misc.ts"; +import { findDuplicate, flattenError, throwError } from "../misc.ts"; import { settings } from "../settings.ts"; import { Clause, @@ -185,12 +185,12 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< return []; } }); - for (const [a, b] of uniquePairs(words)) { - if (a === b) { - throw new UnrecognizedError(`duplicate "${a}" in modifier`); - } + const duplicate = findDuplicate(words); + if (duplicate == null) { + return true; + } else { + throw new UnrecognizedError(`duplicate "${duplicate}" in modifier`); } - return true; } }, ]; From baa7f68622f715446aa06e94f5c27c179f567793 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 11:53:01 +0800 Subject: [PATCH 155/199] this turns out to be unnecessary --- src/misc.ts | 11 ++--------- telo_misikeke/telo_misikeke.js | 11 +++-------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/misc.ts b/src/misc.ts index 7e36a54d..b6ef48d3 100644 --- a/src/misc.ts +++ b/src/misc.ts @@ -1,6 +1,5 @@ import { distinctBy } from "@std/collections/distinct-by"; -import { escape as escapeHtml } from "@std/html/entities"; -import { escape as escapeRegex } from "@std/regexp/escape"; +import { escape } from "@std/regexp/escape"; import { Lazy } from "./cache.ts"; export const NEWLINES = /\r\n|\n|\r/g; @@ -43,12 +42,6 @@ export const checkLocalStorage = lazy(() => { localStorage.length !== 0; } }); -export function newlineAsHtmlLineBreak(text: string): string { - return text.replaceAll(NEWLINES, "
"); -} -export function escapeHtmlWithLineBreak(text: string): string { - return newlineAsHtmlLineBreak(escapeHtml(text)); -} export function setIgnoreError(key: string, value: string): void { if (checkLocalStorage()) { try { @@ -101,7 +94,7 @@ export function deduplicateErrors( return distinctBy(errors, ({ message }) => message); } export function characterClass(characters: Iterable): RegExp { - return new RegExp(`[${escapeRegex([...characters].join(""))}]`, "u"); + return new RegExp(`[${escape([...characters].join(""))}]`, "u"); } export function uniquePairs( array: ReadonlyArray, diff --git a/telo_misikeke/telo_misikeke.js b/telo_misikeke/telo_misikeke.js index 18169b14..0bbf67a7 100644 --- a/telo_misikeke/telo_misikeke.js +++ b/telo_misikeke/telo_misikeke.js @@ -1,9 +1,6 @@ // @ts-self-types="./telo_misikeke.d.ts" -import { - escapeHtmlWithLineBreak, - newlineAsHtmlLineBreak, -} from "../src/misc.ts"; +import { escape } from "@std/html/entities"; import LINKU from "./linku_data.json" with { type: "json" }; import { ParserWithCallbacks } from "./Parser.js"; import { build_rules, getMessage } from "./rules.js"; @@ -15,10 +12,8 @@ export function errors(text) { .tokenize(text) .filter((token) => RULES[token.ruleName].category === "error") .map((token) => { - const src = escapeHtmlWithLineBreak(token.text); - const message = newlineAsHtmlLineBreak( - getMessage(token.ruleName, token.match), - ); + const src = escape(token.text); + const message = getMessage(token.ruleName, token.match); return `"${src}" ${message}`; }); } From a97acf825723606116fd3268716047610ab26880 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 12:03:56 +0800 Subject: [PATCH 156/199] small improvement --- dictionary/misc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dictionary/misc.ts b/dictionary/misc.ts index 36fe33c1..e531ab8d 100644 --- a/dictionary/misc.ts +++ b/dictionary/misc.ts @@ -1,5 +1,5 @@ export function asComment(text: string): string { return text .replaceAll(/^/mg, "# ") - .replaceAll(/^#\s*$/mg, "#"); + .replaceAll(/^#\s+$/mg, "#"); } From 2a9447b935ba9646eb54a7c32f106a5077186d2e Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 12:36:43 +0800 Subject: [PATCH 157/199] small refactor --- telo_misikeke/telo_misikeke.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/telo_misikeke/telo_misikeke.js b/telo_misikeke/telo_misikeke.js index 0bbf67a7..82bf0c05 100644 --- a/telo_misikeke/telo_misikeke.js +++ b/telo_misikeke/telo_misikeke.js @@ -11,9 +11,7 @@ export function errors(text) { return new ParserWithCallbacks(RULES, false) .tokenize(text) .filter((token) => RULES[token.ruleName].category === "error") - .map((token) => { - const src = escape(token.text); - const message = getMessage(token.ruleName, token.match); - return `"${src}" ${message}`; - }); + .map((token) => + `"${escape(token.text)}" ${getMessage(token.ruleName, token.match)}` + ); } From eb073a560cf38cc7c9e87a005201e2fa06aba98a Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 12:50:19 +0800 Subject: [PATCH 158/199] remove quotation --- src/parser/ast.ts | 11 +++--- src/parser/composer.ts | 6 ---- src/parser/extract.ts | 7 ---- src/parser/filter.ts | 74 ++++++-------------------------------- src/parser/parser.ts | 4 +-- src/translator/clause.ts | 1 - src/translator/modifier.ts | 2 -- src/translator/phrase.ts | 1 - 8 files changed, 16 insertions(+), 90 deletions(-) diff --git a/src/parser/ast.ts b/src/parser/ast.ts index cdcbc1b7..e1b040d2 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -22,8 +22,7 @@ export type Modifier = | Readonly<{ type: "default"; word: WordUnit }> | Readonly<{ type: "proper words"; words: string }> | Readonly<{ type: "pi"; phrase: Phrase }> - | (Readonly<{ type: "nanpa" }> & Nanpa) - | (Readonly<{ type: "quotation" }> & Quotation); + | (Readonly<{ type: "nanpa" }> & Nanpa); export type Phrase = | Readonly<{ type: "default"; @@ -38,8 +37,7 @@ export type Phrase = phrase: Phrase; emphasis: null | Emphasis; }> - | (Readonly<{ type: "preposition" }> & Preposition) - | (Readonly<{ type: "quotation" }> & Quotation); + | (Readonly<{ type: "preposition" }> & Preposition); export type MultiplePhrases = | Readonly<{ type: "single"; phrase: Phrase }> | Readonly<{ @@ -77,8 +75,9 @@ export type Clause = subjects: null | MultiplePhrases; predicates: Predicate; }> - | Readonly<{ type: "prepositions"; prepositions: ReadonlyArray }> - | (Readonly<{ type: "quotation" }> & Quotation); + | Readonly< + { type: "prepositions"; prepositions: ReadonlyArray } + >; export type ContextClause = | Clause | (Readonly<{ type: "nanpa" }> & Nanpa); diff --git a/src/parser/composer.ts b/src/parser/composer.ts index 84debbf0..23260493 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -68,8 +68,6 @@ export function modifier(modifier: Modifier): string { return `pi ${phrase(modifier.phrase)}`; case "nanpa": return nanpa(modifier); - case "quotation": - return quotation(modifier); } } export function phrase(value: Phrase): string { @@ -91,8 +89,6 @@ export function phrase(value: Phrase): string { .join(" "); case "preposition": return preposition(value); - case "quotation": - return quotation(value); } } function particle(type: "and conjunction" | "anu", particle: string): string { @@ -175,8 +171,6 @@ export function clause(clause: Clause): string { .join(" "); case "prepositions": return clause.prepositions.map(preposition).join(" "); - case "quotation": - throw new Error(); } } export function contextClause(contextClause: ContextClause): string { diff --git a/src/parser/extract.ts b/src/parser/extract.ts index c2b768bb..fe5f57e1 100644 --- a/src/parser/extract.ts +++ b/src/parser/extract.ts @@ -25,7 +25,6 @@ export function everyWordUnitInModifier( return everyWordUnitInPhrase(modifier.phrase); case "nanpa": return everyWordUnitInNanpa(modifier); - case "quotation": case "proper words": return []; } @@ -45,8 +44,6 @@ export function everyWordUnitInPhrase(phrase: Phrase): ReadonlyArray { ]; case "preposition": return everyWordUnitInPreposition(phrase); - case "quotation": - return []; } } export function everyWordUnitInMultiplePhrases( @@ -99,8 +96,6 @@ export function everyWordUnitInClause(clause: Clause): ReadonlyArray { ]; case "prepositions": return clause.prepositions.flatMap(everyWordUnitInPreposition); - case "quotation": - return []; } } export function everyWordUnitInContextClause( @@ -142,8 +137,6 @@ export function everyModifierInPhrase(phrase: Phrase): ReadonlyArray { ...phrase.modifiers, ...everyModifierInMultiplePhrases(phrase.phrases), ]; - case "quotation": - return []; } } export function everyModifierInMultiplePhrases( diff --git a/src/parser/filter.ts b/src/parser/filter.ts index d5d7dd38..cc023f42 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -42,11 +42,6 @@ export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ modifier.phrase.type !== "preverb" || throwError(new UnrecognizedError("preverb inside nanpa")), - // nanpa construction cannot contain quotation - (modifier) => - modifier.phrase.type !== "quotation" || - throwError(new UnrecognizedError("quotation inside nanpa")), - // nanpa construction cannot contain pi (modifier) => modifier.phrase.type !== "default" || @@ -60,24 +55,9 @@ export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ throwError(new UnrecognizedError("nanpa inside nanpa")), // nanpa cannot have emphasis particle - (modifier) => { - const { phrase } = modifier; - switch (phrase.type) { - case "preposition": - case "preverb": - case "default": - return phrase.emphasis == null; - case "quotation": - return true; - } - }, + (modifier) => modifier.phrase.emphasis == null, ]; export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ - // quotation modifier cannot exist - (modifier) => - modifier.type !== "quotation" || - throwError(new UnrecognizedError("quotation as modifier")), - // pi cannot contain preposition (modifier) => modifier.type !== "pi" || modifier.phrase.type !== "preposition" || @@ -99,8 +79,6 @@ export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // switch (modifier.type) { // case "default": // case "proper words": - // case "quotation": - // return false; // case "nanpa": // return everyModifierInPhrase(modifier.phrase).some(checker); // case "pi": @@ -116,21 +94,7 @@ export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // }, // pi cannot have emphasis particle - (modifier) => { - if (modifier.type === "pi") { - const { phrase } = modifier; - switch (phrase.type) { - case "default": - case "preposition": - case "preverb": - return phrase.emphasis == null; - case "quotation": - return true; - } - } else { - return true; - } - }, + (modifier) => modifier.type !== "pi" || modifier.phrase.emphasis == null, ]; export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< (modifier: ReadonlyArray) => boolean @@ -179,7 +143,6 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< } else { return []; } - case "quotation": case "proper words": case "nanpa": return []; @@ -195,11 +158,6 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< }, ]; export const PHRASE_RULE: ReadonlyArray<(phrase: Phrase) => boolean> = [ - // Disallow quotation - (phrase) => - phrase.type !== "quotation" || - throwError(new UnrecognizedError("quotation as phrase")), - // Disallow preverb modifiers other than "ala" (phrase) => phrase.type !== "preverb" || modifiersIsAlaOrNone(phrase.modifiers) || @@ -227,21 +185,14 @@ export const PHRASE_RULE: ReadonlyArray<(phrase: Phrase) => boolean> = [ // Emphasis must not be nested (phrase) => { - switch (phrase.type) { - case "preposition": - case "preverb": - case "default": - if ( - phrase.emphasis == null || - everyWordUnitInPhrase(phrase) - .every((wordUnit) => wordUnit.emphasis == null) - ) { - return true; - } else { - throw new UnrecognizedError("nested emphasis"); - } - case "quotation": - return true; + if ( + phrase.emphasis == null || + everyWordUnitInPhrase(phrase) + .every((wordUnit) => wordUnit.emphasis == null) + ) { + return true; + } else { + throw new UnrecognizedError("nested emphasis"); } }, ]; @@ -294,7 +245,6 @@ export const CLAUSE_RULE: ReadonlyArray<(clause: Clause) => boolean> = [ } break; case "prepositions": - case "quotation": return true; } if ( @@ -458,8 +408,6 @@ function hasPrepositionInPhrase(phrase: Phrase): boolean { return true; case "preverb": return hasPrepositionInPhrase(phrase.phrase); - case "quotation": - return false; } } function phraseHasTopLevelEmphasis(phrase: Phrase): boolean { @@ -468,7 +416,5 @@ function phraseHasTopLevelEmphasis(phrase: Phrase): boolean { case "preverb": case "preposition": return phrase.emphasis != null; - case "quotation": - return false; } } diff --git a/src/parser/parser.ts b/src/parser/parser.ts index ad776323..73bc9719 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -574,9 +574,7 @@ const clause = choice( prepositions, })), subjectPhrases - .filter((phrases) => - phrases.type !== "single" || phrases.phrase.type !== "quotation" - ) + .filter((phrases) => phrases.type !== "single") .map((phrases) => ({ type: "phrases", phrases })), subjectPhrases .skip(specificWord("o")) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 6306a4de..0c1b5b1a 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -119,7 +119,6 @@ export function clause(clause: TokiPona.Clause): ArrayResult { return liClause(clause); case "prepositions": case "o clause": - case "quotation": return new ArrayResult(new TranslationTodoError(clause.type)); } } diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index ce69d3e1..bc3878fa 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -217,8 +217,6 @@ function modifier( return piModifier(modifier.phrase); case "nanpa": return nanpaModifier(modifier); - case "quotation": - return new ArrayResult(new TranslationTodoError(modifier.type)); } } export function multipleModifiers( diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 07c37e56..95486598 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -219,7 +219,6 @@ export function phrase( return defaultPhrase({ ...options, phrase }); case "preverb": case "preposition": - case "quotation": return new ArrayResult(new TranslationTodoError(phrase.type)); } } From 2683ba218acdb1b7fcfd75c8f680dc0ffafdb5f5 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 12:53:07 +0800 Subject: [PATCH 159/199] remove quotation node --- src/parser/ast.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parser/ast.ts b/src/parser/ast.ts index e1b040d2..1c2f22f8 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -98,11 +98,6 @@ export type Sentence = punctuation: string; interrogative: null | "seme" | "x ala x"; }>; -export type Quotation = Readonly<{ - sentences: ReadonlyArray; - leftMark: string; - rightMark: string; -}>; export type MultipleSentences = | Readonly<{ type: "single word"; word: string }> | Readonly<{ type: "sentences"; sentences: ReadonlyArray }>; From f38e47eb985588f9c1f13c95ce59be2db5033423 Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 12:54:41 +0800 Subject: [PATCH 160/199] fix oversight --- src/parser/composer.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parser/composer.ts b/src/parser/composer.ts index 23260493..b56911a7 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -11,7 +11,6 @@ import { Phrase, Predicate, Preposition, - Quotation, Sentence, SimpleWordUnit, WordUnit, @@ -204,10 +203,6 @@ export function sentence(sentence: Sentence): string { } return `${text}${sentence.punctuation}`; } -export function quotation(quotation: Quotation): string { - const text = quotation.sentences.map(sentence).join(" "); - return `${quotation.leftMark}${text}${quotation.rightMark}`; -} export function multipleSentences(sentences: MultipleSentences): string { switch (sentences.type) { case "single word": From ba86e40f6008e4c665723bbb0bc7e1744f7858ab Mon Sep 17 00:00:00 2001 From: Koko Date: Fri, 7 Mar 2025 13:25:34 +0800 Subject: [PATCH 161/199] conjugate before escaping --- dictionary/parser.ts | 144 ++++++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 62 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index f0747081..d3aedd2f 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -63,7 +63,7 @@ function lex(parser: Parser): Parser { return parser.skip(spaces); } const backtick = matchString("`", "backtick"); -const word = allAtLeastOnce( +const unescapedWord = allAtLeastOnce( choiceOnlyOne( match(WORDS, "word"), backtick @@ -76,8 +76,8 @@ const word = allAtLeastOnce( .map((word) => word.join("").replaceAll(/\s+/g, " ").trim()) .filter((word) => word !== "" || throwError(new ArrayResultError("missing word")) - ) - .map(escapeHtml); + ); +const word = unescapedWord.map(escapeHtml); const slash = lex(matchString("/", "slash")); const forms = sequence(word, all(slash.with(word))) .map(([first, rest]) => [first, ...rest]); @@ -131,70 +131,90 @@ function detectRepetition( `"${source.join("/")}" has no repetition pattern found`, ); } -const nounOnly = sequence( - word, - optionalAll(slash.with(word)), - tag( - keyword("n") - .with(sequence(optionalAll(keyword("gerund")), optionalNumber)), - ), -) - .map(([first, second, [gerund, number]]) => { - let singular: null | string; - let plural: null | string; - switch (number) { - case null: { - if (second == null) { - const sentence = nlp(first); - sentence.tag("Noun"); - singular = sentence - .nouns() - .toSingular() - .text(); - plural = sentence - .nouns() - .toPlural() - .text(); - if (singular === "" || plural === "") { - throw new ArrayResultError( - `no singular or plural form found for "${first}". consider ` + - "providing both singular and plural forms instead", - ); - } - if (first !== singular) { - throw new ArrayResultError( - `conjugation error: "${first}" is not "${singular}". ` + - "consider providing both singular and plural forms instead", - ); - } - } else { - singular = first; - plural = second; +const nounOnly = choiceOnlyOne( + sequence( + unescapedWord, + tag( + keyword("n") + .with(optionalAll(keyword("gerund"))), + ), + ) + .map( + ([noun, gerund]) => { + const sentence = nlp(noun); + sentence.tag("Noun"); + const singular = sentence + .nouns() + .toSingular() + .text(); + const plural = sentence + .nouns() + .toPlural() + .text(); + if (singular === "" || plural === "") { + throw new ArrayResultError( + `no singular or plural form found for "${noun}". consider ` + + "providing both singular and plural forms instead", + ); } - break; - } - case "singular": - case "plural": - if (second != null) { + if (noun !== singular) { throw new ArrayResultError( - "number inside tag may not be provided when two forms of noun " + - "are already provided", + `conjugation error: "${noun}" is not "${singular}". ` + + "consider providing both singular and plural forms instead", ); } + return { + singular: escapeHtml(singular), + plural: escapeHtml(plural), + gerund: gerund != null, + }; + }, + ), + sequence( + word, + tag( + keyword("n") + .with(sequence(optionalAll(keyword("gerund")), number)), + ), + ) + .map( + ([noun, [gerund, number]]) => { + let singular: null | string; + let plural: null | string; switch (number) { case "singular": - singular = first; - plural = null; - break; case "plural": - singular = null; - plural = first; + switch (number) { + case "singular": + singular = noun; + plural = null; + break; + case "plural": + singular = null; + plural = noun; + break; + } break; } - break; - } - return { singular, plural, gerund: gerund != null }; - }); + return { singular, plural, gerund: gerund != null }; + }, + ), + sequence( + word, + optionalAll(slash.with(word)), + tag( + keyword("n") + .with(optionalAll(keyword("gerund"))), + ), + ) + .map( + ([singular, plural, gerund]) => ({ + singular, + plural, + gerund: gerund != null, + }), + ), +); const determinerType = choiceOnlyOne( keyword("article"), keyword("demonstrative"), @@ -285,7 +305,7 @@ function verbOnly(tagInside: Parser): Parser { presentSingular, past, })), - word + unescapedWord .skip(tag(tagInside)) .map((verb) => { const sentence = nlp(verb); @@ -308,9 +328,9 @@ function verbOnly(tagInside: Parser): Parser { ); } return { - presentPlural: conjugations.Infinitive, - presentSingular: conjugations.PresentTense, - past: conjugations.PastTense, + presentPlural: escapeHtml(conjugations.Infinitive), + presentSingular: escapeHtml(conjugations.PresentTense), + past: escapeHtml(conjugations.PastTense), }; }), ); From 21407e67be26f655d3501705625f2ee6d890032b Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:31:32 +0800 Subject: [PATCH 162/199] rename ci --- .github/workflows/{deno.yml => build_and_deploy.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{deno.yml => build_and_deploy.yml} (100%) diff --git a/.github/workflows/deno.yml b/.github/workflows/build_and_deploy.yml similarity index 100% rename from .github/workflows/deno.yml rename to .github/workflows/build_and_deploy.yml From 95ac6de0e5d39b81cec0331b97c56580aecb5893 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:43:46 +0800 Subject: [PATCH 163/199] add more ci --- .github/workflows/test.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..ec7ed8e5 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,24 @@ +name: Build + +on: + push: + branches: ["master"] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Deno + uses: denoland/setup-deno@v2 + with: + deno-version: v2.x + - name: Build Dictionary + run: deno task build-dictionary + - name: Check formatting + run: deno fmt --check + - name: Lint + run: deno lint + - name: Test + run: deno test From d57fc3db70e3e3659d866985017d1bca21c04fcc Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:44:01 +0800 Subject: [PATCH 164/199] update ci tools --- .github/workflows/build_and_deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index f6f008dc..a23a6fb5 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -25,7 +25,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: Setup Deno - uses: denoland/setup-deno@v1 + uses: denoland/setup-deno@v2 with: deno-version: v2.x - name: Build From 3b4f676b4c9e8178b36be42c3681a8d350475c2e Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:45:44 +0800 Subject: [PATCH 165/199] rename CI --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ec7ed8e5..c7ef1082 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Build +name: Test on: push: From 163cb2a27cebcaf90e78513137d402bb162cb097 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:46:27 +0800 Subject: [PATCH 166/199] rename yet again --- .github/workflows/{test.yml => deno.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{test.yml => deno.yml} (97%) diff --git a/.github/workflows/test.yml b/.github/workflows/deno.yml similarity index 97% rename from .github/workflows/test.yml rename to .github/workflows/deno.yml index c7ef1082..afe604c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/deno.yml @@ -1,4 +1,4 @@ -name: Test +name: Deno on: push: From 3bad87c99582b0cac0f80c7e681695ed8710db27 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:53:12 +0800 Subject: [PATCH 167/199] fix --- src/parser/parser.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 73bc9719..068d8b20 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -574,7 +574,6 @@ const clause = choice( prepositions, })), subjectPhrases - .filter((phrases) => phrases.type !== "single") .map((phrases) => ({ type: "phrases", phrases })), subjectPhrases .skip(specificWord("o")) @@ -733,3 +732,5 @@ export const parse = spaces .parser(); Parser.endCache(); + +console.log(subjectPhrases.parser()("tomo waso").unwrap()); From 3cc0f0ccb6b37037c89bd2394b6d766201f7974f Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 08:54:42 +0800 Subject: [PATCH 168/199] remove console --- src/parser/parser.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 068d8b20..a6cfb593 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -732,5 +732,3 @@ export const parse = spaces .parser(); Parser.endCache(); - -console.log(subjectPhrases.parser()("tomo waso").unwrap()); From 863b833e9c9a54539ddc542e491614f40946bd07 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 09:01:27 +0800 Subject: [PATCH 169/199] more lint rules --- bundle.ts | 4 ++++ deno.json | 1 + dictionary/build.ts | 1 + src/main.ts | 3 +++ src/repl.ts | 3 +++ telo_misikeke/update.ts | 1 + 6 files changed, 13 insertions(+) diff --git a/bundle.ts b/bundle.ts index 5e03244c..c6b82dce 100644 --- a/bundle.ts +++ b/bundle.ts @@ -42,10 +42,13 @@ async function buildAll( const Dictionary = await import("./dictionary/build.ts"); await Dictionary.build(checkDictionary ?? true); } + // deno-lint-ignore no-console console.log("Building main.js..."); await ESBuild.build(buildOptions(minify)); + // deno-lint-ignore no-console console.log("Building done!"); } catch (error) { + // deno-lint-ignore no-console console.error(error); } } @@ -56,6 +59,7 @@ if (import.meta.main) { break; } case "watch": { + // deno-lint-ignore no-console console.log("Press ctrl+c to exit."); const watcher = Deno.watchFs(WATCH); let task = Promise.resolve(); diff --git a/deno.json b/deno.json index 4f683985..04df1e50 100644 --- a/deno.json +++ b/deno.json @@ -35,6 +35,7 @@ "camelcase", "explicit-function-return-type", "explicit-module-boundary-types", + "no-console", "no-boolean-literal-for-arguments", "no-eval", "no-inferrable-types", diff --git a/dictionary/build.ts b/dictionary/build.ts index 5825c326..77ebdbdd 100644 --- a/dictionary/build.ts +++ b/dictionary/build.ts @@ -35,6 +35,7 @@ export async function build(checkFile: boolean): Promise { } } } + // deno-lint-ignore no-console console.log("Building dictionary..."); const text = await currentPromise; const json = JSON.stringify( diff --git a/src/main.ts b/src/main.ts index ce8bbfd6..06235e98 100644 --- a/src/main.ts +++ b/src/main.ts @@ -141,6 +141,7 @@ function main(): void { errorDisplay.innerText = errorsFixable(flattenError(error)) ? DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE : DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; + // deno-lint-ignore no-console console.error(error); } } @@ -212,6 +213,7 @@ function main(): void { list[property] = extractErrorMessage(item); errorList.appendChild(list); } + // deno-lint-ignore no-console console.error(error); } } @@ -285,6 +287,7 @@ function main(): void { .map(extractErrorMessage) .map((message) => `\n- ${message.replaceAll(NEWLINES, "$& ")}`); displayToCustomDictionary(asComment(`${message}${errorListMessage}`)); + // deno-lint-ignore no-console console.error(error); } }); diff --git a/src/repl.ts b/src/repl.ts index 0d602f0c..6d9c42c5 100644 --- a/src/repl.ts +++ b/src/repl.ts @@ -4,6 +4,7 @@ import { repeatArray } from "./misc.ts"; import { translate } from "./mod.ts"; if (import.meta.main) { + // deno-lint-ignore no-console console.log( "Welcome to the ilo Token REPL. Press ctrl+d or ctrl+c to exit.", ); @@ -20,12 +21,14 @@ if (import.meta.main) { translation.replaceAll(/<\/?strong>/g, "%c"), { entityList }, ); + // deno-lint-ignore no-console console.log( ` - ${text}`, ...repeatArray(["font-weight: bold", ""], count).flat(), ); } } catch (error) { + // deno-lint-ignore no-console console.error(error); } } diff --git a/telo_misikeke/update.ts b/telo_misikeke/update.ts index 2a3f1bcc..7bc57359 100644 --- a/telo_misikeke/update.ts +++ b/telo_misikeke/update.ts @@ -76,5 +76,6 @@ if (import.meta.main) { buildCode(file.source, file.destination, file.exportItems) ), ]); + // deno-lint-ignore no-console console.log("Updated telo misikeke."); } From 022f40799aa4a13cefde1e896a9980af84e54e54 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 09:08:10 +0800 Subject: [PATCH 170/199] format comment --- src/parser/filter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/filter.ts b/src/parser/filter.ts index cc023f42..84e729ab 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -27,7 +27,7 @@ export const WORD_UNIT_RULES: ReadonlyArray<(wordUnit: WordUnit) => boolean> = [ throwError(new UnrecognizedError('"seme ala seme"')), ]; export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ - // disallow _nanpa ala nanpa_ + // disallow "nanpa ala nanpa" (modifier) => modifier.nanpa.type !== "x ala x" || throwError(new UnrecognizedError('"nanpa ala nanpa"')), From 824ec391a4e3dffd7bb6b7cf6a56f65418e37584 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 09:19:22 +0800 Subject: [PATCH 171/199] more tests --- src/parser/{parser_test.ts => test.ts} | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) rename src/parser/{parser_test.ts => test.ts} (88%) diff --git a/src/parser/parser_test.ts b/src/parser/test.ts similarity index 88% rename from src/parser/parser_test.ts rename to src/parser/test.ts index b17a4c6d..4f1fd385 100644 --- a/src/parser/parser_test.ts +++ b/src/parser/test.ts @@ -1,4 +1,5 @@ import { assertNotEquals } from "@std/assert/not-equals"; +import { assertThrows } from "@std/assert/throws"; import { uniquePairs } from "../misc.ts"; import { parse } from "./parser.ts"; @@ -202,3 +203,26 @@ Deno.test("AST all distinct", () => { } } }); + +// Examples taken from https://telo-misikeke.gitlab.io/ +const MALFORMED_SENTENCES = [ + "pana e lukin pi ilo ni tawa sini.", + "mi pona, taso, toki mi li ken pi ike.", + "pona la, mi li jo e ilo ni a!", + "Mi pana e ilo ni tawa sina kepeken ilo", + "ilo mi pona e toki pi jan ale.", + "ni li pi pona mute a!", + "pi pona mute.", + "lipu sina li pakala en ike la, ilo mi li ken pona e ona.", + "mi en sina ken lukin e ilo mi.", + "mi wile pona e lipu mi en lipu sina", + "jan ale li li ken toki tawa mi.", + "jan li o toki tawa mi a!", + "toki e mi li pona tawa mi.", +]; + +Deno.test("parser all error", () => { + for (const sentence of MALFORMED_SENTENCES) { + assertThrows(() => parse(sentence).unwrap()); + } +}); From be14c6735798e69ac9ea6f7f19fba00055e986e3 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 09:23:20 +0800 Subject: [PATCH 172/199] update examples --- src/parser/test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/test.ts b/src/parser/test.ts index 4f1fd385..3bd22c1c 100644 --- a/src/parser/test.ts +++ b/src/parser/test.ts @@ -206,7 +206,7 @@ Deno.test("AST all distinct", () => { // Examples taken from https://telo-misikeke.gitlab.io/ const MALFORMED_SENTENCES = [ - "pana e lukin pi ilo ni tawa sini.", + "mi jan Nikola, li kama pana e lukin pi ilo ni tawa sini.", "mi pona, taso, toki mi li ken pi ike.", "pona la, mi li jo e ilo ni a!", "Mi pana e ilo ni tawa sina kepeken ilo", From 1ecbc7c998325dae95582942ec350c5f2181dc9e Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 09:31:16 +0800 Subject: [PATCH 173/199] refactor --- src/translator/predicate.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index cb95719b..be3458ae 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -1,5 +1,5 @@ import { ArrayResult } from "../array_result.ts"; -import { nullableAsArray } from "../misc.ts"; +import { nullableAsArray, throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import { AdjectiveWithInWay } from "./adjective.ts"; import * as English from "./ast.ts"; @@ -116,14 +116,11 @@ function associatedPredicate( preposition: ReadonlyArray, ): ArrayResult { return ArrayResult.from(() => { - let verbObject: ArrayResult; - if (object == null) { - verbObject = new ArrayResult([phraseAsVerb(predicate)]); - } else if (object.type === "noun") { - verbObject = predicateVerb(predicate, object.noun); - } else { - throw new UntranslatableError(object.type, "object"); - } + const verbObject = object == null + ? new ArrayResult([phraseAsVerb(predicate)]) + : object.type === "noun" + ? predicateVerb(predicate, object.noun) + : throwError(new UntranslatableError(object.type, "object")); return verbObject.map((verbObject) => ({ ...verbObject, preposition: [...verbObject.preposition, ...preposition], From 78826c78478cfbc5c7ec5fe7d5a393bdbead6bb4 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 10:11:29 +0800 Subject: [PATCH 174/199] refactor --- dictionary/parser.ts | 3 +-- src/dictionary.ts | 18 +++++++------- src/main.ts | 2 +- src/parser/composer.ts | 2 +- src/parser/filter.ts | 40 +++++++++++++++---------------- src/parser/lexer.ts | 5 ++-- src/parser/parser.ts | 44 ++++++++++++++++------------------ src/parser/parser_lib.ts | 6 ++--- src/settings_frontend.ts | 2 +- src/translator/composer.ts | 4 ++-- src/translator/determiner.ts | 12 ++++------ src/translator/modifier.ts | 41 ++++++++++++++++++------------- src/translator/noun.ts | 2 +- src/translator/verb.ts | 17 +++++++------ telo_misikeke/telo_misikeke.js | 6 ++--- telo_misikeke/update.ts | 4 ++-- 16 files changed, 104 insertions(+), 104 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index d3aedd2f..79b85cf7 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -68,8 +68,7 @@ const unescapedWord = allAtLeastOnce( match(WORDS, "word"), backtick .with(character) - .skip(backtick) - .map((character) => character), + .skip(backtick), comment.map(() => ""), ), ) diff --git a/src/dictionary.ts b/src/dictionary.ts index e67ff4f3..d2e5b815 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -36,19 +36,19 @@ function update(): void { } redefineSet( contentWordSet, - (definition) => - definition.type !== "filler" && - definition.type !== "particle definition", + ({ type }) => + type !== "filler" && + type !== "particle definition", ); - redefineSetWithTypes(prepositionSet, "preposition"); + redefineSetWithType(prepositionSet, "preposition"); redefineSet( preverbSet, (definition) => (definition.type === "verb" && definition.predicateType != null) || definition.type === "modal verb", ); - redefineSetWithTypes(fillerSet, "filler"); - redefineSetWithTypes(numeralSet, "numeral"); + redefineSetWithType(fillerSet, "filler"); + redefineSetWithType(numeralSet, "numeral"); redefineSet(tokiPonaWordSet, () => true); } function redefineSet( @@ -62,9 +62,9 @@ function redefineSet( } } } -function redefineSetWithTypes( +function redefineSetWithType( set: Set, - type: Definition["type"], + compareType: Definition["type"], ): void { - redefineSet(set, (definition) => definition.type === type); + redefineSet(set, ({ type }) => type === compareType); } diff --git a/src/main.ts b/src/main.ts index 06235e98..92f9eb10 100644 --- a/src/main.ts +++ b/src/main.ts @@ -141,7 +141,7 @@ function main(): void { errorDisplay.innerText = errorsFixable(flattenError(error)) ? DICTIONARY_LOADING_FAILED_FIXABLE_MESSAGE : DICTIONARY_LOADING_FAILED_UNFIXABLE_MESSAGE; - // deno-lint-ignore no-console + // deno-lint-ignore no-console console.error(error); } } diff --git a/src/parser/composer.ts b/src/parser/composer.ts index b56911a7..605d66ac 100644 --- a/src/parser/composer.ts +++ b/src/parser/composer.ts @@ -131,7 +131,7 @@ export function multiplePredicates( case "associated": { return [ multiplePhrases(predicates.predicates, andParticle), - ...nullableAsArray(predicates.objects).map((_) => "e"), + ...nullableAsArray(predicates.objects).map(() => "e"), ...nullableAsArray(predicates.objects) .map((objects) => multiplePhrases(objects, "e")), ...predicates.prepositions.map(preposition), diff --git a/src/parser/filter.ts b/src/parser/filter.ts index 84e729ab..67e28209 100644 --- a/src/parser/filter.ts +++ b/src/parser/filter.ts @@ -28,34 +28,34 @@ export const WORD_UNIT_RULES: ReadonlyArray<(wordUnit: WordUnit) => boolean> = [ ]; export const NANPA_RULES: ReadonlyArray<(nanpa: Nanpa) => boolean> = [ // disallow "nanpa ala nanpa" - (modifier) => - modifier.nanpa.type !== "x ala x" || + ({ nanpa: { type } }) => + type !== "x ala x" || throwError(new UnrecognizedError('"nanpa ala nanpa"')), // nanpa construction cannot contain preposition - (modifier) => - modifier.phrase.type !== "preposition" || + ({ phrase: { type } }) => + type !== "preposition" || throwError(new UnrecognizedError("preposition inside nanpa")), // nanpa construction cannot contain preverb - (modifier) => - modifier.phrase.type !== "preverb" || + ({ phrase: { type } }) => + type !== "preverb" || throwError(new UnrecognizedError("preverb inside nanpa")), // nanpa construction cannot contain pi - (modifier) => - modifier.phrase.type !== "default" || - modifier.phrase.modifiers.every((modifier) => modifier.type !== "pi") || + ({ phrase }) => + phrase.type !== "default" || + phrase.modifiers.every(({ type }) => type !== "pi") || throwError(new UnrecognizedError("pi inside nanpa")), // nanpa construction cannot contain nanpa - (modifier) => - modifier.phrase.type !== "default" || - modifier.phrase.modifiers.every((modifier) => modifier.type !== "nanpa") || + ({ phrase }) => + phrase.type !== "default" || + phrase.modifiers.every(({ type }) => type !== "nanpa") || throwError(new UnrecognizedError("nanpa inside nanpa")), // nanpa cannot have emphasis particle - (modifier) => modifier.phrase.emphasis == null, + ({ phrase: { emphasis } }) => emphasis == null, ]; export const MODIFIER_RULES: ReadonlyArray<(modifier: Modifier) => boolean> = [ // pi cannot contain preposition @@ -101,18 +101,18 @@ export const MULTIPLE_MODIFIERS_RULES: ReadonlyArray< > = [ // // no multiple pi // (modifiers) => - // modifiers.filter((modifier) => modifier.type === "pi").length <= 1 || + // modifiers.filter(({type}) => type === "pi").length <= 1 || // throwError(new UnrecognizedError("multiple pi")), // no multiple nanpa (modifiers) => - modifiers.filter((modifier) => modifier.type === "nanpa").length <= 1 || + modifiers.filter(({ type }) => type === "nanpa").length <= 1 || throwError(new UnrecognizedError("multiple nanpa")), // no multiple proper words (modifiers) => modifiers - .filter((modifier) => modifier.type === "proper words") + .filter(({ type }) => type === "proper words") .length <= 1 || throwError(new UnrecognizedError("multiple proper words")), @@ -188,7 +188,7 @@ export const PHRASE_RULE: ReadonlyArray<(phrase: Phrase) => boolean> = [ if ( phrase.emphasis == null || everyWordUnitInPhrase(phrase) - .every((wordUnit) => wordUnit.emphasis == null) + .every(({ emphasis }) => emphasis == null) ) { return true; } else { @@ -224,7 +224,7 @@ export const PREPOSITION_RULE: ReadonlyArray<(phrase: Preposition) => boolean> = (preposition) => preposition.emphasis == null || everyWordUnitInPreposition(preposition) - .every((wordUnit) => wordUnit.emphasis == null) || + .every(({ emphasis }) => emphasis == null) || throwError(new UnrecognizedError("nested emphasis")), ]; export const CLAUSE_RULE: ReadonlyArray<(clause: Clause) => boolean> = [ @@ -320,7 +320,7 @@ export const SENTENCE_RULE: ReadonlyArray<(sentence: Sentence) => boolean> = [ // There can't be more than 1 "x ala x" or "seme" (sentence) => { - if (sentence.interrogative) { + if (sentence.interrogative != null) { const interrogative = everyWordUnitInSentence(sentence) .filter((wordUnit) => { switch (wordUnit.type) { @@ -347,7 +347,7 @@ export const MULTIPLE_SENTENCES_RULE: ReadonlyArray< > = [ // Only allow at most 2 sentences (sentences) => - sentences.filter((sentence) => sentence.type !== "filler").length <= 2 || + sentences.filter(({ type }) => type !== "filler").length <= 2 || throwError(new UnrecognizedError("multiple sentences")), ]; export function filter( diff --git a/src/parser/lexer.ts b/src/parser/lexer.ts index 00c6ca1d..ab5bf372 100644 --- a/src/parser/lexer.ts +++ b/src/parser/lexer.ts @@ -133,7 +133,7 @@ const cartoucheElement = choiceOnlyOne( throw new UnrecognizedError("excess dots"); } }), - singleUcsurWord.map((word) => word[0]), + singleUcsurWord.map(([letter]) => letter), match(/[a-zA-Z]/, "Latin letter") .map((letter) => letter.toLowerCase()) .skip(spaces), @@ -160,8 +160,7 @@ function longContainer( ): Parser { return specificSpecialUcsur(left) .with(inside) - .skip(specificSpecialUcsur(right)) - .map((inside) => inside); + .skip(specificSpecialUcsur(right)); } const longSpaceContainer = longContainer( START_OF_LONG_GLYPH, diff --git a/src/parser/parser.ts b/src/parser/parser.ts index a6cfb593..9c294748 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -284,8 +284,7 @@ const pi = choice( specificToken("headed long glyph start") .filter(({ words }) => filterCombinedGlyphs(words, "pi")) .with(phrase) - .skip(specificToken("headless long glyph end")) - .map((phrase) => phrase), + .skip(specificToken("headless long glyph end")), specificWord("pi").with(phrase), ); const modifiers = sequence( @@ -395,23 +394,23 @@ const preposition = choice( })), sequence( specificToken("headed long glyph start") - .map((words) => { - if (words.words.length > 2) { + .map(({ words }) => { + if (words.length > 2) { throw new UnrecognizedError( - `combined glyphs of ${words.words.length} words`, + `combined glyphs of ${words.length} words`, ); } else { - const word = words.words[0]; + const [word] = words; if (!prepositionSet.has(word)) { throw new UnrecognizedError(`"${word}" as preposition`); } else { - return words.words; + return words; } } }), phrase, - specificToken("headless long glyph end"), ) + .skip(specificToken("headless long glyph end")) .map(([words, phrase]) => { const modifiers = words .slice(1) @@ -617,16 +616,16 @@ const la = choice( ); const filler = choice( specificToken("space long glyph") - .map((longGlyph) => - longGlyph.words.length === 1 + .map(({ words, spaceLength }) => + words.length === 1 ? { type: "long word", - word: longGlyph.words[0], - length: longGlyph.spaceLength, + word: words[0], + length: spaceLength, } : throwError( new UnexpectedError( - describe({ type: "combined glyphs", words: longGlyph.words }), + describe({ type: "combined glyphs", words: words }), "simple glyph", ), ) @@ -690,16 +689,15 @@ const sentence = choice( interrogative: null, }; const wordUnits = everyWordUnitInSentence(sentence); - const interrogative = - wordUnits.some((wordUnit) => wordUnit.type === "x ala x") - ? "x ala x" - : wordUnits.some((wordUnit) => - (wordUnit.type === "default" || - wordUnit.type === "reduplication") && - wordUnit.word === "seme" - ) - ? "seme" - : null; + const interrogative = wordUnits.some(({ type }) => type === "x ala x") + ? "x ala x" + : wordUnits.some((wordUnit) => + (wordUnit.type === "default" || + wordUnit.type === "reduplication") && + wordUnit.word === "seme" + ) + ? "seme" + : null; return { ...sentence, interrogative }; }, ) diff --git a/src/parser/parser_lib.ts b/src/parser/parser_lib.ts index 313daae4..8be61820 100644 --- a/src/parser/parser_lib.ts +++ b/src/parser/parser_lib.ts @@ -265,9 +265,9 @@ export function withSource( parser: Parser, ): Parser { return new Parser((src) => - parser.unmemoizedParser(src).map((value) => ({ - value: [value.value, src.slice(0, src.length - value.rest.length)], - rest: value.rest, + parser.unmemoizedParser(src).map(({ value, rest }) => ({ + value: [value, src.slice(0, src.length - rest.length)], + rest, })) ); } diff --git a/src/settings_frontend.ts b/src/settings_frontend.ts index 3361a2d1..165a8eb7 100644 --- a/src/settings_frontend.ts +++ b/src/settings_frontend.ts @@ -36,7 +36,7 @@ const REDUNDANCY_UPDATER: Updater = { ? value as RedundancySettings : null, stringify: (value) => value, - load: (input) => input.value as RedundancySettings, + load: ({ value }) => value as RedundancySettings, set: (input, value) => { input.value = value; }, diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 7f37540c..6dd92339 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -30,11 +30,11 @@ export function noun(phrases: English.NounPhrase, depth: number): string { switch (phrases.type) { case "simple": { const text = [ - ...phrases.determiner.map((determiner) => word(determiner.determiner)), + ...phrases.determiner.map(({ determiner }) => word(determiner)), ...phrases.adjective.map(adjective), word(phrases.noun), ...nullableAsArray(phrases.postAdjective) - .map((adjective) => `${adjective.adjective} ${adjective.name}`), + .map(({ adjective, name }) => `${adjective} ${name}`), ...phrases.preposition.map(preposition), ] .join(" "); diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index 3a5fd5ad..f8565421 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -10,21 +10,19 @@ import { word } from "./word.ts"; function prettyPrintDeterminers( determiners: ReadonlyArray, ): string { - return `(${ - determiners.map((determiner) => determiner.determiner).join(` `) - })`; + return `(${determiners.map(({ determiner }) => determiner).join(` `)})`; } function filterKind( determiners: ReadonlyArray, kinds: ReadonlyArray, ): ReadonlyArray { - return determiners.filter((determiner) => kinds.includes(determiner.kind)); + return determiners.filter(({ kind }) => kinds.includes(kind)); } function filterQuantity( determiners: ReadonlyArray, - quantity: Dictionary.Quantity, + targetQuantity: Dictionary.Quantity, ): ReadonlyArray { - return determiners.filter((determiner) => determiner.quantity === quantity); + return determiners.filter(({ quantity }) => quantity === targetQuantity); } function check( quantities: ReadonlyArray, @@ -37,7 +35,7 @@ function check( export function findNumber( determiners: ReadonlyArray, ): Dictionary.Quantity { - const quantities = determiners.map((determiner) => determiner.quantity); + const quantities = determiners.map(({ quantity }) => quantity); if (quantities.every((quantity) => quantity === `both`)) { return "both"; } else if (check(quantities, "singular", "plural")) { diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index bc3878fa..257ca36d 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -225,25 +225,32 @@ export function multipleModifiers( return ArrayResult.combine(...modifiers.map(modifier)) .flatMap((modifiers) => { const noun = modifiers - .filter((modifier) => modifier.type === "noun") - .map((modifier) => modifier.noun); + .flatMap((modifier) => modifier.type === "noun" ? [modifier.noun] : []); + const nounPreposition = modifiers - .filter((modifier) => modifier.type === "noun preposition"); - const determiner = modifiers - .filter((modifier) => modifier.type === "determiner") - .map((modifier) => modifier.determiner); - const adjective = modifiers - .filter((modifier) => modifier.type === "adjective") - .map((modifier) => modifier.adjective); - const adverb = modifiers - .filter((modifier) => modifier.type === "adverb") - .map((modifier) => modifier.adverb); + .filter(({ type }) => type === "noun preposition") as ReadonlyArray< + ModifierTranslation & { type: "noun preposition" } + >; + + const determiner = modifiers.flatMap((modifier) => + modifier.type === "determiner" ? [modifier.determiner] : [] + ); + + const adjective = modifiers.flatMap((modifier) => + modifier.type === "adjective" ? [modifier.adjective] : [] + ); + + const adverb = modifiers.flatMap((modifier) => + modifier.type === "adverb" ? [modifier.adverb] : [] + ); + const name = modifiers - .filter((modifier) => modifier.type === "name") - .map((modifier) => modifier.name); - const inPositionPhrase = modifiers - .filter((modifier) => modifier.type === "in position phrase") - .map((modifier) => modifier.noun); + .flatMap((modifier) => modifier.type === "name" ? [modifier.name] : []); + + const inPositionPhrase = modifiers.flatMap((modifier) => + modifier.type === "in position phrase" ? [modifier.noun] : [] + ); + let adjectival: ArrayResult; if ( noun.length <= 1 && diff --git a/src/translator/noun.ts b/src/translator/noun.ts index f460e060..3cb25470 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -87,7 +87,7 @@ export function fromNounForms( export function simpleNounForms( nounForms: Dictionary.NounForms, ): ArrayResult { - return fromNounForms(nounForms, "both").map((noun) => noun.noun); + return fromNounForms(nounForms, "both").map(({ noun }) => noun); } export function noun( options: Readonly<{ diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 9320cca7..6fb6dfb1 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -62,15 +62,13 @@ export function partialVerb( }); const preposition = ArrayResult.combine( ...definition.indirectObject - .flatMap((indirectObject) => + .flatMap(({ object, preposition }) => noun({ - definition: indirectObject.object, + definition: object, reduplicationCount: 1, emphasis: false, }) - .map((object) => - nounAsPreposition(object, indirectObject.preposition) - ) + .map((object) => nounAsPreposition(object, preposition)) ), ); return ArrayResult.combine(object, preposition) @@ -100,7 +98,8 @@ export function everyPartialVerb( export function forObject(verb: PartialCompoundVerb): boolean | string { const [{ forObject }, ...rest] = everyPartialVerb(verb); if ( - forObject !== false && rest.every((verb) => forObject === verb.forObject) + forObject !== false && + rest.every(({ forObject: otherForObject }) => forObject === otherForObject) ) { return forObject; } else { @@ -163,11 +162,11 @@ export function fromVerbForms( verb = new ArrayResult([{ modal: null, infinite: present }]); break; } - return verb.map((verb) => { + return verb.map(({ modal, infinite }) => { return { - modal: mapNullable(verb.modal, unemphasized), + modal: mapNullable(modal, unemphasized), finite: [], - infinite: word({ ...options, word: verb.infinite }), + infinite: word({ ...options, word: infinite }), }; }); } diff --git a/telo_misikeke/telo_misikeke.js b/telo_misikeke/telo_misikeke.js index 82bf0c05..29354fd1 100644 --- a/telo_misikeke/telo_misikeke.js +++ b/telo_misikeke/telo_misikeke.js @@ -10,8 +10,8 @@ const RULES = build_rules(LINKU); export function errors(text) { return new ParserWithCallbacks(RULES, false) .tokenize(text) - .filter((token) => RULES[token.ruleName].category === "error") - .map((token) => - `"${escape(token.text)}" ${getMessage(token.ruleName, token.match)}` + .filter(({ ruleName }) => RULES[ruleName].category === "error") + .map(({ text, match }) => + `"${escape(text)}" ${getMessage(token.ruleName, match)}` ); } diff --git a/telo_misikeke/update.ts b/telo_misikeke/update.ts index 7bc57359..a3f30872 100644 --- a/telo_misikeke/update.ts +++ b/telo_misikeke/update.ts @@ -72,8 +72,8 @@ if (import.meta.main) { await Promise.all([ buildSonaLinku(), ...SOURCE - .map((file) => - buildCode(file.source, file.destination, file.exportItems) + .map(({ source, destination, exportItems }) => + buildCode(source, destination, exportItems) ), ]); // deno-lint-ignore no-console From cbd51144802e05590f1064a8199b26cfadfc8c91 Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 10:19:38 +0800 Subject: [PATCH 175/199] small refactor --- dictionary/parser.ts | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 79b85cf7..83dce8ad 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -32,30 +32,8 @@ import { VerbForms, } from "./type.ts"; -const RESERVED_SYMBOLS = [ - "#", - "(", - ")", - "*", - "+", - "/", - ":", - ";", - "<", - "=", - ">", - "@", - "[", - "\\", - "]", - "^", - "`", - "{", - "|", - "}", - "~", -]; -const WORDS = new RegExp(`[^${escapeRegex(RESERVED_SYMBOLS.join(""))}]`); +const RESERVED_SYMBOLS = "#()*+/:;<=>@[\\]^`{|}~"; +const WORDS = new RegExp(`[^${escapeRegex(RESERVED_SYMBOLS)}]`); const comment = match(/#[^\n\r]*/, "comment"); const spaces = sourceOnly(all(choiceOnlyOne(match(/\s/, "space"), comment))); From dc065ba0258f2e3ac2fa67a036a9e2304453450d Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 10:36:03 +0800 Subject: [PATCH 176/199] fix --- bundle.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle.ts b/bundle.ts index c6b82dce..c3da1e23 100644 --- a/bundle.ts +++ b/bundle.ts @@ -8,10 +8,10 @@ const WATCH = [ "./dictionary/misc.ts", "./dictionary/parser.ts", "./dictionary/type.ts", - "./telo_misikeke/linku-data.json", + "./telo_misikeke/linku_data.json", "./telo_misikeke/Parser.js", "./telo_misikeke/rules.js", - "./telo_misikeke/telo-misikeke.js", + "./telo_misikeke/telo_misikeke.js", "./src/", "./project-data.json", ]; From 08c88d9884af9bc260104e96dbada69a27b3028b Mon Sep 17 00:00:00 2001 From: Koko Date: Sat, 8 Mar 2025 11:51:12 +0800 Subject: [PATCH 177/199] improve error message --- dictionary/parser.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dictionary/parser.ts b/dictionary/parser.ts index 83dce8ad..f204729f 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -295,7 +295,10 @@ function verbOnly(tagInside: Parser): Parser { FutureTense: string; }; if (conjugations == null) { - throw new ArrayResultError(`no verb conjugation found for "${verb}"`); + throw new ArrayResultError( + `no verb conjugation found for "${verb}". consider providing all ` + + "conjugations instead", + ); } if (verb !== conjugations.Infinitive) { throw new ArrayResultError( From b2af22e84ed76ee62509c8444cc39ad43d7a4418 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 07:07:33 +0800 Subject: [PATCH 178/199] small refactor --- src/dictionary.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index d2e5b815..a7423285 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -56,8 +56,8 @@ function redefineSet( filter: (definition: Definition) => boolean, ): void { set.clear(); - for (const [word, entry] of dictionary) { - if (entry.definitions.some(filter)) { + for (const [word, { definitions }] of dictionary) { + if (definitions.some(filter)) { set.add(word); } } From c94965d289df15afb32f3d907ba1acb491f1eda0 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 07:08:19 +0800 Subject: [PATCH 179/199] small refactor --- src/dictionary.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dictionary.ts b/src/dictionary.ts index a7423285..af70bc15 100644 --- a/src/dictionary.ts +++ b/src/dictionary.ts @@ -64,7 +64,7 @@ function redefineSet( } function redefineSetWithType( set: Set, - compareType: Definition["type"], + type: Definition["type"], ): void { - redefineSet(set, ({ type }) => type === compareType); + redefineSet(set, ({ type: compareType }) => compareType === type); } From 1633e398fcade429c1b86b3d3edda9976b74ed74 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 08:22:59 +0800 Subject: [PATCH 180/199] improve verb ast node --- src/translator/ast.ts | 5 +++-- src/translator/clause.ts | 4 ++-- src/translator/composer.ts | 9 +++++---- src/translator/verb.ts | 22 +++++++++++----------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/translator/ast.ts b/src/translator/ast.ts index a09e8e73..22a37545 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -47,8 +47,9 @@ export type Complement = | Readonly<{ type: "adjective"; adjective: AdjectivePhrase }>; export type Verb = Readonly<{ modal: null | Word; - finite: ReadonlyArray; - infinite: Word; + // TODO: better name + first: null | Word; + rest: ReadonlyArray; }>; export type VerbPhrase = | Readonly<{ diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 0c1b5b1a..e28d0e4e 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -49,8 +49,8 @@ function phraseClause( adverb: [], verb: { modal: null, - finite: [], - infinite: unemphasized("is"), + first: unemphasized("is"), + rest: [], }, subjectComplement: { type: "adjective", diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 6dd92339..3f7e0bf4 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -84,13 +84,14 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { let text: string; switch (phrase.type) { case "default": { - const { modal, finite, infinite } = phrase.verb; + const { modal, first, rest } = phrase.verb; const verbText = !phrase.hideVerb ? [ - ...nullableAsArray(modal).map(word), - ...finite.map(word), - word(infinite), + ...nullableAsArray(modal), + ...nullableAsArray(first), + ...rest, ] + .map(word) : []; text = [ ...phrase.adverb.map(word), diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 6fb6dfb1..414ca6c8 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -127,46 +127,46 @@ export function fromVerbForms( quantity !== "singular" || (!is && perspective !== "third") ? [pastPlural, verbForms.presentPlural] : [pastSingular, presentSingular]; - let verb: ArrayResult<{ modal: null | string; infinite: string }>; + let verb: ArrayResult<{ modal: null | string; verb: string }>; switch (settings.tense) { case "condensed": if (is) { if (quantity === "condensed") { verb = new ArrayResult([{ modal: null, - infinite: "is/are/was/were/will be", + verb: "is/are/was/were/will be", }]); } else { verb = new ArrayResult([{ modal: null, - infinite: `${present}/${past}/will be`, + verb: `${present}/${past}/will be`, }]); } } else { verb = new ArrayResult([{ modal: "(will)", - infinite: condenseVerb(present, past), + verb: condenseVerb(present, past), }]); } break; case "both": { const future = is ? "be" : verbForms.presentPlural; verb = new ArrayResult([ - { modal: null, infinite: present }, - { modal: null, infinite: past }, - { modal: "will", infinite: future }, + { modal: null, verb: present }, + { modal: null, verb: past }, + { modal: "will", verb: future }, ]); break; } case "default only": - verb = new ArrayResult([{ modal: null, infinite: present }]); + verb = new ArrayResult([{ modal: null, verb: present }]); break; } - return verb.map(({ modal, infinite }) => { + return verb.map(({ modal, verb: infinite }) => { return { modal: mapNullable(modal, unemphasized), - finite: [], - infinite: word({ ...options, word: infinite }), + first: word({ ...options, word: infinite }), + rest: [], }; }); } From ce9f6b12118e4323fdba01156a78a04bf29cc79f Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 08:31:26 +0800 Subject: [PATCH 181/199] update partial verb structure --- src/translator/phrase.ts | 10 +++++++--- src/translator/predicate.ts | 30 +++++++++++++++++++++--------- src/translator/verb.ts | 10 ++++++++-- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/translator/phrase.ts b/src/translator/phrase.ts index 95486598..237dfb3c 100644 --- a/src/translator/phrase.ts +++ b/src/translator/phrase.ts @@ -298,11 +298,15 @@ export function phraseAsVerb( return { type: "simple", adverb: [], - presentPlural: "are", - presentSingular: "is", - past: "were", + modal: null, + first: { + presentPlural: "are", + presentSingular: "is", + past: "were", + }, wordEmphasis: false, reduplicationCount: 1, + rest: [], subjectComplement, object: null, objectComplement: null, diff --git a/src/translator/predicate.ts b/src/translator/predicate.ts index be3458ae..1ebfb9f1 100644 --- a/src/translator/predicate.ts +++ b/src/translator/predicate.ts @@ -35,11 +35,15 @@ function applyTo( return { type: "simple", adverb: [], - presentPlural: "apply", - presentSingular: "applies", - past: "applied", + modal: null, + first: { + presentPlural: "apply", + presentSingular: "applies", + past: "applied", + }, reduplicationCount: 1, wordEmphasis: false, + rest: [], subjectComplement: null, object: predicate, objectComplement: null, @@ -56,11 +60,15 @@ function turnInto( return { type: "simple", adverb: [], - presentPlural: "turn", - presentSingular: "turns", - past: "turned", + modal: null, + first: { + presentPlural: "turn", + presentSingular: "turns", + past: "turned", + }, reduplicationCount: 1, wordEmphasis: false, + rest: [], subjectComplement: null, object, objectComplement: null, @@ -77,11 +85,15 @@ function make( return { type: "simple", adverb: [], - presentPlural: "make", - presentSingular: "makes", - past: "made", + modal: null, + first: { + presentPlural: "make", + presentSingular: "makes", + past: "made", + }, reduplicationCount: 1, wordEmphasis: false, + rest: [], subjectComplement: null, object, objectComplement: { type: "adjective", adjective: predicate.adjective }, diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 414ca6c8..78082fe3 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -14,12 +14,15 @@ export type VerbObjects = Readonly<{ preposition: ReadonlyArray; }>; export type PartialVerb = - & Dictionary.VerbForms & VerbObjects & Readonly<{ adverb: ReadonlyArray; + modal: null | English.Word; + // TODO: better name other than first and rest + first: Dictionary.VerbForms; reduplicationCount: number; wordEmphasis: boolean; + rest: ReadonlyArray; subjectComplement: null | English.Complement; forObject: boolean | string; predicateType: null | "verb" | "noun adjective"; @@ -75,8 +78,11 @@ export function partialVerb( .map(([object, preposition]) => ({ ...definition, adverb: [], + modal: null, + first: definition, reduplicationCount, wordEmphasis: emphasis, + rest: [], subjectComplement: null, object, objectComplement: null, @@ -178,7 +184,7 @@ export function verb( switch (partialVerb.type) { case "simple": { return fromVerbForms({ - verbForms: partialVerb, + verbForms: partialVerb.first, perspective, quantity, reduplicationCount: partialVerb.reduplicationCount, From 8f18be072cdf3018a609b711e254ae8bb9cc095b Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 08:36:30 +0800 Subject: [PATCH 182/199] make first verb nullable --- src/translator/verb.ts | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 78082fe3..2a97536f 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -19,7 +19,7 @@ export type PartialVerb = adverb: ReadonlyArray; modal: null | English.Word; // TODO: better name other than first and rest - first: Dictionary.VerbForms; + first: null | Dictionary.VerbForms; reduplicationCount: number; wordEmphasis: boolean; rest: ReadonlyArray; @@ -183,19 +183,29 @@ export function verb( ): ArrayResult { switch (partialVerb.type) { case "simple": { - return fromVerbForms({ - verbForms: partialVerb.first, - perspective, - quantity, - reduplicationCount: partialVerb.reduplicationCount, - emphasis: partialVerb.wordEmphasis, - }) - .map((verb) => ({ + const verbForms = partialVerb.first; + if (verbForms != null) { + return fromVerbForms({ + verbForms, + perspective, + quantity, + reduplicationCount: partialVerb.reduplicationCount, + emphasis: partialVerb.wordEmphasis, + }) + .map((verb) => ({ + ...partialVerb, + type: "default", + verb, + hideVerb: false, + })); + } else { + return new ArrayResult([{ ...partialVerb, + verb: { ...partialVerb, first: null }, type: "default", - verb, hideVerb: false, - })); + }]); + } } case "compound": return ArrayResult.combine( From e76e886a30e60204102758da0fcc45f2bb1457aa Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 08:41:48 +0800 Subject: [PATCH 183/199] rename --- src/translator/verb.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 2a97536f..7e5d5d69 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -168,10 +168,10 @@ export function fromVerbForms( verb = new ArrayResult([{ modal: null, verb: present }]); break; } - return verb.map(({ modal, verb: infinite }) => { + return verb.map(({ modal, verb }) => { return { modal: mapNullable(modal, unemphasized), - first: word({ ...options, word: infinite }), + first: word({ ...options, word: verb }), rest: [], }; }); From 364576d8119807006a0568983ddb53f01914e5e0 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 08:53:17 +0800 Subject: [PATCH 184/199] reorder --- src/translator/determiner.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/translator/determiner.ts b/src/translator/determiner.ts index f8565421..8dff24aa 100644 --- a/src/translator/determiner.ts +++ b/src/translator/determiner.ts @@ -7,11 +7,6 @@ import { FilteredOutError } from "./error.ts"; import { simpleNounForms } from "./noun.ts"; import { word } from "./word.ts"; -function prettyPrintDeterminers( - determiners: ReadonlyArray, -): string { - return `(${determiners.map(({ determiner }) => determiner).join(` `)})`; -} function filterKind( determiners: ReadonlyArray, kinds: ReadonlyArray, @@ -143,6 +138,11 @@ export function fixDeterminer( ); } } +function prettyPrintDeterminers( + determiners: ReadonlyArray, +): string { + return `(${determiners.map(({ determiner }) => determiner).join(` `)})`; +} function encodeDeterminer( strings: TemplateStringsArray, ...determiners: ReadonlyArray> From 22e055b8e8007bb1cebf6130eb5417494cbfd860 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 09:05:19 +0800 Subject: [PATCH 185/199] fix --- src/translator/noun.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 3cb25470..6ab8e039 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -55,9 +55,19 @@ export function fromNounForms( const { singular, plural } = nounForms; switch (determinerNumber) { case "singular": - case "plural": - return new ArrayResult(nullableAsArray(singular)) + case "plural": { + let noun: null | string; + switch (determinerNumber) { + case "singular": + noun = singular; + break; + case "plural": + noun = plural; + break; + } + return new ArrayResult(nullableAsArray(noun)) .map((noun) => ({ noun, quantity: determinerNumber })); + } case "both": switch (settings.quantity) { case "both": From 568017fa064411c9aab079ecc4b3679e9a9f8f6a Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 09:49:57 +0800 Subject: [PATCH 186/199] revert --- src/array_result.ts | 19 +++++++++++-------- src/parser/parser.ts | 24 +++++++++++------------- src/translator/modifier.ts | 31 +++++++++++++++---------------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/array_result.ts b/src/array_result.ts index c16647d2..9c3bca76 100644 --- a/src/array_result.ts +++ b/src/array_result.ts @@ -114,14 +114,17 @@ export class ArrayResult { ): ArrayResult { // We resorted to using `any` types here, make sure it works properly return arrayResults.reduce( - (left: ArrayResult, right) => - left.isError() && right.isError() - ? ArrayResult.concat(left, right) - : left.isError() - ? left - : right.isError() - ? right - : left.flatMap((left) => right.map((right) => [...left, right])), + (left: ArrayResult, right) => { + if (left.isError() && right.isError()) { + return ArrayResult.concat(left, right); + } else if (left.isError()) { + return left; + } else if (right.isError()) { + return right; + } else { + return left.flatMap((left) => right.map((right) => [...left, right])); + } + }, new ArrayResult([[]]), ) as ArrayResult; } diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 9c294748..f7eaa7f1 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -190,19 +190,17 @@ function binaryWords( word: Set, description: string, ): Parser { - return specificToken("combined glyphs").map(({ words }) => - words.length > 2 - ? throwError( - new UnrecognizedError( - `combined glyphs of ${words.length} words`, - ), - ) - : !word.has(words[0]) - ? throwError(new UnrecognizedError(`"${words[0]}" as ${description}`)) - : !contentWordSet.has(words[1]) - ? throwError(new UnrecognizedError(`"${words[1]}" as content word`)) - : words as [string, string] - ); + return specificToken("combined glyphs").map(({ words }) => { + if (words.length > 2) { + throw new UnrecognizedError(`combined glyphs of ${words.length} words`); + } else if (!word.has(words[0])) { + throw new UnrecognizedError(`"${words[0]}" as ${description}`); + } else if (!contentWordSet.has(words[1])) { + throw new UnrecognizedError(`"${words[1]}" as content word`); + } else { + return words as [string, string]; + } + }); } function optionalCombined( word: Set, diff --git a/src/translator/modifier.ts b/src/translator/modifier.ts index 257ca36d..55703d91 100644 --- a/src/translator/modifier.ts +++ b/src/translator/modifier.ts @@ -1,6 +1,5 @@ import { ArrayResult } from "../array_result.ts"; import { dictionary } from "../dictionary.ts"; -import { throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as Composer from "../parser/composer.ts"; import { adjective, compoundAdjective } from "./adjective.ts"; @@ -173,19 +172,18 @@ function nanpaModifier( includeGerund: true, includeVerb: false, }) - .map((phrase) => - phrase.type !== "noun" - ? throwError( - new FilteredOutError( - `${phrase.type} within "in position" phrase`, - ), - ) - : (phrase.noun as English.NounPhrase & { type: "simple" }) - .preposition.length > 0 - ? throwError( - new FilteredOutError('preposition within "in position" phrase'), - ) - : { + .map((phrase) => { + if (phrase.type !== "noun") { + throw new FilteredOutError( + `${phrase.type} within "in position" phrase`, + ); + } else if ( + (phrase.noun as English.NounPhrase & { type: "simple" }) + .preposition.length > 0 + ) { + throw new FilteredOutError('preposition within "in position" phrase'); + } else { + return { type: "in position phrase", noun: { type: "simple", @@ -202,8 +200,9 @@ function nanpaModifier( preposition: [], emphasis: false, }, - } - ); + }; + } + }); } function modifier( modifier: TokiPona.Modifier, From eac0b00abd630727c9d766bf38217ee0c51f04b5 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 10:02:00 +0800 Subject: [PATCH 187/199] add content clause node to verb phrase --- src/translator/ast.ts | 1 + src/translator/composer.ts | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/translator/ast.ts b/src/translator/ast.ts index 22a37545..e27abd8f 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -57,6 +57,7 @@ export type VerbPhrase = adverb: ReadonlyArray; verb: Verb; subjectComplement: null | Complement; + contentClause: null | Clause; object: null | NounPhrase; objectComplement: null | Complement; preposition: ReadonlyArray; diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 3f7e0bf4..455d3be8 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -1,5 +1,6 @@ import { nullableAsArray } from "../misc.ts"; import * as English from "./ast.ts"; +import { phraseAsVerb } from "./phrase.ts"; const EMPHASIS_STARTING_TAG = ""; const EMPHASIS_ENDING_TAG = ""; @@ -97,6 +98,7 @@ export function verb(phrase: English.VerbPhrase, depth: number): string { ...phrase.adverb.map(word), ...verbText, ...nullableAsArray(phrase.subjectComplement).map(complement), + ...nullableAsArray(phrase.contentClause).map(clause), ] .join(" "); break; From 069cdcc33328cf3b36539d832c8c34dfd97a0d12 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 10:02:58 +0800 Subject: [PATCH 188/199] remove unused import --- src/translator/composer.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/translator/composer.ts b/src/translator/composer.ts index 455d3be8..c3ec6eaf 100644 --- a/src/translator/composer.ts +++ b/src/translator/composer.ts @@ -1,6 +1,5 @@ import { nullableAsArray } from "../misc.ts"; import * as English from "./ast.ts"; -import { phraseAsVerb } from "./phrase.ts"; const EMPHASIS_STARTING_TAG = ""; const EMPHASIS_ENDING_TAG = ""; From a83cf050834cea38f919d46e2da8c498f2daeadc Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 10:06:47 +0800 Subject: [PATCH 189/199] fix --- src/translator/clause.ts | 1 + src/translator/verb.ts | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index e28d0e4e..b1ffbf74 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -56,6 +56,7 @@ function phraseClause( type: "adjective", adjective: phrase.adjective, }, + contentClause: null, object: null, objectComplement: null, preposition: nullableAsArray(phrase.inWayPhrase) diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 7e5d5d69..7719178c 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -196,13 +196,15 @@ export function verb( ...partialVerb, type: "default", verb, + contentClause: null, hideVerb: false, })); } else { return new ArrayResult([{ ...partialVerb, - verb: { ...partialVerb, first: null }, type: "default", + verb: { ...partialVerb, first: null }, + contentClause: null, hideVerb: false, }]); } From 27ffc54fd37d0ced31fe3b8b54d232e079e141e8 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:02:12 +0800 Subject: [PATCH 190/199] implement translation of o clause, closes #60 --- src/translator/clause.ts | 94 +++++++++++++++++++++++++++++++++++++--- src/translator/noun.ts | 8 ++++ src/translator/verb.ts | 41 +++++++++++++++++- 3 files changed, 136 insertions(+), 7 deletions(-) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index b1ffbf74..455959ba 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -3,10 +3,11 @@ import { nullableAsArray, throwError } from "../misc.ts"; import * as TokiPona from "../parser/ast.ts"; import * as English from "./ast.ts"; import { FilteredOutError, TranslationTodoError } from "./error.ts"; +import { perspective } from "./noun.ts"; import { multiplePhrases, multiplePhrasesAsNoun } from "./phrase.ts"; import { predicate } from "./predicate.ts"; import { nounAsPreposition } from "./preposition.ts"; -import { verb } from "./verb.ts"; +import { addModalToAll, verb } from "./verb.ts"; import { unemphasized } from "./word.ts"; function phraseClause( @@ -85,10 +86,7 @@ function liClause( predicate(clause.predicates, "li"), ) .flatMap(([subject, predicate]) => { - const perspective = subject.type === "simple" - ? subject.perspective - : "third"; - return verb(predicate, perspective, subject.quantity) + return verb(predicate, perspective(subject), subject.quantity) .map((verb) => ({ type: "default", subject, @@ -97,6 +95,89 @@ function liClause( })); }); } +function iWish( + subject: English.NounPhrase, + verb: English.VerbPhrase, +): English.Clause { + return { + type: "default", + subject: { + type: "simple", + determiner: [], + adjective: [], + noun: unemphasized("I"), + quantity: "singular", + perspective: "first", + postAdjective: null, + preposition: [], + emphasis: false, + }, + verb: { + type: "default", + adverb: [], + verb: { + modal: null, + first: unemphasized("wish"), + rest: [], + }, + subjectComplement: null, + contentClause: { + type: "default", + subject, + verb, + hideSubject: false, + }, + object: null, + objectComplement: null, + preposition: [], + hideVerb: false, + }, + hideSubject: false, + }; +} +function oClause( + clause: TokiPona.Clause & { type: "o clause" }, +): ArrayResult { + const subject = clause.subjects != null + ? multiplePhrasesAsNoun({ + phrases: clause.subjects, + place: "subject", + includeGerund: true, + andParticle: "en", + }) + : new ArrayResult([{ + type: "simple", + determiner: [], + adjective: [], + noun: unemphasized("you"), + quantity: "plural", + perspective: "second", + postAdjective: null, + preposition: [], + emphasis: false, + }]); + return ArrayResult.concat( + ArrayResult.combine(subject, predicate(clause.predicates, "o")) + .flatMap(([subject, predicate]) => { + return verb(predicate, perspective(subject), subject.quantity) + .map((verb) => iWish(subject, verb)); + }), + ArrayResult.combine( + subject, + predicate(clause.predicates, "o") + .map((verb) => addModalToAll(unemphasized("should"), verb)), + ) + .flatMap(([subject, predicate]) => { + return verb(predicate, perspective(subject), subject.quantity) + .map((verb) => ({ + type: "default", + subject, + verb, + hideSubject: false, + })); + }), + ); +} export function clause(clause: TokiPona.Clause): ArrayResult { switch (clause.type) { case "phrases": @@ -118,8 +199,9 @@ export function clause(clause: TokiPona.Clause): ArrayResult { ); case "li clause": return liClause(clause); - case "prepositions": case "o clause": + return oClause(clause); + case "prepositions": return new ArrayResult(new TranslationTodoError(clause.type)); } } diff --git a/src/translator/noun.ts b/src/translator/noun.ts index 6ab8e039..3f173c60 100644 --- a/src/translator/noun.ts +++ b/src/translator/noun.ts @@ -126,3 +126,11 @@ export function nounAsPlainString( return noun({ definition, reduplicationCount: 1, emphasis: false }) .map((noun) => EnglishComposer.noun(noun, 0)); } +export function perspective(noun: English.NounPhrase): Dictionary.Perspective { + switch (noun.type) { + case "simple": + return noun.perspective; + case "compound": + return "third"; + } +} diff --git a/src/translator/verb.ts b/src/translator/verb.ts index 7719178c..e0cf7544 100644 --- a/src/translator/verb.ts +++ b/src/translator/verb.ts @@ -1,8 +1,9 @@ import * as Dictionary from "../../dictionary/type.ts"; import { ArrayResult } from "../array_result.ts"; -import { mapNullable } from "../misc.ts"; +import { mapNullable, nullableAsArray } from "../misc.ts"; import { settings } from "../settings.ts"; import * as English from "./ast.ts"; +import { FilteredOutError } from "./error.ts"; import { condense } from "./misc.ts"; import { noun } from "./noun.ts"; import { nounAsPreposition } from "./preposition.ts"; @@ -43,6 +44,44 @@ export function condenseVerb(present: string, past: string): string { const second = past.split(" ")[0]; return [condense(first, second), ...rest].join(" "); } +export function addModal(modal: English.Word, verb: PartialVerb): PartialVerb { + if (verb.modal == null) { + const newRest = nullableAsArray(verb.first) + .map(({ presentPlural }) => presentPlural) + .map((verb) => verb === "are" ? "be" : verb) + .map((newVerb) => + word({ + word: newVerb, + reduplicationCount: verb.reduplicationCount, + emphasis: verb.wordEmphasis, + }) + ); + return { + ...verb, + modal, + first: null, + rest: [...newRest, ...verb.rest], + reduplicationCount: 1, + wordEmphasis: false, + }; + } else { + throw new FilteredOutError("nested modal verb"); + } +} +export function addModalToAll( + modal: English.Word, + verb: PartialCompoundVerb, +): PartialCompoundVerb { + switch (verb.type) { + case "simple": + return { ...addModal(modal, verb), type: "simple" }; + case "compound": + return { + ...verb, + verb: verb.verb.map((verb) => addModalToAll(modal, verb)), + }; + } +} export function partialVerb( options: Readonly<{ definition: Dictionary.Verb; From 58ceb4d7258eee0793164ea5d1a214de8903b494 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:03:54 +0800 Subject: [PATCH 191/199] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2810626..6b5e9f34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ NOTE: Before publishing: The latest on-development version can be accessed by building the source code. On this on-development version, things can be broken. +ilo Token can now translate "o" imperative sentences such as "o toki" and "mi o lape". + You can now have custom fillers in custom dictionary: ``` @@ -33,6 +35,7 @@ san: 3(num); ``` +- Implement translation of "o" imperative sentences. - Allow custom fillers. - Allow custom numerals. - Numerals are now very permissive. Something like "wan tu" is now allowed. It From e1b35c260bbd7963b7b673e92eb6a0a3ae325c61 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:07:05 +0800 Subject: [PATCH 192/199] remove unneeded return statements --- src/translator/clause.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 455959ba..a01ed34a 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -158,24 +158,24 @@ function oClause( }]); return ArrayResult.concat( ArrayResult.combine(subject, predicate(clause.predicates, "o")) - .flatMap(([subject, predicate]) => { - return verb(predicate, perspective(subject), subject.quantity) - .map((verb) => iWish(subject, verb)); - }), + .flatMap(([subject, predicate]) => + verb(predicate, perspective(subject), subject.quantity) + .map((verb) => iWish(subject, verb)) + ), ArrayResult.combine( subject, predicate(clause.predicates, "o") .map((verb) => addModalToAll(unemphasized("should"), verb)), ) - .flatMap(([subject, predicate]) => { - return verb(predicate, perspective(subject), subject.quantity) + .flatMap(([subject, predicate]) => + verb(predicate, perspective(subject), subject.quantity) .map((verb) => ({ type: "default", subject, verb, hideSubject: false, - })); - }), + })) + ), ); } export function clause(clause: TokiPona.Clause): ArrayResult { From 39cc618fbc24b3912371c174bccb68525c77f9b5 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:11:27 +0800 Subject: [PATCH 193/199] reduce code duplication --- src/translator/clause.ts | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index a01ed34a..975cbc4a 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -156,27 +156,26 @@ function oClause( preposition: [], emphasis: false, }]); - return ArrayResult.concat( - ArrayResult.combine(subject, predicate(clause.predicates, "o")) - .flatMap(([subject, predicate]) => - verb(predicate, perspective(subject), subject.quantity) - .map((verb) => iWish(subject, verb)) - ), - ArrayResult.combine( - subject, - predicate(clause.predicates, "o") - .map((verb) => addModalToAll(unemphasized("should"), verb)), - ) - .flatMap(([subject, predicate]) => + return ArrayResult.combine(subject, predicate(clause.predicates, "o")) + .flatMap(([subject, predicate]) => + ArrayResult.concat( verb(predicate, perspective(subject), subject.quantity) + .map((verb) => iWish(subject, verb)), + ArrayResult.from(() => + verb( + addModalToAll(unemphasized("should"), predicate), + perspective(subject), + subject.quantity, + ) + ) .map((verb) => ({ type: "default", subject, verb, hideSubject: false, - })) - ), - ); + })), + ) + ); } export function clause(clause: TokiPona.Clause): ArrayResult { switch (clause.type) { From 19217b26216c764a886db48083e241098a578652 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:20:13 +0800 Subject: [PATCH 194/199] small improvement --- src/translator/clause.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/translator/clause.ts b/src/translator/clause.ts index 975cbc4a..db9d24c3 100644 --- a/src/translator/clause.ts +++ b/src/translator/clause.ts @@ -157,14 +157,15 @@ function oClause( emphasis: false, }]); return ArrayResult.combine(subject, predicate(clause.predicates, "o")) - .flatMap(([subject, predicate]) => - ArrayResult.concat( - verb(predicate, perspective(subject), subject.quantity) + .flatMap(([subject, predicate]) => { + const subjectPerspective = perspective(subject); + return ArrayResult.concat( + verb(predicate, subjectPerspective, subject.quantity) .map((verb) => iWish(subject, verb)), ArrayResult.from(() => verb( addModalToAll(unemphasized("should"), predicate), - perspective(subject), + subjectPerspective, subject.quantity, ) ) @@ -174,8 +175,8 @@ function oClause( verb, hideSubject: false, })), - ) - ); + ); + }); } export function clause(clause: TokiPona.Clause): ArrayResult { switch (clause.type) { From b623c8ea7c46100ce0740bbf95e9e50aff6e1452 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:27:26 +0800 Subject: [PATCH 195/199] fix watch --- bundle.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.ts b/bundle.ts index c3da1e23..4860861b 100644 --- a/bundle.ts +++ b/bundle.ts @@ -13,7 +13,7 @@ const WATCH = [ "./telo_misikeke/rules.js", "./telo_misikeke/telo_misikeke.js", "./src/", - "./project-data.json", + "./project_data.json", ]; const DICTIONARY = /dictionary[/\\][^/\\]+$/; From f04fc91abc8154bc00e0a62d839a807421487bbb Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 12:48:31 +0800 Subject: [PATCH 196/199] filter out gerund-like adjectives --- dictionary/dictionary | 102 ++++++++++++++++++------------------ dictionary/parser.ts | 15 ++++-- dictionary/type.ts | 1 + src/translator/word_unit.ts | 14 +++-- 4 files changed, 73 insertions(+), 59 deletions(-) diff --git a/dictionary/dictionary b/dictionary/dictionary index e0aac30f..da33a4bc 100644 --- a/dictionary/dictionary +++ b/dictionary/dictionary @@ -14,7 +14,7 @@ akesi: reptile(n); amphibian(n); scaly(adj material) creature(n); - crawling(adj opinion) creature(n); + crawling(adj opinion gerund-like) creature(n); reptilian(adj qualifier); amphibian(adj qualifier); @@ -39,8 +39,8 @@ alasa: # hunting(n gerund); searching(n gerund); - # hunting(adj qualifier); - searching(adj qualifier); + # hunting(adj qualifier gerund-like); + searching(adj qualifier gerund-like); ale, ali: all(d quantifier plural); @@ -105,12 +105,12 @@ awen: protecting(n gerund); continuing(n gerund); - # staying(adj qualifier); - remaining(adj qualifier); - # waiting(adj qualifier); - pausing(adj qualifier); - protecting(adj qualifier); - continuing(adj qualifier); + # staying(adj qualifier gerund-like); + remaining(adj qualifier gerund-like); + # waiting(adj qualifier gerund-like); + pausing(adj qualifier gerund-like); + protecting(adj qualifier gerund-like); + continuing(adj qualifier gerund-like); e: `[`marks the start of a direct object`]`(particle def); @@ -148,11 +148,11 @@ esun: buying(n gerund); selling(n gerund); - # trading(adj qualifier); - # bartering(adj qualifier); - # exchanging(adj qualifier); - swapping(adj qualifier); - buying(adj qualifier); + # trading(adj qualifier gerund-like); + # bartering(adj qualifier gerund-like); + # exchanging(adj qualifier gerund-like); + swapping(adj qualifier gerund-like); + buying(adj qualifier gerund-like); sell(adj qualifier); ijo: @@ -228,10 +228,10 @@ jasima: # mirroring(n gerund); duplicating(n gerund); - reflecting(adj qualifier); - # echoing(adj qualifier); - # mirroring(adj qualifier); - duplicating(adj qualifier); + reflecting(adj qualifier gerund-like); + # echoing(adj qualifier gerund-like); + # mirroring(adj qualifier gerund-like); + duplicating(adj qualifier gerund-like); jelo: yellow(adj color); @@ -258,14 +258,14 @@ kala: fish(n); # marine(adj qualifier) animal(n); # sea(adj qualifier) creature(n); - swimming(adj qualifier) creature(n); + swimming(adj qualifier gerund-like) creature(n); swim(v); fish-like(adj qualifier); fish-related(adj opinion); - swimming(adj qualifier); + swimming(adj qualifier gerund-like); kalama: produce(v) sound(n); @@ -288,10 +288,10 @@ kama: summoning(n gerund); becoming(n gerund); - arriving(adj qualifier); - # approaching(adj qualifier); - summoning(adj qualifier); - becoming(adj qualifier); + arriving(adj qualifier gerund-like); + # approaching(adj qualifier gerund-like); + summoning(adj qualifier gerund-like); + becoming(adj qualifier gerund-like); kasi: plant(n); @@ -456,7 +456,7 @@ kulupu: kute: ear(n); - hearing(adj qualifier) organ(n); + hearing(adj qualifier gerund-like) organ(n); hear(v) [object]; listen(v) at(prep) [object]; @@ -467,9 +467,9 @@ kute: listen(n gerund); obeying(n gerund); - hearing(adj qualifier); - listen(adj qualifier); - obeying(adj qualifier); + hearing(adj qualifier gerund-like); + listening(adj qualifier gerund-like); + obeying(adj qualifier gerund-like); la: `[`mark the previous statement as context to a following statement`]`(particle def); @@ -483,9 +483,9 @@ lanpan: # seizing(n gerund); # stealing(n gerund); - taking(adj qualifier); - # seizing(adj qualifier); - # stealing(adj qualifier); + taking(adj qualifier gerund-like); + # seizing(adj qualifier gerund-like); + # stealing(adj qualifier gerund-like); lape: sleep(n singular); @@ -493,8 +493,8 @@ lape: sleep(v); rest(v); - sleeping(adj qualifier); - resting(adj qualifier); + sleeping(adj qualifier gerund-like); + resting(adj qualifier gerund-like); laso: # turquoise(adj color); @@ -651,7 +651,7 @@ luka: hand(n); arm(n); tactile(adj qualifier) organ(n); - grasping(adj qualifier) organ(n); + grasping(adj qualifier gerund-like) organ(n); grasp(v) [object]; interact(v) with(prep) [object]; @@ -676,7 +676,7 @@ lukin, oko: visual(adj qualifier); eye(n); - seeing(adj qualifier) organ(n); + seeing(adj qualifier gerund-like) organ(n); try to(v) [predicate]; @@ -899,8 +899,8 @@ musi: art(n); fun(adj opinion); - amusing(adj opinion); - # interesting(adj opinion); + amusing(adj opinion gerund-like); + # interesting(adj opinion gerund-like); comical(adj opinion); # silly(adj opinion); @@ -1111,12 +1111,12 @@ pana: putting(n gerund); releasing(n gerund); - giving(adj qualifier); - # sending(adj qualifier); - emitting(adj qualifier); - providing(adj qualifier); - putting(adj qualifier); - releasing(adj qualifier); + giving(adj qualifier gerund-like); + # sending(adj qualifier gerund-like); + emitting(adj qualifier gerund-like); + providing(adj qualifier gerund-like); + putting(adj qualifier gerund-like); + releasing(adj qualifier gerund-like); pi: `[`modify the next word with one or more following words`]`(particle def); @@ -1169,7 +1169,7 @@ pipi: insect(n); bug(n); # spider(n); - tiny(adj size) crawling(adj qualifier) creature(n); + tiny(adj size) crawling(adj qualifier gerund-like) creature(n); insect-like(adj physical quality); bug-like(adj physical quality); @@ -1292,13 +1292,13 @@ sike: # ball(n); # wheel(n); - repeating(adj qualifier) thing(n); + repeating(adj qualifier gerund-like) thing(n); cycle(n); orbit(n); loop(n); round(adj physical quality); - repeating(adj qualifier); + repeating(adj qualifier gerund-like); repeatedly(adv); @@ -1509,9 +1509,9 @@ toki: language(n); - communicating(adj qualifier); - # saying(adj qualifier); - thinking(adj qualifier); + communicating(adj qualifier gerund-like); + # saying(adj qualifier gerund-like); + thinking(adj qualifier gerund-like); conversation-related(adj opinion); story-related(adj opinion); @@ -1560,7 +1560,7 @@ uta: mouth(n); lip(n); throat(n); - consuming(adj qualifier) orifice(n); + consuming(adj qualifier gerund-like) orifice(n); mouth-related(adj opinion); lips-related(adj opinion); @@ -1605,7 +1605,7 @@ wan: waso: bird(n); - flying(adj qualifier) creature(n); + flying(adj qualifier gerund-like) creature(n); winged(adj qualifier) animal(n); fly(v); diff --git a/dictionary/parser.ts b/dictionary/parser.ts index f204729f..63196147 100644 --- a/dictionary/parser.ts +++ b/dictionary/parser.ts @@ -59,7 +59,7 @@ const slash = lex(matchString("/", "slash")); const forms = sequence(word, all(slash.with(word))) .map(([first, rest]) => [first, ...rest]); function keyword(keyword: T): Parser { - return lex(match(/[a-z]+/, keyword)) + return lex(match(/[a-z\-]+/, keyword)) .filter((that) => keyword === that || throwError(new UnexpectedError(`"${that}"`, `"${keyword}"`)) @@ -227,9 +227,18 @@ const adjectiveKind = choiceOnlyOne( const adjective = sequence( all(simpleUnit("adv")), word, - tag(keyword("adj").with(adjectiveKind)), + tag( + keyword("adj").with( + sequence(adjectiveKind, optionalAll(keyword("gerund-like"))), + ), + ), ) - .map(([adverb, adjective, kind]) => ({ adverb, adjective, kind })); + .map(([adverb, adjective, [kind, gerundLike]]) => ({ + adverb, + adjective, + kind, + gerundLike: gerundLike != null, + })); const noun = sequence( all(determiner), all(adjective), diff --git a/dictionary/type.ts b/dictionary/type.ts index 044c6fdf..77c63e67 100644 --- a/dictionary/type.ts +++ b/dictionary/type.ts @@ -50,6 +50,7 @@ export type Adjective = Readonly<{ adverb: ReadonlyArray; adjective: string; kind: AdjectiveType; + gerundLike: boolean; }>; export type VerbForms = Readonly<{ presentPlural: string; diff --git a/src/translator/word_unit.ts b/src/translator/word_unit.ts index 2b116191..22f799f9 100644 --- a/src/translator/word_unit.ts +++ b/src/translator/word_unit.ts @@ -47,11 +47,15 @@ function defaultWordUnit( type: "noun", }]); case "adjective": - return adjective({ ...options, definition }) - .map((adjective) => ({ - type: "adjective", - adjective, - })); + if (!includeGerund && definition.gerundLike) { + return new ArrayResult(); + } else { + return adjective({ ...options, definition }) + .map((adjective) => ({ + type: "adjective", + adjective, + })); + } case "compound adjective": return compoundAdjective({ ...options, From ba8b24567dd43815a1311f22ad66656203f734ae Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 13:28:06 +0800 Subject: [PATCH 197/199] add TODO --- src/translator/ast.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/translator/ast.ts b/src/translator/ast.ts index e27abd8f..cc566e43 100644 --- a/src/translator/ast.ts +++ b/src/translator/ast.ts @@ -45,6 +45,7 @@ export type AdjectivePhrase = export type Complement = | Readonly<{ type: "noun"; noun: NounPhrase }> | Readonly<{ type: "adjective"; adjective: AdjectivePhrase }>; +// TODO: each verb should have its own adverbs export type Verb = Readonly<{ modal: null | Word; // TODO: better name From 85198e490237b357b4303c5d67fa94633c0d7fa6 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 13:30:22 +0800 Subject: [PATCH 198/199] 0.5.0 --- CHANGELOG.md | 11 +++++------ deno.json | 2 +- project_data.json | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b5e9f34..39160c5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,14 +7,15 @@ NOTE: Before publishing: - Remove details --> + + +## 0.5.0 ilo Token can now translate "o" imperative sentences such as "o toki" and "mi o lape". @@ -46,8 +47,6 @@ san: - Fix sentence capitalization: If the sentence starts with number, no capitalization will occur. - - ## 0.4.1 Released 27 Feb 2025 diff --git a/deno.json b/deno.json index 04df1e50..4ffe771f 100644 --- a/deno.json +++ b/deno.json @@ -10,7 +10,7 @@ }, "start": "deno run --allow-net --allow-read --no-prompt jsr:@std/http/file-server ./dist/", "watch": "deno run --allow-read --allow-write --allow-env --allow-net --allow-run --no-prompt ./bundle.ts watch", - "update": "deno outdated --update && deno run --allow-write --allow-net --no-prompt ./telo-misikeke/update.ts", + "update": "deno outdated --update && deno run --allow-write --allow-net --no-prompt ./telo_misikeke/update.ts", "build-dictionary": "deno run --allow-read --allow-write --no-prompt ./dictionary/build.ts" }, "fmt": { diff --git a/project_data.json b/project_data.json index 211a72bb..c2c36445 100644 --- a/project_data.json +++ b/project_data.json @@ -1,5 +1,5 @@ { "version": "0.5.0", - "onDevelopment": true, - "releaseDate": "2025-2-27" + "onDevelopment": false, + "releaseDate": "2025-3-9" } From 2540864ae43885060b6b76ed38d3123efbf5f3d9 Mon Sep 17 00:00:00 2001 From: Koko Date: Sun, 9 Mar 2025 13:31:44 +0800 Subject: [PATCH 199/199] format --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39160c5d..608c55f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,8 @@ On this on-development version, things can be broken. ## 0.5.0 -ilo Token can now translate "o" imperative sentences such as "o toki" and "mi o lape". +ilo Token can now translate "o" imperative sentences such as "o toki" and "mi o +lape". You can now have custom fillers in custom dictionary: