From d1abf35f12b266d361e67ab5b941f9ae07488e4d Mon Sep 17 00:00:00 2001 From: Ron Shapiro Date: Sun, 13 Oct 2024 22:11:03 +0300 Subject: [PATCH 1/3] =?UTF-8?q?Attempt=20to=20complete=20=D7=95=D7=92?= =?UTF-8?q?=D7=95'=20instances=20with=20surrounding=20verses=20using=20AI.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api_request_handler.ts | 3 +- css/main.css | 3 + llm.ts | 158 +++++++++++++++++++++++++++++++++++++++++ package-lock.json | 15 ++++ package.json | 1 + 5 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 llm.ts diff --git a/api_request_handler.ts b/api_request_handler.ts index 7e8dc57d..b3e5bc38 100644 --- a/api_request_handler.ts +++ b/api_request_handler.ts @@ -77,6 +77,7 @@ import {hasMatchingProperty} from "./util/objects"; import {checkNotUndefined} from "./js/undefined"; import {getWeekdayReading} from "./weekday_parshiot"; import {ASERET_YIMEI_TESHUVA_REFS} from "./js/aseret_yimei_teshuva"; +import {vagomer} from "./llm"; const standardHebrewTransformations = sefariaTextTypeTransformation( hebrew => ( @@ -597,7 +598,7 @@ export abstract class AbstractApiRequestHandler { .then(() => linkGraph); }), ...this.extraPromises(), - ]).then(args => this.transformData(...args)); + ]).then(args => this.transformData(...args)).then(x => vagomer(x)); } protected extraPromises(): Promise[] { diff --git a/css/main.css b/css/main.css index 95c595fa..ff3a4d9f 100644 --- a/css/main.css +++ b/css/main.css @@ -682,3 +682,6 @@ h2, .title, .titleHebrew { .searchMatchCounter { padding: 0 10px; } +.vagomer-completion { + opacity: .5; +} diff --git a/llm.ts b/llm.ts new file mode 100644 index 00000000..3c3fd017 --- /dev/null +++ b/llm.ts @@ -0,0 +1,158 @@ +import {GoogleGenerativeAI} from "@google/generative-ai"; +import {ApiResponse} from "./apiTypes"; +import {hebrewSearchRegex} from "./hebrew"; +import {checkNotUndefined} from "./js/undefined"; + +const VAGOMER = checkNotUndefined(hebrewSearchRegex("(וגו[׳'])", true)); +const API_KEY = checkNotUndefined(process.env.GEMINI_API_KEY); +const genAI = new GoogleGenerativeAI(API_KEY); +const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); + +type AllVerses = any; // do not submit + +const HEBREW_TEXT_PREFIX = "אהלן אחי" +const HEBREW_VERSE_1 = "כִּי מֵתוּ כׇּל הָאֲנָשִׁים" +const HEBREW_VERSE_1_CONTINUATION = "הַמְבַקשִׁים אֶת־נַפְשֶׁךָ" +const HEBREW_TEXT_INFIX_1 = "מה התחזית היום"; +const HEBREW_VERSE_2 = "בְּמַחֲשַׁכִּים הוֹשִׁיבַנִי" +const HEBREW_VERSE_2_CONTINUATION = "כְּמֵתֵי עוֹלָם"; +const HEBREW_TEXT_INFIX_2 = "טוביה הגדול הנורא"; +// Helps when quoted text ends with a hyphen. +const HEBREW_VERSE_3 = "לֻחֹת הָעֵדֻת בְּיַד" +const HEBREW_VERSE_3_CONTINUATION = "־מֹשֶׁה בְּרִדְתּוֹ מִן־הָהָר"; +const HEBREW_TEXT_INFIX_3 = "טוביה הגדול הנורא"; +// Helps when quoted text ends with a hyphen. +const HEBREW_VERSE_4 = "לֹא־תֹאכַל" +const HEBREW_VERSE_4_CONTINUATION = "עָלָיו חָמֵץ שִׁבְעַת יָמִים תֹּאכַל־עָלָיו מַצּוֹת לֶחֶם עֹנִי כִּי בְחִפָּזוֹן יָצָאתָ מֵאֶרֶץ מִצְרַיִם לְמַ֣עַן תִּזְכֹּר אֶת־יוֹם צֵֽאתְךָ מֵאֶרֶץ מִצְרַיִם כֹּל יְמֵי חַיֶּיךָ׃"; + +const HEBREW_TEXT_SUFFIX = "איך נדע?"; +const SAMPLE_HEBREW_INPUT = [ + `${HEBREW_TEXT_PREFIX} ״${HEBREW_VERSE_1}״ וגו' `, + `${HEBREW_TEXT_INFIX_1} `, + `״${HEBREW_VERSE_2}״ וגו׳. `, + `${HEBREW_TEXT_INFIX_2} `, + `״${HEBREW_VERSE_3}״ וגו' `, + `${HEBREW_TEXT_INFIX_3} `, + `״${HEBREW_VERSE_4} וגו׳״ `, + `${HEBREW_TEXT_SUFFIX}`, + +].filter(x => !x.includes("span")).join(""); +const SAMPLE_HEBREW_OUTPUT = [ + `${HEBREW_TEXT_PREFIX} ״${HEBREW_VERSE_1}״ וגו' `, + `${HEBREW_VERSE_1_CONTINUATION} `, + `${HEBREW_TEXT_INFIX_1} `, + `״${HEBREW_VERSE_2}״ וגו׳ `, + // note the change in period location. + `${HEBREW_VERSE_2_CONTINUATION}. `, + `${HEBREW_TEXT_INFIX_2} `, + `״${HEBREW_VERSE_3}״ וגו' `, + `${HEBREW_VERSE_3_CONTINUATION}. `, + `${HEBREW_TEXT_INFIX_3} `, + `״${HEBREW_VERSE_4} וגו׳״ `, + `${HEBREW_VERSE_4_CONTINUATION}. `, + `${HEBREW_TEXT_SUFFIX}`, + +].join(""); +// TODO: it would be good to add examples of typos, and/or vav and yud being spliced in. +const SAMPLE_VERSES = JSON.stringify([ + {hebrew: "בְּמַחֲשַׁכִּים הוֹשִׁיבַנִי אֶת־נַפְשֶׁךָ", originatesFrom: "Nedarim 45a:1"}, + {hebrew: "וַתֵּ֣רֶא רָחֵ֗ל כִּ֣י לֹ֤א יָֽלְדָה֙", originatesFrom: "Nedarim 45a:2"}, + {hebrew: "בְּמַחֲשַׁכִּים הוֹשִׁיבַנִי כְּמֵתֵי עוֹלָם", originatesFrom: "Nedarim 45a:4"}, + {hebrew: "וַיֹּ֨אמֶר יְהֹוָ֤ה אֶל־מֹשֶׁה֙ בְּמִדְיָ֔ן לֵ֖ךְ שֻׁ֣ב מִצְרָ֑יִם כִּי־מֵ֙תוּ֙ כׇּל־הָ֣אֲנָשִׁ֔ים הַֽמְבַקְשִׁ֖ים אֶת־נַפְשֶֽׁךָ", originatesFrom: "Nedarim 45a:4"}, + {hebrew: "וַיֵּצֵא֙ בַּיּ֣וֹם הַשֵּׁנִ֔י וְהִנֵּ֛ה שְׁנֵֽי־אֲנָשִׁ֥ים עִבְרִ֖ים נִצִּ֑ים וַיֹּ֙אמֶר֙ לָֽרָשָׁ֔ע לָ֥מָּה תַכֶּ֖ה רֵעֶֽךָ׃", originatesFrom: "Nedarim 45a:4"}, + {hebrew: "בְּמַחֲשַׁכִּים הוֹשִׁיבַנִי לֹ֥א תִתֵּ֖ן מִכְשֹׁ֑ל", originatesFrom: "Nedarim 45a:5"}, +]); + +async function makeRequest( + hebrew: string, ref: string, allVerses: AllVerses, +): Promise { + const prompt = [ + "Below is a JSON structure representing Hebrew text that contains the phrase וגו׳ or וגו'.", + "When that word occurs (sometimes within the quotation, sometimes directly after), it means that a biblical verse is quoted partially and that the verse ", + "should be completed. The authors expected that the reader knows all biblical verses by ", + "heart, but that's for experts. Instead, I'd like to complete it for them, wrapped in a ", + ' tag.\n', + "I've supplied a list of candidate verses in the JSON structure. If there are multiple ", + "candidates that you're considering, use the one that is closest to the ref/Ref listed here,", + "where proximity is measured by first trying to get an exact match before the colon, with the ", + "tiebreaker being a numerical comparison of the number after the colon. The ref of the source ", + "verses can be found in the verses.originatesFrom fields. Do not include the verses array in ", + "final answer. If you don't have any changes to make, return empty text.", + "\n", + /* + "One other note: sometimes the prefix of the verse that is quoted will have slight ", + "emendations, usually in the form of the letters ו or י being introduced instead of vowels, ", + "or simply just a typo or two. When you rewrite, keep the original emendations.", + "\n", + */ + /* + "Sometimes the verse quotation may have some slight changes from the original text, usually ", + "of the form of the letters ו or י being introduced instead of vowels, ", + "or simply just a typo or two. ", + "\n", + */ + "Whenever you detect a quotation, include the entire rest of the verse right after the ", + "corresponding instance of וגו.", + "\n", + "Here is an example:\n", + "{", + ' ref: "Nedarim 45a:4",', + ` hebrew: "${SAMPLE_HEBREW_INPUT}",`, + ` verses: [${SAMPLE_VERSES}]`, + "}", + "\n", + "Should return:\n", + "{", + ' ref: "Nedarim 45a:4",', + ` hebrew: "${SAMPLE_HEBREW_OUTPUT}"`, + "}", + "\n", + "Here is the example I want you to amend:", + "\n", + JSON.stringify({ref, hebrew, verses: allVerses}), + "\n", + "Your output should be valid JSON.", + ]; + + + let json = (await model.generateContent(prompt.join("")))?.response?.text(); + if (json === "") { + console.log(">>>>>>>", ref, "empty response <<<<<<<<<<"); + return undefined; + } + console.log("^^^^^^^^^^^", ref); + if (!json) return undefined; + if (json && json.startsWith("```json") && json.endsWith("```")) { + json = json.slice(7, -3) + } + if (json) { + const parsed = JSON.parse(json); // do not submit: check parse + if (parsed.hebrew === hebrew) console.log("!!!!!!!!! same\n\n"); + return parsed.hebrew; + } + return undefined; +} + +export async function vagomer(response: ApiResponse): Promise { + const allVerses = []; + for (const section of response.sections) { + if (section.commentary?.Verses) { + for (const comment of section.commentary.Verses.comments) { + allVerses.push({hebrew: comment.he, originatesFrom: section.ref}); + } + } + } + + for (const section of response.sections) { + const hebrew = section.he as string; + if (hebrew.search(VAGOMER) === -1) continue; + + const modelResponse = await makeRequest(hebrew, section.ref, allVerses); + if (modelResponse) { + // do not submit: strip trope and also {ס} + // do not submit: test that if the completions are removed, the text should be unchanged. + section.he = modelResponse; + } + } + return response; +} diff --git a/package-lock.json b/package-lock.json index bb3f7bfc..ca683f3b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6,6 +6,7 @@ "": { "dependencies": { "@babel/preset-typescript": "^7.23.3", + "@google/generative-ai": "^0.21.0", "@react-spring/web": "^9.5.5", "@sendgrid/mail": "^7.4.4", "@use-gesture/react": "^10.2.22", @@ -1437,6 +1438,15 @@ "node": ">=4" } }, + "node_modules/@google/generative-ai": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz", + "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@iarna/toml": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/@iarna/toml/-/toml-2.2.5.tgz", @@ -18015,6 +18025,11 @@ } } }, + "@google/generative-ai": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz", + "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==" + }, "@iarna/toml": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/@iarna/toml/-/toml-2.2.5.tgz", diff --git a/package.json b/package.json index caca7e0b..a790ed68 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,7 @@ { "dependencies": { "@babel/preset-typescript": "^7.23.3", + "@google/generative-ai": "^0.21.0", "@react-spring/web": "^9.5.5", "@sendgrid/mail": "^7.4.4", "@use-gesture/react": "^10.2.22", From f5004409945e9e1979e6b4f268f73b1bfc355a49 Mon Sep 17 00:00:00 2001 From: Ron Shapiro Date: Mon, 14 Oct 2024 08:29:17 +0300 Subject: [PATCH 2/3] More Vagomer testing --- llm.ts | 52 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/llm.ts b/llm.ts index 3c3fd017..53599cfc 100644 --- a/llm.ts +++ b/llm.ts @@ -63,10 +63,8 @@ const SAMPLE_VERSES = JSON.stringify([ {hebrew: "בְּמַחֲשַׁכִּים הוֹשִׁיבַנִי לֹ֥א תִתֵּ֖ן מִכְשֹׁ֑ל", originatesFrom: "Nedarim 45a:5"}, ]); -async function makeRequest( - hebrew: string, ref: string, allVerses: AllVerses, -): Promise { - const prompt = [ +function makePrompt(hebrew: string, ref: string, allVerses: AllVerses): string { + return [ "Below is a JSON structure representing Hebrew text that contains the phrase וגו׳ or וגו'.", "When that word occurs (sometimes within the quotation, sometimes directly after), it means that a biblical verse is quoted partially and that the verse ", "should be completed. The authors expected that the reader knows all biblical verses by ", @@ -107,15 +105,21 @@ async function makeRequest( ` hebrew: "${SAMPLE_HEBREW_OUTPUT}"`, "}", "\n", + "Your output should be valid JSON.", + "\n", "Here is the example I want you to amend:", "\n", JSON.stringify({ref, hebrew, verses: allVerses}), - "\n", - "Your output should be valid JSON.", - ]; - + ].join(""); +} - let json = (await model.generateContent(prompt.join("")))?.response?.text(); +async function makeRequest( + hebrew: string, ref: string, allVerses: AllVerses, +): Promise { + const prompt = makePrompt(hebrew, ref, allVerses); + console.log(prompt); + return undefined; + let json = (await model.generateContent(prompt))?.response?.text(); if (json === "") { console.log(">>>>>>>", ref, "empty response <<<<<<<<<<"); return undefined; @@ -133,6 +137,34 @@ async function makeRequest( return undefined; } +async function makeRequestJamba( + hebrew: string, ref: string, allVerses: AllVerses,) { + // do not submit: Jamba doesn't seem to understand to ignore the verses field."םם + const prompt = makePrompt(hebrew, ref, allVerses); + const [systemPrompt, userPrompt] = prompt.split(/Here is the example/); + const response = await fetch("https://api.ai21.com/studio/v1/chat/completions", { + method: "POST", + headers: { + Authorization: "Bearer 41ZdeuFSEg7cc6Dt4a0fJwKAwdnrIJdG", // do not submit api key + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "jamba-1.5-mini", + messages: [ + {role: "system", content: systemPrompt}, + {role: "user", content: "Here is the example" + userPrompt}, + ], + temperature: 0.4, + top_p: .5, + response_format: {type: "json_object"}, + }), + }); + const result = await response?.json(); + const parsed = JSON.parse(result.choices[0].message.content).hebrew; // do not submit: try parse + console.log(parsed); + return parsed; +} + export async function vagomer(response: ApiResponse): Promise { const allVerses = []; for (const section of response.sections) { @@ -147,7 +179,7 @@ export async function vagomer(response: ApiResponse): Promise { const hebrew = section.he as string; if (hebrew.search(VAGOMER) === -1) continue; - const modelResponse = await makeRequest(hebrew, section.ref, allVerses); + const modelResponse = await makeRequestJamba(hebrew, section.ref, allVerses); if (modelResponse) { // do not submit: strip trope and also {ס} // do not submit: test that if the completions are removed, the text should be unchanged. From 33f0d1d3b3877b7dd6be5395192c22bbc34e2f98 Mon Sep 17 00:00:00 2001 From: Ron Shapiro Date: Mon, 14 Oct 2024 14:04:44 +0300 Subject: [PATCH 3/3] More llming --- api_request_handler.ts | 18 ++-- css/debug.css | 3 + hebrew.ts | 6 ++ llm.ts | 204 +++++++++++++++++++++++++++-------------- 4 files changed, 155 insertions(+), 76 deletions(-) create mode 100644 css/debug.css diff --git a/api_request_handler.ts b/api_request_handler.ts index b3e5bc38..0b8f067c 100644 --- a/api_request_handler.ts +++ b/api_request_handler.ts @@ -10,7 +10,13 @@ import {Book, books, internalLinkableRef} from "./books"; import {ALL_COMMENTARIES, CommentaryType} from "./commentaries"; import {readUtf8} from "./files"; import {hadranSegments, isHadran} from "./hadran"; -import {stripHebrewNonlettersOrVowels, intToHebrewNumeral, ALEPH, TAV} from "./hebrew"; +import { + intToHebrewNumeral, + stripHebrewNonlettersOrVowels, + stripTanakhGapIndicators, + ALEPH, + TAV, +} from "./hebrew"; import {Logger, consoleLogger} from "./logger"; import {mergeRefs} from "./ref_merging"; import {refSorter} from "./js/google_drive/ref_sorter"; @@ -598,7 +604,7 @@ export abstract class AbstractApiRequestHandler { .then(() => linkGraph); }), ...this.extraPromises(), - ]).then(args => this.transformData(...args)).then(x => vagomer(x)); + ]).then(args => this.transformData(...args)).then(x => vagomer(x, this.logger)); } protected extraPromises(): Promise[] { @@ -1427,16 +1433,10 @@ abstract class LiturgicalApiRequestHandler extends AbstractApiRequestHandler { return HEBREW_SECTION_NAMES[this.page]; } - stripWeirdHebrew(hebrew: string): string { - return hebrew - .replace(/\s?{פ}<\/span>(
)?/g, "") - .replace(/\s?{ס}<\/span>\s*/g, ""); - } - protected translateHebrewText(text: sefaria.TextType, ref: string): sefaria.TextType { const transformations = [ (t: sefaria.TextType) => super.translateHebrewText(t, ref), - sefariaTextTypeTransformation(this.stripWeirdHebrew), + sefariaTextTypeTransformation(stripTanakhGapIndicators), ]; if (!KEEP_TROPE_REFS.has(ref)) { transformations.push(sefariaTextTypeTransformation(stripHebrewNonlettersOrVowels)); diff --git a/css/debug.css b/css/debug.css new file mode 100644 index 00000000..5f1b01eb --- /dev/null +++ b/css/debug.css @@ -0,0 +1,3 @@ +.vagomer-completion { + background: yellow; +} diff --git a/hebrew.ts b/hebrew.ts index 22e7ba6a..7ea91b1a 100644 --- a/hebrew.ts +++ b/hebrew.ts @@ -96,3 +96,9 @@ export function stripHebrewNonlettersOrVowels(text: string): string { .replace(/<\/small>/g, "") // sometimes the after-effect of replacing a paseq ); } + +export function stripTanakhGapIndicators(text: string): string { + return text + .replace(/\s?{פ}<\/span>(
)?/g, "") + .replace(/\s?{ס}<\/span>\s*/g, ""); +} diff --git a/llm.ts b/llm.ts index 53599cfc..583a441d 100644 --- a/llm.ts +++ b/llm.ts @@ -1,12 +1,17 @@ -import {GoogleGenerativeAI} from "@google/generative-ai"; +import { + GoogleGenerativeAI, + GenerativeModel, + GenerateContentResult, + SchemaType, +} from "@google/generative-ai"; import {ApiResponse} from "./apiTypes"; -import {hebrewSearchRegex} from "./hebrew"; +import {hebrewSearchRegex, stripHebrewNonlettersOrVowels, stripTanakhGapIndicators} from "./hebrew"; import {checkNotUndefined} from "./js/undefined"; +import {sefariaTextTypeTransformation} from "./sefariaTextType"; +import {Logger} from "./logger"; const VAGOMER = checkNotUndefined(hebrewSearchRegex("(וגו[׳'])", true)); -const API_KEY = checkNotUndefined(process.env.GEMINI_API_KEY); -const genAI = new GoogleGenerativeAI(API_KEY); -const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); +const GEMINI_API_KEY = checkNotUndefined(process.env.GEMINI_API_KEY, "GEMINI_API_KEY"); type AllVerses = any; // do not submit @@ -92,18 +97,18 @@ function makePrompt(hebrew: string, ref: string, allVerses: AllVerses): string { "Whenever you detect a quotation, include the entire rest of the verse right after the ", "corresponding instance of וגו.", "\n", - "Here is an example:\n", - "{", - ' ref: "Nedarim 45a:4",', - ` hebrew: "${SAMPLE_HEBREW_INPUT}",`, - ` verses: [${SAMPLE_VERSES}]`, - "}", + "Here is an example:", + "\n{", + '\n ref: "Nedarim 45a:4",', + `\n hebrew: "${SAMPLE_HEBREW_INPUT}",`, + `\n verses: [${SAMPLE_VERSES}]`, + "\n}", "\n", "Should return:\n", - "{", - ' ref: "Nedarim 45a:4",', - ` hebrew: "${SAMPLE_HEBREW_OUTPUT}"`, - "}", + "\n{", + '\n ref: "Nedarim 45a:4",', + `\n hebrew: "${SAMPLE_HEBREW_OUTPUT}"`, + "\n}", "\n", "Your output should be valid JSON.", "\n", @@ -113,78 +118,143 @@ function makePrompt(hebrew: string, ref: string, allVerses: AllVerses): string { ].join(""); } -async function makeRequest( - hebrew: string, ref: string, allVerses: AllVerses, -): Promise { - const prompt = makePrompt(hebrew, ref, allVerses); - console.log(prompt); - return undefined; - let json = (await model.generateContent(prompt))?.response?.text(); - if (json === "") { - console.log(">>>>>>>", ref, "empty response <<<<<<<<<<"); - return undefined; +abstract class LanguageModel { + async makeRequest( + hebrew: string, ref: string, allVerses: AllVerses, logger: Logger, + ): Promise { + const prompt = makePrompt(hebrew, ref, allVerses); + const response = await this.executePrompt(prompt, ref, logger); + if (response === undefined) return undefined; + + try { + return JSON.parse(response).hebrew; + } catch (e: any) { + logger.error("Invalid json", response); + if (e.toString().includes("at position")) { + const position = parseInt(e.toString().split("at position")[1]); + logger.error(">>", response.slice(position, position + 1)); + } + return undefined; + } + } + + abstract executePrompt( + prompt: string, ref: string, logger: Logger, + ): Promise; +} + +class Gemini extends LanguageModel { + constructor(readonly model: GenerativeModel) { super(); } + + async modelRequest(prompt: string, logger: Logger): Promise { + try { + return this.model.generateContent(prompt); + } catch (e) { + logger.error(e); + return undefined; + } + } + + async executePrompt( + prompt: string, ref: string, logger: Logger, + ): Promise { + const result = await this.modelRequest(prompt, logger); + const json = result?.response?.text(); + return json === "" ? undefined : json; } - console.log("^^^^^^^^^^^", ref); - if (!json) return undefined; - if (json && json.startsWith("```json") && json.endsWith("```")) { - json = json.slice(7, -3) +} + +class Jamba extends LanguageModel { + constructor(readonly modelName: string) { super(); } + + async executePrompt( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + prompt: string, ref: string, logger: Logger, + ): Promise { + const [systemPrompt, userPrompt] = prompt.split(/Here is the example/); + const response = await fetch("https://api.ai21.com/studio/v1/chat/completions", { + method: "POST", + headers: { + Authorization: "Bearer 41ZdeuFSEg7cc6Dt4a0fJwKAwdnrIJdG", // do not submit api key + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: this.modelName, + messages: [ + {role: "system", content: systemPrompt}, + {role: "user", content: "Here is the example" + userPrompt}, + ], + temperature: 0.4, + top_p: .5, + response_format: {type: "json_object"}, + }), + }); + const result = await response?.json(); + return result.choices[0].message.content; } - if (json) { - const parsed = JSON.parse(json); // do not submit: check parse - if (parsed.hebrew === hebrew) console.log("!!!!!!!!! same\n\n"); - return parsed.hebrew; +} + +function cleanVerseTextBase(hebrew: string): string { + hebrew = stripTanakhGapIndicators(hebrew); + hebrew = stripHebrewNonlettersOrVowels(hebrew); + if (hebrew.endsWith(":") || hebrew.endsWith("׃")) { + hebrew = hebrew.slice(0, -1); } - return undefined; + return hebrew; } +const cleanVerseText = sefariaTextTypeTransformation(cleanVerseTextBase); + +const languageModel = (() => { + if (Math.random() === .12345678901234) return new Jamba("jamba-1.5-mini"); -async function makeRequestJamba( - hebrew: string, ref: string, allVerses: AllVerses,) { - // do not submit: Jamba doesn't seem to understand to ignore the verses field."םם - const prompt = makePrompt(hebrew, ref, allVerses); - const [systemPrompt, userPrompt] = prompt.split(/Here is the example/); - const response = await fetch("https://api.ai21.com/studio/v1/chat/completions", { - method: "POST", - headers: { - Authorization: "Bearer 41ZdeuFSEg7cc6Dt4a0fJwKAwdnrIJdG", // do not submit api key - "Content-Type": "application/json", + const genAI = new GoogleGenerativeAI(GEMINI_API_KEY); + const model = genAI.getGenerativeModel({ + model: "gemini-1.5-flash", + generationConfig: { + candidateCount: 1, // More than 1 is not available for Flash + temperature: .4, + responseMimeType: "application/json", + responseSchema: { + type: SchemaType.OBJECT, + properties: { + ref: {type: SchemaType.STRING, description: "The ref value provided in the input."}, + hebrew: {type: SchemaType.STRING, description: "The rewritten text"}, + }, + }, }, - body: JSON.stringify({ - model: "jamba-1.5-mini", - messages: [ - {role: "system", content: systemPrompt}, - {role: "user", content: "Here is the example" + userPrompt}, - ], - temperature: 0.4, - top_p: .5, - response_format: {type: "json_object"}, - }), }); - const result = await response?.json(); - const parsed = JSON.parse(result.choices[0].message.content).hebrew; // do not submit: try parse - console.log(parsed); - return parsed; -} + return new Gemini(model); +})(); -export async function vagomer(response: ApiResponse): Promise { +export async function vagomer(response: ApiResponse, logger: Logger): Promise { const allVerses = []; for (const section of response.sections) { if (section.commentary?.Verses) { for (const comment of section.commentary.Verses.comments) { - allVerses.push({hebrew: comment.he, originatesFrom: section.ref}); + // do not submit: need to handle non-single verses + allVerses.push({hebrew: cleanVerseText(comment.he), originatesFrom: section.ref}); } } } + const promises: Promise[] = []; for (const section of response.sections) { const hebrew = section.he as string; if (hebrew.search(VAGOMER) === -1) continue; - const modelResponse = await makeRequestJamba(hebrew, section.ref, allVerses); - if (modelResponse) { - // do not submit: strip trope and also {ס} - // do not submit: test that if the completions are removed, the text should be unchanged. - section.he = modelResponse; - } + const promise = languageModel.makeRequest(hebrew, section.ref, allVerses, logger) + .then(modelResponse => { + if (modelResponse) { + // do not submit: test that if the completions are removed, the text should be unchanged. + section.he = modelResponse; + } + return "done"; + }); + promises.push(promise); + } + const promiseResults = await Promise.allSettled(promises); + for (const result of promiseResults) { + if (result.status === "rejected") logger.error(result.reason); } return response; }