diff --git a/.gitmodules b/.gitmodules index 0ff9f771d..4d4ebfa12 100644 --- a/.gitmodules +++ b/.gitmodules @@ -53,3 +53,6 @@ [submodule "parsers/rust"] path = parsers/rust url = https://github.com/tree-sitter/tree-sitter-rust.git +[submodule "parsers/ocaml"] + path = parsers/ocaml + url = https://github.com/tree-sitter/tree-sitter-ocaml.git diff --git a/lib/src/lib/language.ts b/lib/src/lib/language.ts index a06088640..0b3eaed4d 100644 --- a/lib/src/lib/language.ts +++ b/lib/src/lib/language.ts @@ -115,6 +115,7 @@ export class LanguagePicker { new ProgrammingLanguage("python", [".py", ".py3"]), new ProgrammingLanguage("php", [".php", ".php3", ".php4", ".php5", ".php7", ".phps", ".phpt", ".phtml"]), new ProgrammingLanguage("modelica", [".mo", ".mos"]), + new ProgrammingLanguage("ocaml", [".ml"]), new ProgrammingLanguage("java", [".java"]), new ProgrammingLanguage("javascript", [".js"]), new ProgrammingLanguage("elm", [".elm"]), diff --git a/lib/src/test/snapshots/tokenizer.test.ts.md b/lib/src/test/snapshots/tokenizer.test.ts.md index f4b73991d..fa68dc3d7 100644 --- a/lib/src/test/snapshots/tokenizer.test.ts.md +++ b/lib/src/test/snapshots/tokenizer.test.ts.md @@ -18651,6 +18651,598 @@ Generated by [AVA](https://avajs.dev). ')', ] +## tokenizer works for ocaml + +> stable tokenization + + [ + '(', + 'compilation_unit', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'infix_expression', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'rel_operator', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + '(', + 'and_operator', + ')', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'rel_operator', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'infix_expression', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'rel_operator', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + '(', + 'and_operator', + ')', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'rel_operator', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'string', + '(', + 'string_content', + ')', + ')', + ')', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'string', + '(', + 'string_content', + ')', + ')', + ')', + ')', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'if_expression', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'rel_operator', + ')', + '(', + 'number', + ')', + ')', + '(', + 'then_clause', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'parenthesized_expression', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'add_operator', + ')', + '(', + 'number', + ')', + ')', + ')', + ')', + ')', + '(', + 'else_clause', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'infix_expression', + '(', + 'parenthesized_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + '(', + 'mult_operator', + ')', + '(', + 'number', + ')', + ')', + ')', + ')', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'add_operator', + ')', + '(', + 'parenthesized_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'infix_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'add_operator', + ')', + '(', + 'parenthesized_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'character', + '(', + 'character_content', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'module_path', + '(', + 'module_name', + ')', + ')', + '(', + 'value_name', + ')', + ')', + '(', + 'parenthesized_expression', + '(', + 'fun_expression', + '(', + 'parameter', + '(', + 'value_pattern', + ')', + ')', + '(', + 'if_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + '(', + 'then_clause', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'infix_expression', + '(', + 'parenthesized_expression', + '(', + 'infix_expression', + '(', + 'parenthesized_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + '(', + 'add_operator', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + '(', + 'mult_operator', + ')', + '(', + 'number', + ')', + ')', + ')', + ')', + '(', + 'string_get_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + ')', + '(', + 'else_clause', + '(', + 'if_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + '(', + 'then_clause', + '(', + 'let_expression', + '(', + 'value_definition', + '(', + 'let_binding', + '(', + 'value_name', + ')', + '(', + 'infix_expression', + '(', + 'parenthesized_expression', + '(', + 'infix_expression', + '(', + 'parenthesized_expression', + '(', + 'application_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + '(', + 'add_operator', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + '(', + 'mult_operator', + ')', + '(', + 'number', + ')', + ')', + ')', + ')', + '(', + 'string_get_expression', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + ')', + '(', + 'else_clause', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + '(', + 'value_path', + '(', + 'value_name', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + ')', + ] + ## tokenizer works for r > stable tokenization diff --git a/lib/src/test/snapshots/tokenizer.test.ts.snap b/lib/src/test/snapshots/tokenizer.test.ts.snap index 6988619d8..91dcebb27 100644 Binary files a/lib/src/test/snapshots/tokenizer.test.ts.snap and b/lib/src/test/snapshots/tokenizer.test.ts.snap differ diff --git a/lib/src/test/tokenizer.test.ts b/lib/src/test/tokenizer.test.ts index f3609d079..73d27dfe0 100644 --- a/lib/src/test/tokenizer.test.ts +++ b/lib/src/test/tokenizer.test.ts @@ -17,6 +17,7 @@ const languageFiles = { "python": "../samples/python/caesar.py", "php": "../samples/php/caesar.php", "modelica": "../samples/modelica/sample.mo", + "ocaml": "../samples/ocaml/Caesar.ml", "r": "../samples/r/caesar.R", "rust": "../samples/rust/caesar.rs", "scala": "../samples/scala/Caesar.scala", @@ -185,4 +186,4 @@ test("tokens should contain comments when includeComments is true", async t => { const tokenizer = await language.createTokenizer({ includeComments: true }); const { tokens } = tokenizer.tokenizeFile(file); t.true(tokens.includes("comment")); -}); \ No newline at end of file +}); diff --git a/parsers/binding.gyp b/parsers/binding.gyp index f82b5fba6..f0d9cca52 100644 --- a/parsers/binding.gyp +++ b/parsers/binding.gyp @@ -10,6 +10,7 @@ "java/binding.gyp", "javascript/binding.gyp", "modelica/binding.gyp", + "ocaml/binding.gyp", "php/binding.gyp", "python/binding.gyp", "r/binding.gyp", diff --git a/parsers/index.js b/parsers/index.js index 09632e6a4..a2e9634bf 100644 --- a/parsers/index.js +++ b/parsers/index.js @@ -30,6 +30,9 @@ parsers.javascript.nodeTypeInfo = require("./javascript/src/node-types.json"); parsers.modelica = require("./build/Release/tree_sitter_modelica_binding"); parsers.modelica.nodeTypeInfo = require("./modelica/src/node-types.json"); +parsers.ocaml = require("./build/Release/tree_sitter_ocaml_binding").ocaml; +parsers.ocaml.nodeTypeInfo = require("./ocaml/grammars/ocaml/src/node-types.json"); + // Note: this parser provides php_only and php (includes HTML) parsers.php = require("./build/Release/tree_sitter_php_binding").php; parsers.php.nodeTypeInfo = require("./php/php/src/node-types.json"); diff --git a/parsers/ocaml b/parsers/ocaml new file mode 160000 index 000000000..98c2130c5 --- /dev/null +++ b/parsers/ocaml @@ -0,0 +1 @@ +Subproject commit 98c2130c59ca7553b47086f91c5d22180151ad55 diff --git a/samples/ocaml/Caesar.ml b/samples/ocaml/Caesar.ml new file mode 100644 index 000000000..0d88566e7 --- /dev/null +++ b/samples/ocaml/Caesar.ml @@ -0,0 +1,25 @@ +let islower c = + c >= 'a' && c <= 'z' + +let isupper c = + c >= 'A' && c <= 'Z' + +let rot x str = + let upchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + and lowchars = "abcdefghijklmnopqrstuvwxyz" in + let rec decal x = + if x < 0 then decal (x + 26) else x + in + let x = (decal x) mod 26 in + let decal_up = x - (int_of_char 'A') + and decal_low = x - (int_of_char 'a') in + String.map (fun c -> + if islower c then + let j = ((int_of_char c) + decal_low) mod 26 in + lowchars.[j] + else if isupper c then + let j = ((int_of_char c) + decal_up) mod 26 in + upchars.[j] + else + c + ) str diff --git a/web/src/components/upload/UploadFormCard.vue b/web/src/components/upload/UploadFormCard.vue index a5332b784..452710040 100644 --- a/web/src/components/upload/UploadFormCard.vue +++ b/web/src/components/upload/UploadFormCard.vue @@ -82,6 +82,10 @@ const languages = [ name: "Modelica", value: "modelica" }, + { + name: "OCaml", + value: "ocaml" + }, { name: "PHP", value: "php"