Skip to content

Commit e2c59bd

Browse files
committed
Better comment detection
1 parent 6b596bc commit e2c59bd

9 files changed

+243
-9
lines changed

.changeset/early-peas-protect.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@code-hike/lighter": patch
3+
---
4+
5+
Better comment detection

lib/dist/browser.esm.mjs

+1-1
Large diffs are not rendered by default.

lib/dist/index.cjs.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/dist/index.esm.mjs

+1-1
Large diffs are not rendered by default.

lib/dist/worker.esm.mjs

+1-1
Large diffs are not rendered by default.

lib/src/comments.ts

+57-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ export function extractCommentsFromCode(
6363
const { annotations, lineWithoutComments } = getAnnotationsFromLine(
6464
line,
6565
annotationExtractor,
66-
lineNumber
66+
lineNumber,
67+
lang
6768
);
6869

6970
allAnnotations.push(...annotations);
@@ -99,14 +100,68 @@ export function extractCommentsFromCode(
99100
return { newCode, annotations };
100101
}
101102

103+
// these are the langs that dont have a PUNCTUATION token
104+
const prefixes = {
105+
"actionscript-3": "//",
106+
ada: "--",
107+
asm: "#",
108+
dart: "//",
109+
fsharp: "//",
110+
graphql: "#",
111+
http: "#",
112+
rust: "//",
113+
sparql: "#",
114+
wgsl: "//",
115+
jsonnet: "//",
116+
kql: "//",
117+
zenscript: "//",
118+
kusto: "//",
119+
turtle: "#",
120+
abap: "*",
121+
beancount: ";",
122+
kotlin: "//",
123+
hlsl: "//",
124+
berry: "#",
125+
cypher: "//",
126+
elm: "--",
127+
nix: "#",
128+
viml: '"',
129+
solidity: "//",
130+
bat: "REM",
131+
shaderlab: "//",
132+
sas: "*",
133+
};
134+
102135
function getAnnotationsFromLine(
103136
tokens: Token[],
104137
annotationExtractor: AnnotationExtractor,
105-
lineNumber: number
138+
lineNumber: number,
139+
lang: string
106140
): {
107141
annotations: RawAnnotation[];
108142
lineWithoutComments: Token[] | null;
109143
} {
144+
// convert prefix to PUNCTUATION
145+
if (
146+
lang in prefixes &&
147+
tokens.some((token) => token.style.color === COMMENT)
148+
) {
149+
const prefix = prefixes[lang];
150+
tokens = tokens.flatMap((token) => {
151+
if (token.style.color === COMMENT && token.content.startsWith(prefix)) {
152+
const content = token.content.slice(prefix.length);
153+
const t = [
154+
{ content: prefix, style: { color: PUNCTUATION } },
155+
] as Token[];
156+
if (content.length) {
157+
t.push({ content, style: token.style });
158+
}
159+
return t;
160+
}
161+
return [token];
162+
});
163+
}
164+
110165
// if no punctuation return empty
111166
if (!tokens.some((token) => token.style.color === PUNCTUATION)) {
112167
return { annotations: [], lineWithoutComments: tokens };

lib/src/index.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,12 @@ export async function extractAnnotations(
185185
}
186186

187187
await preloadGrammars([lang]);
188-
const { grammar } = getGrammar(lang);
188+
const { grammar, langId } = getGrammar(lang);
189189

190190
const { newCode, annotations } = extractCommentsFromCode(
191191
code,
192192
grammar,
193-
lang,
193+
langId,
194194
annotationExtractor
195195
);
196196

lib/test/annotations.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,10 @@ function C() {
183183
}
184184

185185
const extractor = (comment: string) => {
186-
const regex = /\s*(!?[\w-]+)?(\([^\)]*\)|\[[^\]]*\])?(.*)$/;
186+
const annotationPrefix = "!";
187+
const regex = new RegExp(
188+
`\\s*(${annotationPrefix}?[\\w-]+)?(\\([^\\)]*\\)|\\[[^\\]]*\\])?(.*)$`
189+
);
187190
const match = comment.match(regex);
188191
const name = match[1];
189192
const rangeString = match[2];

lib/test/comments.test.ts

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { describe, expect, test } from "vitest";
2+
import { extractAnnotations, highlight } from "..";
3+
let codes = [
4+
// Single-line comment using //
5+
["// foo", "actionscript-3"],
6+
["// foo", "apex"],
7+
["// foo", "c#"],
8+
["// foo", "cpp"],
9+
["// foo", "cs"],
10+
["// foo", "csharp"],
11+
["// foo", "dart"],
12+
["// foo", "f#"],
13+
["// foo", "go"],
14+
["// foo", "groovy"],
15+
["// foo", "java"],
16+
["// foo", "javascript"],
17+
["// foo", "js"],
18+
["// foo", "jsx"],
19+
["// foo", "less"],
20+
["// foo", "objective-c"],
21+
["// foo", "objective-cpp"],
22+
["// foo", "rust"],
23+
["// foo", "scala"],
24+
["// foo", "swift"],
25+
["// foo", "typescript"],
26+
["// foo", "ts"],
27+
["// foo", "tsx"],
28+
["// foo", "verilog"],
29+
["// foo", "wgsl"],
30+
["// foo", "jison"],
31+
["// foo", "jsonnet"],
32+
["// foo", "kql"],
33+
["// foo", "zenscript"],
34+
["// foo", "kusto"],
35+
36+
// Single-line comment using #
37+
["# foo", "asm"],
38+
["# foo", "bash"],
39+
["# foo", "coffee"],
40+
["# foo", "crystal"],
41+
["# foo", "docker"],
42+
["# foo", "dockerfile"],
43+
["# foo", "elixir"],
44+
["# foo", "fish"],
45+
["# foo", "gdscript"],
46+
["# foo", "graphql"],
47+
["# foo", "http"],
48+
["# foo", "ini"],
49+
["# foo", "julia"],
50+
["# foo", "make"],
51+
["# foo", "makefile"],
52+
["# foo", "perl"],
53+
["# foo", "perl6"],
54+
["# foo", "python"],
55+
["# foo", "py"],
56+
["# foo", "r"],
57+
["# foo", "raku"],
58+
["# foo", "shell"],
59+
["# foo", "shellscript"],
60+
["# foo", "tcl"],
61+
["# foo", "toml"],
62+
["# foo", "txt"],
63+
["# foo", "yaml"],
64+
["# foo", "yml"],
65+
["# foo", "zsh"],
66+
["# foo", "turtle"],
67+
68+
// Single-line comment using ;
69+
["; foo", "lisp"],
70+
["; foo", "clj"],
71+
["; foo", "clojure"],
72+
["; foo", "scheme"],
73+
[`" foo"`, "smalltalk"],
74+
75+
// Single-line comment using --
76+
["-- foo", "ada"],
77+
["-- foo", "haskell"],
78+
["-- foo", "sql"],
79+
["-- foo", "lua"],
80+
81+
// Single-line comment using %
82+
["% foo", "matlab"],
83+
["% foo", "tex"],
84+
85+
// Special single-line comment formats
86+
["{ foo}", "pascal"],
87+
["# foo", "sh"],
88+
["# foo", "sparql"],
89+
["# foo", "shell"],
90+
["# foo", "sh"],
91+
["# foo", "tcl"],
92+
["<!-- foo-->", "vue-html"],
93+
["* foo", "abap"],
94+
["; foo", "beancount"],
95+
["' foo", "vb"],
96+
["<!-- foo-->", "html"],
97+
98+
// more
99+
["# foo", "imba"],
100+
["// foo", "kotlin"],
101+
["; foo", "clj"],
102+
["; foo", "clojure"],
103+
["% foo", "erl"],
104+
["<%# foo%>", "erb"],
105+
["% foo", "erlang"],
106+
["// foo", "glimmer-js"],
107+
["// foo", "glimmer-ts"],
108+
["; foo", "reg"],
109+
["* foo", "stata"],
110+
[`" foo`, "vim"],
111+
[`" foo`, "viml"],
112+
[`" foo`, "vimscript"],
113+
["// foo", "hlsl"],
114+
["# foo", "berry"],
115+
["// foo", "cypher"],
116+
["-- foo", "elm"],
117+
["# foo", "nix"],
118+
["// foo", "solidity"],
119+
["REM foo", "bat"],
120+
["REM foo", "batch"],
121+
["// foo", "shader"],
122+
["// foo", "shaderlab"],
123+
["* foo", "sas"],
124+
125+
// fail
126+
// ["// foo", "apl"],
127+
// ["# foo", "shellsession"],
128+
// ["(* foo *)", "ocaml"],
129+
];
130+
131+
// codes = [
132+
// // test
133+
// ["// foo", "actionscript-3"],
134+
// ];
135+
136+
describe.each(codes)("extract annotations", (code, lang) => {
137+
test(lang, async () => {
138+
let comments = [];
139+
const extracted = await extractAnnotations(code, lang, (comment) => {
140+
comments.push(comment);
141+
return null;
142+
});
143+
144+
// if (comments.length === 0) {
145+
// const h = await highlight(extracted.code, lang, "dark-plus", {
146+
// scopes: true,
147+
// });
148+
// const line = h.lines[0];
149+
// if (line.length == 1) {
150+
// const token = line[0];
151+
// if (token.scopes[0].startsWith("comment.line")) {
152+
// console.log(lang, token.content, token.scopes);
153+
// } else {
154+
// console.log(lang, token.content, token.scopes);
155+
// }
156+
// } else {
157+
// console.log(line);
158+
// // TODO fix this
159+
// }
160+
// }
161+
162+
// const h = await highlight(extracted.code, lang, "dark-plus", {
163+
// scopes: true,
164+
// });
165+
// const line = h.lines[0];
166+
// console.log(line);
167+
168+
expect(comments).toHaveLength(1);
169+
expect(comments[0]).toBe(" foo");
170+
});
171+
});

0 commit comments

Comments
 (0)