Skip to content

Commit 8b0eeb4

Browse files
committed
Join tokens when possible
1 parent 846bc57 commit 8b0eeb4

11 files changed

+355
-186
lines changed

.changeset/gentle-cats-divide.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@code-hike/lighter": patch
3+
---
4+
5+
Join tokens when possible

lib/dist/browser.esm.mjs

+38-7
Original file line numberDiff line numberDiff line change
@@ -2398,6 +2398,7 @@ async function loadGrammarFromFile(path) {
23982398
// MetadataConsts
23992399
const FONT_STYLE_MASK = 0b00000000000000000111100000000000;
24002400
const FOREGROUND_MASK = 0b00000000111111111000000000000000;
2401+
const STYLE_MASK = 0b00000000111111111111100000000000;
24012402
const FONT_STYLE_OFFSET = 11;
24022403
const FOREGROUND_OFFSET = 15;
24032404
const FontStyle = {
@@ -2420,26 +2421,56 @@ function tokenize(code, grammar, colors) {
24202421
}));
24212422
});
24222423
}
2423-
function tokenizeLine(grammar, stack, line) {
2424+
function tokenizeLine(grammar, stack, line, config) {
24242425
const { tokens, ruleStack } = grammar.tokenizeLine2(line, stack);
24252426
const newTokens = [];
24262427
let tokenEnd = line.length;
24272428
for (let i = tokens.length - 2; i >= 0; i = i - 2) {
24282429
const tokenStart = tokens[i];
24292430
const metadata = tokens[i + 1];
2430-
newTokens.unshift({
2431-
content: line.slice(tokenStart, tokenEnd),
2432-
metadata,
2433-
});
2431+
const content = line.slice(tokenStart, tokenEnd);
2432+
newTokens.unshift({ content, metadata });
24342433
tokenEnd = tokenStart;
24352434
}
2436-
return { rawTokens: newTokens, nextStack: ruleStack };
2435+
let rawTokens = [];
2436+
if (config === null || config === void 0 ? void 0 : config.preserveWhitespace) {
2437+
rawTokens = newTokens;
2438+
}
2439+
else {
2440+
// join empty space tokens with the previous token (or the next token if there's no previous token)
2441+
for (let i = 0; i < newTokens.length; i++) {
2442+
const token = newTokens[i];
2443+
if (token.content.trim() !== "") {
2444+
// if has same style as previous token, join with previous token
2445+
const prev = rawTokens[rawTokens.length - 1];
2446+
if (prev &&
2447+
(prev.metadata & STYLE_MASK) === (token.metadata & STYLE_MASK)) {
2448+
prev.content += token.content;
2449+
}
2450+
else {
2451+
rawTokens.push(token);
2452+
}
2453+
}
2454+
else if (rawTokens.length > 0) {
2455+
rawTokens[rawTokens.length - 1].content += token.content;
2456+
}
2457+
else if (i < newTokens.length - 1) {
2458+
newTokens[i + 1].content = token.content + newTokens[i + 1].content;
2459+
}
2460+
else {
2461+
rawTokens.push(token);
2462+
}
2463+
}
2464+
}
2465+
return { rawTokens, nextStack: ruleStack };
24372466
}
24382467
function tokenizeWithScopes(code, grammar, colors) {
24392468
let stack = null;
24402469
const lines = code.split(/\r?\n|\r/g);
24412470
return lines.map((line) => {
2442-
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line);
2471+
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line, {
2472+
preserveWhitespace: true,
2473+
});
24432474
const newTokens = rawTokens.map(({ content, metadata }) => ({
24442475
content,
24452476
style: getStyle(metadata, colors),

lib/dist/index.cjs.js

+38-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/dist/index.esm.mjs

+38-7
Original file line numberDiff line numberDiff line change
@@ -2746,6 +2746,7 @@ async function loadGrammarFromFile(path) {
27462746
// MetadataConsts
27472747
const FONT_STYLE_MASK = 0b00000000000000000111100000000000;
27482748
const FOREGROUND_MASK = 0b00000000111111111000000000000000;
2749+
const STYLE_MASK = 0b00000000111111111111100000000000;
27492750
const FONT_STYLE_OFFSET = 11;
27502751
const FOREGROUND_OFFSET = 15;
27512752
const FontStyle = {
@@ -2768,26 +2769,56 @@ function tokenize(code, grammar, colors) {
27682769
}));
27692770
});
27702771
}
2771-
function tokenizeLine(grammar, stack, line) {
2772+
function tokenizeLine(grammar, stack, line, config) {
27722773
const { tokens, ruleStack } = grammar.tokenizeLine2(line, stack);
27732774
const newTokens = [];
27742775
let tokenEnd = line.length;
27752776
for (let i = tokens.length - 2; i >= 0; i = i - 2) {
27762777
const tokenStart = tokens[i];
27772778
const metadata = tokens[i + 1];
2778-
newTokens.unshift({
2779-
content: line.slice(tokenStart, tokenEnd),
2780-
metadata,
2781-
});
2779+
const content = line.slice(tokenStart, tokenEnd);
2780+
newTokens.unshift({ content, metadata });
27822781
tokenEnd = tokenStart;
27832782
}
2784-
return { rawTokens: newTokens, nextStack: ruleStack };
2783+
let rawTokens = [];
2784+
if (config === null || config === void 0 ? void 0 : config.preserveWhitespace) {
2785+
rawTokens = newTokens;
2786+
}
2787+
else {
2788+
// join empty space tokens with the previous token (or the next token if there's no previous token)
2789+
for (let i = 0; i < newTokens.length; i++) {
2790+
const token = newTokens[i];
2791+
if (token.content.trim() !== "") {
2792+
// if has same style as previous token, join with previous token
2793+
const prev = rawTokens[rawTokens.length - 1];
2794+
if (prev &&
2795+
(prev.metadata & STYLE_MASK) === (token.metadata & STYLE_MASK)) {
2796+
prev.content += token.content;
2797+
}
2798+
else {
2799+
rawTokens.push(token);
2800+
}
2801+
}
2802+
else if (rawTokens.length > 0) {
2803+
rawTokens[rawTokens.length - 1].content += token.content;
2804+
}
2805+
else if (i < newTokens.length - 1) {
2806+
newTokens[i + 1].content = token.content + newTokens[i + 1].content;
2807+
}
2808+
else {
2809+
rawTokens.push(token);
2810+
}
2811+
}
2812+
}
2813+
return { rawTokens, nextStack: ruleStack };
27852814
}
27862815
function tokenizeWithScopes(code, grammar, colors) {
27872816
let stack = null;
27882817
const lines = code.split(/\r?\n|\r/g);
27892818
return lines.map((line) => {
2790-
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line);
2819+
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line, {
2820+
preserveWhitespace: true,
2821+
});
27912822
const newTokens = rawTokens.map(({ content, metadata }) => ({
27922823
content,
27932824
style: getStyle(metadata, colors),

lib/src/tokenizer.ts

+43-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { Line, Token } from "./annotations";
77
const FONT_STYLE_MASK = 0b00000000000000000111100000000000;
88
const FOREGROUND_MASK = 0b00000000111111111000000000000000;
99
const BACKGROUND_MASK = 0b11111111000000000000000000000000;
10+
const STYLE_MASK = 0b00000000111111111111100000000000;
1011
const FONT_STYLE_OFFSET = 11;
1112
const FOREGROUND_OFFSET = 15;
1213
const BACKGROUND_OFFSET = 24;
@@ -35,20 +36,53 @@ export function tokenize(code: string, grammar: IGrammar, colors: string[]) {
3536

3637
type RawToken = { content: string; metadata: number };
3738

38-
function tokenizeLine(grammar: IGrammar, stack: StackElement, line: string) {
39+
function tokenizeLine(
40+
grammar: IGrammar,
41+
stack: StackElement,
42+
line: string,
43+
config?: { preserveWhitespace?: boolean }
44+
) {
3945
const { tokens, ruleStack } = grammar.tokenizeLine2(line, stack);
4046
const newTokens: RawToken[] = [];
4147
let tokenEnd = line.length;
4248
for (let i = tokens.length - 2; i >= 0; i = i - 2) {
4349
const tokenStart = tokens[i];
4450
const metadata = tokens[i + 1];
45-
newTokens.unshift({
46-
content: line.slice(tokenStart, tokenEnd),
47-
metadata,
48-
});
51+
const content = line.slice(tokenStart, tokenEnd);
52+
newTokens.unshift({ content, metadata });
4953
tokenEnd = tokenStart;
5054
}
51-
return { rawTokens: newTokens, nextStack: ruleStack };
55+
56+
let rawTokens: RawToken[] = [];
57+
58+
if (config?.preserveWhitespace) {
59+
rawTokens = newTokens;
60+
} else {
61+
// join empty space tokens with the previous token (or the next token if there's no previous token)
62+
for (let i = 0; i < newTokens.length; i++) {
63+
const token = newTokens[i];
64+
if (token.content.trim() !== "") {
65+
// if has same style as previous token, join with previous token
66+
const prev = rawTokens[rawTokens.length - 1];
67+
if (
68+
prev &&
69+
(prev.metadata & STYLE_MASK) === (token.metadata & STYLE_MASK)
70+
) {
71+
prev.content += token.content;
72+
} else {
73+
rawTokens.push(token);
74+
}
75+
} else if (rawTokens.length > 0) {
76+
rawTokens[rawTokens.length - 1].content += token.content;
77+
} else if (i < newTokens.length - 1) {
78+
newTokens[i + 1].content = token.content + newTokens[i + 1].content;
79+
} else {
80+
rawTokens.push(token);
81+
}
82+
}
83+
}
84+
85+
return { rawTokens, nextStack: ruleStack };
5286
}
5387

5488
export function tokenizeWithScopes(
@@ -60,7 +94,9 @@ export function tokenizeWithScopes(
6094
const lines = code.split(/\r?\n|\r/g);
6195

6296
return lines.map((line) => {
63-
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line);
97+
const { rawTokens, nextStack } = tokenizeLine(grammar, stack, line, {
98+
preserveWhitespace: true,
99+
});
64100
const newTokens = rawTokens.map(({ content, metadata }) => ({
65101
content,
66102
style: getStyle(metadata, colors),

lib/test/__snapshots__/browser.test.ts.snap

+6-42
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,11 @@ exports[`extract annottations 2`] = `
5858
"lineNumber": 1,
5959
"tokens": [
6060
{
61-
"content": "const",
61+
"content": "const ",
6262
"style": {
6363
"color": "#569CD6",
6464
},
6565
},
66-
{
67-
"content": " ",
68-
"style": {
69-
"color": "#D4D4D4",
70-
},
71-
},
7266
{
7367
"content": "x",
7468
"style": {
@@ -104,17 +98,11 @@ exports[`extract annottations 2`] = `
10498
"lineNumber": 2,
10599
"tokens": [
106100
{
107-
"content": "const",
101+
"content": "const ",
108102
"style": {
109103
"color": "#569CD6",
110104
},
111105
},
112-
{
113-
"content": " ",
114-
"style": {
115-
"color": "#D4D4D4",
116-
},
117-
},
118106
{
119107
"content": "y",
120108
"style": {
@@ -278,53 +266,29 @@ exports[`highlight html with theme 1`] = `
278266
},
279267
},
280268
{
281-
"content": "const",
269+
"content": "const ",
282270
"style": {
283271
"color": "#FF7B72",
284272
},
285273
},
286274
{
287-
"content": " ",
288-
"style": {
289-
"color": "#C9D1D9",
290-
},
291-
},
292-
{
293-
"content": "x",
275+
"content": "x ",
294276
"style": {
295277
"color": "#79C0FF",
296278
},
297279
},
298280
{
299-
"content": " ",
300-
"style": {
301-
"color": "#C9D1D9",
302-
},
303-
},
304-
{
305-
"content": "=",
281+
"content": "= ",
306282
"style": {
307283
"color": "#FF7B72",
308284
},
309285
},
310286
{
311-
"content": " ",
312-
"style": {
313-
"color": "#C9D1D9",
314-
},
315-
},
316-
{
317-
"content": "1",
287+
"content": "1 ",
318288
"style": {
319289
"color": "#79C0FF",
320290
},
321291
},
322-
{
323-
"content": " ",
324-
"style": {
325-
"color": "#C9D1D9",
326-
},
327-
},
328292
{
329293
"content": "</",
330294
"style": {

0 commit comments

Comments
 (0)