Skip to content

Commit f41a27e

Browse files
authored
Don't normalize strings in the CLI (#127)
* Normalize multiline strings in the formatter so we don't have to normalize in the CLI Allowing us to actually take advantage of the `Unchanged` optimization with CRLF endings, and correctly handle `--check` too * Add a parser snapshot test for multiline strings with CRLF line endings To prove we can parse these line endings, and to prove that the CRLF ends up in the `RStringValue` * Add CHANGELOG bullets * Mention why no `line_ending` crate usage * Don't use `Cell` after all, since it's more mental overhead than it's worth
1 parent 92887d3 commit f41a27e

File tree

12 files changed

+221
-22
lines changed

12 files changed

+221
-22
lines changed

.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
* text=auto eol=lf
22

33
# Windows specific test files where we need CRLF endings
4+
crates/air/tests/fixtures/crlf/*.R text eol=crlf
45
crates/air_r_formatter/tests/specs/r/crlf/*.R text eol=crlf
6+
crates/air_r_parser/tests/snapshots/ok/crlf/*.R text eol=crlf

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
# Development version
44

5+
- `air format` is now faster on Windows when nothing changes (#90).
6+
7+
- `air format --check` now works correctly with Windows line endings (#123).
8+
59
- Magic line breaks are now supported in left assignment (#118).
610

711

crates/air/src/commands/format.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -179,24 +179,11 @@ fn format_file(path: PathBuf, mode: FormatMode) -> Result<FormatFileAction, Form
179179
};
180180
let options = RFormatOptions::default().with_line_ending(line_ending);
181181

182-
let source = line_ending::normalize(source);
183182
let formatted = match format_source(source.as_str(), options) {
184183
Ok(formatted) => formatted,
185184
Err(err) => return Err(FormatCommandError::Format(path.clone(), err)),
186185
};
187186

188-
// TODO: We rarely ever take advantage of this optimization on Windows right
189-
// now. We always normalize on entry but we apply the requested line ending
190-
// on exit (so on Windows we often infer CRLF on entry and normalize to
191-
// LF, but apply CRLF on exit so `source` and `new` always have different
192-
// line endings). We probably need to compare pre-normalized against
193-
// post-formatted output?
194-
//
195-
// Ah, no, the right thing to do is for the cli to not care about normalizing
196-
// line endings. This is mostly useful in the LSP for all the document manipulation
197-
// we are going to do there. In the cli, we want to format a whole bunch of files
198-
// so we really want this optimization, and since the formatter and parser can handle
199-
// windows line endings just fine, we should not normalize here.
200187
match formatted {
201188
FormattedSource::Formatted(new) => {
202189
match mode {
@@ -231,8 +218,6 @@ pub(crate) enum FormatSourceError {
231218
}
232219

233220
/// Formats a vector of `source` code
234-
///
235-
/// Safety: `source` should already be normalized to Unix line endings
236221
pub(crate) fn format_source(
237222
source: &str,
238223
options: RFormatOptions,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"multiline
2+
string"

crates/air/tests/format.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
use std::path::Path;
2+
use std::path::PathBuf;
3+
14
use air::args::Args;
25
use air::run;
36
use air::status::ExitStatus;
@@ -21,3 +24,22 @@ fn default_options() -> anyhow::Result<()> {
2124
assert_eq!(err, ExitStatus::Success);
2225
Ok(())
2326
}
27+
28+
#[test]
29+
fn test_check_returns_cleanly_for_multiline_strings_with_crlf_line_endings() -> anyhow::Result<()> {
30+
let fixtures = path_fixtures();
31+
let path = fixtures.join("crlf").join("multiline_string_value.R");
32+
let path = path.to_str().unwrap();
33+
34+
let args = Args::parse_from(["", "format", path, "--check"]);
35+
let err = run(args)?;
36+
37+
assert_eq!(err, ExitStatus::Success);
38+
Ok(())
39+
}
40+
41+
fn path_fixtures() -> PathBuf {
42+
Path::new(env!("CARGO_MANIFEST_DIR"))
43+
.join("tests")
44+
.join("fixtures")
45+
}

crates/air_formatter_test/src/spec.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ impl<'a> SpecTestFile<'a> {
2828

2929
let input_code = std::fs::read_to_string(input_file).unwrap();
3030

31-
// Normalize to Unix line endings
32-
let input_code = line_ending::normalize(input_code);
33-
3431
// For the whole file, not a specific range right now
3532
let range_start_index = None;
3633
let range_end_index = None;

crates/air_r_formatter/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod prelude;
2525
mod r;
2626
pub(crate) mod separated;
2727
mod statement_body;
28+
mod string_literal;
2829

2930
#[rustfmt::skip]
3031
mod generated;

crates/air_r_formatter/src/r/auxiliary/string_value.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::prelude::*;
2+
use crate::string_literal::FormatStringLiteralToken;
23
use air_r_syntax::RStringValue;
34
use air_r_syntax::RStringValueFields;
45
use biome_formatter::write;
@@ -8,6 +9,6 @@ pub(crate) struct FormatRStringValue;
89
impl FormatNodeRule<RStringValue> for FormatRStringValue {
910
fn fmt_fields(&self, node: &RStringValue, f: &mut RFormatter) -> FormatResult<()> {
1011
let RStringValueFields { value_token } = node.as_fields();
11-
write![f, [value_token.format()]]
12+
write![f, [FormatStringLiteralToken::new(&value_token?)]]
1213
}
1314
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
use air_r_syntax::RSyntaxKind::R_STRING_LITERAL;
2+
use air_r_syntax::RSyntaxToken;
3+
use biome_formatter::prelude::syntax_token_cow_slice;
4+
use biome_formatter::prelude::Formatter;
5+
use biome_formatter::trivia::format_replaced;
6+
use biome_formatter::Format;
7+
use biome_formatter::FormatResult;
8+
use std::borrow::Cow;
9+
10+
use crate::context::RFormatContext;
11+
use crate::RFormatter;
12+
13+
/// Helper utility for formatting a string literal token
14+
///
15+
/// The main job of this utility is to `normalize()` the string and handle the
16+
/// complicated way we have to call [format_replaced] with that normalized result.
17+
pub(crate) struct FormatStringLiteralToken<'token> {
18+
/// The string literal token to format
19+
token: &'token RSyntaxToken,
20+
}
21+
22+
impl<'token> FormatStringLiteralToken<'token> {
23+
pub(crate) fn new(token: &'token RSyntaxToken) -> Self {
24+
Self { token }
25+
}
26+
27+
fn normalize(&self) -> FormatNormalizedStringLiteralToken {
28+
let token = self.token;
29+
30+
debug_assert!(
31+
matches!(token.kind(), R_STRING_LITERAL),
32+
"Found kind {:?}",
33+
token.kind()
34+
);
35+
36+
let text = token.text_trimmed();
37+
let text = normalize_string(text);
38+
39+
FormatNormalizedStringLiteralToken { token, text }
40+
}
41+
}
42+
43+
impl Format<RFormatContext> for FormatStringLiteralToken<'_> {
44+
fn fmt(&self, f: &mut RFormatter) -> FormatResult<()> {
45+
self.normalize().fmt(f)
46+
}
47+
}
48+
49+
struct FormatNormalizedStringLiteralToken<'token> {
50+
/// The original string literal token before normalization
51+
token: &'token RSyntaxToken,
52+
53+
/// The normalized text
54+
text: Cow<'token, str>,
55+
}
56+
57+
impl Format<RFormatContext> for FormatNormalizedStringLiteralToken<'_> {
58+
fn fmt(&self, f: &mut Formatter<RFormatContext>) -> FormatResult<()> {
59+
format_replaced(
60+
self.token,
61+
&syntax_token_cow_slice(
62+
// Cloning the `Cow<str>` is cheap since 99% of the time it will be the
63+
// `Borrowed` variant. Only with multiline strings on Windows will it
64+
// ever actually clone the underlying string.
65+
self.text.clone(),
66+
self.token,
67+
self.token.text_trimmed_range().start(),
68+
),
69+
)
70+
.fmt(f)
71+
}
72+
}
73+
74+
/// Normalize a string, returning a [`Cow::Borrowed`] if the input was already normalized
75+
///
76+
/// This function:
77+
/// - Normalizes all line endings to `\n`
78+
///
79+
/// We may perform more normalization in the future. We don't use utilities from the
80+
/// `line_ending` crate because we don't own the string.
81+
///
82+
/// This function is particularly useful for multiline strings, which capture the existing
83+
/// line ending inside the string token itself. We must normalize those line endings to
84+
/// `\n` before the formatter -> printer stage, because the printer can't handle other
85+
/// line endings and will panic on them. At the printer -> string stage at the very end,
86+
/// the printer will replace all `\n` with the `LineEnding` requested by the user.
87+
/// https://github.com/biomejs/biome/blob/a658a294087c143b83350cbeb6b44f7a2e9afdd1/crates/biome_formatter/src/printer/mod.rs#L714-L718
88+
fn normalize_string(input: &str) -> Cow<str> {
89+
// The normalized string if `input` is not yet normalized.
90+
// `output` must remain empty if `input` is already normalized.
91+
let mut output = String::new();
92+
93+
// Tracks the last index of `input` that has been written to `output`.
94+
// If `last_loc` is `0` at the end, then the input is already normalized and can be returned as is.
95+
let mut last_loc = 0;
96+
97+
let mut iter = input.char_indices().peekable();
98+
99+
while let Some((loc, char)) = iter.next() {
100+
if char == '\r' {
101+
output.push_str(&input[last_loc..loc]);
102+
103+
if iter.peek().is_some_and(|(_, next)| next == &'\n') {
104+
// CRLF support - skip over the '\r' character, keep the `\n`
105+
iter.next();
106+
} else {
107+
// CR support - Replace the `\r` with a `\n`
108+
output.push('\n');
109+
}
110+
111+
last_loc = loc + '\r'.len_utf8();
112+
}
113+
}
114+
115+
if last_loc == 0 {
116+
Cow::Borrowed(input)
117+
} else {
118+
output.push_str(&input[last_loc..]);
119+
Cow::Owned(output)
120+
}
121+
}
122+
123+
#[cfg(test)]
124+
mod tests {
125+
use crate::string_literal::normalize_string;
126+
use std::borrow::Cow;
127+
128+
#[test]
129+
fn normalize_empty() {
130+
let x = "";
131+
assert_eq!(normalize_string(x), Cow::Borrowed(x));
132+
}
133+
134+
#[test]
135+
fn normalize_newlines() {
136+
let x = "abcd";
137+
assert_eq!(normalize_string(x), Cow::Borrowed(x));
138+
139+
let x = "a\nb\nc\nd\n";
140+
assert_eq!(normalize_string(x), Cow::Borrowed(x));
141+
142+
let x = "a\nb\rc\r\nd\n";
143+
assert_eq!(
144+
normalize_string(x),
145+
Cow::Owned::<str>(String::from("a\nb\nc\nd\n"))
146+
);
147+
}
148+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"multiline
2+
string"

0 commit comments

Comments
 (0)