|
| 1 | +use air_r_syntax::RSyntaxKind::R_STRING_LITERAL; |
| 2 | +use air_r_syntax::RSyntaxToken; |
| 3 | +use biome_formatter::prelude::syntax_token_cow_slice; |
| 4 | +use biome_formatter::prelude::Formatter; |
| 5 | +use biome_formatter::trivia::format_replaced; |
| 6 | +use biome_formatter::Format; |
| 7 | +use biome_formatter::FormatResult; |
| 8 | +use std::borrow::Cow; |
| 9 | + |
| 10 | +use crate::context::RFormatContext; |
| 11 | +use crate::RFormatter; |
| 12 | + |
| 13 | +/// Helper utility for formatting a string literal token |
| 14 | +/// |
| 15 | +/// The main job of this utility is to `normalize()` the string and handle the |
| 16 | +/// complicated way we have to call [format_replaced] with that normalized result. |
| 17 | +pub(crate) struct FormatStringLiteralToken<'token> { |
| 18 | + /// The string literal token to format |
| 19 | + token: &'token RSyntaxToken, |
| 20 | +} |
| 21 | + |
| 22 | +impl<'token> FormatStringLiteralToken<'token> { |
| 23 | + pub(crate) fn new(token: &'token RSyntaxToken) -> Self { |
| 24 | + Self { token } |
| 25 | + } |
| 26 | + |
| 27 | + fn normalize(&self) -> FormatNormalizedStringLiteralToken { |
| 28 | + let token = self.token; |
| 29 | + |
| 30 | + debug_assert!( |
| 31 | + matches!(token.kind(), R_STRING_LITERAL), |
| 32 | + "Found kind {:?}", |
| 33 | + token.kind() |
| 34 | + ); |
| 35 | + |
| 36 | + let text = token.text_trimmed(); |
| 37 | + let text = normalize_string(text); |
| 38 | + |
| 39 | + FormatNormalizedStringLiteralToken { token, text } |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +impl Format<RFormatContext> for FormatStringLiteralToken<'_> { |
| 44 | + fn fmt(&self, f: &mut RFormatter) -> FormatResult<()> { |
| 45 | + self.normalize().fmt(f) |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +struct FormatNormalizedStringLiteralToken<'token> { |
| 50 | + /// The original string literal token before normalization |
| 51 | + token: &'token RSyntaxToken, |
| 52 | + |
| 53 | + /// The normalized text |
| 54 | + text: Cow<'token, str>, |
| 55 | +} |
| 56 | + |
| 57 | +impl Format<RFormatContext> for FormatNormalizedStringLiteralToken<'_> { |
| 58 | + fn fmt(&self, f: &mut Formatter<RFormatContext>) -> FormatResult<()> { |
| 59 | + format_replaced( |
| 60 | + self.token, |
| 61 | + &syntax_token_cow_slice( |
| 62 | + // Cloning the `Cow<str>` is cheap since 99% of the time it will be the |
| 63 | + // `Borrowed` variant. Only with multiline strings on Windows will it |
| 64 | + // ever actually clone the underlying string. |
| 65 | + self.text.clone(), |
| 66 | + self.token, |
| 67 | + self.token.text_trimmed_range().start(), |
| 68 | + ), |
| 69 | + ) |
| 70 | + .fmt(f) |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +/// Normalize a string, returning a [`Cow::Borrowed`] if the input was already normalized |
| 75 | +/// |
| 76 | +/// This function: |
| 77 | +/// - Normalizes all line endings to `\n` |
| 78 | +/// |
| 79 | +/// We may perform more normalization in the future. We don't use utilities from the |
| 80 | +/// `line_ending` crate because we don't own the string. |
| 81 | +/// |
| 82 | +/// This function is particularly useful for multiline strings, which capture the existing |
| 83 | +/// line ending inside the string token itself. We must normalize those line endings to |
| 84 | +/// `\n` before the formatter -> printer stage, because the printer can't handle other |
| 85 | +/// line endings and will panic on them. At the printer -> string stage at the very end, |
| 86 | +/// the printer will replace all `\n` with the `LineEnding` requested by the user. |
| 87 | +/// https://github.com/biomejs/biome/blob/a658a294087c143b83350cbeb6b44f7a2e9afdd1/crates/biome_formatter/src/printer/mod.rs#L714-L718 |
| 88 | +fn normalize_string(input: &str) -> Cow<str> { |
| 89 | + // The normalized string if `input` is not yet normalized. |
| 90 | + // `output` must remain empty if `input` is already normalized. |
| 91 | + let mut output = String::new(); |
| 92 | + |
| 93 | + // Tracks the last index of `input` that has been written to `output`. |
| 94 | + // If `last_loc` is `0` at the end, then the input is already normalized and can be returned as is. |
| 95 | + let mut last_loc = 0; |
| 96 | + |
| 97 | + let mut iter = input.char_indices().peekable(); |
| 98 | + |
| 99 | + while let Some((loc, char)) = iter.next() { |
| 100 | + if char == '\r' { |
| 101 | + output.push_str(&input[last_loc..loc]); |
| 102 | + |
| 103 | + if iter.peek().is_some_and(|(_, next)| next == &'\n') { |
| 104 | + // CRLF support - skip over the '\r' character, keep the `\n` |
| 105 | + iter.next(); |
| 106 | + } else { |
| 107 | + // CR support - Replace the `\r` with a `\n` |
| 108 | + output.push('\n'); |
| 109 | + } |
| 110 | + |
| 111 | + last_loc = loc + '\r'.len_utf8(); |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + if last_loc == 0 { |
| 116 | + Cow::Borrowed(input) |
| 117 | + } else { |
| 118 | + output.push_str(&input[last_loc..]); |
| 119 | + Cow::Owned(output) |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +#[cfg(test)] |
| 124 | +mod tests { |
| 125 | + use crate::string_literal::normalize_string; |
| 126 | + use std::borrow::Cow; |
| 127 | + |
| 128 | + #[test] |
| 129 | + fn normalize_empty() { |
| 130 | + let x = ""; |
| 131 | + assert_eq!(normalize_string(x), Cow::Borrowed(x)); |
| 132 | + } |
| 133 | + |
| 134 | + #[test] |
| 135 | + fn normalize_newlines() { |
| 136 | + let x = "abcd"; |
| 137 | + assert_eq!(normalize_string(x), Cow::Borrowed(x)); |
| 138 | + |
| 139 | + let x = "a\nb\nc\nd\n"; |
| 140 | + assert_eq!(normalize_string(x), Cow::Borrowed(x)); |
| 141 | + |
| 142 | + let x = "a\nb\rc\r\nd\n"; |
| 143 | + assert_eq!( |
| 144 | + normalize_string(x), |
| 145 | + Cow::Owned::<str>(String::from("a\nb\nc\nd\n")) |
| 146 | + ); |
| 147 | + } |
| 148 | +} |
0 commit comments