Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/toon_format/_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ def to_parsed_lines(
if not source.strip():
return [], []

# Normalize Windows CRLF line endings to LF
# This prevents stray \r characters from appearing in content
source = source.replace("\r\n", "\n")

# Replace any remaining standalone \r characters (old Mac format) with \n
source = source.replace("\r", "\n")

lines = source.split("\n")
parsed: List[ParsedLine] = []
blank_lines: List[BlankLineInfo] = []
Expand Down
55 changes: 55 additions & 0 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,58 @@ def test_object_key_order_preserved(self):
assert keys == ["z", "a", "m", "b"]
# Verify order is not alphabetical
assert keys != ["a", "b", "m", "z"]


class TestCRLFDecoding:
"""Test CRLF (Windows) line ending handling in decoder."""

def test_decode_object_with_crlf(self):
"""Test decoding objects with CRLF line endings."""
toon = "name: Alice\r\nage: 30\r\n"
result = decode(toon)
assert result == {"name": "Alice", "age": 30}

def test_decode_nested_object_with_crlf(self):
"""Test decoding nested objects with CRLF line endings."""
toon = "person:\r\n name: Alice\r\n age: 30\r\n"
result = decode(toon)
assert result == {"person": {"name": "Alice", "age": 30}}

def test_decode_array_with_crlf(self):
"""Test decoding arrays with CRLF line endings."""
toon = "items[3]:\r\n - apple\r\n - banana\r\n - cherry\r\n"
result = decode(toon)
assert result == {"items": ["apple", "banana", "cherry"]}

def test_decode_delimited_array_with_crlf(self):
"""Test decoding delimited arrays with CRLF line endings."""
toon = "items[3]: apple,banana,cherry\r\n"
result = decode(toon)
assert result == {"items": ["apple", "banana", "cherry"]}

def test_decode_with_old_mac_cr(self):
"""Test decoding with old Mac CR line endings."""
toon = "name: Alice\rage: 30\r"
result = decode(toon)
assert result == {"name": "Alice", "age": 30}

def test_decode_with_mixed_line_endings(self):
"""Test decoding with mixed line endings."""
toon = "name: Alice\r\nage: 30\ncity: NYC\r"
result = decode(toon)
assert result == {"name": "Alice", "age": 30, "city": "NYC"}

def test_crlf_does_not_affect_quoted_strings(self):
"""Test that CRLF normalization doesn't affect escaped \\r in strings."""
toon = 'text: "line1\\r\\nline2"\r\n'
result = decode(toon)
# The string should contain the escaped sequences
assert result == {"text": "line1\r\nline2"}

def test_crlf_in_strict_mode(self):
"""Test CRLF works correctly in strict mode."""
toon = "name:\r\n first: Alice\r\n age: 30\r\n"
options = DecodeOptions(strict=True)
result = decode(toon, options)
assert result == {"name": {"first": "Alice", "age": 30}}

59 changes: 59 additions & 0 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,62 @@ def test_blank_lines_not_validated_in_strict_mode(self):
# Should not raise error for blank line with invalid indentation
assert len(blanks) == 1
assert blanks[0].line_num == 2


class TestCRLFHandling:
"""Tests for CRLF and CR normalization."""

def test_crlf_normalization(self):
"""Test Windows CRLF line endings are normalized to LF."""
source = "name: Alice\r\nage: 30\r\n"
lines, blanks = to_parsed_lines(source, 2, False)
# Verify no \r remains in content
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[0].content == "name: Alice"
assert lines[1].content == "age: 30"
assert len(lines) == 3 # name, age, and trailing empty line

def test_standalone_cr_normalization(self):
"""Test old Mac CR line endings are normalized to LF."""
source = "name: Alice\rage: 30\r"
lines, blanks = to_parsed_lines(source, 2, False)
# Verify no \r remains in content
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[0].content == "name: Alice"
assert lines[1].content == "age: 30"
assert len(lines) == 3 # name, age, and trailing empty line

def test_mixed_line_endings(self):
"""Test mixed line endings are all normalized."""
source = "line1\r\nline2\nline3\rline4"
lines, blanks = to_parsed_lines(source, 2, False)
assert len(lines) == 4
for line in lines:
assert "\r" not in line.content
assert lines[0].content == "line1"
assert lines[1].content == "line2"
assert lines[2].content == "line3"
assert lines[3].content == "line4"

def test_crlf_with_indentation(self):
"""Test CRLF handling preserves indentation."""
source = "parent:\r\n child: value\r\n"
lines, blanks = to_parsed_lines(source, 2, False)
assert lines[0].content == "parent:"
assert lines[0].depth == 0
assert lines[1].content == "child: value"
assert lines[1].depth == 1
assert lines[1].indent == 2

def test_crlf_in_strict_mode(self):
"""Test CRLF normalization works in strict mode."""
source = "name: Alice\r\n age: 30\r\n"
lines, blanks = to_parsed_lines(source, 2, True)
# Should not raise error and should properly normalize
assert len(lines) == 3
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[1].depth == 1

Loading