toon-format · smortezah · Nov 6, 2025 · Nov 6, 2025 · Nov 9, 2025
@@ -207,6 +207,13 @@ def to_parsed_lines(
     if not source.strip():
         return [], []
 
+    # Normalize Windows CRLF line endings to LF
+    # This prevents stray \r characters from appearing in content
+    source = source.replace("\r\n", "\n")
+
+    # Replace any remaining standalone \r characters (old Mac format) with \n
+    source = source.replace("\r", "\n")
+
     lines = source.split("\n")
     parsed: List[ParsedLine] = []
     blank_lines: List[BlankLineInfo] = []

@@ -140,3 +140,58 @@ def test_object_key_order_preserved(self):
         assert keys == ["z", "a", "m", "b"]
         # Verify order is not alphabetical
         assert keys != ["a", "b", "m", "z"]
+
+
+class TestCRLFDecoding:
+    """Test CRLF (Windows) line ending handling in decoder."""
+
+    def test_decode_object_with_crlf(self):
+        """Test decoding objects with CRLF line endings."""
+        toon = "name: Alice\r\nage: 30\r\n"
+        result = decode(toon)
+        assert result == {"name": "Alice", "age": 30}
+
+    def test_decode_nested_object_with_crlf(self):
+        """Test decoding nested objects with CRLF line endings."""
+        toon = "person:\r\n  name: Alice\r\n  age: 30\r\n"
+        result = decode(toon)
+        assert result == {"person": {"name": "Alice", "age": 30}}
+
+    def test_decode_array_with_crlf(self):
+        """Test decoding arrays with CRLF line endings."""
+        toon = "items[3]:\r\n  - apple\r\n  - banana\r\n  - cherry\r\n"
+        result = decode(toon)
+        assert result == {"items": ["apple", "banana", "cherry"]}
+
+    def test_decode_delimited_array_with_crlf(self):
+        """Test decoding delimited arrays with CRLF line endings."""
+        toon = "items[3]: apple,banana,cherry\r\n"
+        result = decode(toon)
+        assert result == {"items": ["apple", "banana", "cherry"]}
+
+    def test_decode_with_old_mac_cr(self):
+        """Test decoding with old Mac CR line endings."""
+        toon = "name: Alice\rage: 30\r"
+        result = decode(toon)
+        assert result == {"name": "Alice", "age": 30}
+
+    def test_decode_with_mixed_line_endings(self):
+        """Test decoding with mixed line endings."""
+        toon = "name: Alice\r\nage: 30\ncity: NYC\r"
+        result = decode(toon)
+        assert result == {"name": "Alice", "age": 30, "city": "NYC"}
+
+    def test_crlf_does_not_affect_quoted_strings(self):
+        """Test that CRLF normalization doesn't affect escaped \\r in strings."""
+        toon = 'text: "line1\\r\\nline2"\r\n'
+        result = decode(toon)
+        # The string should contain the escaped sequences
+        assert result == {"text": "line1\r\nline2"}
+
+    def test_crlf_in_strict_mode(self):
+        """Test CRLF works correctly in strict mode."""
+        toon = "name:\r\n  first: Alice\r\n  age: 30\r\n"
+        options = DecodeOptions(strict=True)
+        result = decode(toon, options)
+        assert result == {"name": {"first": "Alice", "age": 30}}
+
@@ -241,3 +241,62 @@ def test_blank_lines_not_validated_in_strict_mode(self):
         # Should not raise error for blank line with invalid indentation
         assert len(blanks) == 1
         assert blanks[0].line_num == 2
+
+
+class TestCRLFHandling:
+    """Tests for CRLF and CR normalization."""
+
+    def test_crlf_normalization(self):
+        """Test Windows CRLF line endings are normalized to LF."""
+        source = "name: Alice\r\nage: 30\r\n"
+        lines, blanks = to_parsed_lines(source, 2, False)
+        # Verify no \r remains in content
+        assert "\r" not in lines[0].content
+        assert "\r" not in lines[1].content
+        assert lines[0].content == "name: Alice"
+        assert lines[1].content == "age: 30"
+        assert len(lines) == 3  # name, age, and trailing empty line
+
+    def test_standalone_cr_normalization(self):
+        """Test old Mac CR line endings are normalized to LF."""
+        source = "name: Alice\rage: 30\r"
+        lines, blanks = to_parsed_lines(source, 2, False)
+        # Verify no \r remains in content
+        assert "\r" not in lines[0].content
+        assert "\r" not in lines[1].content
+        assert lines[0].content == "name: Alice"
+        assert lines[1].content == "age: 30"
+        assert len(lines) == 3  # name, age, and trailing empty line
+
+    def test_mixed_line_endings(self):
+        """Test mixed line endings are all normalized."""
+        source = "line1\r\nline2\nline3\rline4"
+        lines, blanks = to_parsed_lines(source, 2, False)
+        assert len(lines) == 4
+        for line in lines:
+            assert "\r" not in line.content
+        assert lines[0].content == "line1"
+        assert lines[1].content == "line2"
+        assert lines[2].content == "line3"
+        assert lines[3].content == "line4"
+
+    def test_crlf_with_indentation(self):
+        """Test CRLF handling preserves indentation."""
+        source = "parent:\r\n  child: value\r\n"
+        lines, blanks = to_parsed_lines(source, 2, False)
+        assert lines[0].content == "parent:"
+        assert lines[0].depth == 0
+        assert lines[1].content == "child: value"
+        assert lines[1].depth == 1
+        assert lines[1].indent == 2
+
+    def test_crlf_in_strict_mode(self):
+        """Test CRLF normalization works in strict mode."""
+        source = "name: Alice\r\n  age: 30\r\n"
+        lines, blanks = to_parsed_lines(source, 2, True)
+        # Should not raise error and should properly normalize
+        assert len(lines) == 3
+        assert "\r" not in lines[0].content
+        assert "\r" not in lines[1].content
+        assert lines[1].depth == 1
+