From 7238e46ad23f152a3e0128b02b2f89b504c7569b Mon Sep 17 00:00:00 2001 From: therrshan Date: Sun, 16 Nov 2025 17:42:32 -0500 Subject: [PATCH 1/2] feat(cli): add --stats flag for token comparison --- src/toon_format/cli.py | 21 +++++++++ tests/test_cli.py | 100 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/src/toon_format/cli.py b/src/toon_format/cli.py index 07efd06..0e34359 100644 --- a/src/toon_format/cli.py +++ b/src/toon_format/cli.py @@ -14,6 +14,7 @@ from . import decode, encode from .types import DecodeOptions, EncodeOptions +from .utils import compare_formats def main() -> int: @@ -77,6 +78,12 @@ def main() -> int: help="Disable strict validation when decoding", ) + parser.add_argument( + "--stats", + action="store_true", + help="Show token count estimates and savings (encode only)", + ) + args = parser.parse_args() # Read input @@ -125,6 +132,11 @@ def main() -> int: except json.JSONDecodeError: mode = "decode" + # Handle --stats with decode mode + if args.stats and mode == "decode": + print("Warning: --stats is only available in encode mode", file=sys.stderr) + args.stats = False + # Process try: if mode == "encode": @@ -134,6 +146,15 @@ def main() -> int: indent=args.indent, length_marker=args.length_marker, ) + + # Show stats if requested + if args.stats: + try: + data = json.loads(input_text) + print("\n" + compare_formats(data)) + except RuntimeError as e: + # tiktoken not installed + print(f"\n {e}", file=sys.stderr) else: output_text = decode_toon_to_json( input_text, diff --git a/tests/test_cli.py b/tests/test_cli.py index 3499bf7..72c460a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -78,6 +78,106 @@ def test_decode_lenient_mode(self): assert data["name"] == "Alice" +class TestStatsFlag: + """Tests for the --stats CLI flag.""" + + def test_stats_flag_in_help(self, tmp_path): + """Test that --stats appears in help text.""" + with patch("sys.argv", ["toon", "--help"]): + with pytest.raises(SystemExit): + main() + + def test_stats_with_file_input(self, tmp_path): + """Test --stats with file input.""" + input_file = tmp_path / "test.json" + input_file.write_text('{"users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]}') + + with patch("sys.stdout", new_callable=StringIO) as mock_stdout: + with patch("sys.argv", ["toon", str(input_file), "--stats"]): + result = main() + assert result == 0 + output = mock_stdout.getvalue() + assert "users[2" in output + assert "Format Comparison" in output or "Savings" in output + + def test_stats_with_stdin(self): + """Test --stats with stdin input.""" + input_data = '{"items": ["a", "b", "c"]}' + + with patch("sys.stdin", StringIO(input_data)): + with patch("sys.stdout", new_callable=StringIO) as mock_stdout: + with patch("sys.argv", ["toon", "-", "--stats"]): + result = main() + assert result == 0 + output = mock_stdout.getvalue() + assert "items[3" in output + + def test_stats_ignored_in_decode_mode(self, tmp_path): + """Test that --stats is ignored when decoding.""" + input_file = tmp_path / "test.toon" + input_file.write_text("items[2]: a,b") + + with patch("sys.stdout", new_callable=StringIO) as mock_stdout: + with patch("sys.stderr", new_callable=StringIO) as mock_stderr: + with patch("sys.argv", ["toon", str(input_file), "--decode", "--stats"]): + result = main() + assert result == 0 + output = mock_stdout.getvalue() + assert '"items"' in output + if mock_stderr.getvalue(): + assert "warning" in mock_stderr.getvalue().lower() + + def test_stats_with_different_delimiters(self, tmp_path): + """Test that --stats works with alternative delimiters.""" + input_file = tmp_path / "test.json" + input_file.write_text('{"data": [{"a": 1, "b": 2}]}') + + # Test with tab delimiter + with patch("sys.stdout", new_callable=StringIO): + with patch("sys.argv", ["toon", str(input_file), "--delimiter", "\t", "--stats"]): + result = main() + assert result == 0 + + # Test with pipe delimiter + with patch("sys.stdout", new_callable=StringIO): + with patch("sys.argv", ["toon", str(input_file), "--delimiter", "|", "--stats"]): + result = main() + assert result == 0 + + def test_stats_without_tiktoken(self, tmp_path, monkeypatch): + """Test graceful handling when tiktoken is not available.""" + input_file = tmp_path / "test.json" + input_file.write_text('{"test": 123}') + + # Mock compare_formats to raise RuntimeError (simulating missing tiktoken) + def mock_compare_formats(data): + raise RuntimeError("tiktoken is required") + + with patch("toon_format.cli.compare_formats", side_effect=mock_compare_formats): + with patch("sys.stdout", new_callable=StringIO): + with patch("sys.stderr", new_callable=StringIO) as mock_stderr: + with patch("sys.argv", ["toon", str(input_file), "--stats"]): + result = main() + assert result == 0 + assert "tiktoken" in mock_stderr.getvalue() + + def test_stats_with_output_file(self, tmp_path): + """Test --stats with -o output option.""" + input_file = tmp_path / "test.json" + input_file.write_text('{"test": 123}') + output_file = tmp_path / "output.toon" + + with patch("sys.stdout", new_callable=StringIO) as mock_stdout: + with patch("sys.argv", ["toon", str(input_file), "-o", str(output_file), "--stats"]): + result = main() + assert result == 0 + assert output_file.exists() + assert ( + "Format Comparison" in mock_stdout.getvalue() + or "Savings" in mock_stdout.getvalue() + ) + + class TestCLIMain: """Integration tests for the main CLI function.""" From be41c97d02dae95be7ce954b7cede7a2a0592d68 Mon Sep 17 00:00:00 2001 From: therrshan Date: Sun, 16 Nov 2025 17:45:07 -0500 Subject: [PATCH 2/2] feat(cli): add --stats flag for token comparison --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 31ea483..341dae2 100644 --- a/README.md +++ b/README.md @@ -51,9 +51,10 @@ echo '{"x": 1}' | toon - # Stdin/stdout # Options toon data.json --encode --delimiter "\t" --length-marker toon data.toon --decode --no-strict --indent 4 +toon data.json --stats ``` -**Options:** `-e/--encode` `-d/--decode` `-o/--output` `--delimiter` `--indent` `--length-marker` `--no-strict` +**Options:** `-e/--encode` `-d/--decode` `-o/--output` `--delimiter` `--indent` `--length-marker` `--no-strict` `--stats` ## API Reference