diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 2996f12..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Dependabot configuration for automated dependency updates -# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file - -version: 2 -updates: - # Monitor GitHub Actions for updates - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" - day: "monday" - labels: - - "dependencies" - - "github-actions" - commit-message: - prefix: "ci" - include: "scope" - - # Monitor pip dependencies (compatible with uv) - - package-ecosystem: "pip" - directory: "/" - schedule: - interval: "weekly" - day: "monday" - labels: - - "dependencies" - - "python" - commit-message: - prefix: "deps" - include: "scope" - # Group dev dependencies together - groups: - dev-dependencies: - patterns: - - "pytest*" - - "mypy*" - - "ruff*" - update-types: - - "minor" - - "patch" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8202ec1..d952fc6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v4 - name: Install uv uses: astral-sh/setup-uv@v5 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 958d53d..0529485 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -24,13 +24,16 @@ jobs: - name: Install build dependencies run: | python -m pip install --upgrade pip - pip install build + pip install build twine - name: Build package run: python -m build + - name: Check package + run: twine check dist/* + - name: Store distribution packages - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v4 with: name: python-package-distributions path: dist/ @@ -48,7 +51,7 @@ jobs: steps: - name: Download distributions - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v4 with: name: python-package-distributions path: dist/ @@ -69,7 +72,7 @@ jobs: steps: - name: Download distributions - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v4 with: name: python-package-distributions path: dist/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cffa57a..f5599e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,7 @@ jobs: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v4 - name: Install uv uses: astral-sh/setup-uv@v5 @@ -34,7 +34,7 @@ jobs: run: uv run pytest --cov=toon_format --cov-report=xml --cov-report=term --cov-report=html --cov-fail-under=85 - name: Upload coverage reports as artifact - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v4 if: matrix.python-version == '3.12' with: name: coverage-reports diff --git a/PUBLISHING.md b/PUBLISHING.md new file mode 100644 index 0000000..fa2c639 --- /dev/null +++ b/PUBLISHING.md @@ -0,0 +1,212 @@ +# Publishing toon_format to PyPI + +This guide explains how to publish the toon_format package to PyPI (Python Package Index). + +## Prerequisites + +### 1. PyPI Accounts + +Create accounts on both platforms: + +- **TestPyPI** (for testing): https://test.pypi.org/account/register/ +- **PyPI** (production): https://pypi.org/account/register/ + +### 2. GitHub Environments + +The publish workflow uses GitHub environments with trusted publishing (no API tokens needed!). + +#### Set up environments in GitHub: + +1. Go to your repository Settings > Environments +2. Create two environments: + - `testpypi` - for TestPyPI releases + - `pypi` - for production PyPI releases +3. For each environment, configure: + - **Deployment protection rules** (optional but recommended): + - Required reviewers (for production releases) + - Wait timer (optional delay before deployment) + +#### Configure Trusted Publishers: + +**For TestPyPI:** +1. Log in to https://test.pypi.org +2. Go to Account Settings > Publishing +3. Add a new pending publisher: + - PyPI Project Name: `toon_format` + - Owner: `toon-format` + - Repository: `toon-python` + - Workflow: `publish.yml` + - Environment: `testpypi` + +**For PyPI:** +1. Log in to https://pypi.org +2. Go to Account Settings > Publishing +3. Add a new pending publisher: + - PyPI Project Name: `toon_format` + - Owner: `toon-format` + - Repository: `toon-python` + - Workflow: `publish.yml` + - Environment: `pypi` + +> Note: After the first successful publish, the project will be registered and future publishes will use the same trusted publisher configuration. + +## Release Process + +### Step 1: Prepare the Release + +1. **Update version number** in two places: + - `pyproject.toml` (line 3): `version = "X.Y.Z"` + - `src/toon_format/__init__.py` (line 28): `__version__ = "X.Y.Z"` + +2. **Update changelog** (if exists) or create release notes + +3. **Run tests locally**: + ```bash + uv run pytest + uv run ruff check . + uv run mypy src/toon_format + ``` + +4. **Build and test locally**: + ```bash + # Clean previous builds + rm -rf dist/ build/ *.egg-info + + # Build the package + python -m build + + # Verify the package contents + python -m zipfile -l dist/toon_format-X.Y.Z-py3-none-any.whl + + # Test installation in a clean environment + python -m venv test_env + test_env/bin/pip install dist/toon_format-X.Y.Z-py3-none-any.whl + test_env/bin/python -c "import toon_format; print(toon_format.__version__)" + rm -rf test_env + ``` + +### Step 2: Commit and Tag + +1. **Commit version changes**: + ```bash + git add pyproject.toml src/toon_format/__init__.py + git commit -m "Bump version to X.Y.Z" + ``` + +2. **Create and push tag**: + ```bash + git tag -a vX.Y.Z -m "Release version X.Y.Z" + git push origin main + git push origin vX.Y.Z + ``` + +### Step 3: Test on TestPyPI (Recommended) + +Before publishing to production PyPI, test on TestPyPI: + +1. Go to GitHub Actions: https://github.com/toon-format/toon-python/actions +2. Select the "Publish to PyPI" workflow +3. Click "Run workflow" +4. Select branch: `main` (or the tag `vX.Y.Z`) +5. Click "Run workflow" + - Note: Manual workflow dispatch automatically publishes to TestPyPI + +6. **Verify the TestPyPI upload**: + - Check the package: https://test.pypi.org/project/toon_format/ + - Test installation: + ```bash + pip install --index-url https://test.pypi.org/simple/ toon_format + ``` + +### Step 4: Publish to PyPI + +**Automatic (via GitHub Release)** + +1. Go to https://github.com/toon-format/toon-python/releases/new +2. Select the tag you created: `vX.Y.Z` +3. Release title: `Version X.Y.Z` +4. Description: Add release notes and changelog +5. Click "Publish release" +6. The GitHub Action will automatically build and publish to PyPI + +### Step 5: Verify the Release + +1. **Check PyPI**: https://pypi.org/project/toon_format/ +2. **Test installation**: + ```bash + pip install toon_format + python -c "import toon_format; print(toon_format.__version__)" + ``` +3. **Update README badge** (optional): + ```markdown + [![PyPI version](https://badge.fury.io/py/toon_format.svg)](https://pypi.org/project/toon_format/) + ``` + +## Troubleshooting + +### Build fails with "metadata missing" + +This is usually a configuration issue in `pyproject.toml`. Verify: +- All required fields are present (name, version, description, etc.) +- Project URLs are properly formatted +- Author email is valid + +### Trusted publishing fails + +If the trusted publisher configuration fails: +1. Verify the environment name matches exactly +2. Check that the repository owner and name are correct +3. Ensure the workflow file path is correct (`publish.yml`) +4. Make sure the PyPI project name is available or already claimed by you + +### Package already exists on PyPI + +PyPI doesn't allow overwriting published versions. You must: +1. Increment the version number +2. Create a new tag +3. Publish the new version + +## Version Numbering + +Follow [Semantic Versioning](https://semver.org/): + +- **MAJOR version** (X.0.0): Incompatible API changes +- **MINOR version** (0.X.0): New functionality, backward compatible +- **PATCH version** (0.0.X): Bug fixes, backward compatible + +### Agreed Roadmap (from Discussion #18): + +- **0.8.x** - Initial code set, tests, documentation, migration from toon-llm +- **0.9.x** - Serializer, spec compliance, publishing to PyPI (test and prod) +- **1.0.0-rc.x** - Production readiness candidates +- **1.0.0** - Official stable release 🎉 + +Examples: +- `0.9.0-beta.1` - First beta release for testing +- `0.9.0-beta.2` - Second beta with fixes +- `0.9.0` - First minor release with new features +- `1.0.0-rc.1` - Release candidate +- `1.0.0` - First stable release + +## Checklist + +Before each release, verify: + +- [ ] All tests pass (`uv run pytest`) +- [ ] Linting passes (`uv run ruff check .`) +- [ ] Type checking passes (`uv run mypy src/toon_format`) +- [ ] Version updated in `pyproject.toml` and `src/toon_format/__init__.py` +- [ ] Changes committed and pushed to `main` +- [ ] Git tag created and pushed +- [ ] Package tested on TestPyPI (optional but recommended) +- [ ] GitHub Release created +- [ ] Package verified on PyPI +- [ ] Installation tested from PyPI + +## References + +- [Python Packaging Guide](https://packaging.python.org/) +- [PyPI Help](https://pypi.org/help/) +- [Trusted Publishing Guide](https://docs.pypi.org/trusted-publishers/) +- [GitHub Actions Publishing](https://packaging.python.org/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/) +- [TOON Format Specification](https://github.com/toon-format/spec) diff --git a/README.md b/README.md index 1ffd8ea..503de0d 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,22 @@ # TOON Format for Python [![Tests](https://github.com/toon-format/toon-python/actions/workflows/test.yml/badge.svg)](https://github.com/toon-format/toon-python/actions) -[![PyPI](https://img.shields.io/pypi/v/toon_format.svg)](https://pypi.org/project/toon_format/) [![Python Versions](https://img.shields.io/pypi/pyversions/toon_format.svg)](https://pypi.org/project/toon_format/) -Compact, human-readable serialization format for LLM contexts with **30-60% token reduction** vs JSON. Combines YAML-like indentation with CSV-like tabular arrays. 100% compatible with the [official TOON specification](https://github.com/toon-format/spec). +> **⚠️ Beta Status (v0.9.x):** This library is in active development and working towards spec compliance. Beta published to PyPI. API may change before 1.0.0 release. -**Key Features:** Minimal syntax • Tabular arrays for uniform data • Array length validation • Python 3.8+ • Battle-tested. +Compact, human-readable serialization format for LLM contexts with **30-60% token reduction** vs JSON. Combines YAML-like indentation with CSV-like tabular arrays. Working towards full compatibility with the [official TOON specification](https://github.com/toon-format/spec). + +**Key Features:** Minimal syntax • Tabular arrays for uniform data • Array length validation • Python 3.8+ • Comprehensive test coverage. ```bash -pip install toon_format -# or (recommended) -uv add toon_format +# Beta published to PyPI - install from source: +git clone https://github.com/toon-format/toon-python.git +cd toon-python +uv sync + +# Or install directly from GitHub: +pip install git+https://github.com/toon-format/toon-python.git ``` ## Quick Start @@ -100,7 +105,7 @@ toon_str = encode(data) tokens = count_tokens(toon_str) # Uses tiktoken (gpt5/gpt5-mini) ``` -**Requires tiktoken:** `pip install tiktoken` or `pip install toon-format[benchmark]` +**Requires tiktoken:** `uv add tiktoken` (benchmark features are optional) ## Format Specification @@ -123,7 +128,7 @@ git clone https://github.com/toon-format/toon-python.git cd toon-python uv sync -# Run tests (battle-tested: 792 tests, 91% coverage, 85% enforced) +# Run tests (792 tests, 91% coverage, 85% enforced) uv run pytest --cov=toon_format --cov-report=term # Code quality @@ -132,7 +137,16 @@ uv run ruff format src/ tests/ # Format uv run mypy src/ # Type check ``` -**CI/CD:** GitHub Actions • Python 3.8-3.12 • Coverage enforcement • Dependabot • PR coverage comments +**CI/CD:** GitHub Actions • Python 3.8-3.14 • Coverage enforcement • PR coverage comments + +## Project Status & Roadmap + +Following semantic versioning towards 1.0.0: + +- **v0.8.x** - Initial code set, tests, documentation ✅ +- **v0.9.x** - Serializer improvements, spec compliance testing, publishing setup (current) +- **v1.0.0-rc.x** - Release candidates for production readiness +- **v1.0.0** - First stable release with full spec compliance See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines. diff --git a/docs/api.md b/docs/api.md index dae7f09..3db61da 100644 --- a/docs/api.md +++ b/docs/api.md @@ -261,7 +261,7 @@ Count tokens in a text string using tiktoken. - `RuntimeError`: If tiktoken is not installed **Requirements:** -- Install tiktoken: `pip install tiktoken` or `pip install toon-format[benchmark]` +- Install tiktoken: `uv add tiktoken` or `uv add toon_format[benchmark]` **Example:** @@ -351,91 +351,6 @@ print(compare_formats(data)) --- -## Measuring Token Efficiency - -Use the utility functions to measure and compare token usage between JSON and TOON formats. - -### Quick Token Count - -```python -from toon_format import encode, count_tokens - -data = {"users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]} - -# Count tokens in TOON format -toon_str = encode(data) -tokens = count_tokens(toon_str) -print(f"TOON uses {tokens} tokens") -# TOON uses 28 tokens -``` - -### Compare Formats - -```python -from toon_format import estimate_savings - -data = { - "employees": [ - {"id": 1, "name": "Alice", "dept": "Engineering"}, - {"id": 2, "name": "Bob", "dept": "Sales"}, - {"id": 3, "name": "Charlie", "dept": "Marketing"} - ] -} - -result = estimate_savings(data) -print(f"JSON: {result['json_tokens']} tokens") -print(f"TOON: {result['toon_tokens']} tokens") -print(f"Savings: {result['savings_percent']:.1f}%") -# JSON: 89 tokens -# TOON: 52 tokens -# Savings: 41.6% -``` - -### Visual Comparison - -```python -from toon_format import compare_formats - -data = { - "products": [ - {"sku": "A100", "price": 29.99, "stock": 50}, - {"sku": "B200", "price": 49.99, "stock": 30} - ] -} - -print(compare_formats(data)) -# Format Comparison -# ──────────────────────────────────────────────── -# Format Tokens Size (chars) -# JSON 67 145 -# TOON 38 89 -# ──────────────────────────────────────────────── -# Savings: 29 tokens (43.3%) -``` - -### Using Different Encodings - -```python -from toon_format import count_tokens - -text = "Hello, world!" - -# GPT-5 / GPT-5-mini (default) -tokens_gpt5 = count_tokens(text, encoding="o200k_base") - -# GPT-3.5 / GPT-4 -tokens_gpt4 = count_tokens(text, encoding="cl100k_base") - -# Older models -tokens_old = count_tokens(text, encoding="p50k_base") - -print(f"GPT-5: {tokens_gpt5} tokens") -print(f"GPT-4: {tokens_gpt4} tokens") -print(f"Older: {tokens_old} tokens") -``` - ---- - ## Advanced Usage ### Working with Large Integers @@ -506,17 +421,12 @@ from typing import Any, Dict, List, Union from toon_format import encode, decode from toon_format.types import EncodeOptions, DecodeOptions, JsonValue -# Type-safe usage - EncodeOptions is a TypedDict, use dict syntax +# Type-safe usage data: Dict[str, Any] = {"key": "value"} -options: EncodeOptions = {"delimiter": ",", "indent": 2} +options: EncodeOptions = EncodeOptions(delimiter=",") result: str = encode(data, options) decoded: JsonValue = decode(result) - -# DecodeOptions is a class, can be instantiated or use dict -decode_opts = DecodeOptions(indent=2, strict=True) -# Or use dict for decode too -decode(result, {"indent": 2, "strict": True}) ``` --- diff --git a/pyproject.toml b/pyproject.toml index 1ecb271..8c8824b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "toon_format" -version = "1.0.0" +version = "0.9.0-beta.1" description = "A compact, human-readable serialization format designed for passing structured data to Large Language Models with significantly reduced token usage" readme = "README.md" authors = [ diff --git a/src/toon_format/__init__.py b/src/toon_format/__init__.py index dee81fa..f664ec0 100644 --- a/src/toon_format/__init__.py +++ b/src/toon_format/__init__.py @@ -25,7 +25,7 @@ from .types import DecodeOptions, Delimiter, DelimiterKey, EncodeOptions from .utils import compare_formats, count_tokens, estimate_savings -__version__ = "0.1.1" +__version__ = "0.9.0-beta.1" __all__ = [ "encode", "decode", diff --git a/src/toon_format/utils.py b/src/toon_format/utils.py index f013cf0..002f3d2 100644 --- a/src/toon_format/utils.py +++ b/src/toon_format/utils.py @@ -14,7 +14,7 @@ compare_formats: Generate formatted comparison table Requirements: - tiktoken: Install with `pip install tiktoken` + tiktoken: Install with `uv add tiktoken` or `uv add toon_format[benchmark]` Example: >>> import toon_format @@ -36,7 +36,7 @@ _TIKTOKEN_MISSING_MSG = ( "tiktoken is required for token counting. " - "Install with: pip install tiktoken or pip install toon-format[benchmark]" + "Install with: uv add tiktoken or uv add toon_format[benchmark]" ) @@ -80,7 +80,7 @@ def count_tokens(text: str, encoding: str = "o200k_base") -> int: 4 Note: - Requires tiktoken to be installed: pip install tiktoken + Requires tiktoken to be installed: uv add tiktoken or uv add toon_format[benchmark] """ if encoding == "o200k_base": enc = _get_tokenizer()