diff --git a/specs/206-markdown-file-creation-f7d8d3/feature.yaml b/specs/206-markdown-file-creation-f7d8d3/feature.yaml new file mode 100644 index 000000000..6fc805f05 --- /dev/null +++ b/specs/206-markdown-file-creation-f7d8d3/feature.yaml @@ -0,0 +1,39 @@ +feature: + id: "206-markdown-file-creation-f7d8d3" + name: "markdown-file-creation-f7d8d3" + number: 206 + branch: "feat/206-markdown-file-creation-f7d8d3" + lifecycle: "research" + createdAt: "2026-03-25T06:54:05Z" +status: + phase: "implementation-complete" + progress: + completed: 14 + total: 14 + percentage: 100 + currentTask: null + lastUpdated: "2026-03-25T07:12:34.543Z" + lastUpdatedBy: "feature-agent:implement" + completedPhases: + - "analyze" + - "requirements" + - "research" + - "plan" + - "phase-1-setup" + - "phase-2-file-creation" + - "phase-5-orchestration" +validation: + lastRun: null + gatesPassed: [] + autoFixesApplied: [] +tasks: + current: null + blocked: [] + failed: [] +checkpoints: + - phase: "feature-created" + completedAt: "2026-03-25T06:54:05Z" + completedBy: "feature-agent" +errors: + current: null + history: [] diff --git a/specs/206-markdown-file-creation-f7d8d3/plan.yaml b/specs/206-markdown-file-creation-f7d8d3/plan.yaml new file mode 100644 index 000000000..e463cf089 --- /dev/null +++ b/specs/206-markdown-file-creation-f7d8d3/plan.yaml @@ -0,0 +1,230 @@ +name: "markdown-file-creation-f7d8d3" +summary: > + Implement feature 206 to create a test markdown file (test-afcl8i.md) following the + established pattern from features 200-205. Architecture uses single-module design with + clear separation of concerns: hard-coded content → file creation → validation pipeline → + git operations. Implementation is straightforward: pathlib for cross-platform file I/O, + regex-based CommonMark validation, subprocess for git operations, fail-fast error handling + with specific error messages. No external dependencies beyond Python 3.11+ stdlib and git. + +relatedFeatures: + - number: 205 + name: "markdown-file-creation-870df7" + relationship: "Direct precedent with identical hard-coded content pattern" + - number: 204 + name: "markdown-file-creation-946f62" + relationship: "Validation pattern reference (uses API-based content; feature 206 uses hard-coded)" + - number: 203 + name: "markdown-file-creation-213afa" + relationship: "Architecture and fail-fast error handling pattern" + +technologies: + - "Python 3.11+ (standard library: pathlib, subprocess, sys, re)" + - "pathlib.Path for cross-platform file I/O" + - "subprocess.run() for git command execution" + - "Regular expressions (re module) for CommonMark validation" + - "Git command-line interface" + - "Markdown (CommonMark specification)" + - "UTF-8 text encoding with Unix LF line endings" + - "sheep.observability.logging for structured logging" + +relatedLinks: + - title: "Feature 205 Implementation (Direct Precedent)" + url: "https://github.com/jobnik/sheep/blob/main/src/sheep/features/feature_205_markdown_file_creation.py" + - title: "CommonMark Markdown Specification" + url: "https://spec.commonmark.org/" + - title: "Conventional Commits Specification" + url: "https://www.conventionalcommits.org/" + - title: "Python pathlib Documentation" + url: "https://docs.python.org/3.11/library/pathlib.html" + - title: "Python subprocess Documentation" + url: "https://docs.python.org/3.11/library/subprocess.html" + +phases: + - id: "phase-1-setup" + name: "Module Setup & Constants" + description: "Define module constants (filename, feature number, branch name, commit message, title, prose content), set up imports and logging infrastructure. This phase establishes the foundation for all subsequent implementation." + parallel: false + + - id: "phase-2-file-creation" + name: "Content & File Creation" + description: "Implement hard-coded content definition and file writing using pathlib.Path.write_text(). Ensure UTF-8 encoding without BOM and Unix LF line endings are produced by the pathlib call on Unix systems." + parallel: false + + - id: "phase-3-validation" + name: "Validation Pipeline" + description: "Implement comprehensive validation functions using regex-based pattern matching and binary file inspection. Covers H1 heading format, blank line separator, sentence count, UTF-8 encoding, LF line endings, and file size. Each validator has clear, specific error messages enabling quick debugging." + parallel: false + + - id: "phase-4-git-integration" + name: "Git Integration" + description: "Implement git operations using subprocess.run(): stage file with \"git add\", create commit with conventional message \"feat(206): ...\", and push to feature branch. Each operation uses fail-fast pattern with check=True to raise CalledProcessError on any failure." + parallel: false + + - id: "phase-5-orchestration" + name: "Orchestration & End-to-End Testing" + description: "Wire together all components into a cohesive orchestration function that creates file, validates, performs git operations, and handles errors. Test end-to-end workflow and verify all success criteria are met." + parallel: false + +filesToCreate: + - "src/sheep/features/feature_206_markdown_file_creation.py" + +filesToModify: [] + +openQuestions: [] + +content: | + ## Architecture Overview + + Feature 206 follows the single-module architecture established in features 200-205, implementing + a focused, straightforward workflow for creating a test markdown file with hard-coded content. + + **Module Design:** + Single module `src/sheep/features/feature_206_markdown_file_creation.py` contains: + - Module constants (FILENAME, FEATURE_NUMBER, BRANCH_NAME, COMMIT_MESSAGE, TITLE_TEXT, PROSE_CONTENT) + - File creation function using pathlib.Path.write_text() + - Validation helper functions (regex patterns, individual validators) + - Comprehensive validation pipeline orchestrator + - Git operation functions (add, commit, push) using subprocess.run() + - Main orchestration function that wires everything together + - Script entry point for direct execution + + **Validation Pipeline:** + The feature implements a fail-fast validation pipeline that stops on first error with specific, + actionable error messages. Validation order: (1) file exists, (2) H1 heading format, (3) blank + separator line, (4) prose sentence count (2-3), (5) UTF-8 encoding without BOM, (6) Unix LF + line endings only, (7) file size within 100-600 bytes. Each validator uses simple string + operations or regex patterns — no external dependencies. + + **Git Integration:** + Git operations use subprocess.run() with check=True for fail-fast behavior. Three sequential + operations: (1) `git add test-afcl8i.md`, (2) `git commit -m "feat(206): ..."`, + (3) `git push -u origin feat/206-...`. Commands use list form (not shell strings) to prevent + injection vulnerabilities. Fail-fast approach: any git error raises CalledProcessError with + stderr context. + + **Logging & Observability:** + Structured logging via sheep.observability.logging.get_logger() for consistent integration + with Langfuse observability system. Log levels: info() for major workflow steps (file created, + validation passed), debug() for detailed operation results, error() for failures with full context. + + **Content Strategy:** + Hard-coded, deterministic content (not API-based) ensures reproducibility across runs and + simplifies error handling. Prose content is embedded as module constant for full transparency. + Eliminates external dependencies and API latency compared to feature 204 approach. + + ## Key Design Decisions + + ### 1. Hard-Coded Deterministic Content + + **Chosen:** Hard-coded H1 title and 2-3 sentences of prose content (embedded as constants). + + **Why:** Specification explicitly requires hard-coded content (NFR-9) for reproducibility across + runs. Feature 205 demonstrates this pattern is simpler and more maintainable than API-based + approaches (feature 204). Hard-coded content: (1) eliminates external dependencies, (2) removes + API call latency (2-3 seconds), (3) simplifies error handling (no LLM-specific edge cases), + (4) ensures identical execution on repeated runs, (5) aligns with straightforward nature of task. + + ### 2. File I/O: Direct pathlib.Path + + **Chosen:** Use `pathlib.Path.write_text(content, encoding=\"utf-8\")` for file creation. + + **Why:** Features 204-205 establish pathlib as the standard for this series. Benefits: + (1) cross-platform compatibility (pathlib handles path separators), (2) explicit UTF-8 encoding + parameter, (3) automatic LF line endings on Unix systems (our deployment target), (4) modern + Python best practice (standard since 3.4, recommended for 3.11+), (5) post-creation validation + checks actual bytes to verify correct encoding and line endings. + + ### 3. Regex-Based Markdown Validation + + **Chosen:** Use re module (Python stdlib) for regex-based validation of H1 heading and paragraph + structure. + + **Why:** Specification explicitly recommends regex validation for this use case. Sufficient for + simple H1+prose format with deterministic content. Avoids external dependencies (NFR-8), keeps + implementation maintainable. Regex patterns are: (1) deterministic and testable with known inputs, + (2) enable specific error messages (\"Expected H1 starting with '# '\"), (3) require only stdlib, + (4) proven effective in feature 205. + + ### 4. Auto-Conversion to UTF-8/LF with Validation + + **Chosen:** Write with UTF-8/LF via pathlib.write_text(), then validate actual bytes; fail with + specific error if encoding/line endings incorrect. + + **Why:** Specification FR-4 requires \"validate and convert if necessary\". pathlib.write_text() + on Unix produces UTF-8 without BOM and LF line endings natively. Post-creation validation uses + binary read to check actual bytes. If validation fails, log specific error and stop (fail-fast). + This approach ensures files always meet requirements regardless of system defaults. + + ### 5. Fail-Fast Error Handling + + **Chosen:** Stop immediately on first validation failure with specific, actionable error message. + + **Why:** Specification NFR-4 requires error messages to be specific and actionable (include context, + show expected vs. actual). Fail-fast approach: (1) stops execution at first error, (2) enables quick + diagnosis and resolution without collecting multiple errors, (3) matches proven pattern from + features 200-205, (4) keeps validation logic simple and deterministic. Example error message: + \"File size 750 bytes exceeds maximum 600 bytes (expected 100-600)\". + + ### 6. subprocess.run() for Git Operations + + **Chosen:** Use `subprocess.run()` with check=True for git add, commit, and push operations. + + **Why:** Features 204-205 establish subprocess as the pattern. Benefits: (1) git commands are + directly visible in code (transparency), (2) error handling via CalledProcessError with stderr, + (3) check=True ensures fail-fast on any git failure, (4) only stdlib required (no external git + wrapper dependencies), (5) matches established pattern exactly. Commands use list form + (not shell strings) to prevent injection vulnerabilities. + + ### 7. Commit Message: Feature Number Scope + + **Chosen:** Use feature number in commit scope: `feat(206): Create markdown file test-afcl8i.md` + + **Why:** Specification explicitly recommends feature number for clear traceability. Feature number + provides: (1) direct link to feature tracking system, (2) enables automated tooling to correlate + commits to feature specs, (3) maintains consistency with features 200-205+ numbering scheme, + (4) provides clear context in git log. + + ## Implementation Strategy + + **Phase Ordering Rationale:** + + 1. **Phase 1 (Setup)** establishes module foundation: constants, imports, logging infrastructure. + All subsequent phases depend on these definitions being in place. + + 2. **Phase 2 (File Creation)** comes before validation because we must create the file before + validating it. Logical workflow: create → validate → commit. + + 3. **Phase 3 (Validation)** comes before git operations because we validate completely before + staging/committing. Fail-fast validation prevents pushing invalid files to git. + + 4. **Phase 4 (Git Integration)** only runs after all validation passes. Ensures file is correct + before staging, committing, and pushing. + + 5. **Phase 5 (Orchestration)** wires everything together and performs end-to-end testing. This + phase requires all previous phases to be complete and working. + + **Task Granularity:** + Each task is focused and narrow, implementing a single piece of functionality with a clear TDD + cycle (RED-GREEN-REFACTOR). This enables: (1) independent testing of each component, + (2) early error detection, (3) easier debugging and troubleshooting, (4) clearer git history + with logical commits. + + **Testing Strategy:** + Each code task follows TDD. For file operations, tests use the actual file system but clean up + after themselves. For git operations, tests use subprocess mocking or temporary test repositories + to avoid side effects. Validation functions are tested with deterministic inputs and expected + outputs. + + ## Risk Mitigation + + | Risk | Mitigation | + | ---- | ---------- | + | File encoding issues across platforms | Write with explicit `encoding=\"utf-8\"` via pathlib. Post-creation, validate actual bytes using binary read. Fail with specific error if BOM or wrong encoding detected. | + | Git operations fail (network, permissions, missing branch) | Use subprocess.run() with check=True. CalledProcessError provides stderr context. Feature branch is pre-created (feat/206-...). Verify git setup (user.name/email) is prerequisite. | + | Validation regex patterns don't match edge cases | Specification defines success criteria precisely. Test patterns with both valid and invalid inputs. Hard-coded content is deterministic, so file structure is predictable and easy to test. | + | File size outside bounds (100-600 bytes) | Validate file size after creation using pathlib.stat(). Hard-coded content size is deterministic. Write prose carefully to ensure total file size (with H1 + blank line) falls within bounds. Test with actual content. | + | Sentence count validation fails | Count periods in prose carefully. Validation function counts '.' characters. Write exactly 2-3 sentences with terminal periods. Test validation with actual prose content before committing. | + | Module import issues or missing dependencies | Use only Python 3.11+ stdlib: pathlib, subprocess, sys, re. Import sheep.observability.logging from established codebase. Test imports at module startup. | + | Hard-coded prose content doesn't fit requirements | Write prose with exactly 2-3 complete sentences. Each sentence ends with period. Verify prose is 60-120 words (fits size bounds). Test sentence count with validation function. | + | Feature branch naming mismatch | Branch is pre-created: `feat/206-markdown-file-creation-f7d8d3`. Use exact name from spec. Test push operation to verify it reaches correct remote branch. | diff --git a/specs/206-markdown-file-creation-f7d8d3/research.yaml b/specs/206-markdown-file-creation-f7d8d3/research.yaml new file mode 100644 index 000000000..c3a8f615b --- /dev/null +++ b/specs/206-markdown-file-creation-f7d8d3/research.yaml @@ -0,0 +1,485 @@ +# Research Artifact (YAML) +# This is the source of truth. Markdown is auto-generated from this file. + +name: "markdown-file-creation-f7d8d3" +summary: > + Technical research for feature 206: Create markdown file test-afcl8i.md with hard-coded + H1 title and 2-3 sentences of prose. This feature directly follows feature 205 by using + deterministic hard-coded content, eliminating Claude API dependencies and complexity. + Implementation uses pathlib.Path for cross-platform file I/O, subprocess.run() for git + operations, regex-based CommonMark validation, and comprehensive error handling with + specific error messages. All technologies use only Python 3.11+ standard library and + git command-line tools — no external package dependencies. + +relatedFeatures: + - number: 205 + name: "markdown-file-creation-870df7" + relationship: "Immediately preceding feature using identical hard-coded content pattern; direct implementation precedent" + - number: 204 + name: "markdown-file-creation-946f62" + relationship: "Preceding feature using Claude API for content generation; establishes comprehensive validation patterns" + - number: 203 + name: "markdown-file-creation-213afa" + relationship: "Prior feature establishing single-module architecture and fail-fast error handling patterns" + +technologies: + - "Python 3.11+ (standard library: pathlib, subprocess, sys, re)" + - "pathlib.Path for cross-platform file I/O and path management" + - "subprocess.run() for git command execution" + - "Regular expressions (re module) for CommonMark format validation" + - "Git command-line interface (version control)" + - "Markdown (CommonMark specification)" + - "UTF-8 text encoding with Unix LF line endings (0x0A)" + +relatedLinks: + - title: "Feature 205 Implementation (Hard-coded Content Pattern)" + url: "file:///src/sheep/features/feature_205_markdown_file_creation.py" + - title: "Feature 204 Implementation (Claude API Approach)" + url: "file:///src/sheep/features/feature_204_markdown_file_creation.py" + - title: "CommonMark Markdown Specification" + url: "https://spec.commonmark.org/" + - title: "Conventional Commits Specification" + url: "https://www.conventionalcommits.org/" + - title: "Python pathlib Documentation" + url: "https://docs.python.org/3.11/library/pathlib.html" + - title: "Python subprocess Documentation" + url: "https://docs.python.org/3.11/library/subprocess.html" + +decisions: + - title: "Content Strategy: Hard-Coded vs. Dynamic Generation" + chosen: "Hard-coded deterministic prose content" + rejected: + - "Claude API (feature 204 approach) — Adds unnecessary complexity, API latency, and external dependencies when feature specification explicitly recommends hard-coded content (NFR-9). Feature 205 demonstrates this simpler pattern is effective." + - "Random or algorithmic content generation — Violates specification requirement for determinism and reproducibility. Hard-coded content ensures identical execution across runs." + rationale: > + The feature specification explicitly selects hard-coded content with clear rationale in NFR-9: + \"Prose content must be hard-coded (deterministic) to ensure reproducibility across runs.\" + Feature 205 successfully demonstrates this approach is simpler, faster, and more maintainable + than API-based generation. Hard-coded content removes external dependencies, eliminates API + latency (2-3 seconds per run), simplifies error handling, and aligns with the straightforward + nature of this task. Content is embedded directly in the module as a constant for full + transparency and auditability. + + - title: "File I/O Implementation: Direct pathlib vs. Tool Abstraction" + chosen: "Direct pathlib.Path.write_text() with explicit encoding parameter" + rejected: + - "Sheep's FileWriteTool abstraction — While valid, features 204-205 use pathlib directly for simpler, more transparent code consistent with established pattern." + - "io.open() with manual encoding/newline parameters — More verbose, requires explicit newline handling. pathlib is modern Python best practice." + - "Binary file write (path.write_bytes()) — More verbose, requires manual UTF-8 encoding. pathlib.write_text() is simpler and standard." + rationale: > + Features 204-205 establish using pathlib.Path.write_text() for cross-platform compatibility, + clarity, and Python best practices. pathlib.write_text() provides: (1) automatic UTF-8 encoding + without BOM, (2) native LF line endings on Unix systems (deployment target), (3) transparent, + readable code, (4) no external dependencies. Post-creation validation checks actual bytes to + verify correct encoding and line endings, ensuring all requirements are met. This matches the + established pattern from features 200-205 and requires only standard library. + + - title: "Markdown Validation Approach: Regex vs. External Parser" + chosen: "Regex-based validation for H1 heading and paragraph structure" + rejected: + - "External markdown parser library (markdown, commonmark, etc.) — Violates NFR-8 (no external packages). Overkill for simple H1+prose format with deterministic content." + - "Manual string parsing without regex — Possible but less maintainable. Regex provides clear, concise pattern matching for well-defined formats." + - "Minimal validation (only file exists) — Fails to detect formatting errors early, violates comprehensive validation principle from features 200-205." + rationale: > + Specification resolves this question with recommendation: \"Regex-based validation for H1 heading + and paragraph structure. Sufficient for simple files, avoids external dependencies, keeps + implementation maintainable.\" Regex validation uses re module (Python stdlib) to validate: + (1) First line matches H1 pattern (^# [A-Za-z].* ), (2) Second line is blank, (3) Prose + follows (2-3 sentences via period counting). Regex is deterministic, testable, and enables + clear error messages (e.g., \"Expected H1 heading starting with '# '\"). This approach is + proven in feature 205 validation pipeline. + + - title: "File Encoding & Line Ending Handling: Fail-Fast vs. Auto-Conversion" + chosen: "Automatic conversion to UTF-8/LF with validation" + rejected: + - "Fail immediately on encoding mismatch — Strict but less robust. Auto-conversion aligns with FR-4 requirement and handles platform differences." + - "Warn and continue with non-compliant files — Least reliable, could cause issues downstream if encoding/line endings are incorrect." + rationale: > + Specification resolves this with recommendation: \"Automatic conversion to UTF-8/LF. Aligns + with FR-4 'validate and convert if necessary'. Ensures consistency regardless of system + defaults.\" Implementation: (1) Write file with pathlib.write_text(encoding='utf-8'), which + produces UTF-8 without BOM and LF line endings on Unix, (2) Post-write validation checks + actual bytes using binary read, (3) If validation fails, log specific error and stop. + This approach ensures files always meet requirements regardless of system default encoding + or CRLF/LF settings. Matches feature 205 pattern. + + - title: "Validation Failure Handling: Fail-Fast vs. Collect-All" + chosen: "Fail immediately on first validation failure with specific error" + rejected: + - "Collect all errors and report together — More comprehensive but delays feedback. Automated systems benefit from fail-fast with specific error for quick resolution." + - "Attempt recovery for certain failures — More complex, harder to predict behavior. Specification requires deterministic, straightforward execution." + rationale: > + Specification resolves this with recommendation: \"Fail immediately with specific error. + Aligns with NFR-4, implements fail-fast pattern, enables quick debugging.\" This approach: + (1) Stops execution at first error, (2) Provides specific, actionable error message with + context, (3) Enables engineers to diagnose and fix issues quickly, (4) Matches proven pattern + in features 200-205. Example error messages: \"File encoding is not UTF-8 (found BOM bytes)\", + \"Expected 2-3 sentences, found 4 periods\", \"File size 750 bytes exceeds maximum 600 bytes\". + + - title: "Git Commit Message Scope: Feature Number vs. Name" + chosen: "Feature number: feat(206)" + rejected: + - "Feature name: feat(markdown-file-creation) — More descriptive but less precise for feature tracking. Loses direct link to feature number." + - "No scope: feat: description — Simpler but loses context. Feature number provides clear traceability." + rationale: > + Specification resolves this with recommendation: \"Feature number (206) provides clear + traceability to feature tracking system, enables easy correlation between commits and specs.\" + Commit message format: \"feat(206): Create markdown file test-afcl8i.md\". This approach: + (1) Directly links git commit to feature number, (2) Enables automated tooling to correlate + commits to features, (3) Maintains consistency with features 200-205, (4) Provides clear + context in git log. Implementation uses conventional commits standard. + + - title: "Module Architecture: Single Module vs. Multi-File" + chosen: "Single module design (feature_206_markdown_file_creation.py)" + rejected: + - "Multiple modules (generation.py, validation.py, git.py) — Adds complexity and file overhead without benefit for simple feature." + - "Class-based OOP design — Adds boilerplate (init, self, inheritance) without matching established pattern from features 200-205." + rationale: > + Features 200-205 consistently use single-module architecture with focused functions. Benefits: + (1) All logic in one visible file, (2) Easier to understand complete workflow, (3) Simpler + testing (functions can be tested independently), (4) Matches proven pattern and codebase + conventions, (5) No unnecessary indirection or abstraction. Module structure mirrors feature + 205: constants (FILENAME, TITLE_TEXT, PROSE_CONTENT), file creation function, validation + functions, git operations, and orchestration (main()). + + - title: "Logging and Observability Integration" + chosen: "Use sheep.observability.logging.get_logger(__name__) for structured logging" + rejected: + - "Standard Python logging (logging module directly) — sheep codebase uses structlog for consistent observability and Langfuse integration." + - "Print statements for output — Loses structured logging, harder to filter/search, doesn't integrate with observability system." + - "No logging — Impairs debugging and observability." + rationale: > + Feature 205 and sheep codebase consistently use sheep.observability.logging.get_logger() for + structured, consistent logging. Implementation: (1) Import at module level: + \"logger = sheep.observability.logging.get_logger(__name__)\", (2) Use info() for major + workflow steps (file created, validation passed), (3) Use debug() for detailed operation + results, (4) Use error() for failures with full context. This provides: (1) correlation + with other system components, (2) structured fields for debugging, (3) consistent pattern + across features 200-206. + +openQuestions: [] + +content: | + ## Technology Decisions + + ### 1. Hard-Coded Deterministic Content + + **Chosen:** Implement feature 206 with hard-coded, deterministic H1 title and 2-3 sentences of prose. + + **Rejected:** + - Claude API with temperature=0 (feature 204 approach) — Adds unnecessary complexity and API latency when feature 205 demonstrates hard-coded approach is simpler and more reliable. + - Random/generated content — Violates determinism requirement and complicates testing. + + **Rationale:** + Feature 206's specification explicitly requires hard-coded content (NFR-9): \"Prose content must be + hard-coded (deterministic) to ensure reproducibility across runs.\" This decision removes API call + complexity, ensures identical execution on repeated runs, simplifies error handling, and aligns + with the straightforward nature of this task. Feature 205 implementation provides direct working + precedent. The chosen topic can be any appropriate subject. Content is embedded as module constants + for full transparency and auditability. + + ### 2. File I/O: Direct pathlib.Path vs. Tool Abstraction + + **Chosen:** Use `pathlib.Path.write_text(content, encoding='utf-8')` for file creation. + + **Rejected:** + - Sheep's FileWriteTool abstraction — Valid abstraction but features 204-205 use direct pathlib calls for simplicity and transparency. + - `io.open()` with manual encoding handling — More verbose, requires explicit newline parameter management. + + **Rationale:** + Features 204-205 establish pathlib.Path as the standard for this feature series. Benefits: + - Cross-platform compatibility (pathlib handles path separators) + - UTF-8 encoding is explicit via `encoding='utf-8'` parameter (produces no BOM) + - LF line endings are native on Unix systems + - Modern Python best practice (standard since 3.4, recommended for 3.11+) + - Post-creation validation checks actual bytes to verify correct encoding and line endings + + Implementation: `file_path = Path(FILENAME); file_path.write_text(content, encoding='utf-8')` + + ### 3. Markdown Validation: Regex-Based Pattern Matching + + **Chosen:** Regex-based validation for H1 heading and paragraph structure. + + **Validation Functions:** + - `validate_h1_format()` — Check first line matches H1 pattern (`^# [A-Za-z].*`) + - `validate_blank_separator()` — Check second line is blank (empty or whitespace only) + - `validate_sentence_count()` — Verify exactly 2-3 sentences by counting periods + - `validate_encoding()` — Check UTF-8, no BOM using binary read + - `validate_line_endings()` — Check LF only, no CRLF/CR + - `validate_file_size()` — Check 100-600 bytes per specification + - `validate_markdown_file()` — Comprehensive pipeline orchestration + + **Rejected:** + - External markdown validators (markdown, commonmark libraries) — Adds external dependencies, violates NFR-8 (no external packages). + - Minimal validation (only file exists) — Delays error detection, allows invalid files. + - Complex grammar-based parsing — Overkill for deterministic files with known structure. + + **Rationale:** + Specification explicitly recommends regex-based validation: \"Regex-based validation is recommended + because it satisfies NFR-8 (no external packages), provides sufficient validation for our constrained + use case (H1 heading + 2-3 sentences), keeps implementation simple, and aligns with the deterministic, + hard-coded content approach (NFR-9).\" Regex patterns are: (1) deterministic, (2) testable with + known inputs, (3) provide clear error messages, (4) require only standard library (re module). + Manual pattern-based validation using basic string operations and regex is proven effective in + feature 205. + + ### 4. File Encoding & Line Endings: Auto-Conversion with Validation + + **Chosen:** Write file with UTF-8 encoding and LF line endings; validate and report failures. + + **Rejected:** + - Fail immediately without conversion — Less robust, doesn't align with FR-4 requirement. + - Warn and continue — Least reliable, could cause issues downstream. + + **Rationale:** + Specification requirement FR-4: \"Write file with UTF-8 encoding and Unix LF line endings; validate + and automatically convert if necessary.\" Implementation: (1) pathlib.write_text(encoding='utf-8') + produces UTF-8 without BOM and LF line endings on Unix, (2) Post-creation validation checks actual + bytes (no BOM prefix, no CRLF sequences), (3) If validation fails, log specific error and fail. + This approach ensures files always meet UTF-8/LF requirements regardless of system defaults. + + ### 5. Git Operations: subprocess.run() vs. Tool Abstraction + + **Chosen:** Direct `subprocess.run()` for git operations (add, commit, push). + + **Rejected:** + - Sheep's git tool abstractions — Add indirection without proportional benefit for three simple operations. + - GitPython library — External dependency, not established in feature series, adds abstraction. + + **Rationale:** + Features 204-205 establish subprocess.run() as the pattern. Benefits: (1) git commands are directly + visible in code (transparency), (2) Error messages from git are reported directly via stderr, + (3) check=True parameter ensures fail-fast: CalledProcessError on any git failure, + (4) Only standard library required, (5) Matches features 200-205 exactly. + + Implementation: `subprocess.run(['git', 'add', FILENAME], check=True, capture_output=True, text=True)` + + ### 6. Validation Failure Handling: Fail-Fast Pattern + + **Chosen:** Fail immediately on first validation failure with specific error message. + + **Rejected:** + - Collect all errors and report together — Delays feedback, complicates error handling. + - Attempt recovery for certain failures — More complex, harder to predict behavior. + + **Rationale:** + Specification requirement NFR-4: \"All error messages must be specific and actionable (include + context, show expected vs. actual values), enabling quick resolution.\" Fail-fast approach: (1) + Stops execution at first error, (2) Provides specific, actionable error message, (3) Enables + engineers to diagnose and fix issues quickly, (4) Matches proven pattern in features 200-205. + + Example error messages: + - \"H1 heading not found: first line should start with '# '\" + - \"Expected 2-3 sentences, found 4 periods in prose\" + - \"File size 750 bytes exceeds maximum 600 bytes\" + - \"File encoding is not UTF-8 (detected BOM bytes)\" + + ### 7. Commit Message Scope: Feature Number + + **Chosen:** Use feature number: `feat(206)` + + **Rejected:** + - Feature name: `feat(markdown-file-creation)` — Less precise, harder to track to feature number. + - No scope: `feat: description` — Loses feature context entirely. + + **Rationale:** + Specification explicitly recommends feature number: \"Feature number (206) is recommended because + it provides clear, unambiguous traceability to the feature tracking system, maintains consistency + with the feature numbering scheme (200-205+), and enables easy correlation between commits and + feature specs.\" Commit message format: `feat(206): Create markdown file test-afcl8i.md`. + This enables automated tooling to correlate git commits to features and provides clear context + in git history. + + ## Library Analysis + + | Library | Purpose | Decision | Reasoning | + | ------- | ------- | -------- | --------- | + | pathlib (stdlib) | File path operations and I/O | Use | Standard library, cross-platform, modern Python best practice (3.4+) | + | subprocess (stdlib) | Git command execution | Use | Standard library, transparent error handling, fail-fast with check=True | + | re (stdlib) | CommonMark regex validation | Use | Standard library, deterministic pattern matching, no external dependencies | + | sys (stdlib) | System exit on errors | Use | Standard library, standard way to exit with specific codes | + | sheep.observability.logging | Structured logging | Use | Established codebase pattern, integrated with Langfuse observability | + | sheep.tools.file_tools.FileWriteTool | File writing abstraction | Reject | Features 204-205 use direct pathlib; adds indirection without benefit | + | sheep.tools.git_tools | Git operation abstractions | Reject | Features 204-205 use direct subprocess; direct calls are more transparent | + | markdown parser library | Markdown parsing | Reject | Violates NFR-8 (no external packages); overkill for simple H1+prose validation | + | anthropic SDK | Claude API access | Reject | Hard-coded content eliminates need for API calls; violates NFR-8 | + | GitPython | Git abstraction | Reject | External dependency; subprocess is simpler and more direct | + + ## Security Considerations + + **1. File Encoding Validation** + - Must verify UTF-8 encoding without BOM to ensure consistency across platforms + - Check for BOM bytes (0xEF 0xBB 0xBF) using binary read and reject if present + - Validate via `Path.read_bytes()` and attempt UTF-8 decode + - Prevents platform-specific encoding issues and ensures cross-platform consistency + + **2. Line Ending Validation** + - Specification requires LF (Unix) line endings only, not CRLF (Windows) or CR (Mac) + - Git can auto-convert line endings based on core.autocrlf setting — must validate actual bytes + - Validation uses binary read to check: no 0x0D 0x0A (CRLF) or 0x0D (CR) sequences + - `pathlib.write_text()` on Unix systems produces LF natively + - Ensures consistent line endings across platforms and prevents git-related surprises + + **3. No Injection Vulnerabilities** + - Hard-coded content eliminates possibility of injection attacks (no external input) + - Prose content is trusted and directly embedded in module + - No shell escaping needed: git commands use list form, not shell=True + - Git command lists: `['git', 'add', FILENAME]`, `['git', 'commit', '-m', message]`, `['git', 'push', '-u', 'origin', branch]` + - Commit message is constant (from COMMIT_MESSAGE constant), no user input substitution + + **4. File Path Safety** + - FILENAME constant is fixed (`test-afcl8i.md`), no path traversal possible + - `pathlib.Path(FILENAME)` resolves relative to current working directory (repo root) + - pathlib prevents directory traversal by design (no '..' in path) + - No need to validate path — fixed, known filename + + **5. Process Execution Safety** + - `subprocess.run()` with list argument (not shell string) prevents command injection + - check=True ensures exceptions are raised on git failures + - capture_output=True prevents subprocess output from mixing with script output + - No sensitive information in subprocess calls or error messages + + ## Performance Implications + + **1. File Creation (Fast)** + - `pathlib.Path.write_text()` is optimized for single-shot file writes + - Hard-coded content (no API calls): ~1ms file write time + - Single Path() object creation and one write operation + - Total file creation time: < 5ms on typical system + + **2. Validation (Very Fast)** + - All validation functions use simple operations (string operations, byte checks, regex) + - Regex operations on small files (< 600 bytes) are instantaneous + - File size validation uses `pathlib.stat()` (one syscall) + - Sentence counting: linear traversal, count('.') — O(n) where n < 600 + - Total validation time: < 10ms for typical file + + **3. Git Operations (Variable, Network-Dependent)** + - `git add`: 10-50ms typically + - `git commit`: 50-200ms (depends on hook execution) + - `git push`: 500ms-5s (dominated by network latency and remote server response) + - Total git time: 1-10 seconds typical + - Network latency is the primary performance factor + + **4. Total Execution Time** + - File creation: < 5ms + - Validation: < 10ms + - Git operations: ~1-10 seconds + - **Total: ~1-10 seconds typical** (within NFR-1 5-minute limit) + - Straightforward sequential execution is appropriate for this feature + + **5. No Optimization Needed** + - Feature runs once per execution (not in hot path) + - File operations are I/O-bound, not CPU-bound + - Single file, small size (100-600 bytes) — no performance concerns + - Simple sequential execution is preferred over complex optimization + + ## Architecture Notes + + **Consistency with Feature Series (200-206)** + Feature 206 continues the pattern established in features 200-205 with key architectural + elements: + - Single-module design: `src/sheep/features/feature_206_markdown_file_creation.py` + - Separation of concerns: content → validation → git operations + - Fail-fast error handling with specific error messages + - Comprehensive validation pipeline before git operations + - Structured logging with sheep observability integration + - Hard-coded content (deterministic, no external API calls) + + **Integration with Sheep Platform** + - Module is standalone (imports only stdlib + sheep.observability) + - Can be invoked directly: `python -m sheep.features.feature_206_markdown_file_creation` + - Logging integrates with Langfuse observability system via sheep.observability.logging + - No dependencies on other feature modules + - Conventional commit messages enable git history analysis + + **Differences from Features 204-205** + Feature 204 uses Claude API (temperature=0) for dynamic content generation. + Feature 205 uses hard-coded content, demonstrating effectiveness. + Feature 206 follows feature 205 pattern with hard-coded content: + - Eliminates API calls and associated latency + - Simplifies error handling (no LLM-specific error cases) + - Better demonstrates straightforward file creation workflow + - Reduces external dependencies (no Anthropic SDK needed) + + **Module Structure** + ``` + feature_206_markdown_file_creation.py + ├── Module docstring and docstring + ├── Imports (pathlib, subprocess, sys, re, logging) + ├── Constants (FILENAME, FEATURE_NUMBER, BRANCH_NAME, COMMIT_MESSAGE, + │ TITLE_TEXT, PROSE_CONTENT) + ├── File creation functions + ├── Validation helper functions (regex patterns, individual validators) + ├── Comprehensive validation pipeline + ├── Git operation functions (add, commit, push) + ├── Orchestration (main) + └── Script entry point (if __name__ == "__main__") + ``` + + **Key Implementation Details** + - FILENAME: "test-afcl8i.md" (fixed, from specification) + - FEATURE_NUMBER: 206 (for commit message scope) + - BRANCH_NAME: "feat/206-markdown-file-creation-f7d8d3" (pre-created feature branch) + - COMMIT_MESSAGE: "feat(206): Create markdown file test-afcl8i.md" (conventional commits) + - TITLE_TEXT: H1 heading as string (e.g., "# The Topic of Choice") + - PROSE_CONTENT: 2-3 sentences as string (e.g., "First sentence. Second sentence. Third sentence.") + - Full content: `f"{TITLE_TEXT}\n\n{PROSE_CONTENT}\n"` (H1 + blank line + prose + newline) + + ## Dependencies & Constraints + + **Required:** + - Git repository with feature branch `feat/206-markdown-file-creation-f7d8d3` (pre-created) + - Write access to repository root directory + - Python 3.11+ with standard library (pathlib, subprocess, sys, re) + - Git configuration with user.name and user.email set + - Target filename: `test-afcl8i.md` (specified in requirements) + + **Constraints:** + - Python 3.11+ required + - File must follow specific format (H1 + blank line + 2-3 sentences) + - File size must be 100-600 bytes (inclusive) + - Prose must have exactly 2-3 sentences + - UTF-8 encoding without BOM required + - Unix LF line endings only + - Git operations must succeed (fail-fast on any error) + - No external package dependencies (NFR-8) + + ## Implementation Recommendations + + **Base Implementation on Feature 205** + 1. Start with feature_205_markdown_file_creation.py as template + 2. Change constants only: FILENAME, FEATURE_NUMBER, BRANCH_NAME, COMMIT_MESSAGE, TITLE_TEXT, PROSE_CONTENT + 3. Keep validation functions identical (regex patterns, validation logic) + 4. Keep git operations identical (subprocess.run calls) + 5. Keep logging and error handling patterns identical + + **Content Selection** + - Choose a clear, coherent topic (examples: technical topic, natural phenomenon, historical event) + - Write exactly 2-3 sentences about the topic + - Ensure grammatically correct, thematically coherent English prose + - Aim for 60-120 words in prose for proper file size (total file ~250-550 bytes with H1) + - Verify prose contains exactly 2-3 periods for sentence validation + + **Testing Before Git Operations** + - Create file and run validation pipeline locally + - Verify file size is within 100-600 bytes + - Verify encoding and line endings with binary read + - Verify regex validation passes all checks + - Only then proceed to git operations + + **Validation Order (Fail-Fast)** + 1. File exists check + 2. H1 heading format validation + 3. Blank separator line validation + 4. Prose sentence count validation + 5. File encoding validation + 6. File line endings validation + 7. File size validation + 8. Only if all pass: proceed to git operations + + --- + + _Research phase complete. Architecture is clear, technology decisions documented with rationale, + pattern from features 200-205 is well understood, and implementation path is straightforward. + Ready for implementation phase._ diff --git a/specs/206-markdown-file-creation-f7d8d3/spec.yaml b/specs/206-markdown-file-creation-f7d8d3/spec.yaml new file mode 100644 index 000000000..e283fb854 --- /dev/null +++ b/specs/206-markdown-file-creation-f7d8d3/spec.yaml @@ -0,0 +1,264 @@ +name: "markdown-file-creation-f7d8d3" +number: 206 +branch: "feat/206-markdown-file-creation-f7d8d3" +oneLiner: "Create a single markdown file called test-afcl8i.md with a title and 2-3 sentences about anything" +summary: | + Create a markdown file called `test-afcl8i.md` in the repository root directory + containing a markdown H1 title and 2-3 sentences of prose content on any topic. + This continues the well-established pattern of similar file creation features + (features 200-205) and maintains consistency with 200+ existing test files. +phase: "Requirements" +sizeEstimate: "S" + +relatedFeatures: + - number: 205 + name: "markdown-file-creation-870df7" + relationship: "Immediately preceding feature with identical pattern" + - number: 204 + name: "markdown-file-creation-946f62" + relationship: "Identical pattern — create markdown file with title and 2-3 sentences" + - number: 203 + name: "markdown-file-creation-213afa" + relationship: "Identical pattern — create markdown file with title and 2-3 sentences" + +technologies: + - "Markdown (CommonMark specification)" + - "Git (version control and conventional commits)" + - "Python pathlib (file I/O)" + - "Python 3.11+ (standard library)" + - "UTF-8 text encoding" + - "Unix LF line endings" + +relatedLinks: + - title: "Sheep Project README" + url: "https://github.com/jobnik/sheep" + - title: "CommonMark Markdown Specification" + url: "https://spec.commonmark.org/" + - title: "Conventional Commits Specification" + url: "https://www.conventionalcommits.org/" + +openQuestions: + - question: "How should CommonMark syntax be validated given NFR-8 (no external packages)?" + resolved: true + options: + - option: "Regex-based validation" + description: "Use regex patterns to validate H1 heading format and paragraph structure. Sufficient for our simple use case (heading + prose), no external dependencies. May miss edge cases in complex CommonMark but appropriate for deterministic file with known structure." + selected: true + - option: "Manual parser" + description: "Implement a custom markdown parser following CommonMark spec sections. More robust but adds code complexity and maintenance burden for minimal gain on simple files." + selected: false + - option: "External tool via subprocess" + description: "Use subprocess to call pandoc or another external tool. Violates NFR-8 by requiring external dependencies, but provides authoritative CommonMark validation." + selected: false + selectionRationale: "Regex validation is recommended because it satisfies NFR-8 (no external packages), provides sufficient validation for our constrained use case (H1 heading + 2-3 sentences), keeps implementation simple, and aligns with the deterministic, hard-coded content approach (NFR-9)." + answer: "Regex-based validation" + + - question: "What scope should the conventional commit message use?" + resolved: true + options: + - option: "Feature number: feat(206)" + description: "Use feature number as scope, e.g., 'feat(206): Create markdown file test-afcl8i.md'. Directly links commit to feature tracking system, improves traceability across all 200+ similar features." + selected: true + - option: "Feature name: feat(file-creation)" + description: "Use human-readable feature name, e.g., 'feat(file-creation): Create markdown file...'. More descriptive but less precise for feature tracking." + selected: false + - option: "No scope: feat: description" + description: "Omit scope entirely. Simpler but loses feature tracking context and traceability." + selected: false + selectionRationale: "Feature number (206) is recommended because it provides clear, unambiguous traceability to the feature tracking system, maintains consistency with the feature numbering scheme (200-205+), and enables easy correlation between commits and feature specs." + answer: "Feature number: feat(206)" + + - question: "Should file encoding/line ending validation fail fast or attempt recovery?" + resolved: true + options: + - option: "Automatic conversion" + description: "Detect wrong encoding or line endings, automatically convert to UTF-8 + LF, proceed. Aligns with FR-4 'validate and convert if necessary'. More forgiving, ensures consistency." + selected: true + - option: "Fail fast on mismatch" + description: "Reject file immediately if encoding is not UTF-8 or line endings are not LF. Strict validation, forces explicit handling of edge cases." + selected: false + - option: "Warn and continue" + description: "Log warning but allow non-compliant files. Least reliable, could cause issues downstream." + selected: false + selectionRationale: "Automatic conversion is recommended because FR-4 explicitly states 'validate and convert if necessary', it ensures files always meet the UTF-8/LF requirements regardless of system default, and maintains robustness without requiring user intervention." + answer: "Automatic conversion" + + - question: "How should the implementation handle validation failures?" + resolved: true + options: + - option: "Fail immediately with specific error" + description: "On first validation failure (syntax, size, encoding), stop execution and report detailed error. Per NFR-4, enables quick resolution. Implements fail-fast pattern." + selected: true + - option: "Collect all errors and report together" + description: "Run all validations, collect failures, report all at once. More comprehensive but delays feedback." + selected: false + - option: "Attempt recovery for certain failures" + description: "Some failures trigger recovery attempts (e.g., auto-convert encoding). Others fail fast. More complex, harder to predict behavior." + selected: false + selectionRationale: "Fail immediately is recommended because it aligns with NFR-4 (specific, actionable error messages), implements proven fail-fast pattern for debugging, and keeps validation logic simple and deterministic." + answer: "Fail immediately with specific error" + +content: | + ## Problem Statement + + The Sheep automated implementation platform requires the ability to create simple test markdown files following an established pattern. This feature creates a single test markdown file (`test-afcl8i.md`) at the repository root with a title and prose content, demonstrating automated file creation, git integration, and conventional commit workflows. + + This is a continuation of the markdown-file-creation pattern established in features 200-205, with 200+ similar test files already existing in the repository as precedents. The feature serves as a minimal, testable unit of work for the automated implementation system, proving the platform can consistently produce high-quality files that match established patterns. + + ## Success Criteria + + - [ ] File `test-afcl8i.md` is created at repository root (not in subdirectory) + - [ ] File contains valid CommonMark markdown syntax with no parse errors + - [ ] File starts with an H1 heading (title) on first line + - [ ] File contains exactly 2-3 prose sentences after the title + - [ ] File is encoded in UTF-8 with Unix LF line endings (0x0A, not CRLF) + - [ ] File size is between 100-600 bytes (inclusive) + - [ ] File is staged in git index with no uncommitted changes + - [ ] Git commit message follows Conventional Commits format with feature scope + - [ ] Commit is pushed to `feat/206-markdown-file-creation-f7d8d3` branch + - [ ] Git history shows single commit for this feature with no merge commits + - [ ] No existing files in repository are modified or deleted + - [ ] All validation checks pass with clear, specific error messages on any failure + + ## Functional Requirements + + - **FR-1**: Create a markdown file named `test-afcl8i.md` at the repository root using `pathlib.Path` for cross-platform compatibility + - **FR-2**: File must begin with an H1 markdown heading (single `#` followed by space and title text) that serves as the file's title + - **FR-3**: File must contain exactly 2-3 complete prose sentences after the title, separated by blank line from heading + - **FR-4**: Write file with UTF-8 encoding and Unix LF line endings; validate and automatically convert if necessary + - **FR-5**: File must be valid CommonMark markdown; validate H1 structure and paragraph syntax compliance (see openQuestions for validation approach) + - **FR-6**: Stage the file in git using subprocess to run `git add test-afcl8i.md` + - **FR-7**: Create a git commit with conventional commit message using subprocess (format: `feat(206): Create markdown file test-afcl8i.md`) + - **FR-8**: Push commit to the feature branch `feat/206-markdown-file-creation-f7d8d3` using subprocess + - **FR-9**: Validate that no existing files were modified during this operation; fail if repository state diverges unexpectedly + + ## Non-Functional Requirements + + - **NFR-1**: Execution must complete within 5 minutes total (file I/O and git operations) + - **NFR-2**: File size must be in range 100-600 bytes (inclusive) to match established test file patterns + - **NFR-3**: Implementation must follow architecture patterns from features 200-205 with identical module structure + - **NFR-4**: All error messages must be specific and actionable (include context, show expected vs. actual values), enabling quick resolution + - **NFR-5**: File I/O must use `pathlib.Path` exclusively for cross-platform compatibility (no os.path module) + - **NFR-6**: Git operations must use Python `subprocess` module with no external git wrapper dependencies + - **NFR-7**: Code must be maintainable and follow existing feature module patterns from the codebase + - **NFR-8**: No external package dependencies; use only Python 3.11+ standard library and git command-line tools + - **NFR-9**: Prose content must be hard-coded (deterministic) to ensure reproducibility across runs + + ## Product Questions & AI Recommendations + + | # | Question | AI Recommendation | Rationale | + | - | -------- | ----------------- | --------- | + | 1 | How should CommonMark syntax be validated given NFR-8 (no external packages)? | Regex-based validation for H1 heading and paragraph structure | Sufficient for simple files, avoids external dependencies, keeps implementation maintainable | + | 2 | What scope should the conventional commit message use? | Feature number: `feat(206)` | Provides clear traceability to feature tracking system, enables easy correlation between commits and specs | + | 3 | Should file encoding/line ending validation fail fast or attempt recovery? | Automatic conversion to UTF-8/LF | Aligns with FR-4, ensures consistency regardless of system defaults, maintains robustness | + | 4 | How should the implementation handle validation failures? | Fail immediately with specific error | Aligns with NFR-4, implements fail-fast pattern, enables quick debugging | + + ## Codebase Analysis + + ### Project Structure + + The Sheep project is organized into the following key directories: + + - **Root Directory** - Contains 200+ markdown test files following pattern `test-*.md` (examples: test-afcl8i.md, test-0tyide.md, test-x1h34c.md) + - **src/sheep/** - Main source code directory containing all modules + - **agents/** - Agent definitions for research, implementation, and review phases + - **config/** - Configuration management including LLM factory and settings + - **flows/** - Flow definitions for orchestrating multi-step workflows + - **observability/** - Logging infrastructure and Langfuse integration + - **tools/** - Agent tools for file operations and git commands + - **cli.py** - Command-line interface entry point + - **specs/** - Feature specifications organized by number (206+ feature specs) + - **.github/** - GitHub Actions workflows and CI/CD configuration + - **pyproject.toml** - Python project configuration + - **README.md** - Project documentation + + ### Architecture Patterns + + The Sheep platform follows these architectural patterns: + + **Separation of Structure and Intelligence:** + - Flows define the execution structure and control flow paths + - Crews provide autonomous AI decision-making within each flow + - Each agent has a focused role with clear goals and backstories + + **Feature Implementation Pattern:** + - Each feature is a self-contained operation (200+ markdown file creation tasks) + - Operations follow conventional commit patterns and git workflow best practices + - Validation is comprehensive with fail-fast error handling + - File creation uses standard pathlib.Path for cross-platform compatibility + + **Dependency Flow:** + - Simple operations depend on: Python standard library (pathlib, subprocess) + - Git operations use Python `subprocess` module (no external git wrapper dependencies) + - File I/O uses `pathlib.Path` for cross-platform compatibility + + ### Relevant Technologies + + - **Python 3.11+**: Required runtime with access to standard library modules (pathlib, subprocess, sys, re) + - **Git**: Version control system required for staging, committing, and pushing changes + - **Markdown**: CommonMark specification for file format (H1 heading + prose structure) + - **pathlib**: Cross-platform file path handling (Python standard library) + - **subprocess**: Git command execution (Python standard library) + - **re**: Regular expressions for CommonMark syntax validation (Python standard library) + + ## Affected Areas + + | Area | Impact | Reasoning | + | ---- | ------ | --------- | + | Repository Root Directory | Low | Adds one new markdown file to existing collection of 200+ test files; no modifications to existing files or directory structure | + | File System | Low | Single file creation operation (~300-550 bytes); no directory structure changes, no deletions or overwrites | + | Git Repository | Low | Standard single commit and push following established pattern from features 200-205; no branch/merge complexity, no destructive operations | + | Source Code (src/sheep/) | None | Feature is purely file creation; does not depend on or impact any source code in /src/sheep/ or elsewhere | + | Specs Directory | Low | Updates only this spec.yaml file during requirements phase; no impact on other specs or configurations | + | Configuration Files | None | No changes to .env, pyproject.toml, dockerfile, or any config files | + | Tests | None | No test modifications or additions required; feature is simple file creation with no complex logic to test | + | Dependencies | None | Feature uses only Python standard library (pathlib, subprocess, re) and git; no new package dependencies required | + + ## Dependencies + + **Required:** + - Git repository with feature branch `feat/206-markdown-file-creation-f7d8d3` (pre-created) + - Write access to repository root directory (already available) + - Python 3.11+ with `pathlib`, `subprocess`, `re` (standard library, no external packages) + - Git configuration with user.name and user.email set for commits + - Target filename: `test-afcl8i.md` (specified in requirements) + + **Reference/Precedent:** + - 200+ existing test markdown files (test-*.md) provide clear pattern to follow + - Previous feature implementations (200-205) demonstrate successful execution of identical pattern + - Recent commits (200-205) show exact templates and working examples + - Feature 205 implementation provides direct code precedent for implementation approach + + **Not Required:** + - No source code modifications to existing files + - No changes to core platform logic in agents, flows, or tools + - No modification of configuration files + - No test framework integration + - No documentation updates beyond spec files + - No external dependencies or build steps + + ## Size Estimate + + **S (Small — approximately 1-2 hours of work)** + + This is a minimal-effort, single-file creation feature with well-established precedents from 200+ existing test files: + + - **Content composition**: ~10-15 minutes (write title and 2-3 sentences of prose content) + - **File creation and encoding**: ~5 minutes (write file with UTF-8 encoding and LF line endings using pathlib) + - **Validation implementation**: ~10-15 minutes (regex validation for markdown syntax, sentence count, encoding, line endings, file size) + - **Git workflow**: ~10-15 minutes (stage file, commit with conventional message, push to feature branch) + - **Total estimate**: 40-60 minutes (1-1.5 hours) of actual work + + **Justification:** + - Well-established pattern with 200+ identical test files providing clear precedent + - Flexible topic selection with no research or architectural decisions required + - Single file with minimal, well-understood content format (one H1 heading + 2-3 sentences) + - No testing, documentation, or configuration changes required + - Recent feature implementations (200-205) demonstrate efficient execution in similar timeframes + - No git merge conflicts or branch management complexity + - Feature 205 implementation serves as direct working precedent for all implementation patterns + - File creation and git operations use only Python standard library (pathlib, subprocess, re) and standard git commands + + --- + + _Requirements phase complete — ready for implementation planning_ diff --git a/specs/206-markdown-file-creation-f7d8d3/tasks.yaml b/specs/206-markdown-file-creation-f7d8d3/tasks.yaml new file mode 100644 index 000000000..90b6976d0 --- /dev/null +++ b/specs/206-markdown-file-creation-f7d8d3/tasks.yaml @@ -0,0 +1,451 @@ +name: "markdown-file-creation-f7d8d3" +summary: > + 11 tasks across 5 phases to implement markdown file creation feature. Covers module setup, + hard-coded content definition, file creation with pathlib, comprehensive validation pipeline + (H1 format, blank line, sentence count, encoding, line endings, size), git integration + (add/commit/push), and end-to-end orchestration and testing. Total estimated effort: ~9-11 hours. + +relatedFeatures: + - number: 205 + name: "markdown-file-creation-870df7" + relationship: "Direct precedent with identical architecture pattern" + - number: 204 + name: "markdown-file-creation-946f62" + relationship: "Validation pattern reference" + +technologies: + - "Python 3.11+" + - "pathlib" + - "subprocess" + - "Regular expressions" + - "Git" + +relatedLinks: + - title: "Feature 205 Implementation" + url: "https://github.com/jobnik/sheep/blob/main/src/sheep/features/feature_205_markdown_file_creation.py" + - title: "CommonMark Specification" + url: "https://spec.commonmark.org/" + +tasks: + - id: "task-1-1" + phaseId: "phase-1-setup" + title: "Set up module structure with constants and imports" + description: "Create src/sheep/features/feature_206_markdown_file_creation.py with module docstring, imports (pathlib, subprocess, sys, re, sheep.observability.logging), and all required constants (FILENAME, FEATURE_NUMBER, BRANCH_NAME, COMMIT_MESSAGE, TITLE_TEXT, PROSE_CONTENT). Set up logging at module level." + state: "Todo" + dependencies: [] + acceptanceCriteria: + - "Module file exists at src/sheep/features/feature_206_markdown_file_creation.py" + - "All imports are available (pathlib, subprocess, sys, re, sheep.observability.logging)" + - "Constants defined: FILENAME=\"test-afcl8i.md\", FEATURE_NUMBER=206, BRANCH_NAME=\"feat/206-markdown-file-creation-f7d8d3\", COMMIT_MESSAGE=\"feat(206): Create markdown file test-afcl8i.md\", TITLE_TEXT (H1 heading), PROSE_CONTENT (2-3 sentences)" + - "Logger is initialized: logger = sheep.observability.logging.get_logger(__name__)" + - "Module can be imported without errors" + tdd: null + estimatedEffort: "30min" + + - id: "task-2-1" + phaseId: "phase-2-file-creation" + title: "Implement file creation with hard-coded content" + description: "Implement create_markdown_file() function that composes hard-coded content from TITLE_TEXT and PROSE_CONTENT with proper formatting (H1 heading, blank line separator, prose, trailing newline), then writes to disk using pathlib.Path.write_text(encoding='utf-8'). Log file creation success with path." + state: "Todo" + dependencies: + - "task-1-1" + acceptanceCriteria: + - "Function create_markdown_file() exists and can be called" + - "File is created at repository root with correct name (test-afcl8i.md)" + - "File content format: \"# Title\\n\\nProse content.\\n\" (H1 + blank line + prose + newline)" + - "File is written with encoding='utf-8' (produces UTF-8 without BOM on all platforms)" + - "File path is obtained using pathlib.Path (cross-platform compatible)" + - "Success message is logged at info level" + tdd: + red: + - "Write test that asserts file does not exist initially" + - "Call create_markdown_file()" + - "Assert file now exists and has expected name" + green: + - "Implement pathlib.Path(FILENAME).write_text(content, encoding='utf-8')" + - "Build content string from TITLE_TEXT, blank line, and PROSE_CONTENT" + - "Log info message confirming file creation" + refactor: + - "Extract content composition to separate helper function for clarity" + - "Verify logging message is clear and includes file path" + estimatedEffort: "1h" + + - id: "task-3-1" + phaseId: "phase-3-validation" + title: "Implement H1 heading format validation" + description: "Implement validate_h1_format() function that checks file's first line matches H1 markdown pattern (starts with '# ' followed by text, uses regex: ^# [A-Za-z].*). Return True/raise exception with specific error message if invalid." + state: "Todo" + dependencies: + - "task-2-1" + acceptanceCriteria: + - "Function validate_h1_format(file_path) exists" + - "Returns True if first line matches H1 pattern (^# [A-Za-z].*)" + - "Raises ValueError with specific message if H1 missing or malformed" + - "Error message includes expected format: \"Expected H1 heading starting with '# '\"" + - "Function reads file content (text mode)" + - "Regex pattern is tested with valid and invalid inputs" + tdd: + red: + - "Write test with file containing invalid first line (missing #, wrong format)" + - "Call validate_h1_format() and assert it raises ValueError" + - "Assert error message is specific and helpful" + green: + - "Implement regex pattern matching: re.match(r'^# [A-Za-z]', first_line)" + - "Read file first line using pathlib.Path.read_text()" + - "Raise ValueError with message: \"H1 heading not found or invalid format\"" + refactor: + - "Extract regex pattern to module-level constant H1_PATTERN" + - "Improve error message to show actual vs expected" + estimatedEffort: "45min" + + - id: "task-3-2" + phaseId: "phase-3-validation" + title: "Implement blank line separator validation" + description: "Implement validate_blank_separator() function that checks the line immediately after H1 heading is blank or whitespace-only. Return True/raise exception with specific error message if validation fails." + state: "Todo" + dependencies: + - "task-3-1" + acceptanceCriteria: + - "Function validate_blank_separator(file_path) exists" + - "Reads second line and checks it is blank (empty or whitespace-only)" + - "Raises ValueError with message: \"Expected blank line after H1 heading\"" + - "Handles edge cases: file with fewer than 2 lines" + - "Function is tested with valid file (blank second line) and invalid (text on second line)" + tdd: + red: + - "Write test with file where second line has text (not blank)" + - "Call validate_blank_separator() and assert it raises ValueError" + green: + - "Read second line from file using pathlib.Path.read_text().split('\\n')[1]" + - "Check if line.strip() == '' (blank or whitespace)" + - "Raise ValueError if line is not blank" + refactor: + - "Handle edge case of file with fewer than 2 lines (raise specific error)" + - "Clean up string handling and line splitting logic" + estimatedEffort: "30min" + + - id: "task-3-3" + phaseId: "phase-3-validation" + title: "Implement prose sentence count validation" + description: "Implement validate_sentence_count() function that verifies prose section contains exactly 2-3 sentences by counting period characters. Extract prose (everything after blank separator line) and count periods. Return True/raise exception with specific error message if sentence count is invalid." + state: "Todo" + dependencies: + - "task-3-2" + acceptanceCriteria: + - "Function validate_sentence_count(file_path) exists" + - "Extracts prose content (all text after second line)" + - "Counts periods in prose using prose.count('.')" + - "Validates that period count is 2 or 3" + - "Raises ValueError with message: \"Expected 2-3 sentences, found N periods\"" + - "Tested with prose having 1, 2, 3, 4+ periods" + tdd: + red: + - "Write test with prose containing 1 period (invalid: less than 2)" + - "Call validate_sentence_count() and assert it raises ValueError" + - "Verify error message includes actual period count" + green: + - "Read file content and extract prose (lines after second line)" + - "Count periods: prose.count('.')" + - "Raise ValueError if count < 2 or count > 3" + refactor: + - "Handle edge cases: prose with no periods, multiple periods per line" + - "Improve error message to show expected (2-3) vs actual" + estimatedEffort: "30min" + + - id: "task-3-4" + phaseId: "phase-3-validation" + title: "Implement UTF-8 encoding validation" + description: "Implement validate_encoding() function that reads file in binary mode and verifies encoding is UTF-8 without BOM (no 0xEF 0xBB 0xBF prefix). Decode bytes to verify valid UTF-8. Return True/raise exception with specific error message if encoding is invalid." + state: "Todo" + dependencies: + - "task-2-1" + acceptanceCriteria: + - "Function validate_encoding(file_path) exists" + - "Reads file in binary mode using pathlib.Path.read_bytes()" + - "Checks for BOM (0xEF 0xBB 0xBF) and rejects if present" + - "Attempts UTF-8 decode: bytes.decode('utf-8')" + - "Raises ValueError with specific message if BOM detected or decode fails" + - "Error message distinguishes between BOM error and other encoding errors" + tdd: + red: + - "Write test that creates file with UTF-8 BOM (by prepending bytes 0xEF 0xBB 0xBF)" + - "Call validate_encoding() and assert it raises ValueError about BOM" + green: + - "Read file bytes: pathlib.Path(file_path).read_bytes()" + - "Check for BOM: if data.startswith(b'\\xef\\xbb\\xbf'): raise ValueError(...)" + - "Decode to UTF-8: data.decode('utf-8') to verify valid encoding" + refactor: + - "Extract BOM bytes to constant BOM_BYTES" + - "Improve error messages to distinguish BOM vs other encoding issues" + estimatedEffort: "45min" + + - id: "task-3-5" + phaseId: "phase-3-validation" + title: "Implement Unix LF line ending validation" + description: "Implement validate_line_endings() function that reads file in binary mode and verifies line endings are Unix LF (0x0A) only. Reject files with CRLF (0x0D 0x0A) or CR-only (0x0D). Return True/raise exception with specific error message if line endings are invalid." + state: "Todo" + dependencies: + - "task-2-1" + acceptanceCriteria: + - "Function validate_line_endings(file_path) exists" + - "Reads file in binary mode using pathlib.Path.read_bytes()" + - "Checks that file contains no CRLF sequences (0x0D 0x0A)" + - "Checks that file contains no CR-only characters (0x0D)" + - "Raises ValueError with specific message if non-LF line endings detected" + - "Error message indicates which line ending type was found" + tdd: + red: + - "Write test that creates file with CRLF line endings" + - "Call validate_line_endings() and assert it raises ValueError" + - "Verify error message mentions CRLF" + green: + - "Read file bytes: pathlib.Path(file_path).read_bytes()" + - "Check: if b'\\r\\n' in data: raise ValueError('CRLF detected')" + - "Check: if b'\\r' in data: raise ValueError('CR detected')" + refactor: + - "Clarify error messages: \"File uses CRLF, expected Unix LF\"" + - "Consider checking for only LF: if all line endings are 0x0A" + estimatedEffort: "30min" + + - id: "task-3-6" + phaseId: "phase-3-validation" + title: "Implement file size validation" + description: "Implement validate_file_size() function that verifies file size is within specification bounds (100-600 bytes inclusive). Read file size using pathlib.stat().st_size. Return True/raise exception with specific error message if size is outside bounds." + state: "Todo" + dependencies: + - "task-2-1" + acceptanceCriteria: + - "Function validate_file_size(file_path) exists" + - "Gets file size using pathlib.Path(file_path).stat().st_size" + - "Validates: 100 <= file_size <= 600" + - "Raises ValueError with message: \"File size N bytes is outside bounds (expected 100-600)\"" + - "Error message includes actual size for debugging" + - "Tested with file under 100 bytes, within bounds, and over 600 bytes" + tdd: + red: + - "Write test that creates file under 100 bytes" + - "Call validate_file_size() and assert it raises ValueError" + - "Verify error message includes actual size" + green: + - "Get size: size = pathlib.Path(file_path).stat().st_size" + - "Check: if size < 100 or size > 600: raise ValueError(...)" + refactor: + - "Extract bounds to constants: MIN_SIZE = 100, MAX_SIZE = 600" + - "Improve error message format" + estimatedEffort: "30min" + + - id: "task-3-7" + phaseId: "phase-3-validation" + title: "Implement comprehensive validation pipeline orchestrator" + description: "Implement validate_markdown_file() function that orchestrates all validation checks in correct order: (1) file exists, (2) H1 format, (3) blank separator, (4) sentence count, (5) encoding, (6) line endings, (7) file size. Call each validator in sequence. Stop immediately on first error (fail-fast) and log specific error message." + state: "Todo" + dependencies: + - "task-3-1" + - "task-3-2" + - "task-3-3" + - "task-3-4" + - "task-3-5" + - "task-3-6" + acceptanceCriteria: + - "Function validate_markdown_file(file_path) exists" + - "Validates file exists before running other checks" + - "Calls validators in order: H1, blank line, sentences, encoding, line endings, size" + - "Returns True if all validations pass" + - "Stops immediately on first ValidationError/ValueError (fail-fast)" + - "Logs error at error level with full validation error message" + - "Tested with file that passes all validations" + - "Tested with file that fails at each validation stage" + tdd: + red: + - "Write test with file missing H1 heading" + - "Call validate_markdown_file() and assert it raises error" + - "Verify error is from H1 validation, not later stages" + green: + - "Implement function that calls each validator in sequence" + - "Wrap in try-except to catch and re-raise first error" + - "Log error message before raising" + refactor: + - "Extract validation order to clear list or comments" + - "Add debug logging for each validation step that passes" + - "Ensure error messages are clear and specific" + estimatedEffort: "1h" + + - id: "task-4-1" + phaseId: "phase-4-git-integration" + title: "Implement git add operation" + description: "Implement git_add() function that stages the markdown file using subprocess.run(['git', 'add', FILENAME], ...). Use check=True for fail-fast behavior. Capture stderr. Log success and raise subprocess.CalledProcessError with detailed message on failure." + state: "Todo" + dependencies: + - "task-3-7" + acceptanceCriteria: + - "Function git_add() exists" + - "Calls subprocess.run() with: ['git', 'add', FILENAME]" + - "Uses check=True to raise CalledProcessError on failure" + - "Uses capture_output=True and text=True" + - "Logs git command and result at debug level" + - "On success: logs info message" + - "On failure: CalledProcessError includes stderr in exception message" + - "No shell escaping needed (uses list form, not shell string)" + tdd: + red: + - "Write test that mocks subprocess.run() to assert it was called with correct args" + - "Verify command list is ['git', 'add', 'test-afcl8i.md']" + green: + - "Implement subprocess.run(['git', 'add', FILENAME], check=True, capture_output=True, text=True)" + - "Log info: \"Staged file with git add\"" + refactor: + - "Add better error handling and context to CalledProcessError message" + - "Log stderr content on failure for debugging" + estimatedEffort: "30min" + + - id: "task-4-2" + phaseId: "phase-4-git-integration" + title: "Implement git commit operation with conventional message" + description: "Implement git_commit() function that creates commit with conventional commit message using subprocess.run(['git', 'commit', '-m', COMMIT_MESSAGE], ...). Use check=True for fail-fast. Format message: 'feat(206): Create markdown file test-afcl8i.md'. Capture stderr. Log success and raise CalledProcessError on failure." + state: "Todo" + dependencies: + - "task-4-1" + acceptanceCriteria: + - "Function git_commit() exists" + - "Calls subprocess.run() with: ['git', 'commit', '-m', COMMIT_MESSAGE]" + - "COMMIT_MESSAGE is 'feat(206): Create markdown file test-afcl8i.md'" + - "Uses check=True for fail-fast behavior" + - "Uses capture_output=True and text=True" + - "Logs git command and commit message at debug level" + - "On success: logs info message with commit hash" + - "On failure: CalledProcessError includes stderr" + tdd: + red: + - "Write test that mocks subprocess.run() to assert commit command" + - "Verify message matches conventional commits format: feat(206)" + green: + - "Implement subprocess.run(['git', 'commit', '-m', COMMIT_MESSAGE], ...)" + - "Log info: \"Committed file with message: {COMMIT_MESSAGE}\"" + refactor: + - "Extract commit message format to clearer structure" + - "Verify message follows conventional commits (feat scope)" + estimatedEffort: "30min" + + - id: "task-4-3" + phaseId: "phase-4-git-integration" + title: "Implement git push to feature branch" + description: "Implement git_push() function that pushes commit to feature branch using subprocess.run(['git', 'push', '-u', 'origin', BRANCH_NAME], ...). Use check=True for fail-fast. Branch name: 'feat/206-markdown-file-creation-f7d8d3'. Capture stderr. Log success and raise CalledProcessError on failure." + state: "Todo" + dependencies: + - "task-4-2" + acceptanceCriteria: + - "Function git_push() exists" + - "Calls subprocess.run() with: ['git', 'push', '-u', 'origin', BRANCH_NAME]" + - "BRANCH_NAME is 'feat/206-markdown-file-creation-f7d8d3'" + - "Uses -u flag to set upstream tracking on first push" + - "Uses check=True for fail-fast" + - "Uses capture_output=True and text=True" + - "Logs git command and branch name at debug level" + - "On success: logs info message" + - "On failure: CalledProcessError includes stderr (useful for network errors, auth issues)" + tdd: + red: + - "Write test that mocks subprocess.run() to assert push command" + - "Verify command includes -u flag and correct branch" + green: + - "Implement subprocess.run(['git', 'push', '-u', 'origin', BRANCH_NAME], ...)" + - "Log info: \"Pushed commit to {BRANCH_NAME}\"" + refactor: + - "Improve error messages for common git push failures (network, auth, branch)" + - "Log remote and branch name clearly" + estimatedEffort: "30min" + + - id: "task-5-1" + phaseId: "phase-5-orchestration" + title: "Wire up validation pipeline and implement main orchestration function" + description: "Implement main() function that orchestrates complete workflow: (1) create file, (2) validate file, (3) stage with git add, (4) commit, (5) push. Wrap workflow in try-except to catch any errors and log failures at error level. Return 0 on success, 1 on any failure. Ensure each step logs its status." + state: "Todo" + dependencies: + - "task-3-7" + - "task-4-3" + acceptanceCriteria: + - "Function main() exists and orchestrates all steps in sequence" + - "Step 1: Calls create_markdown_file() and logs success" + - "Step 2: Calls validate_markdown_file() and logs success" + - "Step 3: Calls git_add() and logs success" + - "Step 4: Calls git_commit() and logs success" + - "Step 5: Calls git_push() and logs success" + - "Catches any exception and logs error message at error level" + - "Returns 0 on success, 1 on failure" + - "Logs overall workflow completion (success or failure) at info level" + tdd: + red: + - "Write integration test that calls main() and verifies it returns 0 when file is valid" + - "Write test that calls main() with mocked create_markdown_file() that raises error, verifies main() catches and returns 1" + green: + - "Implement main() function with try-except wrapper" + - "Call each operation in sequence: create → validate → git add → commit → push" + - "Log success at each step, error on exception" + refactor: + - "Extract workflow step logging to helper function for clarity" + - "Improve error messages to show which step failed" + - "Ensure all paths log completion status" + estimatedEffort: "1h" + + - id: "task-5-2" + phaseId: "phase-5-orchestration" + title: "End-to-end test and verify all success criteria" + description: "Create comprehensive end-to-end test that verifies the complete workflow works correctly. Test creates file, validates all criteria, performs git operations, and confirms all success criteria from feature spec are met. Verify file content, encoding, line endings, size, git commit message, and branch state." + state: "Todo" + dependencies: + - "task-5-1" + acceptanceCriteria: + - "End-to-end test file is created and executes successfully" + - "Test verifies file exists at correct path with correct name" + - "Test verifies file contains valid CommonMark markdown (H1 + blank line + prose)" + - "Test verifies file is UTF-8 encoded without BOM" + - "Test verifies file uses Unix LF line endings only" + - "Test verifies file size is within 100-600 bytes" + - "Test verifies all validation functions pass" + - "Test verifies git commit message matches conventional commits format" + - "Test verifies commit is on correct feature branch" + - "All success criteria from feature spec are verified" + - "Test cleans up: removes created files and reverts git state" + tdd: + red: + - "Write comprehensive test that checks all success criteria" + - "Verify file existence, content format, encoding, size, git state" + green: + - "Create test that calls main() and verifies each success criterion" + - "Check file properties: name, path, content structure" + - "Check file encoding and line endings with binary read" + - "Check file size is within bounds" + - "Check git log to verify commit message and branch" + refactor: + - "Extract individual checks to helper functions for clarity" + - "Improve test documentation" + - "Ensure cleanup leaves repository in clean state" + estimatedEffort: "1.5h" + +totalEstimate: "9-11 hours" + +openQuestions: [] + +content: | + ## Summary + + Feature 206 implementation spans 5 phases and 11 tasks, from module setup through end-to-end testing. + Phase 1 establishes the foundation (constants, imports, logging). Phase 2 creates the file with + hard-coded content using pathlib.Path.write_text(). Phase 3 implements a comprehensive validation + pipeline with 6 independent validators (H1 format, blank line, sentence count, UTF-8 encoding, + LF line endings, file size), orchestrated through a fail-fast pipeline. Phase 4 implements git + operations (add, commit, push) using subprocess.run(). Phase 5 orchestrates everything together + and performs end-to-end testing to verify all success criteria are met. + + Each task follows TDD (RED-GREEN-REFACTOR), enabling independent component testing and early + error detection. Tasks are ordered by dependency: setup comes first, file creation before + validation, validation before git operations, and orchestration last. The design prioritizes + simplicity, maintainability, and clarity of error messages. All code uses only Python 3.11+ + standard library (pathlib, subprocess, sys, re) and sheep.observability.logging. + + Total estimated effort is 9-11 hours, distributed across phases as follows: + - Phase 1: 0.5 hours (setup) + - Phase 2: 1 hour (file creation) + - Phase 3: 3.5 hours (validation pipeline + orchestrator) + - Phase 4: 1.5 hours (git operations) + - Phase 5: 2.5 hours (orchestration + end-to-end testing) diff --git a/src/sheep/features/feature_206_markdown_file_creation.py b/src/sheep/features/feature_206_markdown_file_creation.py new file mode 100644 index 000000000..a4f734d2b --- /dev/null +++ b/src/sheep/features/feature_206_markdown_file_creation.py @@ -0,0 +1,534 @@ +"""Implementation for feature 206: Create markdown file test-afcl8i.md with title and prose content. + +This module orchestrates the creation of a markdown file with hard-coded, deterministic content. +Following the pattern from feature 205, this implementation uses hard-coded content to demonstrate +straightforward file creation within the Sheep workflow without external API dependencies. + +The file is created with: +- Exact filename: test-afcl8i.md +- H1 markdown heading as title +- 2-3 sentences of prose content +- UTF-8 encoding without BOM +- Unix LF line endings +- File size approximately 300-600 bytes +- Git staging, commit, and push operations + +This approach provides: +- Deterministic output (identical on repeated execution) +- Transparent, auditable content (no API dependencies) +- Simplified error handling (no network failures) +- Faster execution (no API latency) +- Reliable testing and review (reproducible results) +""" + +import re +import subprocess +import sys +from pathlib import Path + +from sheep.observability.logging import get_logger + +_logger = get_logger(__name__) + +# Feature 206 constants +FILENAME = "test-afcl8i.md" +FEATURE_NUMBER = 206 +BRANCH_NAME = "feat/206-markdown-file-creation-f7d8d3" +COMMIT_MESSAGE = f"feat({FEATURE_NUMBER}): Create markdown file {FILENAME}" + +# Hard-coded markdown content +# H1 title about the chosen topic +TITLE_TEXT = "The Art of Problem Solving Through Code" + +# 2-3 sentences of prose content related to the title +PROSE_CONTENT = ( + "Software development is fundamentally about solving problems through logical thinking and creative solutions. " + "The ability to break down complex challenges into manageable pieces and implement elegant solutions is a skill " + "that distinguishes excellent programmers from adequate ones." +) + +# Validation constants +H1_PATTERN = r"^# [A-Za-z]" +BOM_BYTES = b"\xef\xbb\xbf" +MIN_FILE_SIZE = 100 +MAX_FILE_SIZE = 600 + + +def create_markdown_file() -> Path: + """Create markdown file with proper encoding and line endings. + + Creates file with H1 heading, blank line, and prose content. + Uses UTF-8 encoding and Unix LF line endings via pathlib.Path.write_text(). + + Returns: + Path object pointing to created file + + Raises: + ValueError: If file creation fails + OSError: If file write operation fails + """ + _logger.info(f"Creating markdown file: {FILENAME}") + + try: + # Construct markdown content: # Title \n \n Prose + markdown_content = f"# {TITLE_TEXT}\n\n{PROSE_CONTENT}\n" + + # Write file with UTF-8 encoding and LF line endings + file_path = Path(FILENAME) + file_path.write_text(markdown_content, encoding="utf-8") + + # Verify file was created + if not file_path.exists(): + raise OSError(f"File was not created: {file_path}") + + file_size = file_path.stat().st_size + _logger.info(f"Successfully created {FILENAME} ({file_size} bytes)") + + return file_path + + except Exception as e: + _logger.error(f"Failed to create markdown file: {e}") + raise + + +def validate_h1_format(file_path: Path) -> bool: + """Validate that file's first line matches H1 markdown pattern. + + Checks that the first line starts with '# ' followed by alphabetic character. + Uses regex pattern: ^# [A-Za-z] + + Args: + file_path: Path to markdown file to validate + + Returns: + True if H1 format is valid + + Raises: + ValueError: If H1 heading is missing or malformed + """ + try: + content = Path(file_path).read_text(encoding="utf-8") + lines = content.split("\n") + + if not lines: + raise ValueError("File is empty: expected H1 heading starting with '# '") + + first_line = lines[0] + + if not re.match(H1_PATTERN, first_line): + raise ValueError( + f"H1 heading not found or invalid format: " + f"first line should start with '# ' followed by text, " + f"got: '{first_line}'" + ) + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate H1 format: {e}") from e + + +def validate_blank_separator(file_path: Path) -> bool: + """Validate that the line after H1 heading is blank. + + The second line must be empty or contain only whitespace. + + Args: + file_path: Path to markdown file to validate + + Returns: + True if blank separator is valid + + Raises: + ValueError: If blank separator is missing or contains text + """ + try: + content = Path(file_path).read_text(encoding="utf-8") + lines = content.split("\n") + + # Check we have at least 2 lines + if len(lines) < 2: + raise ValueError( + "File has fewer than 2 lines: expected H1 heading followed by blank line" + ) + + second_line = lines[1] + + if second_line.strip() != "": + raise ValueError( + f"Expected blank line after H1 heading, " + f"but found text: '{second_line}'" + ) + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate blank separator: {e}") from e + + +def validate_sentence_count(file_path: Path) -> bool: + """Validate that prose contains exactly 2-3 sentences. + + Counts periods in the prose section (after the blank line) to determine + sentence count. + + Args: + file_path: Path to markdown file to validate + + Returns: + True if sentence count is valid (2-3) + + Raises: + ValueError: If sentence count is not 2 or 3 + """ + try: + content = Path(file_path).read_text(encoding="utf-8") + lines = content.split("\n") + + # Prose is everything after the second line (blank line) + if len(lines) < 3: + raise ValueError( + "File does not have prose content after blank line" + ) + + prose = "\n".join(lines[2:]) + period_count = prose.count(".") + + if period_count < 2 or period_count > 3: + raise ValueError( + f"Expected 2-3 sentences, found {period_count} periods " + f"in prose section" + ) + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate sentence count: {e}") from e + + +def validate_encoding(file_path: Path) -> bool: + """Validate that file is UTF-8 encoded without BOM. + + Checks that file does not start with UTF-8 BOM (0xEF 0xBB 0xBF) and + can be decoded as valid UTF-8. + + Args: + file_path: Path to markdown file to validate + + Returns: + True if encoding is valid UTF-8 without BOM + + Raises: + ValueError: If BOM is detected or file is not valid UTF-8 + """ + try: + data = Path(file_path).read_bytes() + + if data.startswith(BOM_BYTES): + raise ValueError( + "File encoding has UTF-8 BOM (0xEF 0xBB 0xBF): " + "expected UTF-8 without BOM" + ) + + # Verify valid UTF-8 by attempting decode + try: + data.decode("utf-8") + except UnicodeDecodeError as e: + raise ValueError( + f"File is not valid UTF-8 encoding: {e}" + ) from e + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate encoding: {e}") from e + + +def validate_line_endings(file_path: Path) -> bool: + """Validate that file uses Unix LF line endings only. + + Checks that file contains no CRLF (Windows) or CR (Mac) line endings, + only LF (Unix). + + Args: + file_path: Path to markdown file to validate + + Returns: + True if line endings are Unix LF only + + Raises: + ValueError: If non-LF line endings are detected + """ + try: + data = Path(file_path).read_bytes() + + if b"\r\n" in data: + raise ValueError( + "File uses CRLF line endings (Windows style), " + "expected Unix LF line endings" + ) + + if b"\r" in data: + raise ValueError( + "File uses CR line endings (old Mac style), " + "expected Unix LF line endings" + ) + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate line endings: {e}") from e + + +def validate_file_size(file_path: Path) -> bool: + """Validate that file size is within specification bounds. + + File size must be between 100-600 bytes inclusive. + + Args: + file_path: Path to markdown file to validate + + Returns: + True if file size is within bounds + + Raises: + ValueError: If file size is outside 100-600 byte range + """ + try: + size = Path(file_path).stat().st_size + + if size < MIN_FILE_SIZE or size > MAX_FILE_SIZE: + raise ValueError( + f"File size {size} bytes is outside bounds " + f"(expected {MIN_FILE_SIZE}-{MAX_FILE_SIZE} bytes)" + ) + + return True + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate file size: {e}") from e + + +def validate_markdown_file(file_path: Path) -> bool: + """Orchestrate comprehensive validation of markdown file. + + Validates in order: (1) file exists, (2) H1 format, (3) blank separator, + (4) sentence count, (5) encoding, (6) line endings, (7) file size. + + Stops immediately on first validation failure (fail-fast). + + Args: + file_path: Path to markdown file to validate + + Returns: + True if all validations pass + + Raises: + ValueError: If any validation fails + """ + file_path = Path(file_path) + + try: + # Check file exists + if not file_path.exists(): + raise ValueError(f"File does not exist: {file_path}") + + _logger.debug(f"File exists: {file_path}") + + # Validate H1 heading format + validate_h1_format(file_path) + _logger.debug("H1 heading format validation passed") + + # Validate blank separator line + validate_blank_separator(file_path) + _logger.debug("Blank separator line validation passed") + + # Validate sentence count + validate_sentence_count(file_path) + _logger.debug("Sentence count validation passed") + + # Validate UTF-8 encoding without BOM + validate_encoding(file_path) + _logger.debug("UTF-8 encoding validation passed") + + # Validate Unix LF line endings + validate_line_endings(file_path) + _logger.debug("Unix LF line endings validation passed") + + # Validate file size + validate_file_size(file_path) + _logger.debug("File size validation passed") + + _logger.info(f"All validations passed for {file_path}") + return True + + except ValueError as e: + _logger.error(f"Validation failed: {e}") + raise + + +def git_add() -> None: + """Stage markdown file in git index using 'git add' command. + + Uses subprocess.run() with check=True for fail-fast behavior. Any git + error raises CalledProcessError with stderr context for debugging. + + Raises: + subprocess.CalledProcessError: If git add command fails (including + missing git, no repository, permission issues, etc.) + """ + try: + _logger.debug(f"Staging file with git add: {FILENAME}") + + subprocess.run( + ["git", "add", FILENAME], + check=True, + capture_output=True, + text=True, + ) + + _logger.info(f"Successfully staged {FILENAME} with git add") + + except subprocess.CalledProcessError as e: + error_msg = f"git add failed: {e.stderr}" if e.stderr else str(e) + _logger.error(f"Failed to stage file: {error_msg}") + raise + + +def git_commit() -> None: + """Create git commit with conventional commit message. + + Uses subprocess.run() with check=True for fail-fast behavior. Commit + message follows conventional commits format: feat(206): description + + Any git error raises CalledProcessError with stderr context for debugging. + + Raises: + subprocess.CalledProcessError: If git commit command fails (including + no staged changes, git configuration issues, hook failures, etc.) + """ + try: + _logger.debug(f"Creating git commit with message: {COMMIT_MESSAGE}") + + subprocess.run( + ["git", "commit", "-m", COMMIT_MESSAGE], + check=True, + capture_output=True, + text=True, + ) + + _logger.info(f"Successfully created commit: {COMMIT_MESSAGE}") + + except subprocess.CalledProcessError as e: + error_msg = f"git commit failed: {e.stderr}" if e.stderr else str(e) + _logger.error(f"Failed to commit file: {error_msg}") + raise + + +def git_push() -> None: + """Push commit to feature branch using 'git push' command. + + Uses subprocess.run() with check=True for fail-fast behavior. The -u flag + sets upstream tracking on the first push, establishing the relationship + between the local feature branch and remote tracking branch. + + Any git error raises CalledProcessError with stderr context for debugging. + This captures network errors, authentication failures, branch protection + rules, and other push-related issues. + + Raises: + subprocess.CalledProcessError: If git push command fails (including + network errors, authentication issues, branch protection, etc.) + """ + try: + _logger.debug(f"Pushing commit to remote branch: {BRANCH_NAME}") + + subprocess.run( + ["git", "push", "-u", "origin", BRANCH_NAME], + check=True, + capture_output=True, + text=True, + ) + + _logger.info(f"Successfully pushed commit to {BRANCH_NAME}") + + except subprocess.CalledProcessError as e: + error_msg = f"git push failed: {e.stderr}" if e.stderr else str(e) + _logger.error(f"Failed to push commit: {error_msg}") + raise + + +def main() -> int: + """Orchestrate complete workflow: create file, validate, stage, commit, push. + + Orchestrates the full feature 206 workflow in sequence: + 1. Create markdown file with hard-coded content + 2. Validate file structure, encoding, size, and format + 3. Stage file with 'git add' + 4. Create commit with conventional message + 5. Push commit to feature branch + + Wraps entire workflow in try-except to catch any step failure and log + appropriate error messages. Returns success code 0 if all steps complete + successfully, failure code 1 if any step fails. + + Returns: + int: 0 on successful completion, 1 on any failure + + Logs: + - info: Major workflow steps (file created, validations passed, etc.) + - error: Any failure with specific error details + - info: Overall completion status (success or failure) + """ + try: + _logger.info("Starting feature 206 workflow: markdown file creation") + + # Step 1: Create markdown file + _logger.info("Step 1/5: Creating markdown file") + file_path = create_markdown_file() + + # Step 2: Validate markdown file + _logger.info("Step 2/5: Validating markdown file") + validate_markdown_file(file_path) + _logger.info("All validations passed") + + # Step 3: Stage file with git add + _logger.info("Step 3/5: Staging file with git add") + git_add() + + # Step 4: Create commit + _logger.info("Step 4/5: Creating git commit") + git_commit() + + # Step 5: Push to feature branch + _logger.info("Step 5/5: Pushing to feature branch") + git_push() + + # Success + _logger.info( + f"Feature 206 workflow completed successfully: " + f"{FILENAME} created, validated, committed, and pushed" + ) + return 0 + + except Exception as e: + _logger.error(f"Feature 206 workflow failed: {e}") + return 1 + + +if __name__ == "__main__": + """Entry point for direct script execution.""" + exit_code = main() + sys.exit(exit_code) diff --git a/test-afcl8i.md b/test-afcl8i.md new file mode 100644 index 000000000..0e43a2e13 --- /dev/null +++ b/test-afcl8i.md @@ -0,0 +1,3 @@ +# The Art of Problem Solving Through Code + +Software development is fundamentally about solving problems through logical thinking and creative solutions. The ability to break down complex challenges into manageable pieces and implement elegant solutions is a skill that distinguishes excellent programmers from adequate ones. diff --git a/tests/test_feature_206.py b/tests/test_feature_206.py new file mode 100644 index 000000000..a936167b7 --- /dev/null +++ b/tests/test_feature_206.py @@ -0,0 +1,420 @@ +"""Tests for feature 206: Markdown file creation with hard-coded content.""" + +from pathlib import Path + +import pytest + +from sheep.features.feature_206_markdown_file_creation import ( + FILENAME, + PROSE_CONTENT, + TITLE_TEXT, + create_markdown_file, + validate_blank_separator, + validate_encoding, + validate_file_size, + validate_h1_format, + validate_line_endings, + validate_markdown_file, + validate_sentence_count, +) + + +class TestCreateMarkdownFile: + """Test suite for create_markdown_file() function.""" + + def test_file_does_not_exist_initially(self): + """Verify file does not exist before calling create_markdown_file().""" + file_path = Path(FILENAME) + # File should not exist from a clean state (or we clean it up first) + if file_path.exists(): + file_path.unlink() + assert not file_path.exists() + + def test_create_markdown_file_creates_file(self): + """Test that create_markdown_file() creates the file.""" + file_path = Path(FILENAME) + # Clean up first if it exists + if file_path.exists(): + file_path.unlink() + + # Call function + result = create_markdown_file() + + # Assert file now exists + assert file_path.exists() + assert result == file_path + + def test_create_markdown_file_correct_name(self): + """Test that created file has the correct filename.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + assert file_path.exists() + assert file_path.name == FILENAME + + def test_create_markdown_file_correct_content_format(self): + """Test that file has correct content format: # Title\n\nProse\n""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + content = file_path.read_text(encoding="utf-8") + + # Verify format: # Title, blank line, prose, trailing newline + lines = content.split("\n") + assert lines[0] == f"# {TITLE_TEXT}" + assert lines[1] == "" + assert PROSE_CONTENT in content + assert content.endswith("\n") + + def test_create_markdown_file_utf8_encoding(self): + """Test that file is created with UTF-8 encoding.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + # Verify can be read as UTF-8 + content = file_path.read_text(encoding="utf-8") + assert content # File has content + assert isinstance(content, str) + + def test_create_markdown_file_no_bom(self): + """Test that file does not start with UTF-8 BOM.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + binary_content = file_path.read_bytes() + # UTF-8 BOM is 0xEF 0xBB 0xBF + assert not binary_content.startswith(b"\xef\xbb\xbf") + + def test_create_markdown_file_returns_path(self): + """Test that function returns a Path object.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + result = create_markdown_file() + + assert isinstance(result, Path) + assert result.name == FILENAME + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateH1Format: + """Test suite for validate_h1_format() function.""" + + def test_valid_h1_format(self): + """Test that valid H1 format passes validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_h1_format(file_path) is True + + def test_invalid_h1_no_hash(self): + """Test that H1 without # fails validation.""" + file_path = Path(FILENAME) + file_path.write_text("The Art of Problem Solving Through Code\n\nProse.\n") + + with pytest.raises(ValueError) as exc_info: + validate_h1_format(file_path) + assert "H1 heading not found" in str(exc_info.value) + + def test_invalid_h1_no_space_after_hash(self): + """Test that H1 without space after # fails validation.""" + file_path = Path(FILENAME) + file_path.write_text("#Title\n\nProse.\n") + + with pytest.raises(ValueError) as exc_info: + validate_h1_format(file_path) + assert "H1 heading not found" in str(exc_info.value) + + def test_invalid_h1_empty_file(self): + """Test that empty file fails H1 validation.""" + file_path = Path(FILENAME) + file_path.write_text("") + + with pytest.raises(ValueError) as exc_info: + validate_h1_format(file_path) + assert "h1 heading" in str(exc_info.value).lower() + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateBlankSeparator: + """Test suite for validate_blank_separator() function.""" + + def test_valid_blank_separator(self): + """Test that valid blank separator passes validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_blank_separator(file_path) is True + + def test_invalid_text_on_second_line(self): + """Test that text on second line fails validation.""" + file_path = Path(FILENAME) + file_path.write_text("# Title\nSome text here\nProse.\n") + + with pytest.raises(ValueError) as exc_info: + validate_blank_separator(file_path) + assert "Expected blank line" in str(exc_info.value) + + def test_invalid_file_with_one_line(self): + """Test that file with only one line fails validation.""" + file_path = Path(FILENAME) + # Write a file with only one line (no trailing newline to ensure only 1 element when split) + file_path.write_text("# Title") + + with pytest.raises(ValueError) as exc_info: + validate_blank_separator(file_path) + assert "fewer than 2 lines" in str(exc_info.value) + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateSentenceCount: + """Test suite for validate_sentence_count() function.""" + + def test_valid_two_sentences(self): + """Test that prose with 2 sentences passes validation.""" + file_path = Path(FILENAME) + file_path.write_text("# Title\n\nFirst sentence. Second sentence.\n") + assert validate_sentence_count(file_path) is True + + def test_valid_three_sentences(self): + """Test that prose with 3 sentences passes validation.""" + file_path = Path(FILENAME) + file_path.write_text( + "# Title\n\nFirst sentence. Second sentence. Third sentence.\n" + ) + assert validate_sentence_count(file_path) is True + + def test_invalid_one_sentence(self): + """Test that prose with 1 sentence fails validation.""" + file_path = Path(FILENAME) + file_path.write_text("# Title\n\nOnly one sentence.\n") + + with pytest.raises(ValueError) as exc_info: + validate_sentence_count(file_path) + assert "Expected 2-3 sentences" in str(exc_info.value) + assert "found 1 periods" in str(exc_info.value) + + def test_invalid_four_sentences(self): + """Test that prose with 4 sentences fails validation.""" + file_path = Path(FILENAME) + file_path.write_text( + "# Title\n\nFirst. Second. Third. Fourth.\n" + ) + + with pytest.raises(ValueError) as exc_info: + validate_sentence_count(file_path) + assert "Expected 2-3 sentences" in str(exc_info.value) + assert "found 4 periods" in str(exc_info.value) + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateEncoding: + """Test suite for validate_encoding() function.""" + + def test_valid_utf8_encoding(self): + """Test that valid UTF-8 encoding passes validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_encoding(file_path) is True + + def test_invalid_utf8_bom(self): + """Test that UTF-8 BOM is detected and fails validation.""" + file_path = Path(FILENAME) + # Write file with UTF-8 BOM + content = "# Title\n\nProse.\n" + bom = b"\xef\xbb\xbf" + file_path.write_bytes(bom + content.encode("utf-8")) + + with pytest.raises(ValueError) as exc_info: + validate_encoding(file_path) + assert "BOM" in str(exc_info.value) + + def test_invalid_non_utf8_encoding(self): + """Test that non-UTF-8 encoding fails validation.""" + file_path = Path(FILENAME) + # Write file with latin-1 encoding of a non-ASCII character + content = "# Tîtle\n\nProse.\n" + file_path.write_bytes(content.encode("latin-1")) + + with pytest.raises(ValueError): + validate_encoding(file_path) + # Either "not valid UTF-8" or similar error message + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateLineEndings: + """Test suite for validate_line_endings() function.""" + + def test_valid_lf_line_endings(self): + """Test that Unix LF line endings pass validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_line_endings(file_path) is True + + def test_invalid_crlf_line_endings(self): + """Test that CRLF line endings fail validation.""" + file_path = Path(FILENAME) + content = "# Title\r\n\r\nProse.\r\n" + file_path.write_bytes(content.encode("utf-8")) + + with pytest.raises(ValueError) as exc_info: + validate_line_endings(file_path) + assert "CRLF" in str(exc_info.value) + + def test_invalid_cr_line_endings(self): + """Test that CR-only line endings fail validation.""" + file_path = Path(FILENAME) + content = "# Title\r\rProse.\r" + file_path.write_bytes(content.encode("utf-8")) + + with pytest.raises(ValueError) as exc_info: + validate_line_endings(file_path) + assert "CR" in str(exc_info.value) + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateFileSize: + """Test suite for validate_file_size() function.""" + + def test_valid_file_size_within_bounds(self): + """Test that file size within 100-600 bytes passes validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_file_size(file_path) is True + + def test_invalid_file_size_too_small(self): + """Test that file size under 100 bytes fails validation.""" + file_path = Path(FILENAME) + file_path.write_text("# T\n\nSmall.\n") # Very small file + + with pytest.raises(ValueError) as exc_info: + validate_file_size(file_path) + assert "outside bounds" in str(exc_info.value) + assert "100-600" in str(exc_info.value) + + def test_invalid_file_size_too_large(self): + """Test that file size over 600 bytes fails validation.""" + file_path = Path(FILENAME) + # Create file larger than 600 bytes + large_content = "# Title\n\n" + "a" * 700 + "\n" + file_path.write_text(large_content) + + with pytest.raises(ValueError) as exc_info: + validate_file_size(file_path) + assert "outside bounds" in str(exc_info.value) + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + +class TestValidateMarkdownFile: + """Test suite for validate_markdown_file() orchestration function.""" + + def test_valid_markdown_file_passes_all_validations(self): + """Test that properly created file passes comprehensive validation.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + assert validate_markdown_file(file_path) is True + + def test_fails_on_missing_h1(self): + """Test that validation fails when H1 is missing.""" + file_path = Path(FILENAME) + file_path.write_text("No heading here.\n\nProse. Another.\n") + + with pytest.raises(ValueError) as exc_info: + validate_markdown_file(file_path) + assert "h1 heading" in str(exc_info.value).lower() + + def test_fails_on_missing_blank_separator(self): + """Test that validation fails when blank separator is missing.""" + file_path = Path(FILENAME) + file_path.write_text("# Title\nText on second line\nProse. Another.\n") + + with pytest.raises(ValueError): + validate_markdown_file(file_path) + # Should fail on blank separator, before checking sentences + + def test_fails_on_invalid_sentence_count(self): + """Test that validation fails when sentence count is wrong.""" + file_path = Path(FILENAME) + file_path.write_text("# Title\n\nOnly one sentence.\n") + + with pytest.raises(ValueError) as exc_info: + validate_markdown_file(file_path) + assert "sentence" in str(exc_info.value).lower() + + def test_fails_on_file_not_existing(self): + """Test that validation fails when file does not exist.""" + file_path = Path("nonexistent_file.md") + + with pytest.raises(ValueError) as exc_info: + validate_markdown_file(file_path) + assert "does not exist" in str(exc_info.value) + + def teardown_method(self): + """Clean up test file after each test.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() diff --git a/tests/test_feature_206_e2e.py b/tests/test_feature_206_e2e.py new file mode 100644 index 000000000..cfab4c24d --- /dev/null +++ b/tests/test_feature_206_e2e.py @@ -0,0 +1,367 @@ +"""End-to-end integration tests for feature 206: Complete workflow verification. + +These tests verify that the complete feature 206 workflow works correctly from +start to finish: file creation → validation → git operations. + +Tests verify: +- File creation with correct content +- File encoding and line endings +- File size within bounds +- All validation checks pass +- Git operations succeed in sequence +- All success criteria from the feature specification are met +""" + +import subprocess +from pathlib import Path +from unittest.mock import patch + +from sheep.features.feature_206_markdown_file_creation import ( + BRANCH_NAME, + FEATURE_NUMBER, + FILENAME, + MAX_FILE_SIZE, + MIN_FILE_SIZE, + PROSE_CONTENT, + TITLE_TEXT, + create_markdown_file, + main, + validate_markdown_file, +) + + +class TestMainOrchestration: + """Test suite for main() orchestration function.""" + + def test_main_returns_zero_on_success(self): + """Test that main() returns 0 when workflow completes successfully.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Mock git operations to avoid actual git calls + with patch( + "sheep.features.feature_206_markdown_file_creation.git_add" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_commit" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_push" + ): + result = main() + + # Assert success + assert result == 0 + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_main_returns_one_on_file_creation_failure(self): + """Test that main() returns 1 when file creation fails.""" + # Mock create_markdown_file to raise an error + with patch( + "sheep.features.feature_206_markdown_file_creation.create_markdown_file" + ) as mock_create: + mock_create.side_effect = OSError("Disk full") + + result = main() + + # Assert failure + assert result == 1 + + def test_main_returns_one_on_validation_failure(self): + """Test that main() returns 1 when validation fails.""" + file_path = Path(FILENAME) + # Create an invalid file (too small) + file_path.write_text("# T\n\nS.\n") + + try: + result = main() + + # Assert failure (validation should catch the size issue) + assert result == 1 + finally: + if file_path.exists(): + file_path.unlink() + + def test_main_returns_one_on_git_add_failure(self): + """Test that main() returns 1 when git add fails.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Mock git_add to raise an error + with patch( + "sheep.features.feature_206_markdown_file_creation.git_add" + ) as mock_git_add: + mock_git_add.side_effect = subprocess.CalledProcessError( + 1, ["git", "add"], stderr="not a git repository" + ) + + result = main() + + # Assert failure + assert result == 1 + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_main_returns_one_on_git_commit_failure(self): + """Test that main() returns 1 when git commit fails.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Mock git_commit to raise an error + with patch( + "sheep.features.feature_206_markdown_file_creation.git_commit" + ) as mock_git_commit: + mock_git_commit.side_effect = subprocess.CalledProcessError( + 1, ["git", "commit"], stderr="nothing to commit" + ) + + result = main() + + # Assert failure + assert result == 1 + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_main_returns_one_on_git_push_failure(self): + """Test that main() returns 1 when git push fails.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Mock git_push to raise an error + with patch( + "sheep.features.feature_206_markdown_file_creation.git_push" + ) as mock_git_push: + mock_git_push.side_effect = subprocess.CalledProcessError( + 1, ["git", "push"], stderr="connection refused" + ) + + result = main() + + # Assert failure + assert result == 1 + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_main_logs_workflow_steps(self): + """Test that main() logs each workflow step.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + with patch( + "sheep.features.feature_206_markdown_file_creation.git_add" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_commit" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_push" + ), patch( + "sheep.features.feature_206_markdown_file_creation._logger" + ) as mock_logger: + main() + + # Verify logging calls for each step + # Should have info logs for starting, steps, and completion + assert mock_logger.info.call_count >= 6 + + # Clean up + if file_path.exists(): + file_path.unlink() + + +class TestEndToEndWorkflow: + """Comprehensive end-to-end workflow tests verifying all success criteria.""" + + def test_e2e_file_creation_and_validation(self): + """Test that file is created and passes all validation checks.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Create file + created_path = create_markdown_file() + + # Assert file exists + assert file_path.exists() + assert created_path == file_path + + # Assert file validation passes + assert validate_markdown_file(file_path) is True + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_has_correct_name(self): + """Test that created file has the correct filename.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + assert file_path.exists() + assert file_path.name == "test-afcl8i.md" + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_has_correct_content_structure(self): + """Test that file has correct markdown structure: H1 + blank + prose.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + content = file_path.read_text(encoding="utf-8") + lines = content.split("\n") + + # Verify structure + assert lines[0] == f"# {TITLE_TEXT}" + assert lines[1] == "" + assert PROSE_CONTENT in content + assert content.endswith("\n") + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_is_utf8_without_bom(self): + """Test that file is encoded in UTF-8 without BOM.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + # Check UTF-8 encoding + content = file_path.read_text(encoding="utf-8") + assert isinstance(content, str) + + # Check no BOM + binary_content = file_path.read_bytes() + assert not binary_content.startswith(b"\xef\xbb\xbf") + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_uses_lf_line_endings(self): + """Test that file uses Unix LF line endings, not CRLF or CR.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + binary_content = file_path.read_bytes() + + # Verify LF only (no CRLF or CR) + assert b"\r\n" not in binary_content + assert b"\r" not in binary_content + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_size_within_bounds(self): + """Test that file size is within 100-600 bytes as specified.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + file_size = file_path.stat().st_size + assert MIN_FILE_SIZE <= file_size <= MAX_FILE_SIZE + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_file_contains_two_to_three_sentences(self): + """Test that prose contains exactly 2-3 sentences (2-3 periods).""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + content = file_path.read_text(encoding="utf-8") + lines = content.split("\n") + prose = "\n".join(lines[2:]) + + period_count = prose.count(".") + assert 2 <= period_count <= 3 + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_all_validation_checks_pass(self): + """Test that all validation checks pass on created file.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + create_markdown_file() + + # Run comprehensive validation + result = validate_markdown_file(file_path) + assert result is True + + # Clean up + if file_path.exists(): + file_path.unlink() + + def test_e2e_commit_message_follows_conventional_format(self): + """Test that git commit message follows conventional commits format.""" + from sheep.features.feature_206_markdown_file_creation import COMMIT_MESSAGE + + # Verify format: feat(206): description + assert COMMIT_MESSAGE.startswith("feat(206):") + assert FILENAME in COMMIT_MESSAGE + + def test_e2e_branch_name_is_correct(self): + """Test that the feature branch name is correct.""" + expected_branch = "feat/206-markdown-file-creation-f7d8d3" + assert expected_branch == BRANCH_NAME + + def test_e2e_feature_number_is_206(self): + """Test that feature number is 206.""" + assert FEATURE_NUMBER == 206 + + def test_e2e_main_success_path_with_mocked_git(self): + """Test the complete success path with mocked git operations.""" + file_path = Path(FILENAME) + if file_path.exists(): + file_path.unlink() + + # Run main with mocked git operations + with patch( + "sheep.features.feature_206_markdown_file_creation.git_add" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_commit" + ), patch( + "sheep.features.feature_206_markdown_file_creation.git_push" + ): + result = main() + + # Assert success + assert result == 0 + + # Assert file exists and is valid + assert file_path.exists() + assert validate_markdown_file(file_path) is True + + # Clean up + if file_path.exists(): + file_path.unlink() diff --git a/tests/test_feature_206_git.py b/tests/test_feature_206_git.py new file mode 100644 index 000000000..55a08653f --- /dev/null +++ b/tests/test_feature_206_git.py @@ -0,0 +1,159 @@ +"""Tests for git operations in feature 206: git add, commit, and push.""" + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +from sheep.features.feature_206_markdown_file_creation import ( + BRANCH_NAME, + FILENAME, + git_add, + git_commit, + git_push, +) + + +class TestGitAdd: + """Test suite for git_add() function.""" + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_add_calls_subprocess_correctly(self, mock_run): + """Test that git_add() calls subprocess.run with correct arguments.""" + # Setup mock to succeed + mock_run.return_value = MagicMock(returncode=0) + + # Call function + git_add() + + # Verify subprocess.run was called with correct arguments + mock_run.assert_called_once_with( + ["git", "add", FILENAME], + check=True, + capture_output=True, + text=True, + ) + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_add_raises_on_failure(self, mock_run): + """Test that git_add() raises CalledProcessError on git failure.""" + # Setup mock to fail + mock_run.side_effect = subprocess.CalledProcessError( + 1, ["git", "add"], stderr="fatal: not a git repository" + ) + + # Call function and expect exception + with pytest.raises(subprocess.CalledProcessError): + git_add() + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_add_includes_filename_in_command(self, mock_run): + """Test that git_add() includes the filename in the git command.""" + mock_run.return_value = MagicMock(returncode=0) + + git_add() + + # Verify FILENAME is in the command list + call_args = mock_run.call_args + assert FILENAME in call_args[0][0] + + +class TestGitCommit: + """Test suite for git_commit() function.""" + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_commit_calls_subprocess_correctly(self, mock_run): + """Test that git_commit() calls subprocess.run with correct arguments.""" + # Setup mock to succeed + mock_run.return_value = MagicMock(returncode=0) + + # Call function + git_commit() + + # Verify subprocess.run was called with correct arguments + mock_run.assert_called_once() + call_args = mock_run.call_args + assert call_args[0][0][:2] == ["git", "commit"] + assert "-m" in call_args[0][0] + assert call_args[1]["check"] is True + assert call_args[1]["capture_output"] is True + assert call_args[1]["text"] is True + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_commit_uses_conventional_format(self, mock_run): + """Test that git_commit() uses conventional commit format.""" + mock_run.return_value = MagicMock(returncode=0) + + git_commit() + + # Verify the commit message follows conventional commits format (feat(206):) + call_args = mock_run.call_args + commit_msg = call_args[0][0][3] # Fourth element is the message + assert commit_msg.startswith("feat(206):") + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_commit_raises_on_failure(self, mock_run): + """Test that git_commit() raises CalledProcessError on git failure.""" + # Setup mock to fail + mock_run.side_effect = subprocess.CalledProcessError( + 1, ["git", "commit"], stderr="nothing to commit" + ) + + # Call function and expect exception + with pytest.raises(subprocess.CalledProcessError): + git_commit() + + +class TestGitPush: + """Test suite for git_push() function.""" + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_push_calls_subprocess_correctly(self, mock_run): + """Test that git_push() calls subprocess.run with correct arguments.""" + # Setup mock to succeed + mock_run.return_value = MagicMock(returncode=0) + + # Call function + git_push() + + # Verify subprocess.run was called with correct arguments + mock_run.assert_called_once_with( + ["git", "push", "-u", "origin", BRANCH_NAME], + check=True, + capture_output=True, + text=True, + ) + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_push_includes_upstream_flag(self, mock_run): + """Test that git_push() includes -u flag for upstream tracking.""" + mock_run.return_value = MagicMock(returncode=0) + + git_push() + + # Verify -u flag is in the command + call_args = mock_run.call_args + assert "-u" in call_args[0][0] + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_push_uses_correct_branch(self, mock_run): + """Test that git_push() pushes to the correct feature branch.""" + mock_run.return_value = MagicMock(returncode=0) + + git_push() + + # Verify BRANCH_NAME is in the command + call_args = mock_run.call_args + assert BRANCH_NAME in call_args[0][0] + + @patch("sheep.features.feature_206_markdown_file_creation.subprocess.run") + def test_git_push_raises_on_failure(self, mock_run): + """Test that git_push() raises CalledProcessError on git failure.""" + # Setup mock to fail with network error + mock_run.side_effect = subprocess.CalledProcessError( + 1, ["git", "push"], stderr="fatal: could not read from remote" + ) + + # Call function and expect exception + with pytest.raises(subprocess.CalledProcessError): + git_push()