Skip to content

Commit e217739

Browse files
Merge pull request #9 from endomorphosis/endo-laptop
Endo laptop
2 parents 96e0bd6 + 87cd19e commit e217739

File tree

602 files changed

+78654
-935
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

602 files changed

+78654
-935
lines changed
Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
name: PDF Processing Pipeline CI/CD
2+
3+
on:
4+
push:
5+
branches: [ main, develop ]
6+
paths:
7+
- 'ipfs_datasets_py/pdf_processing/**'
8+
- 'ipfs_datasets_py/mcp_server/tools/pdf_tools/**'
9+
- 'tests/**'
10+
- 'requirements.txt'
11+
- '.github/workflows/**'
12+
pull_request:
13+
branches: [ main ]
14+
paths:
15+
- 'ipfs_datasets_py/pdf_processing/**'
16+
- 'ipfs_datasets_py/mcp_server/tools/pdf_tools/**'
17+
- 'tests/**'
18+
- 'requirements.txt'
19+
20+
env:
21+
PYTHON_VERSION: '3.12'
22+
PYTEST_TIMEOUT: '300' # 5 minutes
23+
24+
jobs:
25+
lint-and-format:
26+
name: Code Quality Checks
27+
runs-on: ubuntu-latest
28+
steps:
29+
- uses: actions/checkout@v4
30+
31+
- name: Set up Python
32+
uses: actions/setup-python@v4
33+
with:
34+
python-version: ${{ env.PYTHON_VERSION }}
35+
36+
- name: Install dependencies
37+
run: |
38+
python -m pip install --upgrade pip
39+
pip install flake8 black isort mypy
40+
pip install -r requirements.txt
41+
42+
- name: Run Black formatter check
43+
run: |
44+
black --check --diff ipfs_datasets_py/pdf_processing/
45+
black --check --diff ipfs_datasets_py/mcp_server/tools/pdf_tools/
46+
black --check --diff tests/
47+
48+
- name: Run isort import sorting check
49+
run: |
50+
isort --check-only --diff ipfs_datasets_py/pdf_processing/
51+
isort --check-only --diff ipfs_datasets_py/mcp_server/tools/pdf_tools/
52+
isort --check-only --diff tests/
53+
54+
- name: Run flake8 linting
55+
run: |
56+
flake8 ipfs_datasets_py/pdf_processing/ --max-line-length=100 --extend-ignore=E203,W503
57+
flake8 ipfs_datasets_py/mcp_server/tools/pdf_tools/ --max-line-length=100 --extend-ignore=E203,W503
58+
flake8 tests/ --max-line-length=100 --extend-ignore=E203,W503
59+
60+
- name: Run mypy type checking
61+
run: |
62+
mypy ipfs_datasets_py/pdf_processing/ --ignore-missing-imports
63+
mypy ipfs_datasets_py/mcp_server/tools/pdf_tools/ --ignore-missing-imports
64+
continue-on-error: true # Type checking is advisory for now
65+
66+
unit-tests:
67+
name: Unit Tests
68+
runs-on: ubuntu-latest
69+
strategy:
70+
matrix:
71+
python-version: ['3.10', '3.11', '3.12']
72+
test-group: ['pdf-processing', 'mcp-tools', 'utils']
73+
74+
steps:
75+
- uses: actions/checkout@v4
76+
77+
- name: Set up Python ${{ matrix.python-version }}
78+
uses: actions/setup-python@v4
79+
with:
80+
python-version: ${{ matrix.python-version }}
81+
82+
- name: Cache dependencies
83+
uses: actions/cache@v3
84+
with:
85+
path: ~/.cache/pip
86+
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
87+
restore-keys: |
88+
${{ runner.os }}-pip-
89+
90+
- name: Install system dependencies
91+
run: |
92+
sudo apt-get update
93+
sudo apt-get install -y tesseract-ocr tesseract-ocr-eng
94+
# Install other OCR dependencies as needed
95+
96+
- name: Install Python dependencies
97+
run: |
98+
python -m pip install --upgrade pip
99+
pip install -r requirements.txt
100+
pip install pytest pytest-cov pytest-asyncio pytest-timeout pytest-xdist
101+
102+
- name: Run unit tests - PDF Processing
103+
if: matrix.test-group == 'pdf-processing'
104+
run: |
105+
pytest tests/unit/test_pdf_processing.py \
106+
--cov=ipfs_datasets_py.pdf_processing \
107+
--cov-report=xml \
108+
--cov-report=term-missing \
109+
--timeout=${{ env.PYTEST_TIMEOUT }} \
110+
-v
111+
112+
- name: Run unit tests - MCP Tools
113+
if: matrix.test-group == 'mcp-tools'
114+
run: |
115+
pytest tests/unit/test_mcp_pdf_tools.py \
116+
--cov=ipfs_datasets_py.mcp_server.tools.pdf_tools \
117+
--cov-report=xml \
118+
--cov-report=term-missing \
119+
--timeout=${{ env.PYTEST_TIMEOUT }} \
120+
-v
121+
122+
- name: Run unit tests - Utils
123+
if: matrix.test-group == 'utils'
124+
run: |
125+
pytest tests/unit/ \
126+
--ignore=tests/unit/test_pdf_processing.py \
127+
--ignore=tests/unit/test_mcp_pdf_tools.py \
128+
--cov=ipfs_datasets_py.utils \
129+
--cov-report=xml \
130+
--cov-report=term-missing \
131+
--timeout=${{ env.PYTEST_TIMEOUT }} \
132+
-v
133+
134+
- name: Upload coverage to Codecov
135+
uses: codecov/codecov-action@v3
136+
with:
137+
file: ./coverage.xml
138+
flags: unittests
139+
name: codecov-umbrella
140+
fail_ci_if_error: false
141+
142+
integration-tests:
143+
name: Integration Tests
144+
runs-on: ubuntu-latest
145+
needs: unit-tests
146+
147+
steps:
148+
- uses: actions/checkout@v4
149+
150+
- name: Set up Python
151+
uses: actions/setup-python@v4
152+
with:
153+
python-version: ${{ env.PYTHON_VERSION }}
154+
155+
- name: Install system dependencies
156+
run: |
157+
sudo apt-get update
158+
sudo apt-get install -y tesseract-ocr tesseract-ocr-eng
159+
160+
- name: Install Python dependencies
161+
run: |
162+
python -m pip install --upgrade pip
163+
pip install -r requirements.txt
164+
pip install pytest pytest-asyncio pytest-timeout
165+
166+
- name: Run integration tests
167+
run: |
168+
pytest tests/integration/ \
169+
--timeout=${{ env.PYTEST_TIMEOUT }} \
170+
-v \
171+
--tb=short
172+
173+
- name: Upload integration test artifacts
174+
if: failure()
175+
uses: actions/upload-artifact@v3
176+
with:
177+
name: integration-test-artifacts
178+
path: |
179+
tests/fixtures/test_output/
180+
*.log
181+
182+
mcp-server-tests:
183+
name: MCP Server Tests
184+
runs-on: ubuntu-latest
185+
needs: unit-tests
186+
187+
steps:
188+
- uses: actions/checkout@v4
189+
190+
- name: Set up Python
191+
uses: actions/setup-python@v4
192+
with:
193+
python-version: ${{ env.PYTHON_VERSION }}
194+
195+
- name: Install dependencies
196+
run: |
197+
python -m pip install --upgrade pip
198+
pip install -r requirements.txt
199+
pip install pytest pytest-asyncio pytest-timeout
200+
201+
- name: Run MCP server tests
202+
run: |
203+
pytest tests/mcp/ \
204+
--timeout=${{ env.PYTEST_TIMEOUT }} \
205+
-v \
206+
--tb=short
207+
208+
- name: Test MCP tool registration
209+
run: |
210+
python -c "
211+
from ipfs_datasets_py.mcp_server.tools.pdf_tools import *
212+
print('✅ All MCP tools imported successfully')
213+
"
214+
215+
performance-tests:
216+
name: Performance Tests
217+
runs-on: ubuntu-latest
218+
needs: integration-tests
219+
if: github.ref == 'refs/heads/main'
220+
221+
steps:
222+
- uses: actions/checkout@v4
223+
224+
- name: Set up Python
225+
uses: actions/setup-python@v4
226+
with:
227+
python-version: ${{ env.PYTHON_VERSION }}
228+
229+
- name: Install dependencies
230+
run: |
231+
python -m pip install --upgrade pip
232+
pip install -r requirements.txt
233+
pip install pytest pytest-asyncio pytest-benchmark memory_profiler
234+
235+
- name: Run performance benchmarks
236+
run: |
237+
python -m pytest tests/integration/test_pdf_mcp_integration.py::TestPerformanceIntegration \
238+
--benchmark-only \
239+
--benchmark-json=benchmark_results.json \
240+
-v
241+
242+
- name: Upload benchmark results
243+
uses: actions/upload-artifact@v3
244+
with:
245+
name: benchmark-results
246+
path: benchmark_results.json
247+
248+
security-scan:
249+
name: Security Scan
250+
runs-on: ubuntu-latest
251+
252+
steps:
253+
- uses: actions/checkout@v4
254+
255+
- name: Set up Python
256+
uses: actions/setup-python@v4
257+
with:
258+
python-version: ${{ env.PYTHON_VERSION }}
259+
260+
- name: Install security scanning tools
261+
run: |
262+
python -m pip install --upgrade pip
263+
pip install bandit safety
264+
265+
- name: Run Bandit security linter
266+
run: |
267+
bandit -r ipfs_datasets_py/pdf_processing/ \
268+
-f json -o bandit_report.json
269+
bandit -r ipfs_datasets_py/mcp_server/tools/pdf_tools/ \
270+
-f json -o bandit_mcp_report.json
271+
continue-on-error: true
272+
273+
- name: Run Safety dependency check
274+
run: |
275+
safety check --json --output safety_report.json
276+
continue-on-error: true
277+
278+
- name: Upload security reports
279+
uses: actions/upload-artifact@v3
280+
with:
281+
name: security-reports
282+
path: |
283+
bandit_report.json
284+
bandit_mcp_report.json
285+
safety_report.json
286+
287+
docker-tests:
288+
name: Docker Tests
289+
runs-on: ubuntu-latest
290+
needs: integration-tests
291+
292+
steps:
293+
- uses: actions/checkout@v4
294+
295+
- name: Build Docker image
296+
run: |
297+
docker build -t ipfs-datasets-pdf:test .
298+
299+
- name: Test Docker container
300+
run: |
301+
docker run --rm \
302+
-v ${{ github.workspace }}/tests:/app/tests \
303+
ipfs-datasets-pdf:test \
304+
python -m pytest tests/unit/test_pdf_processing.py::TestPDFProcessor::test_pdf_processor_initialization -v
305+
306+
- name: Test MCP tools in container
307+
run: |
308+
docker run --rm \
309+
ipfs-datasets-pdf:test \
310+
python -c "
311+
from ipfs_datasets_py.pdf_processing import PDFProcessor
312+
from ipfs_datasets_py.mcp_server.tools.pdf_tools import pdf_ingest_to_graphrag
313+
print('✅ PDF processing and MCP tools work in Docker')
314+
"
315+
316+
deployment-tests:
317+
name: Deployment Tests
318+
runs-on: ubuntu-latest
319+
needs: [integration-tests, mcp-server-tests]
320+
if: github.ref == 'refs/heads/main'
321+
322+
steps:
323+
- uses: actions/checkout@v4
324+
325+
- name: Set up Python
326+
uses: actions/setup-python@v4
327+
with:
328+
python-version: ${{ env.PYTHON_VERSION }}
329+
330+
- name: Test package installation
331+
run: |
332+
python -m pip install --upgrade pip
333+
pip install -e .
334+
python -c "
335+
import ipfs_datasets_py
336+
from ipfs_datasets_py.pdf_processing import PDFProcessor
337+
print('✅ Package installs and imports correctly')
338+
"
339+
340+
- name: Test CLI functionality
341+
run: |
342+
# Test any CLI commands if they exist
343+
python -m ipfs_datasets_py.pdf_processing --help || true
344+
345+
- name: Run quick smoke tests
346+
run: |
347+
python pdf_processing_quick_test.py
348+
349+
notify-status:
350+
name: Notify Status
351+
runs-on: ubuntu-latest
352+
needs: [lint-and-format, unit-tests, integration-tests, mcp-server-tests]
353+
if: always()
354+
355+
steps:
356+
- name: Notify success
357+
if: ${{ needs.lint-and-format.result == 'success' && needs.unit-tests.result == 'success' && needs.integration-tests.result == 'success' && needs.mcp-server-tests.result == 'success' }}
358+
run: |
359+
echo "✅ All tests passed successfully!"
360+
echo "PDF processing pipeline and MCP tools are ready for deployment."
361+
362+
- name: Notify failure
363+
if: ${{ needs.lint-and-format.result == 'failure' || needs.unit-tests.result == 'failure' || needs.integration-tests.result == 'failure' || needs.mcp-server-tests.result == 'failure' }}
364+
run: |
365+
echo "❌ Some tests failed. Please check the logs."
366+
exit 1

0 commit comments

Comments
 (0)