KathiraveluLab · suyashkumar102 · Apr 5, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,73 @@
+name: Tests
+
+on:
+  push:
+    branches: [ main, dev ]
+  pull_request:
+    branches: [ main, dev ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.11']
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r requirements-dev.txt
+
+    - name: Run tests with coverage
+      run: |
+        pytest tests/ -v --cov=dreamsApp --cov-report=term-missing --cov-report=xml
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v3
+      with:
+        file: ./coverage.xml
+        flags: unittests
+        name: codecov-umbrella
+        fail_ci_if_error: false
+
+    - name: Check code style with flake8
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 dreamsApp/ --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 dreamsApp/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+    - name: Check code formatting with black
+      run: |
+        black --check dreamsApp/ tests/
+      continue-on-error: true
+
+  test-summary:
+    runs-on: ubuntu-latest
+    needs: test
+    if: always()
+
+    steps:
+    - name: Test Summary
+      run: |
+        echo "Test suite completed"
+        echo "Check individual job results above"
diff --git a/dreamsApp/core/extra/clustering.py b/dreamsApp/core/extra/clustering.py
@@ -30,18 +30,18 @@ def cluster_keywords_for_all_users(keywords_collection):
 
         vectors, metadata = get_vectors_and_metadata(doc)
         if len(vectors) < 2:
+            logger.debug(f"Skipping user {user_id}: insufficient data ({len(vectors)} vectors)")
             continue  # Skip clustering if insufficient data
 
-        # Debug: Log the shape of the vectors array to check its dimensions
-        logger.debug(f"Shape of vectors array: {vectors.shape}")
-        # Debug: Log the first few vectors to inspect their values
-        logger.debug(f"First 5 vectors: {vectors[:5]}")
+        logger.debug(f"Clustering user {user_id}: vectors shape {vectors.shape}")
+        logger.debug(f"Sample vectors for user {user_id} (first 5): {vectors[:5]}")
 
         clusterer = hdbscan.HDBSCAN(min_cluster_size=2, metric='euclidean')
         cluster_labels = clusterer.fit_predict(vectors)
 
-        # Debug: Log the cluster labels to see how the data is being clustered
-        logger.debug(f"Cluster labels: {cluster_labels}")
+        unique_clusters = len(set(cluster_labels)) - (1 if -1 in cluster_labels else 0)
+        noise_count = np.count_nonzero(cluster_labels == -1)
+        logger.debug(f"HDBSCAN produced {unique_clusters} clusters for user {user_id} ({noise_count} noise points)")
 
         clustered_result = []
         for i, label in enumerate(cluster_labels):
@@ -57,4 +57,4 @@ def cluster_keywords_for_all_users(keywords_collection):
             {'$set': {'clustered_keywords': clustered_result}}
         )
 
-    print("All users clustered.")
+    logger.info("Clustering complete for all users")
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,25 @@
+# Development and Testing Dependencies
+# Separate from production requirements.txt to keep deployment lean
+
+# Core test framework
+pytest>=7.0.0
+pytest-cov>=4.0.0
+pytest-mock>=3.12.0
+
+# Flask testing (already in requirements.txt but explicit here for clarity)
+Flask>=3.0.0
+
+# MongoDB testing
+pymongo>=4.6.0
+
+# Graph analysis (already in requirements.txt)
+networkx>=3.0
+
+# Code quality
+black>=23.0.0
+flake8>=6.0.0
+
+# Missing dependency that causes test failures
+# pymongo requires bson but it's not explicitly listed
+# This ensures bson is available in test environments
+pymongo[srv]>=4.6.0
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,239 @@
+# DREAMS Test Suite
+
+## Overview
+
+This directory contains the test suite for the DREAMS platform. Tests are organized by module and use pytest as the test framework.
+
+## Setup
+
+### Install Test Dependencies
+
+```bash
+# Install all test dependencies
+pip install -r requirements-dev.txt
+
+# Or install just the core requirements plus test tools
+pip install -r requirements.txt
+pip install pytest pytest-cov pytest-mock
+```
+
+### Verify Installation
+
+```bash
+# Check that pytest can discover tests
+pytest --collect-only
+
+# Should show ~92 tests collected
+```
+
+## Running Tests
+
+### Run All Tests
+
+```bash
+pytest tests/ -v
+```
+
+### Run with Coverage Report
+
+```bash
+# Terminal output
+pytest tests/ -v --cov=dreamsApp --cov-report=term-missing
+
+# HTML report (opens in browser)
+pytest tests/ -v --cov=dreamsApp --cov-report=html
+python -m webbrowser htmlcov/index.html
+```
+
+### Run Specific Test Files
+
+```bash
+# Single file
+pytest tests/test_sentiment.py -v
+
+# Multiple files
+pytest tests/test_sentiment.py tests/test_clustering.py -v
+
+# Specific test function
+pytest tests/test_sentiment.py::test_valid_caption -v
+```
+
+### Run Tests by Marker
+
+```bash
+# Run only unit tests (if markers are added)
+pytest -m unit
+
+# Run only integration tests
+pytest -m integration
+
+# Skip slow tests
+pytest -m "not slow"
+```
+
+## Test Structure
+
+```
+tests/
+├── conftest.py              # Shared fixtures (Flask app, MongoDB mocks)
+├── README.md                # This file
+├── data/                    # Test data files
+│   ├── expected_results.json
+│   ├── locations.json
+│   └── sentiments.csv
+├── test_*.py                # Test modules (one per feature)
+└── integration/             # Integration tests (future)
+```
+
+## Test Categories
+
+### Unit Tests
+- `test_sentiment.py` - Sentiment analysis API
+- `test_clustering.py` - Keyword clustering
+- `test_location_enrichment.py` - Location geocoding and embedding
+- `test_temporal_narrative_graph.py` - Temporal graph construction
+- `test_graph_analysis.py` - Graph metrics computation
+- `test_timeline.py` - Emotion timeline utilities
+
+### Integration Tests
+- `test_graph_metrics_api.py` - Full API endpoint testing
+- `test_fl.py` - Federated learning workflow
+- `test_chime.py` - CHIME model integration
+
+## Writing Tests
+
+### Using Fixtures
+
+The `conftest.py` file provides shared fixtures:
+
+```python
+def test_with_flask_app(app):
+    """Test that needs Flask app instance."""
+    assert app.config['TESTING'] is True
+
+def test_with_client(client):
+    """Test that makes HTTP requests."""
+    response = client.get('/api/some-endpoint')
+    assert response.status_code == 200
+
+def test_with_context(app_context):
+    """Test that needs application context."""
+    from flask import current_app
+    assert current_app.config['TESTING'] is True
+
+def test_with_mock_db(mock_mongo):
+    """Test that uses mocked MongoDB."""
+    mock_mongo['posts'].find_one.return_value = {'_id': '123'}
+    # test code here
+```
+
+### Test Naming Conventions
+
+- Test files: `test_<module_name>.py`
+- Test functions: `test_<what_it_tests>()`
+- Test classes: `Test<FeatureName>`
+
+### Mocking External Dependencies
+
+```python
+from unittest.mock import patch, MagicMock
+
+@patch('dreamsApp.app.utils.sentiment.pipeline')
+def test_with_mocked_model(mock_pipeline):
+    """Mock HuggingFace pipeline to avoid loading models."""
+    mock_pipeline.return_value = MagicMock()
+    # test code here
+```
+
+## Coverage Goals
+
+| Module | Current | Target |
+|--------|---------|--------|
+| `app/utils/sentiment.py` | 78% | 90% |
+| `app/utils/clustering.py` | 65% | 85% |
+| `analytics/graph_analysis.py` | 95% | 95% |
+| `analytics/temporal_narrative_graph.py` | 98% | 98% |
+| **Overall** | 78% | 85% |
+
+## Continuous Integration
+
+Tests run automatically on every push and pull request via GitHub Actions.
+
+See `.github/workflows/test.yml` for the CI configuration.
+
+### CI Workflow
+
+1. Checkout code
+2. Set up Python 3.10
+3. Install dependencies (`requirements.txt` + `requirements-dev.txt`)
+4. Run pytest with coverage
+5. Run flake8 for code quality checks
+
+## Troubleshooting
+
+### ModuleNotFoundError: No module named 'flask'
+
+```bash
+# Install test dependencies
+pip install -r requirements-dev.txt
+```
+
+### ModuleNotFoundError: No module named 'bson'
+
+```bash
+# Install pymongo with all extras
+pip install 'pymongo[srv]>=4.6.0'
+```
+
+### Tests fail with MongoDB connection errors
+
+The test suite uses mocked MongoDB by default. If you see connection errors:
+
+1. Check that `conftest.py` is in the `tests/` directory
+2. Verify the `mock_mongo` fixture is being used
+3. For tests that need real MongoDB, use `pytest.mark.integration` and skip in CI
+
+### Import errors in test files
+
+Make sure you're running pytest from the project root:
+
+```bash
+# Correct (from project root)
+cd /path/to/DREAMS
+pytest tests/
+
+# Incorrect (from tests directory)
+cd tests
+pytest .  # This will fail with import errors
+```
+
+## Test Data
+
+Test data files are located in `tests/data/`:
+
+- `locations.json` - Sample location data for proximity tests
+- `sentiments.csv` - Sample sentiment data for timeline tests
+- `expected_results.json` - Expected outputs for validation
+
+## Adding New Tests
+
+1. Create a new file: `tests/test_<feature>.py`
+2. Import fixtures from `conftest.py`
+3. Write test functions with descriptive names
+4. Use mocks for external dependencies (APIs, models, databases)
+5. Run tests locally before committing
+6. Ensure coverage doesn't decrease
+
+## References
+
+- [pytest documentation](https://docs.pytest.org/)
+- [pytest-cov documentation](https://pytest-cov.readthedocs.io/)
+- [Flask testing documentation](https://flask.palletsprojects.com/en/latest/testing/)
+- [DREAMS TEST_PLAN.md](../docs/TEST_PLAN.md) - Comprehensive test strategy
+
+## Questions?
+
+For questions about the test suite, see:
+- `docs/TEST_PLAN.md` - Overall testing strategy
+- GitHub Discussions - Ask the community
+- Open an issue - Report test failures or suggest improvements