From 1f795dd9cfbe81e20c3aafb57687786b0dbb01e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 20:58:03 +0000 Subject: [PATCH 1/3] Initial plan From 6225d6f72a01cb282a89eb78b11b757c8368655b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:13:32 +0000 Subject: [PATCH 2/3] fix: validate_dataset checks stem-name pairing between audio and label files Agent-Logs-Url: https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop/sessions/73d34692-65d9-48a2-9621-7127982837a2 Co-authored-by: BeckettFrey <83560790+BeckettFrey@users.noreply.github.com> --- conftest.py | 39 ++++++++++++++++++++++++++++++++++ src/voxkit/storage/datasets.py | 21 +++++++++++------- tests/storage/test_datasets.py | 16 ++++++++++++++ 3 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 conftest.py diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..74223c8 --- /dev/null +++ b/conftest.py @@ -0,0 +1,39 @@ +"""Pytest configuration for handling GUI dependencies in headless tests.""" + +import os +import sys +from unittest.mock import MagicMock + +# Add src to path for tests FIRST before any voxkit imports +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +# Set environment variables for headless operation +os.environ['QT_QPA_PLATFORM'] = 'offscreen' + +# Ensure libEGL can be found +os.environ['LD_LIBRARY_PATH'] = '/usr/local/share/chromium/chrome-linux:' + os.environ.get('LD_LIBRARY_PATH', '') + +# Mock ALL problematic modules BEFORE importing anything from voxkit +problematic_modules = [ + 'PyQt6', 'PyQt6.QtCore', 'PyQt6.QtGui', 'PyQt6.QtWidgets', 'PyQt6.QtSvg', + 'PyQt6.QtPrintSupport', + 'pypllrcomputer', + 'Wav2TextGrid', 'Wav2TextGrid.wav2textgrid', 'Wav2TextGrid.wav2textgrid_train', + 'alignment_comparison_plots', + 'faster_whisper', +] + +# Create mock QObject first +class MockQObject: + pass + +# Mock all modules +for mod in problematic_modules: + if mod not in sys.modules: + sys.modules[mod] = MagicMock() + +# Now override PyQt6.QtCore.QObject with our custom class +if 'PyQt6.QtCore' in sys.modules: + sys.modules['PyQt6.QtCore'].QObject = MockQObject +if 'PyQt6' in sys.modules: + sys.modules['PyQt6'].QtCore.QObject = MockQObject diff --git a/src/voxkit/storage/datasets.py b/src/voxkit/storage/datasets.py index 88a619b..57d91b8 100644 --- a/src/voxkit/storage/datasets.py +++ b/src/voxkit/storage/datasets.py @@ -510,6 +510,7 @@ def validate_dataset(dataset_path: Path) -> Tuple[bool, str]: - Each speaker directory contains audio files (.wav, .flac, .mp3, .ogg, .m4a) - Each speaker directory contains label files (.lab, .txt) - Number of audio files matches number of label files per speaker + - Each audio file has a matching label file with the same stem name Expected structure: @@ -562,15 +563,9 @@ def validate_dataset(dataset_path: Path) -> Tuple[bool, str]: audio_files = [ f for f in os.listdir(speaker_path) - if f.endswith(".wav") - or f.endswith(".flac") - or f.endswith(".mp3") - or f.endswith(".ogg") - or f.endswith(".m4a") - ] - label_files = [ - f for f in os.listdir(speaker_path) if f.endswith(".lab") or f.endswith(".txt") + if f.endswith((".wav", ".flac", ".mp3", ".ogg", ".m4a")) ] + label_files = [f for f in os.listdir(speaker_path) if f.endswith((".lab", ".txt"))] if not audio_files: return False, f"No audio files found in speaker directory '{speaker_path}'." @@ -585,4 +580,14 @@ def validate_dataset(dataset_path: Path) -> Tuple[bool, str]: f"directory '{speaker_path}'.", ) + audio_stems = {Path(f).stem for f in audio_files} + label_stems = {Path(f).stem for f in label_files} + unmatched = audio_stems.symmetric_difference(label_stems) + if unmatched: + return ( + False, + f"Unpaired audio/label files in speaker directory '{speaker_path}': " + f"{', '.join(sorted(unmatched))}.", + ) + return True, "Dataset is valid." diff --git a/tests/storage/test_datasets.py b/tests/storage/test_datasets.py index 1f5c621..00416af 100644 --- a/tests/storage/test_datasets.py +++ b/tests/storage/test_datasets.py @@ -813,3 +813,19 @@ def test_validate_dataset_mismatched_counts(self, monkeypatch): assert is_valid is False assert "Mismatch" in msg + + def test_validate_dataset_unpaired_stems(self, monkeypatch): + from voxkit.storage.datasets import validate_dataset + + # Create a dataset where counts match but stems do not + # (e.g. recording_A.wav paired with recording_B.lab) + unpaired_path = mock_get_storage_root() / "fake_datasets" / "unpaired_stems" + speaker_path = unpaired_path / "speaker_1" + speaker_path.mkdir(parents=True, exist_ok=True) + (speaker_path / "recording_A.wav").touch() + (speaker_path / "recording_B.lab").touch() + + is_valid, msg = validate_dataset(unpaired_path) + + assert is_valid is False + assert "Unpaired" in msg From ef99dc43103ac3ace94caddf8a40cab7bf76af90 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:38:45 +0000 Subject: [PATCH 3/3] chore: remove accidentally committed root conftest.py Agent-Logs-Url: https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop/sessions/46f1cfe0-1564-47a8-85ea-4c404b6b23f8 Co-authored-by: BeckettFrey <83560790+BeckettFrey@users.noreply.github.com> --- conftest.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 conftest.py diff --git a/conftest.py b/conftest.py deleted file mode 100644 index 74223c8..0000000 --- a/conftest.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Pytest configuration for handling GUI dependencies in headless tests.""" - -import os -import sys -from unittest.mock import MagicMock - -# Add src to path for tests FIRST before any voxkit imports -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) - -# Set environment variables for headless operation -os.environ['QT_QPA_PLATFORM'] = 'offscreen' - -# Ensure libEGL can be found -os.environ['LD_LIBRARY_PATH'] = '/usr/local/share/chromium/chrome-linux:' + os.environ.get('LD_LIBRARY_PATH', '') - -# Mock ALL problematic modules BEFORE importing anything from voxkit -problematic_modules = [ - 'PyQt6', 'PyQt6.QtCore', 'PyQt6.QtGui', 'PyQt6.QtWidgets', 'PyQt6.QtSvg', - 'PyQt6.QtPrintSupport', - 'pypllrcomputer', - 'Wav2TextGrid', 'Wav2TextGrid.wav2textgrid', 'Wav2TextGrid.wav2textgrid_train', - 'alignment_comparison_plots', - 'faster_whisper', -] - -# Create mock QObject first -class MockQObject: - pass - -# Mock all modules -for mod in problematic_modules: - if mod not in sys.modules: - sys.modules[mod] = MagicMock() - -# Now override PyQt6.QtCore.QObject with our custom class -if 'PyQt6.QtCore' in sys.modules: - sys.modules['PyQt6.QtCore'].QObject = MockQObject -if 'PyQt6' in sys.modules: - sys.modules['PyQt6'].QtCore.QObject = MockQObject