speechbrain · darius522 · Jun 26, 2025 · Jun 29, 2025 · Jul 8, 2025 · Jul 21, 2025
diff --git a/benchmarks/DASB/FUSS/README.md b/benchmarks/DASB/FUSS/README.md
@@ -0,0 +1,65 @@
+# FUSS Separation Task
+
+This folder defines the **FUSS source separation benchmark** within DASB (Discrete Audio Separation Benchmark). It enables evaluating discrete audio representations on **general-purpose source separation**, using the [FUSS dataset](https://www.tensorflow.org/datasets/catalog/fuss) (Free Universal Sound Separation).
+
+## Overview
+
+The goal of this task is to perform **source separation** from complex acoustic mixtures of general sounds and go beyond speech and music,
+
+This benchmark supports:
+- Preparing the FUSS dataset for **supervised training and evaluation**
+- Running separation experiments using various discrete audio codecs and backbones (namely Conformer and CRDNN)
+- Computing standard evaluation metrics (e.g., SDR) using the Fast-BSSEval library.
+
+---
+
+## Directory Structure
+
+```
+FUSS
+├── create_fuss.py # Generates chunked training data from raw FUSS mixtures
+├── README.md
+├── separation
+│   ├── fuss_prepare.py # Prepare FUSS dataset for supervised SS training
+│   ├── train.py # Unified training script for all FUSS experiments
+│   ├── utils.py # Audio I/O and utility functions
+│   ├── hparams
+│   │   ├── conformer
+│   │   │   ├── train_dac.yaml # config recipe for Conformer
+            ...
+│   │   └── crdnn
+│   │       ├── train_dac.yaml # config recipe for CRDNN
+            ...
+│   └── metrics
+│       └── bsseval.py # BSSEval implementation (SDR, SIR, SAR)
+└── experiments
+```
+
+
+---
+
+## Setup
+
+**Install dependencies:**
+
+You may need additional packages for separation and evaluation:
+```bash
+pip install -r ../extra_requirements.txt
+```
+
+---
+
+## Data Preparation
+
+- Download the raw FUSS dataset: Please follow the instructions from the [official repo](https://github.com/google-research/sound-separation/tree/master/datasets/fuss) to download the data locally
+- Unpack it into a directory `<fuss_dir>`
+- To validate the data and create the FUSS mixtures, you'll need to run `create_fuss.py`. That will create mixtures for all three sets, namely `['eval', 'train', 'validation']`
+- Lastly, to create the `.csv` manifests, run `separation/fuss_prepare.py`
+
+## Running Separation Experiment
+
+```python
+python FUSS/separation/train.py FUSS/separation/hparams/conformer/train_encodec.yaml \
+    --data_folder=<fuss_dir> \
+    --output_folder=FUSS/experiments
+```
diff --git a/benchmarks/DASB/FUSS/create_fuss.py b/benchmarks/DASB/FUSS/create_fuss.py
@@ -0,0 +1,123 @@
+import os
+import argparse
+import numpy as np
+import soundfile as sf
+from tqdm import tqdm
+
+
+def create_silent_audio(reference_path, target_path):
+    """
+    Create a silent audio file with the same length and sampling rate as the reference audio.
+
+    Args:
+        reference_path (str): Path to the reference audio file.
+        target_path (str): Path where the silent audio will be saved.
+    """
+    # Read the reference audio to get sampling rate and length
+    data, samplerate = sf.read(reference_path)
+    silent_audio = np.zeros_like(data)
+
+    # Save the silent audio
+    sf.write(target_path, silent_audio, samplerate)
+
+
+def create_mixture_audio(directory, required_files, output_path):
+    """
+    Create a mixture audio file that is a linear mix of all existing audio files in the directory.
+
+    Args:
+        directory (str): Path to the directory containing the audio files.
+        required_files (list): List of required audio file names.
+        output_path (str): Path where the mixture audio will be saved.
+    """
+    mixture = None
+    samplerate = None
+
+    for file in required_files:
+        file_path = os.path.join(directory, file)
+        if os.path.exists(file_path):
+            data, sr = sf.read(file_path)
+            if mixture is None:
+                mixture = np.zeros_like(data, dtype=np.float32)
+                samplerate = sr
+            mixture += data
+
+    if mixture is not None and samplerate is not None:
+        # Normalize the mixture to prevent clipping
+        # mixture = mixture / len(required_files)
+        sf.write(output_path, mixture, samplerate)
+
+
+def ensure_audio_files(directory):
+    """
+    Ensure all required audio files exist in a directory. If not, create silent versions of them.
+
+    Args:
+        directory (str): Path to the directory containing the audio files.
+    """
+    required_files = [
+        "background0_sound.wav",
+        "foreground0_sound.wav",
+        "foreground1_sound.wav",
+        "foreground2_sound.wav",
+    ]
+
+    # Full paths to the required files
+    required_paths = {
+        file: os.path.join(directory, file) for file in required_files
+    }
+
+    # Check if 'background0_sound.wav' exists
+    background_path = required_paths["background0_sound.wav"]
+    if not os.path.exists(background_path):
+        print(f"Error: {background_path} is missing. Cannot proceed.")
+        return
+
+    # Ensure other files exist, creating silent versions if necessary
+    for file, path in required_paths.items():
+        if not os.path.exists(path):
+            # print(f"{file} is missing. Creating a silent version.")
+            create_silent_audio(background_path, path)
+
+    # Create the mixture audio file
+    mixture_path = os.path.join(directory, "mixture.wav")
+    create_mixture_audio(directory, required_files, mixture_path)
+
+
+def process_directories(root_directory):
+    """
+    Walk through each subdirectory and ensure required audio files exist and create mixture files.
+
+    Args:
+        root_directory (str): Path to the root directory of the FUSS eval set.
+    """
+    for subdir, _, _ in tqdm(os.walk(root_directory)):
+        ensure_audio_files(subdir)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Ensure audio files and create mixture files in each subdirectory."
+    )
+    parser.add_argument(
+        "root_dir", type=str, help="Path to the root directory of the FUSS."
+    )
+
+    args = parser.parse_args()
+    root_dir = args.root_dir
+
+    required_subdirs = ["eval", "train", "validation"]
+    missing = [
+        d
+        for d in required_subdirs
+        if not os.path.isdir(os.path.join(root_dir, d))
+    ]
+
+    if missing:
+        raise FileNotFoundError(
+            f"Missing required subdirectories in '{root_dir}': {', '.join(missing)}"
+        )
+
+    for subdir in required_subdirs:
+        subdir_path = os.path.join(root_dir, subdir)
+        process_directories(subdir_path)
diff --git a/benchmarks/DASB/FUSS/separation/fuss_prepare.py b/benchmarks/DASB/FUSS/separation/fuss_prepare.py
@@ -0,0 +1,167 @@
+import csv
+import logging
+import os
+from typing import Optional, Sequence
+
+from tqdm import tqdm
+
+import speechbrain as sb
+
+
+__all__ = ["prepare_fuss"]
+
+SOURCE_NAMES = [
+    "background0_sound.wav",
+    "foreground0_sound.wav",
+    "foreground1_sound.wav",
+    "foreground2_sound.wav",
+]
+
+# Workaround to use fastest backend (SoundFile)
+try:
+    import torchaudio
+
+    torchaudio._backend.utils.get_available_backends().pop("ffmpeg", None)
+except Exception:
+    pass
+
+# Logging configuration
+logging.basicConfig(
+    level=logging.INFO,  # format="%(asctime)s [%(levelname)s] %(funcName)s - %(message)s",
+)
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def prepare_fuss(
+    data_folder: "str",
+    save_folder: "Optional[str]" = None,
+    splits: "Sequence[str]" = ("train", "eval", "validation"),
+) -> "None":
+    """Prepare data manifest CSV files for the MUSDB dataset
+
+    Arguments
+    ---------
+    data_folder:
+        The path to the dataset folder.
+    save_folder:
+        The path to the folder where the data manifest CSV files will be stored.
+        Default to `data_folder`.
+    splits:
+        The dataset splits to prepare.
+    num_sources:
+        The number of speakers (1, 2 or 3).
+
+    Raises
+    ------
+    ValueError
+        If an invalid argument value is given.
+    RuntimeError
+        If one of the expected split folders is missing.
+
+    Examples
+    --------
+    >>> # Expected folder structure: MUSDB/{train, test}/<track_name>/{mixture.wav, bass.wav, others.wav, drums.wav, vocals.wa}
+    >>> prepare_musdb("MUSDB", num_sources=4)
+
+    """
+    if not save_folder:
+        save_folder = data_folder
+
+    train_data = []
+    test_data = []
+    valid_data = []
+
+    # Iterate over train and test splits
+    for split in splits:
+        split_dir = os.path.join(data_folder, split)
+
+        # Check if the split directory exists
+        if not os.path.exists(split_dir):
+            print(f"Warning: {split_dir} does not exist. Skipping.")
+            continue
+
+        # Walk through the subdirectories of the split (tracks)
+        for track_id in tqdm(os.listdir(split_dir), desc=split):
+            track_dir = os.path.join(split_dir, track_id)
+            # Ensure the track directory exists and contains the required files
+            required_files = [
+                "mixture.wav",
+                "background0_sound.wav",
+                "foreground0_sound.wav",
+                "foreground1_sound.wav",
+                "foreground2_sound.wav",
+            ]
+            file_paths = {}
+
+            for file_name in required_files:
+                file_path = os.path.join(track_dir, file_name)
+                if os.path.exists(file_path):
+                    file_paths[file_name] = file_path
+                else:
+                    import pdb
+
+                    pdb.set_trace()
+                    print(
+                        f"Warning: {file_name} missing in {track_dir}. Skipping track."
+                    )
+                    file_paths = None
+                    break  # If any file is missing, skip the current track
+
+            # If all required files are found, process the track
+            if file_paths:
+                # Get the duration of the 'mixture.wav' file
+                mixture_wav_path = file_paths["mixture.wav"]
+                info = sb.dataio.dataio.read_audio_info(mixture_wav_path)
+                duration = info.num_frames / info.sample_rate
+
+                # Prepare the row for the CSV
+                row = [
+                    split,
+                    track_id,  # ID
+                    duration,  # duration
+                    file_paths["mixture.wav"],  # mixture_wav
+                    file_paths["background0_sound.wav"],
+                    file_paths["foreground0_sound.wav"],
+                    file_paths["foreground1_sound.wav"],
+                    file_paths["foreground2_sound.wav"],
+                ]
+
+                # Add the row to the appropriate data list
+                if split == "train":
+                    train_data.append(row)
+                elif split == "eval":
+                    test_data.append(row)
+                elif split == "validation":
+                    valid_data.append(row)
+
+    # Define the CSV file headers
+    headers = [
+        "split",
+        "ID",
+        "duration",
+        "mixture_wav",
+        "background0_sound_wav",
+        "foreground0_sound_wav",
+        "foreground1_sound_wav",
+        "foreground2_sound_wav",
+    ]
+
+    # Write the CSV files for each split
+    for data, split in [
+        (train_data, "train"),
+        (test_data, "eval"),
+        (valid_data, "validation"),
+    ]:
+        output_csv = os.path.join(save_folder, f"{split}.csv")
+
+        with open(output_csv, mode="w", newline="") as file:
+            writer = csv.writer(file)
+            writer.writerow(headers)
+            writer.writerows(data)
+        print(f"CSV file created for {split}: {output_csv}")
+
+    _LOGGER.info(
+        "----------------------------------------------------------------------",
+    )
+    _LOGGER.info("Done!")