Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions benchmarks/DASB/FUSS/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# FUSS Separation Task

This folder defines the **FUSS source separation benchmark** within DASB (Discrete Audio Separation Benchmark). It enables evaluating discrete audio representations on **general-purpose source separation**, using the [FUSS dataset](https://www.tensorflow.org/datasets/catalog/fuss) (Free Universal Sound Separation).

## Overview

The goal of this task is to perform **source separation** from complex acoustic mixtures of general sounds and go beyond speech and music,

This benchmark supports:
- Preparing the FUSS dataset for **supervised training and evaluation**
- Running separation experiments using various discrete audio codecs and backbones (namely Conformer and CRDNN)
- Computing standard evaluation metrics (e.g., SDR) using the Fast-BSSEval library.

---

## Directory Structure

```
FUSS
├── create_fuss.py # Generates chunked training data from raw FUSS mixtures
├── README.md
├── separation
│   ├── fuss_prepare.py # Prepare FUSS dataset for supervised SS training
│   ├── train.py # Unified training script for all FUSS experiments
│   ├── utils.py # Audio I/O and utility functions
│   ├── hparams
│   │   ├── conformer
│   │   │   ├── train_dac.yaml # config recipe for Conformer
...
│   │   └── crdnn
│   │   ├── train_dac.yaml # config recipe for CRDNN
...
│   └── metrics
│   └── bsseval.py # BSSEval implementation (SDR, SIR, SAR)
└── experiments
```


---

## Setup

**Install dependencies:**

You may need additional packages for separation and evaluation:
```bash
pip install -r ../extra_requirements.txt
```

---

## Data Preparation

- Download the raw FUSS dataset: Please follow the instructions from the [official repo](https://github.com/google-research/sound-separation/tree/master/datasets/fuss) to download the data locally
- Unpack it into a directory `<fuss_dir>`
- To validate the data and create the FUSS mixtures, you'll need to run `create_fuss.py`. That will create mixtures for all three sets, namely `['eval', 'train', 'validation']`
- Lastly, to create the `.csv` manifests, run `separation/fuss_prepare.py`

## Running Separation Experiment

```python
python FUSS/separation/train.py FUSS/separation/hparams/conformer/train_encodec.yaml \
--data_folder=<fuss_dir> \
--output_folder=FUSS/experiments
```
123 changes: 123 additions & 0 deletions benchmarks/DASB/FUSS/create_fuss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import os
import argparse
import numpy as np
import soundfile as sf
from tqdm import tqdm


def create_silent_audio(reference_path, target_path):
"""
Create a silent audio file with the same length and sampling rate as the reference audio.

Args:
reference_path (str): Path to the reference audio file.
target_path (str): Path where the silent audio will be saved.
"""
# Read the reference audio to get sampling rate and length
data, samplerate = sf.read(reference_path)
silent_audio = np.zeros_like(data)

# Save the silent audio
sf.write(target_path, silent_audio, samplerate)


def create_mixture_audio(directory, required_files, output_path):
"""
Create a mixture audio file that is a linear mix of all existing audio files in the directory.

Args:
directory (str): Path to the directory containing the audio files.
required_files (list): List of required audio file names.
output_path (str): Path where the mixture audio will be saved.
"""
mixture = None
samplerate = None

for file in required_files:
file_path = os.path.join(directory, file)
if os.path.exists(file_path):
data, sr = sf.read(file_path)
if mixture is None:
mixture = np.zeros_like(data, dtype=np.float32)
samplerate = sr
mixture += data

if mixture is not None and samplerate is not None:
# Normalize the mixture to prevent clipping
# mixture = mixture / len(required_files)
sf.write(output_path, mixture, samplerate)


def ensure_audio_files(directory):
"""
Ensure all required audio files exist in a directory. If not, create silent versions of them.

Args:
directory (str): Path to the directory containing the audio files.
"""
required_files = [
"background0_sound.wav",
"foreground0_sound.wav",
"foreground1_sound.wav",
"foreground2_sound.wav",
]

# Full paths to the required files
required_paths = {
file: os.path.join(directory, file) for file in required_files
}

# Check if 'background0_sound.wav' exists
background_path = required_paths["background0_sound.wav"]
if not os.path.exists(background_path):
print(f"Error: {background_path} is missing. Cannot proceed.")
return

# Ensure other files exist, creating silent versions if necessary
for file, path in required_paths.items():
if not os.path.exists(path):
# print(f"{file} is missing. Creating a silent version.")
create_silent_audio(background_path, path)

# Create the mixture audio file
mixture_path = os.path.join(directory, "mixture.wav")
create_mixture_audio(directory, required_files, mixture_path)


def process_directories(root_directory):
"""
Walk through each subdirectory and ensure required audio files exist and create mixture files.

Args:
root_directory (str): Path to the root directory of the FUSS eval set.
"""
for subdir, _, _ in tqdm(os.walk(root_directory)):
ensure_audio_files(subdir)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Ensure audio files and create mixture files in each subdirectory."
)
parser.add_argument(
"root_dir", type=str, help="Path to the root directory of the FUSS."
)

args = parser.parse_args()
root_dir = args.root_dir

required_subdirs = ["eval", "train", "validation"]
missing = [
d
for d in required_subdirs
if not os.path.isdir(os.path.join(root_dir, d))
]

if missing:
raise FileNotFoundError(
f"Missing required subdirectories in '{root_dir}': {', '.join(missing)}"
)

for subdir in required_subdirs:
subdir_path = os.path.join(root_dir, subdir)
process_directories(subdir_path)
167 changes: 167 additions & 0 deletions benchmarks/DASB/FUSS/separation/fuss_prepare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import csv
import logging
import os
from typing import Optional, Sequence

from tqdm import tqdm

import speechbrain as sb


__all__ = ["prepare_fuss"]

SOURCE_NAMES = [
"background0_sound.wav",
"foreground0_sound.wav",
"foreground1_sound.wav",
"foreground2_sound.wav",
]

# Workaround to use fastest backend (SoundFile)
try:
import torchaudio

torchaudio._backend.utils.get_available_backends().pop("ffmpeg", None)
except Exception:
pass

# Logging configuration
logging.basicConfig(
level=logging.INFO, # format="%(asctime)s [%(levelname)s] %(funcName)s - %(message)s",
)

_LOGGER = logging.getLogger(__name__)


def prepare_fuss(
data_folder: "str",
save_folder: "Optional[str]" = None,
splits: "Sequence[str]" = ("train", "eval", "validation"),
) -> "None":
"""Prepare data manifest CSV files for the MUSDB dataset

Arguments
---------
data_folder:
The path to the dataset folder.
save_folder:
The path to the folder where the data manifest CSV files will be stored.
Default to `data_folder`.
splits:
The dataset splits to prepare.
num_sources:
The number of speakers (1, 2 or 3).

Raises
------
ValueError
If an invalid argument value is given.
RuntimeError
If one of the expected split folders is missing.

Examples
--------
>>> # Expected folder structure: MUSDB/{train, test}/<track_name>/{mixture.wav, bass.wav, others.wav, drums.wav, vocals.wa}
>>> prepare_musdb("MUSDB", num_sources=4)

"""
if not save_folder:
save_folder = data_folder

train_data = []
test_data = []
valid_data = []

# Iterate over train and test splits
for split in splits:
split_dir = os.path.join(data_folder, split)

# Check if the split directory exists
if not os.path.exists(split_dir):
print(f"Warning: {split_dir} does not exist. Skipping.")
continue

# Walk through the subdirectories of the split (tracks)
for track_id in tqdm(os.listdir(split_dir), desc=split):
track_dir = os.path.join(split_dir, track_id)
# Ensure the track directory exists and contains the required files
required_files = [
"mixture.wav",
"background0_sound.wav",
"foreground0_sound.wav",
"foreground1_sound.wav",
"foreground2_sound.wav",
]
file_paths = {}

for file_name in required_files:
file_path = os.path.join(track_dir, file_name)
if os.path.exists(file_path):
file_paths[file_name] = file_path
else:
import pdb

pdb.set_trace()
print(
f"Warning: {file_name} missing in {track_dir}. Skipping track."
)
file_paths = None
break # If any file is missing, skip the current track

# If all required files are found, process the track
if file_paths:
# Get the duration of the 'mixture.wav' file
mixture_wav_path = file_paths["mixture.wav"]
info = sb.dataio.dataio.read_audio_info(mixture_wav_path)
duration = info.num_frames / info.sample_rate

# Prepare the row for the CSV
row = [
split,
track_id, # ID
duration, # duration
file_paths["mixture.wav"], # mixture_wav
file_paths["background0_sound.wav"],
file_paths["foreground0_sound.wav"],
file_paths["foreground1_sound.wav"],
file_paths["foreground2_sound.wav"],
]

# Add the row to the appropriate data list
if split == "train":
train_data.append(row)
elif split == "eval":
test_data.append(row)
elif split == "validation":
valid_data.append(row)

# Define the CSV file headers
headers = [
"split",
"ID",
"duration",
"mixture_wav",
"background0_sound_wav",
"foreground0_sound_wav",
"foreground1_sound_wav",
"foreground2_sound_wav",
]

# Write the CSV files for each split
for data, split in [
(train_data, "train"),
(test_data, "eval"),
(valid_data, "validation"),
]:
output_csv = os.path.join(save_folder, f"{split}.csv")

with open(output_csv, mode="w", newline="") as file:
writer = csv.writer(file)
writer.writerow(headers)
writer.writerows(data)
print(f"CSV file created for {split}: {output_csv}")

_LOGGER.info(
"----------------------------------------------------------------------",
)
_LOGGER.info("Done!")
Loading
Loading