Skip to content

Commit

Permalink
misc. fix
Browse files Browse the repository at this point in the history
  • Loading branch information
JinZr committed Mar 15, 2024
1 parent 7d01eb4 commit d77b035
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions egs/commonvoice/ASR/local/preprocess_commonvoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def preprocess_commonvoice(
The 'validated' partition contains the data of both 'train', 'dev'
and 'test' partitions. We filter out the 'dev' and 'test' partition
here.
"""
"""
)
dev_ids = src_dir / f"cv-{language}_dev_ids"
test_ids = src_dir / f"cv-{language}_test_ids"
Expand All @@ -182,7 +182,9 @@ def preprocess_commonvoice(
), f"{test_ids} does not exist, please check stage 1 of the prepare.sh"
dev_ids = dev_ids.read_text().strip().split("\n")
test_ids = test_ids.read_text().strip().split("\n")
cut_set = cut_set.filter(lambda x: x.id not in dev_ids + test_ids)
cut_set = cut_set.filter(
lambda x: x.supervisions[0].id not in dev_ids + test_ids
)

# Run data augmentation that needs to be done in the
# time domain.
Expand Down

0 comments on commit d77b035

Please sign in to comment.