Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MatchaTTS for the Chinese dataset Baker #1849

Merged
merged 29 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions .github/scripts/baker_zh/TTS/run-matcha.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env bash

set -ex

apt-get update
apt-get install -y sox

python3 -m pip install numba conformer==0.3.2 diffusers librosa
python3 -m pip install jieba


log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/baker_zh/TTS

sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff

function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
tar xvf BZNSYP-samples.tar.bz2
mv BZNSYP-samples BZNSYP
rm BZNSYP-samples.tar.bz2
popd

./prepare.sh
tree .
}

function train() {
pushd ./matcha
sed -i.bak s/1500/3/g ./train.py
git diff .
popd

./matcha/train.py \
--exp-dir matcha/exp \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20

ls -lh matcha/exp
}

function infer() {
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2

./matcha/infer.py \
--num-buckets 2 \
--epoch 1 \
--exp-dir ./matcha/exp \
--tokens data/tokens.txt \
--cmvn ./data/fbank/cmvn.json \
--vocoder ./generator_v2 \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav ./generated.wav

ls -lh *.wav
soxi ./generated.wav
rm -v ./generated.wav
rm -v generator_v2
}

function export_onnx() {
pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
popd

pushd data/fbank
rm -v *.json
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
popd

./matcha/export_onnx.py \
--exp-dir ./matcha/exp \
--epoch 2000 \
--tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json

ls -lh *.onnx

if false; then
# The CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
fi

ls -lh *.onnx

python3 ./matcha/generate_lexicon.py

for v in v1 v2 v3; do
python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \
--vocoder ./hifigan_$v.onnx \
--tokens ./data/tokens.txt \
--lexicon ./lexicon.txt \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
done

ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-steps-6-*.wav
cp ./model-steps-*.onnx /icefall

d=matcha-icefall-zh-baker
mkdir $d
cp -v data/tokens.txt $d
cp -v lexicon.txt $d
cp model-steps-3.onnx $d
pushd $d
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
tar xvf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

cat >README.md <<EOF
# Introduction

This model is trained using the dataset from
https://en.data-baker.com/datasets/freeDatasets/

The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
which is about 12 hours.

**Note**: The dataset is for non-commercial use only.

You can find the training code at
https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
EOF

ls -lh
popd
tar cvjf $d.tar.bz2 $d
mv $d.tar.bz2 /icefall
mv $d /icefall
}

prepare_data
train
infer
export_onnx

rm -rfv generator_v* matcha/exp
git checkout .
18 changes: 16 additions & 2 deletions .github/scripts/docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,19 @@
# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang)


import argparse
import json


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--min-torch-version",
help="Minimu torch version",
)
return parser.parse_args()


def version_gt(a, b):
a_major, a_minor = list(map(int, a.split(".")))[:2]
b_major, b_minor = list(map(int, b.split(".")))[:2]
Expand Down Expand Up @@ -42,7 +52,7 @@ def get_torchaudio_version(torch_version):
return torch_version


def get_matrix():
def get_matrix(min_torch_version):
k2_version = "1.24.4.dev20241029"
kaldifeat_version = "1.25.5.dev20241029"
version = "20241218"
Expand All @@ -64,6 +74,9 @@ def get_matrix():
matrix = []
for p in python_version:
for t in torch_version:
if min_torch_version and version_gt(min_torch_version, t):
continue

# torchaudio <= 1.13.x supports only python <= 3.10

if version_gt(p, "3.10") and not version_gt(t, "2.0"):
Expand Down Expand Up @@ -101,7 +114,8 @@ def get_matrix():


def main():
matrix = get_matrix()
args = get_args()
matrix = get_matrix(min_torch_version=args.min_torch_version)
print(json.dumps({"include": matrix}))


Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/ljspeech/TTS/run-matcha.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function export_onnx() {
ls -lh *.onnx

if false; then
# THe CI machine does not have enough memory to run it
# The CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
Expand Down
152 changes: 152 additions & 0 deletions .github/workflows/baker_zh.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
name: baker_zh

on:
push:
branches:
- master
- baker-matcha-2

pull_request:
branches:
- master

workflow_dispatch:

concurrency:
group: baker-zh-${{ github.ref }}
cancel-in-progress: true

jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
echo "::set-output name=matrix::${MATRIX}"

baker_zh:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Free space
shell: bash
run: |
ls -lh
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"

- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall

pip install onnx==1.17.0

pip list

git config --global --add safe.directory /icefall

.github/scripts/baker_zh/TTS/run-matcha.sh

- name: display files
shell: bash
run: |
ls -lh

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: step-2
path: ./model-steps-2.onnx

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: step-3
path: ./model-steps-3.onnx

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: step-4
path: ./model-steps-4.onnx

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: step-5
path: ./model-steps-5.onnx

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: step-6
path: ./model-steps-6.onnx

- name: Upload models to huggingface
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
d=matcha-icefall-zh-baker

GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
cp -av $d/* hf/

pushd hf
git add .

git config --global user.name "csukuangfj"
git config --global user.email "[email protected]"
git config --global lfs.allowincompletepush true

git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
popd

- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: matcha-icefall-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models
6 changes: 6 additions & 0 deletions egs/baker_zh/TTS/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
path.sh
*.onnx
*.wav
generator_v1
generator_v2
generator_v3
Loading
Loading