Skip to content

Commit

Permalink
Merge branch 'master' into einichi
Browse files Browse the repository at this point in the history
  • Loading branch information
baileyeet authored Jan 7, 2025
2 parents 564b632 + 8d60280 commit 5c142d4
Show file tree
Hide file tree
Showing 297 changed files with 30,906 additions and 1,745 deletions.
167 changes: 167 additions & 0 deletions .github/scripts/baker_zh/TTS/run-matcha.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env bash

set -ex

apt-get update
apt-get install -y sox

python3 -m pip install numba conformer==0.3.2 diffusers librosa
python3 -m pip install jieba


log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/baker_zh/TTS

sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff

function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
tar xvf BZNSYP-samples.tar.bz2
mv BZNSYP-samples BZNSYP
rm BZNSYP-samples.tar.bz2
popd

./prepare.sh
tree .
}

function train() {
pushd ./matcha
sed -i.bak s/1500/3/g ./train.py
git diff .
popd

./matcha/train.py \
--exp-dir matcha/exp \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20

ls -lh matcha/exp
}

function infer() {
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2

./matcha/infer.py \
--num-buckets 2 \
--epoch 1 \
--exp-dir ./matcha/exp \
--tokens data/tokens.txt \
--cmvn ./data/fbank/cmvn.json \
--vocoder ./generator_v2 \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav ./generated.wav

ls -lh *.wav
soxi ./generated.wav
rm -v ./generated.wav
rm -v generator_v2
}

function export_onnx() {
pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
popd

pushd data/fbank
rm -v *.json
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
popd

./matcha/export_onnx.py \
--exp-dir ./matcha/exp \
--epoch 2000 \
--tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json

ls -lh *.onnx

if false; then
# The CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
fi

ls -lh *.onnx

python3 ./matcha/generate_lexicon.py

for v in v1 v2 v3; do
python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \
--vocoder ./hifigan_$v.onnx \
--tokens ./data/tokens.txt \
--lexicon ./lexicon.txt \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
done

ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-steps-6-*.wav
cp ./model-steps-*.onnx /icefall

d=matcha-icefall-zh-baker
mkdir $d
cp -v data/tokens.txt $d
cp -v lexicon.txt $d
cp model-steps-3.onnx $d
pushd $d
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
tar xvf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

cat >README.md <<EOF
# Introduction
This model is trained using the dataset from
https://en.data-baker.com/datasets/freeDatasets/
The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
which is about 12 hours.
**Note**: The dataset is for non-commercial use only.
You can find the training code at
https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
EOF

ls -lh
popd
tar cvjf $d.tar.bz2 $d
mv $d.tar.bz2 /icefall
mv $d /icefall
}

prepare_data
train
infer
export_onnx

rm -rfv generator_v* matcha/exp
git checkout .
10 changes: 7 additions & 3 deletions .github/scripts/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@ LABEL github_repo="https://github.com/k2-fsa/icefall"

# Install dependencies
RUN pip install --no-cache-dir \
torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
\
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
conformer==0.3.2 \
cython \
diffusers \
dill \
espnet_tts_frontend \
graphviz \
Expand All @@ -45,10 +48,11 @@ RUN pip install --no-cache-dir \
kaldialign \
kaldifst \
kaldilm \
matplotlib \
librosa \
"matplotlib<=3.9.4" \
multi_quantization \
numba \
numpy \
"numpy<2.0" \
onnxoptimizer \
onnxsim \
onnx \
Expand Down
66 changes: 44 additions & 22 deletions .github/scripts/docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,19 @@
# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang)


import argparse
import json


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--min-torch-version",
help="Minimu torch version",
)
return parser.parse_args()


def version_gt(a, b):
a_major, a_minor = list(map(int, a.split(".")))[:2]
b_major, b_minor = list(map(int, b.split(".")))[:2]
Expand Down Expand Up @@ -42,22 +52,34 @@ def get_torchaudio_version(torch_version):
return torch_version


def get_matrix():
k2_version = "1.24.4.dev20240223"
kaldifeat_version = "1.25.4.dev20240223"
version = "20240725"

def get_matrix(min_torch_version):
k2_version = "1.24.4.dev20241029"
kaldifeat_version = "1.25.5.dev20241029"
version = "20241218"

# torchaudio 2.5.0 does not support python 3.13

python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
torch_version = []
torch_version += ["1.13.0", "1.13.1"]
torch_version += ["2.0.0", "2.0.1"]
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
# torch_version += ["2.1.0", "2.1.1", "2.1.2"]
# torch_version += ["2.2.0", "2.2.1", "2.2.2"]
# Test only torch >= 2.3.0
torch_version += ["2.3.0", "2.3.1"]
torch_version += ["2.4.0"]

torch_version += ["2.4.1"]
torch_version += ["2.5.0"]
torch_version += ["2.5.1"]

matrix = []
for p in python_version:
for t in torch_version:
if min_torch_version and version_gt(min_torch_version, t):
continue

# torchaudio <= 1.13.x supports only python <= 3.10

if version_gt(p, "3.10") and not version_gt(t, "2.0"):
Expand All @@ -67,21 +89,20 @@ def get_matrix():
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
continue

k2_version_2 = k2_version
kaldifeat_version_2 = kaldifeat_version

if t == "2.2.2":
k2_version_2 = "1.24.4.dev20240328"
kaldifeat_version_2 = "1.25.4.dev20240329"
elif t == "2.3.0":
k2_version_2 = "1.24.4.dev20240425"
kaldifeat_version_2 = "1.25.4.dev20240425"
elif t == "2.3.1":
k2_version_2 = "1.24.4.dev20240606"
kaldifeat_version_2 = "1.25.4.dev20240606"
elif t == "2.4.0":
k2_version_2 = "1.24.4.dev20240725"
kaldifeat_version_2 = "1.25.4.dev20240725"
if version_gt(p, "3.12") and not version_gt(t, "2.4"):
continue

if version_gt(t, "2.4") and version_gt("3.10", p):
# torch>=2.5 requires python 3.10
continue


if t == "2.5.1":
k2_version_2 = "1.24.4.dev20241122"
kaldifeat_version_2 = "1.25.5.dev20241126"
else:
k2_version_2 = k2_version
kaldifeat_version_2 = kaldifeat_version

matrix.append(
{
Expand All @@ -97,7 +118,8 @@ def get_matrix():


def main():
matrix = get_matrix()
args = get_args()
matrix = get_matrix(min_torch_version=args.min_torch_version)
print(json.dumps({"include": matrix}))


Expand Down
Loading

0 comments on commit 5c142d4

Please sign in to comment.