From 666d69b20d53796420593d99b0c0d6e9cd2212cc Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 17 Nov 2023 18:12:59 +0800 Subject: [PATCH] Rename train2.py to avoid confusion (#1386) --- .github/scripts/run-multi-zh_hans-zipformer.sh | 4 +++- egs/aishell/ASR/prepare.sh | 5 ++--- .../{train2.py => do_not_use_it_directly.py} | 1 + egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py | 2 +- .../{train2.py => do_not_use_it_directly.py} | 1 + .../ASR/pruned_transducer_stateless7_streaming/README.md | 4 ++-- .../{train2.py => do_not_use_it_directly.py} | 1 + .../{train2.py => do_not_use_it_directly.py} | 1 + .../export-for-ncnn.py | 2 +- .../{train2.py => do_not_use_it_directly.py} | 1 + .../conv_emformer_transducer_stateless2/export-for-ncnn.py | 2 +- .../ASR/conv_emformer_transducer_stateless2/export-onnx.py | 2 +- .../ASR/pruned_transducer_stateless7_streaming/README.md | 4 ++-- .../{train2.py => do_not_use_it_directly.py} | 1 + .../export-for-ncnn-zh.py | 2 +- .../export-for-ncnn.py | 2 +- .../do_not_use_it_directly.py | 1 + .../export-for-ncnn.py | 2 +- .../pruned_transducer_stateless7_streaming_multi/train2.py | 1 - 19 files changed, 23 insertions(+), 16 deletions(-) rename egs/aishell/ASR/pruned_transducer_stateless7/{train2.py => do_not_use_it_directly.py} (99%) rename egs/aishell/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%) rename egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%) rename egs/csj/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%) rename egs/librispeech/ASR/conv_emformer_transducer_stateless2/{train2.py => do_not_use_it_directly.py} (99%) rename egs/librispeech/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%) create mode 120000 egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py delete mode 120000 egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py diff --git a/.github/scripts/run-multi-zh_hans-zipformer.sh b/.github/scripts/run-multi-zh_hans-zipformer.sh index dd32a94f88..cbd86a4d36 100755 --- a/.github/scripts/run-multi-zh_hans-zipformer.sh +++ b/.github/scripts/run-multi-zh_hans-zipformer.sh @@ -51,6 +51,8 @@ for method in modified_beam_search fast_beam_search; do $repo/test_wavs/DEV_T0000000002.wav done +rm -rf $repo + log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ====" repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/ @@ -92,4 +94,4 @@ for method in modified_beam_search fast_beam_search; do $repo/test_wavs/DEV_T0000000000.wav \ $repo/test_wavs/DEV_T0000000001.wav \ $repo/test_wavs/DEV_T0000000002.wav -done \ No newline at end of file +done diff --git a/egs/aishell/ASR/prepare.sh b/egs/aishell/ASR/prepare.sh index d36dc5ed30..9f73a2073c 100755 --- a/egs/aishell/ASR/prepare.sh +++ b/egs/aishell/ASR/prepare.sh @@ -261,10 +261,9 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then fi if [ ! -f $lang_char_dir/HLG.fst ]; then - lang_phone_dir=data/lang_phone ./local/prepare_lang_fst.py \ - --lang-dir $lang_phone_dir \ - --ngram-G ./data/lm/G_3_gram.fst.txt + --lang-dir $lang_char_dir \ + --ngram-G ./data/lm/G_3_gram_char.fst.txt fi fi diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py b/egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py similarity index 99% rename from egs/aishell/ASR/pruned_transducer_stateless7/train2.py rename to egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py index 057af297f0..6027273b2b 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py @@ -1234,6 +1234,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() AsrDataModule.add_arguments(parser) args = parser.parse_args() diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py index 2a9fc57d5f..39d988cd04 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py @@ -56,7 +56,7 @@ from decoder2 import Decoder from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from zipformer import Zipformer from icefall.checkpoint import ( diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py similarity index 99% rename from egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py rename to egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py index 88eb341048..3c13c19c69 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py @@ -1233,6 +1233,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() AishellAsrDataModule.add_arguments(parser) args = parser.parse_args() diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md index 991875aaa3..6c20bab2c0 100644 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md @@ -4,6 +4,6 @@ See https://k2-fsa.github.io/icefall/recipes/Streaming-ASR/librispeech/zipformer [./emformer.py](./emformer.py) and [./train.py](./train.py) are basically the same as -[./emformer2.py](./emformer2.py) and [./train2.py](./train2.py). -The only purpose of [./emformer2.py](./emformer2.py) and [./train2.py](./train2.py) +[./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py). +The only purpose of [./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py) is for exporting to [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn). diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py similarity index 99% rename from egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py rename to egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py index c09c9537c5..61a3f27db6 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py @@ -1237,6 +1237,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() CommonVoiceAsrDataModule.add_arguments(parser) args = parser.parse_args() diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py similarity index 99% rename from egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py rename to egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py index 4c866ddd81..acde72d802 100755 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py @@ -1274,6 +1274,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() CSJAsrDataModule.add_arguments(parser) Tokenizer.add_arguments(parser) diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py index ebdb596a57..b210430c67 100755 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py @@ -72,7 +72,7 @@ import torch from scaling_converter import convert_scaled_to_non_scaled from tokenizer import Tokenizer -from train2 import add_model_arguments, get_params, get_transducer_model +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py similarity index 99% rename from egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py rename to egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py index 420dc1065a..d614f0914e 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py @@ -1099,6 +1099,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() LibriSpeechAsrDataModule.add_arguments(parser) args = parser.parse_args() diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py index 85dbd4661a..953f95c458 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py @@ -39,8 +39,8 @@ import k2 import torch +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py index ab046557fb..1e59e08583 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py @@ -61,7 +61,7 @@ from decoder import Decoder from emformer import Emformer from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md index d3691e6473..0f3c63e752 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md @@ -4,7 +4,7 @@ See https://k2-fsa.github.io/icefall/recipes/Streaming-ASR/librispeech/zipformer [./emformer.py](./emformer.py) and [./train.py](./train.py) are basically the same as -[./emformer2.py](./emformer2.py) and [./train2.py](./train2.py). -The only purpose of [./emformer2.py](./emformer2.py) and [./train2.py](./train2.py) +[./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py). +The only purpose of [./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py) is for exporting to [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn). diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py similarity index 99% rename from egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py rename to egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py index aa6c0668a9..cd26db6f3b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py @@ -1234,6 +1234,7 @@ def scan_pessimistic_batches_for_oom( def main(): + raise RuntimeError("Please don't use this file directly!") parser = get_parser() LibriSpeechAsrDataModule.add_arguments(parser) args = parser.parse_args() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py index 07de57a86f..a7d06a5ddb 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py @@ -68,8 +68,8 @@ import k2 import torch +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py index 9a6b31268d..8f2178b1d6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py @@ -66,8 +66,8 @@ import k2 import torch +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py new file mode 120000 index 0000000000..beeffaa03c --- /dev/null +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py @@ -0,0 +1 @@ +../pruned_transducer_stateless7_streaming/do_not_use_it_directly.py \ No newline at end of file diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py index 9a6b31268d..8f2178b1d6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py @@ -66,8 +66,8 @@ import k2 import torch +from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import ( average_checkpoints, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py deleted file mode 120000 index 3c3280b688..0000000000 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py +++ /dev/null @@ -1 +0,0 @@ -../pruned_transducer_stateless7_streaming/train2.py \ No newline at end of file