From 666d69b20d53796420593d99b0c0d6e9cd2212cc Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Fri, 17 Nov 2023 18:12:59 +0800
Subject: [PATCH] Rename train2.py to avoid confusion (#1386)

---
 .github/scripts/run-multi-zh_hans-zipformer.sh               | 4 +++-
 egs/aishell/ASR/prepare.sh                                   | 5 ++---
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py  | 2 +-
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 .../ASR/pruned_transducer_stateless7_streaming/README.md     | 4 ++--
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 .../export-for-ncnn.py                                       | 2 +-
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 .../conv_emformer_transducer_stateless2/export-for-ncnn.py   | 2 +-
 .../ASR/conv_emformer_transducer_stateless2/export-onnx.py   | 2 +-
 .../ASR/pruned_transducer_stateless7_streaming/README.md     | 4 ++--
 .../{train2.py => do_not_use_it_directly.py}                 | 1 +
 .../export-for-ncnn-zh.py                                    | 2 +-
 .../export-for-ncnn.py                                       | 2 +-
 .../do_not_use_it_directly.py                                | 1 +
 .../export-for-ncnn.py                                       | 2 +-
 .../pruned_transducer_stateless7_streaming_multi/train2.py   | 1 -
 19 files changed, 23 insertions(+), 16 deletions(-)
 rename egs/aishell/ASR/pruned_transducer_stateless7/{train2.py => do_not_use_it_directly.py} (99%)
 rename egs/aishell/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%)
 rename egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%)
 rename egs/csj/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%)
 rename egs/librispeech/ASR/conv_emformer_transducer_stateless2/{train2.py => do_not_use_it_directly.py} (99%)
 rename egs/librispeech/ASR/pruned_transducer_stateless7_streaming/{train2.py => do_not_use_it_directly.py} (99%)
 create mode 120000 egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py
 delete mode 120000 egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py

diff --git a/.github/scripts/run-multi-zh_hans-zipformer.sh b/.github/scripts/run-multi-zh_hans-zipformer.sh
index dd32a94f88..cbd86a4d36 100755
--- a/.github/scripts/run-multi-zh_hans-zipformer.sh
+++ b/.github/scripts/run-multi-zh_hans-zipformer.sh
@@ -51,6 +51,8 @@ for method in modified_beam_search fast_beam_search; do
   $repo/test_wavs/DEV_T0000000002.wav
 done
 
+rm -rf $repo
+
 log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ===="
 repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/
 
@@ -92,4 +94,4 @@ for method in modified_beam_search fast_beam_search; do
   $repo/test_wavs/DEV_T0000000000.wav \
   $repo/test_wavs/DEV_T0000000001.wav \
   $repo/test_wavs/DEV_T0000000002.wav
-done
\ No newline at end of file
+done
diff --git a/egs/aishell/ASR/prepare.sh b/egs/aishell/ASR/prepare.sh
index d36dc5ed30..9f73a2073c 100755
--- a/egs/aishell/ASR/prepare.sh
+++ b/egs/aishell/ASR/prepare.sh
@@ -261,10 +261,9 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
   fi
 
   if [ ! -f $lang_char_dir/HLG.fst ]; then
-    lang_phone_dir=data/lang_phone
     ./local/prepare_lang_fst.py  \
-      --lang-dir $lang_phone_dir \
-      --ngram-G ./data/lm/G_3_gram.fst.txt
+      --lang-dir $lang_char_dir \
+      --ngram-G ./data/lm/G_3_gram_char.fst.txt
   fi
 fi
 
diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py b/egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py
similarity index 99%
rename from egs/aishell/ASR/pruned_transducer_stateless7/train2.py
rename to egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py
index 057af297f0..6027273b2b 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless7/do_not_use_it_directly.py
@@ -1234,6 +1234,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     AsrDataModule.add_arguments(parser)
     args = parser.parse_args()
diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py
index 2a9fc57d5f..39d988cd04 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py
@@ -56,7 +56,7 @@
 from decoder2 import Decoder
 from onnxruntime.quantization import QuantType, quantize_dynamic
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 from zipformer import Zipformer
 
 from icefall.checkpoint import (
diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
similarity index 99%
rename from egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py
rename to egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
index 88eb341048..3c13c19c69 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/train2.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
@@ -1233,6 +1233,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     AishellAsrDataModule.add_arguments(parser)
     args = parser.parse_args()
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md
index 991875aaa3..6c20bab2c0 100644
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/README.md
@@ -4,6 +4,6 @@ See https://k2-fsa.github.io/icefall/recipes/Streaming-ASR/librispeech/zipformer
 
 [./emformer.py](./emformer.py) and [./train.py](./train.py)
 are basically the same as
-[./emformer2.py](./emformer2.py) and [./train2.py](./train2.py).
-The only purpose of [./emformer2.py](./emformer2.py) and [./train2.py](./train2.py)
+[./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py).
+The only purpose of [./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py)
 is for exporting to [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn).
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
similarity index 99%
rename from egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py
rename to egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
index c09c9537c5..61a3f27db6 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/train2.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
@@ -1237,6 +1237,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     CommonVoiceAsrDataModule.add_arguments(parser)
     args = parser.parse_args()
diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
similarity index 99%
rename from egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py
rename to egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
index 4c866ddd81..acde72d802 100755
--- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/train2.py
+++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
@@ -1274,6 +1274,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     CSJAsrDataModule.add_arguments(parser)
     Tokenizer.add_arguments(parser)
diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
index ebdb596a57..b210430c67 100755
--- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
+++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
@@ -72,7 +72,7 @@
 import torch
 from scaling_converter import convert_scaled_to_non_scaled
 from tokenizer import Tokenizer
-from train2 import add_model_arguments, get_params, get_transducer_model
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py
similarity index 99%
rename from egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py
rename to egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py
index 420dc1065a..d614f0914e 100755
--- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train2.py
+++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/do_not_use_it_directly.py
@@ -1099,6 +1099,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     LibriSpeechAsrDataModule.add_arguments(parser)
     args = parser.parse_args()
diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py
index 85dbd4661a..953f95c458 100755
--- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py
+++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py
@@ -39,8 +39,8 @@
 
 import k2
 import torch
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py
index ab046557fb..1e59e08583 100755
--- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py
+++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py
@@ -61,7 +61,7 @@
 from decoder import Decoder
 from emformer import Emformer
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md
index d3691e6473..0f3c63e752 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/README.md
@@ -4,7 +4,7 @@ See https://k2-fsa.github.io/icefall/recipes/Streaming-ASR/librispeech/zipformer
 
 [./emformer.py](./emformer.py) and [./train.py](./train.py)
 are basically the same as
-[./emformer2.py](./emformer2.py) and [./train2.py](./train2.py).
-The only purpose of [./emformer2.py](./emformer2.py) and [./train2.py](./train2.py)
+[./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py).
+The only purpose of [./emformer2.py](./emformer2.py) and [./do_not_use_it_directly.py](./do_not_use_it_directly.py)
 is for exporting to [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn).
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
similarity index 99%
rename from egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py
rename to egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
index aa6c0668a9..cd26db6f3b 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train2.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
@@ -1234,6 +1234,7 @@ def scan_pessimistic_batches_for_oom(
 
 
 def main():
+    raise RuntimeError("Please don't use this file directly!")
     parser = get_parser()
     LibriSpeechAsrDataModule.add_arguments(parser)
     args = parser.parse_args()
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py
index 07de57a86f..a7d06a5ddb 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py
@@ -68,8 +68,8 @@
 
 import k2
 import torch
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
index 9a6b31268d..8f2178b1d6 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py
@@ -66,8 +66,8 @@
 
 import k2
 import torch
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py
new file mode 120000
index 0000000000..beeffaa03c
--- /dev/null
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/do_not_use_it_directly.py
@@ -0,0 +1 @@
+../pruned_transducer_stateless7_streaming/do_not_use_it_directly.py
\ No newline at end of file
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py
index 9a6b31268d..8f2178b1d6 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py
@@ -66,8 +66,8 @@
 
 import k2
 import torch
+from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
 from scaling_converter import convert_scaled_to_non_scaled
-from train2 import add_model_arguments, get_params, get_transducer_model
 
 from icefall.checkpoint import (
     average_checkpoints,
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py
deleted file mode 120000
index 3c3280b688..0000000000
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train2.py
+++ /dev/null
@@ -1 +0,0 @@
-../pruned_transducer_stateless7_streaming/train2.py
\ No newline at end of file