Skip to content

Commit 45d0c05

Browse files
committed
Merge branch 'uos' into 'main'
batch功能支持 See merge request ai/sensevoice.cpp!2
2 parents e576d14 + 300e6f9 commit 45d0c05

File tree

11 files changed

+856
-286
lines changed

11 files changed

+856
-286
lines changed

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ if(SDL2_FOUND)
1717
endif()
1818
target_link_libraries(common ${ExampleLibs})
1919
add_subdirectory(quantize)
20+
add_subdirectory(zcr_main)
2021
if(SDL2_FOUND)
2122
add_subdirectory(stream)
2223
endif()

examples/stream/stream.cc

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,10 @@ struct sense_voice_stream_params {
1313
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
1414
int32_t n_processors = 1;
1515
int32_t capture_id = -1;
16-
int32_t beam_size = sense_voice_full_default_params(SENSE_VOICE_SAMPLING_BEAM_SEARCH).beam_search.beam_size;
17-
int32_t best_of = sense_voice_full_default_params(SENSE_VOICE_SAMPLING_GREEDY).greedy.best_of;
1816
int32_t chunk_size = 100; // ms
1917
int32_t max_nomute_chunks = 8000 / chunk_size; // chunks
2018
int32_t min_mute_chunks = 1000 / chunk_size; // chunks
2119

22-
bool no_context = true;
23-
bool no_timestamps = false;
2420
bool use_gpu = true;
2521
bool flash_attn = false;
2622
bool debug_mode = false;
@@ -46,7 +42,6 @@ void sense_voice_stream_usage(int /*argc*/, char ** argv, const sense_voice_stre
4642
fprintf(stderr, " --use-vad [%-7s] when the first non-silent chunk is too far away\n", params.use_vad ? "true" : "false");
4743
fprintf(stderr, " --use-prefix [%-7s] use sense voice prefix\n", params.use_prefix ? "true" : "false");
4844
fprintf(stderr, " -c ID, --capture ID [%-7d] [Device] capture device ID\n", params.capture_id);
49-
fprintf(stderr, " -kc, --keep-context [%-7s] [IO] keep context between audio chunks\n", params.no_context ? "false" : "true");
5045
fprintf(stderr, " -l LANG, --language LANG [%-7s] [SenseVoice] spoken language\n", params.language.c_str());
5146
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] [SenseVoice] model path\n", params.model.c_str());
5247
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] [IO] text output file name\n", params.fname_out.c_str());
@@ -67,7 +62,6 @@ static bool get_stream_params(int argc, char ** argv, sense_voice_stream_params
6762
}
6863
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
6964
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
70-
else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
7165
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
7266
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
7367
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
@@ -114,10 +108,6 @@ int main(int argc, char** argv)
114108
const int keep_nomute_step = params.chunk_size * params.min_mute_chunks * 1e-3 * SENSE_VOICE_SAMPLE_RATE;
115109
const int max_nomute_step = params.chunk_size * params.max_nomute_chunks * 1e-3 * SENSE_VOICE_SAMPLE_RATE;
116110

117-
params.no_timestamps = !params.use_vad;
118-
params.no_context |= params.use_vad;
119-
// params.max_tokens = 0;
120-
121111
audio_async audio(params.chunk_size << 2);
122112
if (!audio.init(params.capture_id, SENSE_VOICE_SAMPLE_RATE)) {
123113
fprintf(stderr, "%s: audio.init() failed!\n", __func__);
@@ -147,13 +137,12 @@ int main(int argc, char** argv)
147137

148138
{
149139
fprintf(stderr, "\n");
150-
fprintf(stderr, "%s: processing samples (chunk = %d ms / max_nomute_chunk = %d / min_mute_chunk = %d), %d threads, timestamps = %d ...\n",
140+
fprintf(stderr, "%s: processing samples (chunk = %d ms / max_nomute_chunk = %d / min_mute_chunk = %d), %d threads ...\n",
151141
__func__,
152142
params.chunk_size,
153143
params.max_nomute_chunks,
154144
params.min_mute_chunks,
155-
params.n_threads,
156-
params.no_timestamps ? 0 : 1);
145+
params.n_threads);
157146

158147
if (!params.use_vad) {
159148
fprintf(stderr, "%s: not use VAD, will print identified result per %d ms\n", __func__, params.chunk_size * params.max_nomute_chunks);
@@ -167,14 +156,9 @@ int main(int argc, char** argv)
167156

168157
sense_voice_full_params wparams = sense_voice_full_default_params(SENSE_VOICE_SAMPLING_GREEDY);
169158
{
170-
wparams.print_progress = false;
171-
wparams.no_timestamps = !params.no_timestamps;
172159
wparams.language = params.language.c_str();
173160
wparams.n_threads = params.n_threads;
174161
wparams.debug_mode = params.debug_mode;
175-
176-
wparams.greedy.best_of = params.best_of;
177-
wparams.beam_search.beam_size = params.beam_size;
178162
}
179163

180164
int idenitified_floats = 0, R_new_chunk = 0, L_new_chunk = 0;

examples/zcr_main/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include_directories(${PROJECT_SOURCE_DIR}/sense-voice/csrc)
2+
add_executable(sense-voice-zcr-main main.cc)
3+
target_link_libraries(sense-voice-zcr-main PRIVATE sense-voice-core ggml common)

0 commit comments

Comments
 (0)