Skip to content
This repository was archived by the owner on Jun 19, 2025. It is now read-only.

Commit feb33f8

Browse files
danielefernandesbernardohenz
authored andcommitted
Loading model from both path or array of bytes
1 parent 0c020d1 commit feb33f8

31 files changed

+647
-104
lines changed

native_client/Makefile

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ clean:
1919
rm -f deepspeech
2020

2121
$(DEEPSPEECH_BIN): client.cc Makefile
22-
$(CXX) $(CFLAGS) $(CFLAGS_DEEPSPEECH) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS)
22+
$(CXX) $(CFLAGS) $(CFLAGS_DEEPSPEECH) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS) -llzma -lbz2
2323
ifeq ($(OS),Darwin)
2424
install_name_tool -change bazel-out/local-opt/bin/native_client/libdeepspeech.so @rpath/libdeepspeech.so deepspeech
2525
endif

native_client/alphabet.cc

100644100755
File mode changed.

native_client/alphabet.h

100644100755
File mode changed.

native_client/args.h

100644100755
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ bool extended_metadata = false;
3434

3535
bool json_output = false;
3636

37+
bool init_from_array_of_bytes = false;
38+
3739
int json_candidate_transcripts = 3;
3840

3941
int stream_size = 0;
@@ -59,6 +61,7 @@ void PrintHelp(const char* bin)
5961
"\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n"
6062
"\t--stream size\t\t\tRun in stream mode, output intermediate results\n"
6163
"\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
64+
"\t--init_from_bytes\t\tTest init model and scorer from array of bytes\n"
6265
"\t--help\t\t\t\tShow help\n"
6366
"\t--version\t\t\tPrint version and exits\n";
6467
char* version = DS_Version();
@@ -80,6 +83,7 @@ bool ProcessArgs(int argc, char** argv)
8083
{"t", no_argument, nullptr, 't'},
8184
{"extended", no_argument, nullptr, 'e'},
8285
{"json", no_argument, nullptr, 'j'},
86+
{"init_from_bytes", no_argument, nullptr, 'B'},
8387
{"candidate_transcripts", required_argument, nullptr, 150},
8488
{"stream", required_argument, nullptr, 's'},
8589
{"hot_words", required_argument, nullptr, 'w'},
@@ -135,6 +139,10 @@ bool ProcessArgs(int argc, char** argv)
135139
case 'j':
136140
json_output = true;
137141
break;
142+
143+
case 'B':
144+
init_from_array_of_bytes = true;
145+
break;
138146

139147
case 150:
140148
json_candidate_transcripts = atoi(optarg);

native_client/client.cc

100644100755
Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include <unistd.h>
3434
#endif // NO_DIR
3535
#include <vector>
36+
#include <iostream>
37+
#include <fstream>
3638

3739
#include "deepspeech.h"
3840
#include "args.h"
@@ -415,8 +417,21 @@ main(int argc, char **argv)
415417

416418
// Initialise DeepSpeech
417419
ModelState* ctx;
420+
std::string buffer_model_str;
418421
// sphinx-doc: c_ref_model_start
419-
int status = DS_CreateModel(model, &ctx);
422+
int status;
423+
if (init_from_array_of_bytes){
424+
// Reading model file to a char * buffer
425+
std::ifstream is_model( model, std::ios::binary );
426+
std::stringstream buffer_model;
427+
buffer_model << is_model.rdbuf();
428+
buffer_model_str = buffer_model.str();
429+
status = DS_CreateModelFromBuffer(buffer_model_str.c_str(), buffer_model_str.size(), &ctx);
430+
}else {
431+
// Keep old method due to backwards compatibility
432+
status = DS_CreateModel(model, &ctx);
433+
}
434+
420435
if (status != 0) {
421436
char* error = DS_ErrorCodeToErrorMessage(status);
422437
fprintf(stderr, "Could not create model: %s\n", error);
@@ -433,7 +448,18 @@ main(int argc, char **argv)
433448
}
434449

435450
if (scorer) {
436-
status = DS_EnableExternalScorer(ctx, scorer);
451+
if (init_from_array_of_bytes){
452+
// Reading scorer file to a string buffer
453+
std::ifstream is_scorer(scorer, std::ios::binary );
454+
std::stringstream buffer_scorer;
455+
buffer_scorer << is_scorer.rdbuf();
456+
std::string tmp_str_scorer = buffer_scorer.str();
457+
status = DS_EnableExternalScorerFromBuffer(ctx, tmp_str_scorer.c_str(), tmp_str_scorer.size());
458+
} else {
459+
// Keep old method due to backwards compatibility
460+
status = DS_EnableExternalScorer(ctx, scorer);
461+
}
462+
437463
if (status != 0) {
438464
fprintf(stderr, "Could not enable external scorer.\n");
439465
return 1;

native_client/ctcdecode/scorer.cpp

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,24 @@ static const int32_t FILE_VERSION = 6;
2929

3030
int
3131
Scorer::init(const std::string& lm_path,
32+
bool load_from_bytes,
3233
const Alphabet& alphabet)
3334
{
3435
set_alphabet(alphabet);
35-
return load_lm(lm_path);
36+
return load_lm(lm_path, load_from_bytes);
3637
}
3738

3839
int
3940
Scorer::init(const std::string& lm_path,
41+
bool load_from_bytes,
4042
const std::string& alphabet_config_path)
4143
{
42-
int err = alphabet_.init(alphabet_config_path.c_str());
44+
int err = alphabet_.init(alphabet_config_path.c_str()); // Do we need to make this initiable from bytes?
4345
if (err != 0) {
4446
return err;
4547
}
4648
setup_char_map();
47-
return load_lm(lm_path);
49+
return load_lm(lm_path, load_from_bytes);
4850
}
4951

5052
void
@@ -69,45 +71,60 @@ void Scorer::setup_char_map()
6971
}
7072
}
7173

72-
int Scorer::load_lm(const std::string& lm_path)
74+
int Scorer::load_lm(const std::string& lm_string, bool load_from_bytes)
7375
{
74-
// Check if file is readable to avoid KenLM throwing an exception
75-
const char* filename = lm_path.c_str();
76-
if (access(filename, R_OK) != 0) {
77-
return DS_ERR_SCORER_UNREADABLE;
78-
}
79-
80-
// Check if the file format is valid to avoid KenLM throwing an exception
81-
lm::ngram::ModelType model_type;
82-
if (!lm::ngram::RecognizeBinary(filename, model_type)) {
83-
return DS_ERR_SCORER_INVALID_LM;
76+
if (!load_from_bytes){
77+
// Check if file is readable to avoid KenLM throwing an exception
78+
const char* filename = lm_string.c_str();
79+
if (access(filename, R_OK) != 0) {
80+
return DS_ERR_SCORER_UNREADABLE;
81+
}
82+
83+
// Check if the file format is valid to avoid KenLM throwing an exception
84+
lm::ngram::ModelType model_type;
85+
if (!lm::ngram::RecognizeBinary(filename, model_type)) {
86+
return DS_ERR_SCORER_INVALID_LM;
87+
}
8488
}
8589

8690
// Load the LM
8791
lm::ngram::Config config;
8892
config.load_method = util::LoadMethod::LAZY;
89-
language_model_.reset(lm::ngram::LoadVirtual(filename, config));
90-
max_order_ = language_model_->Order();
91-
92-
uint64_t package_size;
93-
{
94-
util::scoped_fd fd(util::OpenReadOrThrow(filename));
95-
package_size = util::SizeFile(fd.get());
93+
if (load_from_bytes){
94+
language_model_.reset(lm::ngram::LoadVirtual(lm_string.c_str(), lm_string.size(), config));
95+
} else {
96+
language_model_.reset(lm::ngram::LoadVirtual(lm_string.c_str(), config));
9697
}
98+
99+
max_order_ = language_model_->Order();
100+
std::stringstream stst;
97101
uint64_t trie_offset = language_model_->GetEndOfSearchOffset();
98-
if (package_size <= trie_offset) {
99-
// File ends without a trie structure
100-
return DS_ERR_SCORER_NO_TRIE;
102+
103+
if (!load_from_bytes){
104+
uint64_t package_size;
105+
{
106+
util::scoped_fd fd(util::OpenReadOrThrow(lm_string.c_str()));
107+
package_size = util::SizeFile(fd.get());
108+
}
109+
110+
if (package_size <= trie_offset) {
111+
// File ends without a trie structure
112+
return DS_ERR_SCORER_NO_TRIE;
113+
}
114+
// Read metadata and trie from file
115+
std::ifstream fin(lm_string.c_str(), std::ios::binary);
116+
stst<<fin.rdbuf();
117+
} else {
118+
stst = std::stringstream(lm_string);
101119
}
102120

103-
// Read metadata and trie from file
104-
std::ifstream fin(lm_path, std::ios::binary);
105-
fin.seekg(trie_offset);
106-
return load_trie(fin, lm_path);
121+
stst.seekg(trie_offset);
122+
return load_trie(stst, lm_string, load_from_bytes);
107123
}
108124

109-
int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
125+
int Scorer::load_trie(std::stringstream& fin, const std::string& file_path, bool load_from_bytes)
110126
{
127+
111128
int magic;
112129
fin.read(reinterpret_cast<char*>(&magic), sizeof(magic));
113130
if (magic != MAGIC) {
@@ -140,9 +157,13 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
140157
reset_params(alpha, beta);
141158

142159
fst::FstReadOptions opt;
143-
opt.mode = fst::FstReadOptions::MAP;
144-
opt.source = file_path;
145-
dictionary.reset(FstType::Read(fin, opt));
160+
if (load_from_bytes) {
161+
dictionary.reset(fst::ConstFst<fst::StdArc>::Read(fin, opt));
162+
} else {
163+
opt.mode = fst::FstReadOptions::MAP;
164+
opt.source = file_path;
165+
dictionary.reset(FstType::Read(fin, opt));
166+
}
146167
return DS_ERR_OK;
147168
}
148169

native_client/ctcdecode/scorer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ class Scorer {
3939
Scorer& operator=(const Scorer&) = delete;
4040

4141
int init(const std::string &lm_path,
42+
bool load_from_bytes,
4243
const Alphabet &alphabet);
4344

4445
int init(const std::string &lm_path,
46+
bool load_from_bytes,
4547
const std::string &alphabet_config_path);
4648

4749
double get_log_cond_prob(const std::vector<std::string> &words,
@@ -84,7 +86,7 @@ class Scorer {
8486
void fill_dictionary(const std::unordered_set<std::string> &vocabulary);
8587

8688
// load language model from given path
87-
int load_lm(const std::string &lm_path);
89+
int load_lm(const std::string &lm_path, bool load_from_bytes=false);
8890

8991
// language model weight
9092
double alpha = 0.;
@@ -98,7 +100,7 @@ class Scorer {
98100
// necessary setup after setting alphabet
99101
void setup_char_map();
100102

101-
int load_trie(std::ifstream& fin, const std::string& file_path);
103+
int load_trie(std::stringstream& fin, const std::string& file_path, bool load_from_bytes=false);
102104

103105
private:
104106
std::unique_ptr<lm::base::Model> language_model_;

native_client/deepspeech.cc

100644100755
Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,9 @@ StreamingState::processBatch(const vector<float>& buf, unsigned int n_steps)
263263
}
264264

265265
int
266-
DS_CreateModel(const char* aModelPath,
267-
ModelState** retval)
266+
DS_CreateModel_(const std::string &aModelString,
267+
bool init_from_bytes,
268+
ModelState** retval)
268269
{
269270
*retval = nullptr;
270271

@@ -277,7 +278,7 @@ DS_CreateModel(const char* aModelPath,
277278
LOGD("DeepSpeech: %s", ds_git_version());
278279
#endif
279280

280-
if (!aModelPath || strlen(aModelPath) < 1) {
281+
if ( (!init_from_bytes && (strlen(aModelString) < 1)) || (init_from_bytes && (bufferSize<1))) {
281282
std::cerr << "No model specified, cannot continue." << std::endl;
282283
return DS_ERR_NO_MODEL;
283284
}
@@ -294,8 +295,8 @@ DS_CreateModel(const char* aModelPath,
294295
std::cerr << "Could not allocate model state." << std::endl;
295296
return DS_ERR_FAIL_CREATE_MODEL;
296297
}
297-
298-
int err = model->init(aModelPath);
298+
299+
int err = model->init(aModelString, init_from_bytes, bufferSize);
299300
if (err != DS_ERR_OK) {
300301
return err;
301302
}
@@ -304,6 +305,22 @@ DS_CreateModel(const char* aModelPath,
304305
return DS_ERR_OK;
305306
}
306307

308+
int
309+
DS_CreateModel(const char* aModelPath,
310+
ModelState** retval)
311+
{
312+
return DS_CreateModel_(aModelPath, false, retval);
313+
}
314+
315+
int
316+
DS_CreateModelFromBuffer(const char* aModelBuffer,
317+
size_t bufferSize,
318+
ModelState** retval)
319+
{
320+
return DS_CreateModel_(aModelBuffer, true, retval, bufferSize);
321+
}
322+
323+
307324
unsigned int
308325
DS_GetModelBeamWidth(const ModelState* aCtx)
309326
{
@@ -330,18 +347,41 @@ DS_FreeModel(ModelState* ctx)
330347
}
331348

332349
int
333-
DS_EnableExternalScorer(ModelState* aCtx,
334-
const char* aScorerPath)
350+
DS_EnableExternalScorer_(ModelState* aCtx,
351+
const std::string &aScorerString,
352+
bool init_from_bytes)
335353
{
336354
std::unique_ptr<Scorer> scorer(new Scorer());
337-
int err = scorer->init(aScorerPath, aCtx->alphabet_);
355+
356+
int err;
357+
if (init_from_bytes)
358+
err = scorer->init(std::string(aScorerString, bufferSize), init_from_bytes, aCtx->alphabet_);
359+
else
360+
err = scorer->init(aScorerString, init_from_bytes, aCtx->alphabet_);
361+
362+
338363
if (err != 0) {
339364
return DS_ERR_INVALID_SCORER;
340365
}
341366
aCtx->scorer_ = std::move(scorer);
342367
return DS_ERR_OK;
343368
}
344369

370+
int
371+
DS_EnableExternalScorer(ModelState* aCtx,
372+
const char* aScorerPath)
373+
{
374+
return DS_EnableExternalScorer_(aCtx, aScorerPath, false);
375+
}
376+
377+
int
378+
DS_EnableExternalScorerFromBuffer(ModelState* aCtx,
379+
const char* aScorerBuffer,
380+
size_t bufferSize)
381+
{
382+
return DS_EnableExternalScorer_(aCtx, aScorerBuffer, true, bufferSize);
383+
}
384+
345385
int
346386
DS_AddHotWord(ModelState* aCtx,
347387
const char* word,

0 commit comments

Comments
 (0)