@@ -12,34 +12,20 @@ import androidx.appcompat.app.AppCompatActivity
1212import androidx.core.app.ActivityCompat
1313import kotlinx.android.synthetic.main.activity_main.*
1414import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel
15- import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState
1615import java.io.File
17-
16+ import java.util.concurrent.atomic.AtomicBoolean
1817
1918class MainActivity : AppCompatActivity () {
2019 private var model: DeepSpeechModel ? = null
21- private var streamContext: DeepSpeechStreamingState ? = null
22-
23- // Change the following parameters regarding
24- // what works best for your use case or your language.
25- private val BEAM_WIDTH = 500L
26- private val LM_ALPHA = 0.931289039105002f
27- private val LM_BETA = 1.1834137581510284f
28-
29- private val RECORDER_CHANNELS : Int = AudioFormat .CHANNEL_IN_MONO
30- private val RECORDER_AUDIO_ENCODING : Int = AudioFormat .ENCODING_PCM_16BIT
31- private var recorder: AudioRecord ? = null
32- private var recordingThread: Thread ? = null
33- private var isRecording: Boolean = false
3420
35- private val NUM_BUFFER_ELEMENTS = 1024
36- private val BYTES_PER_ELEMENT = 2 // 2 bytes (short) because of 16 bit format
21+ private var transcriptionThread : Thread ? = null
22+ private var isRecording : AtomicBoolean = AtomicBoolean ( false )
3723
3824 private val TFLITE_MODEL_FILENAME = " deepspeech-0.8.0-models.tflite"
3925 private val SCORER_FILENAME = " deepspeech-0.8.0-models.scorer"
4026
4127 private fun checkAudioPermission () {
42- // permission is automatically granted on sdk < 23 upon installation
28+ // Permission is automatically granted on SDK < 23 upon installation.
4329 if (Build .VERSION .SDK_INT >= 23 ) {
4430 val permission = Manifest .permission.RECORD_AUDIO
4531
@@ -50,21 +36,41 @@ class MainActivity : AppCompatActivity() {
5036 }
5137
5238 private fun transcribe () {
53- val audioData = ShortArray (NUM_BUFFER_ELEMENTS )
39+ // We read from the recorder in chunks of 2048 shorts. With a model that expects its input
40+ // at 16000Hz, this corresponds to 2048/16000 = 0.128s or 128ms.
41+ val audioBufferSize = 2048
42+ val audioData = ShortArray (audioBufferSize)
5443
55- while (isRecording) {
56- recorder?.read(
57- audioData,
58- 0 ,
59- NUM_BUFFER_ELEMENTS
44+ runOnUiThread { btnStartInference.text = " Stop Recording" }
45+
46+ model?.let { model ->
47+ val streamContext = model.createStream()
48+
49+ val recorder = AudioRecord (
50+ MediaRecorder .AudioSource .VOICE_RECOGNITION ,
51+ model.sampleRate(),
52+ AudioFormat .CHANNEL_IN_MONO ,
53+ AudioFormat .ENCODING_PCM_16BIT ,
54+ audioBufferSize
6055 )
61- model?.feedAudioContent(streamContext, audioData, audioData.size)
62- val decoded = model?.intermediateDecode(streamContext)
63- runOnUiThread { transcription.text = decoded }
56+ recorder.startRecording()
57+
58+ while (isRecording.get()) {
59+ recorder.read(audioData, 0 , audioBufferSize)
60+ model.feedAudioContent(streamContext, audioData, audioData.size)
61+ val decoded = model.intermediateDecode(streamContext)
62+ runOnUiThread { transcription.text = decoded }
63+ }
64+
65+ val decoded = model.finishStream(streamContext)
66+
67+ runOnUiThread {
68+ btnStartInference.text = " Start Recording"
69+ transcription.text = decoded
70+ }
71+
72+ recorder.stop()
6473 }
65- val decoded = model?.finishStream(streamContext)
66- runOnUiThread { transcription.text = decoded }
67- recorder?.stop()
6874 }
6975
7076 private fun createModel (): Boolean {
@@ -73,73 +79,49 @@ class MainActivity : AppCompatActivity() {
7379 val scorerPath = " $modelsPath /$SCORER_FILENAME "
7480
7581 for (path in listOf (tfliteModelPath, scorerPath)) {
76- if (! ( File (path).exists() )) {
77- status.text = " Model creation failed: $path does not exist."
82+ if (! File (path).exists()) {
83+ status.append( " Model creation failed: $path does not exist.\n " )
7884 return false
7985 }
8086 }
8187
8288 model = DeepSpeechModel (tfliteModelPath)
83- model?.setBeamWidth(BEAM_WIDTH )
8489 model?.enableExternalScorer(scorerPath)
85- model?.setScorerAlphaBeta( LM_ALPHA , LM_BETA )
90+
8691 return true
8792 }
8893
8994 private fun startListening () {
90- status.text = " Creating model...\n "
91-
92- if (model == null ) {
93- if (! createModel()) {
94- return
95- }
96- status.append(" Created model.\n " )
97- } else {
98- status.append(" Model already created.\n " )
99- }
100-
101- model?.let { model ->
102- btnStartInference.text = " Stop Recording"
103- streamContext = model.createStream()
104-
105- if (recorder == null ) {
106- recorder = AudioRecord (
107- MediaRecorder .AudioSource .VOICE_RECOGNITION ,
108- model.sampleRate(),
109- RECORDER_CHANNELS ,
110- RECORDER_AUDIO_ENCODING ,
111- NUM_BUFFER_ELEMENTS * BYTES_PER_ELEMENT )
112- }
113-
114- recorder?.startRecording()
115- isRecording = true
116-
117- if (recordingThread == null ) {
118- recordingThread = Thread (Runnable { transcribe() }, " AudioRecorder Thread" )
119- recordingThread?.start()
120- }
95+ if (isRecording.compareAndSet(false , true )) {
96+ transcriptionThread = Thread (Runnable { transcribe() }, " Transcription Thread" )
97+ transcriptionThread?.start()
12198 }
12299 }
123100
124101 override fun onCreate (savedInstanceState : Bundle ? ) {
125102 super .onCreate(savedInstanceState)
126- getExternalFilesDir(null )
127103 setContentView(R .layout.activity_main)
128104 checkAudioPermission()
129105
130- // create application data directory on the device
131- getExternalFilesDir(null )
106+ // Create application data directory on the device
107+ val modelsPath = getExternalFilesDir(null ).toString( )
132108
133- status.text = " Ready, waiting ... "
109+ status.text = " Ready. Copy model files to \" $modelsPath \" if running for the first time. \n "
134110 }
135111
136112 private fun stopListening () {
137- isRecording = false
138- btnStartInference.text = " Start Recording"
113+ isRecording.set(false )
139114 }
140115
141116 fun onRecordClick (v : View ? ) {
142- if (isRecording) {
117+ if (model == null ) {
118+ if (! createModel()) {
119+ return
120+ }
121+ status.append(" Created model.\n " )
122+ }
123+
124+ if (isRecording.get()) {
143125 stopListening()
144126 } else {
145127 startListening()
0 commit comments