From 13ad4e0729e559639726ed827c6ba32bb040dbd4 Mon Sep 17 00:00:00 2001 From: Francisco Riadigos Date: Mon, 13 Aug 2018 22:14:25 +0100 Subject: [PATCH] Updating to an accurate and working Synthesizer --- .../android/voice/GoogleSpeechRecognizer.java | 7 ++- .../voice/GoogleSpeechSynthesizer.java | 60 +++++++------------ .../android/voice/AndroidAudioRecorder.java | 2 +- 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechRecognizer.java b/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechRecognizer.java index a08154c..c4520a0 100644 --- a/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechRecognizer.java +++ b/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechRecognizer.java @@ -74,6 +74,9 @@ public void onVoiceStart() { @Override public void onVoice(byte[] data, int size) { + + + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16; RecognitionConfig config = RecognitionConfig.newBuilder() @@ -81,11 +84,11 @@ public void onVoice(byte[] data, int size) { //.setProfanityFilter(true) .setEncoding(encoding) .setSampleRateHertz(mAudioRecorder.getSampleRate()) - .setLanguageCode(getDefaultLanguageCode()) + .setLanguageCode("en-US") .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() - .setContent(ByteString.copyFrom(data)) + .setContent(ByteString.copyFrom(data, 0, size)) .build(); try (SpeechClient speech = generateFromRawFile( diff --git a/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechSynthesizer.java b/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechSynthesizer.java index 64459af..6fd034e 100644 --- a/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechSynthesizer.java +++ b/sdk-addon-google-speech/src/main/java/com/chattylabs/sdk/android/voice/GoogleSpeechSynthesizer.java @@ -58,8 +58,6 @@ public final class GoogleSpeechSynthesizer extends BaseSpeechSynthesizer { private AudioConfig audioConfig; private MediaPlayer mediaPlayer; private final ConditionVariable mCondVar = new ConditionVariable(); - private boolean completed; //released - private int extraCode; //released GoogleSpeechSynthesizer(Application application, ComponentConfig configuration, @@ -75,11 +73,10 @@ public final class GoogleSpeechSynthesizer extends BaseSpeechSynthesizer { @Override public void setup(SynthesizerListener.OnSetup onSynthesizerSetup) { logger.i(TAG, "GOOGLE TTS - setup and check language"); + // At this stage, we only want to check whether the api works and the language is available try (TextToSpeechClient ttsClient = generateFromRawFile( application, getConfiguration().getGoogleCredentialsResourceFile())) { ListVoicesResponse response = ttsClient.listVoices(getDefaultLanguageCode()); - ttsClient.shutdownNow(); - ttsClient.awaitTermination(2, TimeUnit.SECONDS); if (response.getVoicesCount() > 0) { onSynthesizerSetup.execute(SynthesizerListener.Status.AVAILABLE); } else { @@ -158,7 +155,7 @@ private void finishPlayer() { if (mediaPlayer != null) mediaPlayer.stop(); logger.v(TAG, "GOOGLE TTS - stopped"); } catch (IllegalStateException ex) { - // Do nothing, the player is already stopped + // Does nothing, the player is already stopped } mCondVar.open(); if (mediaPlayer != null) { @@ -167,33 +164,31 @@ private void finishPlayer() { } } - @Override - public void shutdown() { - logger.w(TAG, "GOOGLE TTS - shutting down"); - this.stop(); - release(); - } - private void destroyTts() { if (!isTtsNull()) { try { tts.close(); tts.shutdown(); - tts.awaitTermination(2 ,TimeUnit.SECONDS); + tts.awaitTermination(2, TimeUnit.SECONDS); logger.v(TAG, "GOOGLE TTS - destroyed"); } catch (Exception ignored) {} tts = null; } } + @Override + public void shutdown() { + logger.w(TAG, "GOOGLE TTS - shutting down"); + this.stop(); + release(); + } + @Override public void release() { super.release(); tts = null; voice = null; audioConfig = null; - completed = true; - extraCode = 0; } @Override @@ -206,11 +201,11 @@ void initTts(SynthesizerListener.OnInitialised onSynthesizerInitialised) { if (isTtsNull()) { setReady(false); logger.i(TAG, "GOOGLE TTS - creating new instance of TextToSpeechClient.class"); - try (TextToSpeechClient ttsClient = generateFromRawFile( - application, getConfiguration().getGoogleCredentialsResourceFile())) { + try { logger.i(TAG, "GOOGLE TTS - new instance created"); setReady(true); - this.tts = ttsClient; + this.tts = generateFromRawFile( + application, getConfiguration().getGoogleCredentialsResourceFile()); this.audioConfig = AudioConfig.newBuilder().setAudioEncoding(AudioEncoding.MP3).build(); setupLanguage(); setSynthesizerUtteranceListener(createUtterancesListener()); @@ -278,7 +273,7 @@ public void run() { public void onStart(String utteranceId) { logger.v(getTag(), "GOOGLE TTS[%s] - on start", utteranceId); - startTimeout(utteranceId); + //startTimeout(utteranceId); timestamp = System.currentTimeMillis(); if (getListenersMap().size() > 0) { @@ -376,14 +371,12 @@ private void play(String utteranceId, String text, HashMap param getSynthesizerUtteranceListener().onStart(utteranceId); initTts(status -> { if (status == SynthesizerListener.Status.SUCCESS) { - mCondVar.close(); - completed = false; SynthesisInput input = SynthesisInput.newBuilder() .setText(text) .build(); SynthesizeSpeechResponse response = tts.synthesizeSpeech(input, voice, audioConfig); - destroyTts(); + //destroyTts(); // Get the audio contents from the response ByteString audioContents = response.getAudioContent(); @@ -391,8 +384,6 @@ private void play(String utteranceId, String text, HashMap param logger.d(getTag(), "audio: %s", audioContents); logger.d(getTag(), "audio string: %s", audioContents.toStringUtf8()); - extraCode = -1; - try { File tempMp3 = File.createTempFile("output", "mp3", application.getCacheDir()); @@ -409,29 +400,20 @@ private void play(String utteranceId, String text, HashMap param return; } - mediaPlayer.setOnCompletionListener(mp -> { - completed = true; - mCondVar.open(); + mediaPlayer.setOnCompletionListener(mediaPlayer -> { + finishPlayer(); + getSynthesizerUtteranceListener().onDone(utteranceId); }); mediaPlayer.setOnErrorListener((mp, what, extra) -> { - extraCode = extra; - mCondVar.open(); + finishPlayer(); + // TODO: When I release the timeout, since I already run onError, it might be called twice + getSynthesizerUtteranceListener().onError(utteranceId, extra); return true; }); mediaPlayer.setAudioStreamType(AudioManager.STREAM_MUSIC); mediaPlayer.start(); - mCondVar.block(); - finishPlayer(); } catch (Exception ex) { logger.logException(ex); - mCondVar.open(); - } - - if (completed) { - getSynthesizerUtteranceListener().onDone(utteranceId); - } else { - // TODO: When I release the timeout, since I already run onError, it might be called twice - getSynthesizerUtteranceListener().onError(utteranceId, extraCode); } } else { diff --git a/sdk-conversational-flow-core/src/main/java/com/chattylabs/sdk/android/voice/AndroidAudioRecorder.java b/sdk-conversational-flow-core/src/main/java/com/chattylabs/sdk/android/voice/AndroidAudioRecorder.java index 5c4efec..2c5e459 100755 --- a/sdk-conversational-flow-core/src/main/java/com/chattylabs/sdk/android/voice/AndroidAudioRecorder.java +++ b/sdk-conversational-flow-core/src/main/java/com/chattylabs/sdk/android/voice/AndroidAudioRecorder.java @@ -188,7 +188,7 @@ private AudioRecord createAudioRecord() { if (sizeInBytes == AudioRecord.ERROR_BAD_VALUE) { continue; } - final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.DEFAULT, + final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, sampleRate, CHANNEL, ENCODING, sizeInBytes); if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) { mBuffer = new byte[sizeInBytes];