From 43f71cbad7104394b0ec44c31a5aa39473be22b7 Mon Sep 17 00:00:00 2001 From: Mario Lang Date: Tue, 7 Nov 2023 11:24:48 +0100 Subject: [PATCH] TTS API --- openai-hs/src/OpenAI/Client.hs | 9 ++++++++- openai-servant/openai-servant.cabal | 3 ++- openai-servant/package.yaml | 2 ++ openai-servant/src/OpenAI/Api.hs | 19 ++++++++++++++++- openai-servant/src/OpenAI/Resources.hs | 28 ++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/openai-hs/src/OpenAI/Client.hs b/openai-hs/src/OpenAI/Client.hs index d1a4f6b..f896fb5 100644 --- a/openai-hs/src/OpenAI/Client.hs +++ b/openai-hs/src/OpenAI/Client.hs @@ -68,6 +68,11 @@ module OpenAI.Client AudioTranslationRequest (..), createTranscription, createAudioTranslation, + TextToSpeechRequest(..), + defaultTextToSpeechRequest, + TextToSpeechResponse, + createTextToSpeech, + -- * Engine (deprecated) EngineId (..), @@ -203,6 +208,7 @@ createAudioTranslation sc atr = bnd <- liftIO MP.genBoundary createAudioTranslationInternal sc (bnd, atr) +EP1 (createTextToSpeech, TextToSpeechRequest, TextToSpeechResponse) EP1 (createTranscriptionInternal, (BSL.ByteString, AudioTranscriptionRequest), AudioResponseData) EP1 (createAudioTranslationInternal, (BSL.ByteString, AudioTranslationRequest), AudioResponseData) @@ -237,7 +243,8 @@ EP2 (engineCreateEmbedding, EngineId, EngineEmbeddingCreate, (OpenAIList EngineE :<|> createImageVariation' ) :<|> (createEmbedding') - :<|> ( createTranscriptionInternal' + :<|> (createTextToSpeech' + :<|> createTranscriptionInternal' :<|> createAudioTranslationInternal' ) :<|> (createFileInternal' :<|> deleteFile') diff --git a/openai-servant/openai-servant.cabal b/openai-servant/openai-servant.cabal index 3b28e96..62cc4da 100644 --- a/openai-servant/openai-servant.cabal +++ b/openai-servant/openai-servant.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.2. +-- This file has been generated from package.yaml by hpack version 0.36.0. -- -- see: https://github.com/sol/hpack @@ -52,6 +52,7 @@ library , base >=4.7 && <5 , bytestring , casing + , http-media , mime-types , servant , servant-auth diff --git a/openai-servant/package.yaml b/openai-servant/package.yaml index 39ea3ff..411fb04 100644 --- a/openai-servant/package.yaml +++ b/openai-servant/package.yaml @@ -27,6 +27,8 @@ dependencies: - time - vector - mime-types + - http-media + ghc-options: - -Wall diff --git a/openai-servant/src/OpenAI/Api.hs b/openai-servant/src/OpenAI/Api.hs index a603164..aacdc75 100644 --- a/openai-servant/src/OpenAI/Api.hs +++ b/openai-servant/src/OpenAI/Api.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DataKinds #-} -- | The API module OpenAI.Api where @@ -6,9 +7,24 @@ import Servant.API import Servant.Auth import Servant.Auth.Client import Servant.Multipart.API +import Data.List.NonEmpty (NonEmpty((:|))) +import Data.ByteString.Lazy (ByteString) +import Network.HTTP.Media ((//)) type OpenAIAuth = Auth '[Bearer] () +data Audio + +instance Accept Audio where + contentTypes _ = "audio" // "mpeg" :| + [ "audio" // "opus", + "audio" // "aac", + "audio" // "flac" + ] + +instance MimeUnrender Audio ByteString where + mimeUnrender _ bs = Right bs + type OpenAIApi = "v1" :> OpenAIApiInternal @@ -46,7 +62,8 @@ type EmbeddingsApi = OpenAIAuth :> ReqBody '[JSON] EmbeddingCreate :> Post '[JSON] EmbeddingResponse type AudioApi = - OpenAIAuth :> "transcriptions" :> MultipartForm Tmp AudioTranscriptionRequest :> Post '[JSON] AudioResponseData + OpenAIAuth :> "speech" :> ReqBody '[JSON] TextToSpeechRequest :> Post '[Audio] TextToSpeechResponse + :<|> OpenAIAuth :> "transcriptions" :> MultipartForm Tmp AudioTranscriptionRequest :> Post '[JSON] AudioResponseData :<|> OpenAIAuth :> "translations" :> MultipartForm Tmp AudioTranslationRequest :> Post '[JSON] AudioResponseData type FilesApi = diff --git a/openai-servant/src/OpenAI/Resources.hs b/openai-servant/src/OpenAI/Resources.hs index 76275c2..4773cde 100644 --- a/openai-servant/src/OpenAI/Resources.hs +++ b/openai-servant/src/OpenAI/Resources.hs @@ -52,6 +52,10 @@ module OpenAI.Resources AudioResponseData (..), AudioTranscriptionRequest (..), AudioTranslationRequest (..), + TextToSpeechRequest (..), + defaultTextToSpeechRequest, + TextToSpeechResponse, + -- * Fine tuning (out of date) FineTuneId (..), @@ -507,6 +511,30 @@ data AudioResponseData = AudioResponseData $(deriveJSON (jsonOpts 5) ''AudioResponseData) -- | Audio create API + +data TextToSpeechRequest = TextToSpeechRequest + { ttsModel :: ModelId, + ttsInput :: T.Text, + ttsVoice :: T.Text, + ttsResponseFormat :: Maybe T.Text, + ttsSpeed :: Maybe Float + } + deriving (Show, Eq) + +$(deriveJSON (jsonOpts 3) ''TextToSpeechRequest) + +defaultTextToSpeechRequest :: ModelId -> T.Text -> T.Text -> TextToSpeechRequest +defaultTextToSpeechRequest model voice input = + TextToSpeechRequest + { ttsModel = model, + ttsVoice = voice, + ttsInput = input, + ttsResponseFormat = Nothing, + ttsSpeed = Nothing + } + +type TextToSpeechResponse = BSL.ByteString + data AudioTranscriptionRequest = AudioTranscriptionRequest { audtsrFile :: FilePath, audtsrModel :: ModelId,