Fixed RecognitionLongAudio each run creates an aws s3 key

tikhonp · Mar 16, 2023 · 54d766b · 54d766b
1 parent f163422
commit 54d766b
Show file tree

Hide file tree

Showing 7 changed files with 129 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -113,23 +113,3 @@ In other words, you can use the code for private and commercial purposes with an
 Feel free to contact us via email [[email protected]](mailto:[email protected]).
 
 ❤️
-
-### Get FOLDER_ID
-```
-FOLDER_ID=$(yc config get folder-id)
-```
-
-### Create service-account
-```
-yc iam service-account create --name admin
-```
-
-### Get id of service-account
-```
-SA_ID=$(yc iam service-account get --name admin --format json | jq .id -r)
-```
-
-### Assign a role to the admin service account using its ID:
-```
-yc resource-manager folder add-access-binding --id $FOLDER_ID --role admin --subject serviceAccount:$SA_ID
-```
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -42,6 +42,32 @@ Api Reference
 
    exceptions
 
+Some Shortcuts dor YC console tool
+-------------
+
+Get FOLDER_ID:
+
+```
+FOLDER_ID=$(yc config get folder-id)
+```
+
+Create service-account:
+
+```
+yc iam service-account create --name admin
+```
+
+Get id of service-account:
+
+```
+SA_ID=$(yc iam service-account get --name admin --format json | jq .id -r)
+```
+
+Assign a role to the admin service account using its ID:
+
+```
+yc resource-manager folder add-access-binding --id $FOLDER_ID --role admin --subject serviceAccount:$SA_ID
+```
 
 Indices and tables
 ==================

diff --git a/examples/long_audio_recognition.py b/examples/long_audio_recognition.py
@@ -1,5 +1,5 @@
-import time
 import os
+import time
 from sys import argv
 
 from speechkit import RecognitionLongAudio, Session
@@ -19,7 +19,16 @@
 jwt = generate_jwt(service_account_id, key_id, private_key)
 session = Session.from_jwt(jwt)
 
-recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
+# Maybe you want to create aws s3 key only once
+# In that case from version 2.2.0 you can do:
+access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)
+
+# And then use it:
+recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name, aws_access_key_id=access_key_id,
+                                            aws_secret=secret)
+
+# Or you can do it automatically
+# recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
 
 print("Sending file for recognition...")
 recognize_long_audio.send_for_recognition(

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = speechkit
-version = 2.1.1
+version = 2.2.0
 author = Tikhon Petrishchev
 author_email = [email protected]
 description = Python SDK for Yandex Speechkit API.

diff --git a/src/speechkit/__init__.py b/src/speechkit/__init__.py
@@ -4,7 +4,7 @@
 """
 
 __author__ = 'Tikhon Petrishchev'
-__version__ = '2.1.1'
+__version__ = '2.2.0'
 
 from speechkit._auth import Session
 from speechkit._recognition.streaming_recognition import DataStreamingRecognition

diff --git a/src/speechkit/_recognition/sync_recognition.py b/src/speechkit/_recognition/sync_recognition.py
@@ -130,38 +130,43 @@ class RecognitionLongAudio:
 
     def __init__(self, session, service_account_id, aws_bucket_name=None,
                  aws_credentials_description='Default AWS credentials created by `speechkit` python SDK',
-                 aws_region_name='ru-central1'):
+                 aws_region_name='ru-central1', aws_access_key_id=None, aws_secret=None):
         """
         Initialize :py:class:`speechkit.RecognitionLongAudio`
 
         :param speechkit.Session session: Session instance for auth
+        :param string service_account_id: Yandex Cloud Service account ID
+        :param string aws_bucket_name: Optional AWS bucket name
+        :param string aws_credentials_description: AWS credentials description
+        :param string aws_region_name: AWS region name
+        :param string aws_access_key_id: Optional AWS access key. Can be got by `.get_aws_credentials`.
+            If None will be generated automatically
+        :param string aws_secret: Optional AWS secret. Can be got by `.get_aws_credentials`.
+            If None will be generated automatically
         """
         self._id = None
         self._answer_data = None
         self._aws_file_name = None
 
-        if len(aws_credentials_description) > 256:
-            raise ValueError("The maximum `description` string length in characters is 256.")
-
         self._headers = session.header
         if session.folder_id:
             raise ValueError("folder_id specify is not supported, use jwt.")
         if session.auth_method == session.API_KEY:
             raise ValueError("Only jwt method supported")
 
-        url_aws_credentials = 'https://iam.api.cloud.yandex.net/iam/aws-compatibility/v1/accessKeys'
-        data_aws_credentials = {'description': aws_credentials_description, 'serviceAccountId': service_account_id}
-        answer = requests.post(url_aws_credentials, headers=self._headers, json=data_aws_credentials)
-
-        if not answer.ok:
-            raise RequestError(answer.json())
-
-        answer = answer.json()
-        self._s3 = self._init_aws(
-            aws_access_key_id=answer.get('accessKey', {}).get('keyId'),
-            aws_secret_access_key=answer.get('secret'),
-            region_name=aws_region_name,
-        )
+        if aws_access_key_id and aws_secret:
+            self._s3 = self._init_aws(
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret,
+                region_name=aws_region_name,
+            )
+        else:
+            access_key_id, secret = self.get_aws_credentials(session, service_account_id, aws_credentials_description)
+            self._s3 = self._init_aws(
+                aws_access_key_id=access_key_id,
+                aws_secret_access_key=secret,
+                region_name=aws_region_name,
+            )
 
         if aws_bucket_name:
             self._aws_bucket_name = aws_bucket_name
@@ -191,16 +196,54 @@ def _init_aws(**kwargs):
             **kwargs
         )
 
+    @staticmethod
+    def get_aws_credentials(session, service_account_id,
+                            aws_credentials_description='Default AWS credentials created by `speechkit` python SDK'):
+        """
+        Get AWS credentials from yandex cloud
+
+        :type session: speechkit.Session
+        :param session: Session instance for auth
+
+        :type service_account_id: string
+        :param service_account_id: Yandex Cloud Service account ID
+
+        :type aws_credentials_description: string
+        :param aws_credentials_description: AWS credentials description
+
+        :return: tuple with strings (access_key_id, secret)
+        """
+
+        if len(aws_credentials_description) > 256:
+            raise ValueError("The maximum `description` string length in characters is 256.")
+
+        url_aws_credentials = 'https://iam.api.cloud.yandex.net/iam/aws-compatibility/v1/accessKeys'
+        data_aws_credentials = {'description': aws_credentials_description, 'serviceAccountId': service_account_id}
+        answer = requests.post(url_aws_credentials, headers=session.header, json=data_aws_credentials)
+
+        if not answer.ok:
+            raise RequestError(answer.json())
+
+        answer = answer.json()
+        access_key_id = answer.get('accessKey', {}).get('keyId')
+        secret = answer.get('secret')
+        return access_key_id, secret
+
     def _aws_upload_file(self, file_path, baket_name, aws_file_name):
-        """Upload a file to object storage
+        """
+        Upload a file to object storage
 
         :type file_path: string
         :param file_path: Path to input file
+
         :type baket_name: string
+        :param baket_name:
+
         :type aws_file_name: string
         :param aws_file_name: Name of file in object storage
         """
-        return self._s3.upload_file(file_path, baket_name, aws_file_name)
+
+        self._s3.upload_file(file_path, baket_name, aws_file_name)
 
     def _create_presigned_url(self, bucket_name, aws_file_name,
                               expiration=3600):
@@ -248,7 +291,7 @@ def send_for_recognition(self, file_path, **kwargs):
         :param string model: The language model to be used for recognition.
             Default value: `general`.
 
-        :param boolean profanityFilter: The profanity filter.
+        :param boolean profanityFilter: The profanity filters.
 
         :param string audioEncoding: The format of the submitted audio.
             Acceptable values:

diff --git a/src/tests/test_long_audio_recognition.py b/src/tests/test_long_audio_recognition.py
@@ -35,6 +35,31 @@ def test_init(self):
         recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
         self.assertIsInstance(recognize_long_audio._headers, dict)
 
+    def test_get_aws_credentials(self):
+        service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
+        key_id = os.environ.get('YANDEX_KEY_ID')
+        private_key = get_private_key()
+        jwt = generate_jwt(service_account_id, key_id, private_key)
+
+        session = Session.from_jwt(jwt)
+        access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)
+        self.assertIsInstance(access_key_id, str)
+        self.assertIsInstance(secret, str)
+
+    def test_init_from_aws_credentials(self):
+        bucket_name = os.environ.get('BUCKET_NAME')
+        service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
+        key_id = os.environ.get('YANDEX_KEY_ID')
+        private_key = get_private_key()
+        jwt = generate_jwt(service_account_id, key_id, private_key)
+
+        session = Session.from_jwt(jwt)
+        access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)
+
+        recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name,
+                                                    aws_access_key_id=access_key_id, aws_secret=secret)
+        self.assertIsInstance(recognize_long_audio._headers, dict)
+
     def test_recognition(self):
         bucket_name = os.environ.get('BUCKET_NAME')
         service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
@@ -49,9 +74,8 @@ def test_recognition(self):
         with open(self.path, 'wb') as f:
             f.write(test_data)
 
-        recognize_long_audio.send_for_recognition(
-            self.path, audioEncoding='LINEAR16_PCM', sampleRateHertz='48000', audioChannelCount=1, rawResults=False
-        )
+        recognize_long_audio.send_for_recognition(self.path, audioEncoding='LINEAR16_PCM', sampleRateHertz='48000',
+            audioChannelCount=1, rawResults=False)
 
         while True:
             time.sleep(2)