Skip to content

Commit

Permalink
Fixed RecognitionLongAudio each run creates an aws s3 key
Browse files Browse the repository at this point in the history
  • Loading branch information
tikhonp committed Mar 16, 2023
1 parent f163422 commit 54d766b
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 47 deletions.
20 changes: 0 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,3 @@ In other words, you can use the code for private and commercial purposes with an
Feel free to contact us via email [[email protected]](mailto:[email protected]).

❤️

### Get FOLDER_ID
```
FOLDER_ID=$(yc config get folder-id)
```

### Create service-account
```
yc iam service-account create --name admin
```

### Get id of service-account
```
SA_ID=$(yc iam service-account get --name admin --format json | jq .id -r)
```

### Assign a role to the admin service account using its ID:
```
yc resource-manager folder add-access-binding --id $FOLDER_ID --role admin --subject serviceAccount:$SA_ID
```
26 changes: 26 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,32 @@ Api Reference

exceptions

Some Shortcuts dor YC console tool
-------------

Get FOLDER_ID:

```
FOLDER_ID=$(yc config get folder-id)
```

Create service-account:

```
yc iam service-account create --name admin
```

Get id of service-account:

```
SA_ID=$(yc iam service-account get --name admin --format json | jq .id -r)
```

Assign a role to the admin service account using its ID:

```
yc resource-manager folder add-access-binding --id $FOLDER_ID --role admin --subject serviceAccount:$SA_ID
```

Indices and tables
==================
Expand Down
13 changes: 11 additions & 2 deletions examples/long_audio_recognition.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import time
import os
import time
from sys import argv

from speechkit import RecognitionLongAudio, Session
Expand All @@ -19,7 +19,16 @@
jwt = generate_jwt(service_account_id, key_id, private_key)
session = Session.from_jwt(jwt)

recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
# Maybe you want to create aws s3 key only once
# In that case from version 2.2.0 you can do:
access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)

# And then use it:
recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name, aws_access_key_id=access_key_id,
aws_secret=secret)

# Or you can do it automatically
# recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)

print("Sending file for recognition...")
recognize_long_audio.send_for_recognition(
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = speechkit
version = 2.1.1
version = 2.2.0
author = Tikhon Petrishchev
author_email = [email protected]
description = Python SDK for Yandex Speechkit API.
Expand Down
2 changes: 1 addition & 1 deletion src/speechkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""

__author__ = 'Tikhon Petrishchev'
__version__ = '2.1.1'
__version__ = '2.2.0'

from speechkit._auth import Session
from speechkit._recognition.streaming_recognition import DataStreamingRecognition
Expand Down
83 changes: 63 additions & 20 deletions src/speechkit/_recognition/sync_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,38 +130,43 @@ class RecognitionLongAudio:

def __init__(self, session, service_account_id, aws_bucket_name=None,
aws_credentials_description='Default AWS credentials created by `speechkit` python SDK',
aws_region_name='ru-central1'):
aws_region_name='ru-central1', aws_access_key_id=None, aws_secret=None):
"""
Initialize :py:class:`speechkit.RecognitionLongAudio`
:param speechkit.Session session: Session instance for auth
:param string service_account_id: Yandex Cloud Service account ID
:param string aws_bucket_name: Optional AWS bucket name
:param string aws_credentials_description: AWS credentials description
:param string aws_region_name: AWS region name
:param string aws_access_key_id: Optional AWS access key. Can be got by `.get_aws_credentials`.
If None will be generated automatically
:param string aws_secret: Optional AWS secret. Can be got by `.get_aws_credentials`.
If None will be generated automatically
"""
self._id = None
self._answer_data = None
self._aws_file_name = None

if len(aws_credentials_description) > 256:
raise ValueError("The maximum `description` string length in characters is 256.")

self._headers = session.header
if session.folder_id:
raise ValueError("folder_id specify is not supported, use jwt.")
if session.auth_method == session.API_KEY:
raise ValueError("Only jwt method supported")

url_aws_credentials = 'https://iam.api.cloud.yandex.net/iam/aws-compatibility/v1/accessKeys'
data_aws_credentials = {'description': aws_credentials_description, 'serviceAccountId': service_account_id}
answer = requests.post(url_aws_credentials, headers=self._headers, json=data_aws_credentials)

if not answer.ok:
raise RequestError(answer.json())

answer = answer.json()
self._s3 = self._init_aws(
aws_access_key_id=answer.get('accessKey', {}).get('keyId'),
aws_secret_access_key=answer.get('secret'),
region_name=aws_region_name,
)
if aws_access_key_id and aws_secret:
self._s3 = self._init_aws(
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret,
region_name=aws_region_name,
)
else:
access_key_id, secret = self.get_aws_credentials(session, service_account_id, aws_credentials_description)
self._s3 = self._init_aws(
aws_access_key_id=access_key_id,
aws_secret_access_key=secret,
region_name=aws_region_name,
)

if aws_bucket_name:
self._aws_bucket_name = aws_bucket_name
Expand Down Expand Up @@ -191,16 +196,54 @@ def _init_aws(**kwargs):
**kwargs
)

@staticmethod
def get_aws_credentials(session, service_account_id,
aws_credentials_description='Default AWS credentials created by `speechkit` python SDK'):
"""
Get AWS credentials from yandex cloud
:type session: speechkit.Session
:param session: Session instance for auth
:type service_account_id: string
:param service_account_id: Yandex Cloud Service account ID
:type aws_credentials_description: string
:param aws_credentials_description: AWS credentials description
:return: tuple with strings (access_key_id, secret)
"""

if len(aws_credentials_description) > 256:
raise ValueError("The maximum `description` string length in characters is 256.")

url_aws_credentials = 'https://iam.api.cloud.yandex.net/iam/aws-compatibility/v1/accessKeys'
data_aws_credentials = {'description': aws_credentials_description, 'serviceAccountId': service_account_id}
answer = requests.post(url_aws_credentials, headers=session.header, json=data_aws_credentials)

if not answer.ok:
raise RequestError(answer.json())

answer = answer.json()
access_key_id = answer.get('accessKey', {}).get('keyId')
secret = answer.get('secret')
return access_key_id, secret

def _aws_upload_file(self, file_path, baket_name, aws_file_name):
"""Upload a file to object storage
"""
Upload a file to object storage
:type file_path: string
:param file_path: Path to input file
:type baket_name: string
:param baket_name:
:type aws_file_name: string
:param aws_file_name: Name of file in object storage
"""
return self._s3.upload_file(file_path, baket_name, aws_file_name)

self._s3.upload_file(file_path, baket_name, aws_file_name)

def _create_presigned_url(self, bucket_name, aws_file_name,
expiration=3600):
Expand Down Expand Up @@ -248,7 +291,7 @@ def send_for_recognition(self, file_path, **kwargs):
:param string model: The language model to be used for recognition.
Default value: `general`.
:param boolean profanityFilter: The profanity filter.
:param boolean profanityFilter: The profanity filters.
:param string audioEncoding: The format of the submitted audio.
Acceptable values:
Expand Down
30 changes: 27 additions & 3 deletions src/tests/test_long_audio_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,31 @@ def test_init(self):
recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
self.assertIsInstance(recognize_long_audio._headers, dict)

def test_get_aws_credentials(self):
service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
key_id = os.environ.get('YANDEX_KEY_ID')
private_key = get_private_key()
jwt = generate_jwt(service_account_id, key_id, private_key)

session = Session.from_jwt(jwt)
access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)
self.assertIsInstance(access_key_id, str)
self.assertIsInstance(secret, str)

def test_init_from_aws_credentials(self):
bucket_name = os.environ.get('BUCKET_NAME')
service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
key_id = os.environ.get('YANDEX_KEY_ID')
private_key = get_private_key()
jwt = generate_jwt(service_account_id, key_id, private_key)

session = Session.from_jwt(jwt)
access_key_id, secret = RecognitionLongAudio.get_aws_credentials(session, service_account_id)

recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name,
aws_access_key_id=access_key_id, aws_secret=secret)
self.assertIsInstance(recognize_long_audio._headers, dict)

def test_recognition(self):
bucket_name = os.environ.get('BUCKET_NAME')
service_account_id = os.environ.get('SERVICE_ACCOUNT_ID')
Expand All @@ -49,9 +74,8 @@ def test_recognition(self):
with open(self.path, 'wb') as f:
f.write(test_data)

recognize_long_audio.send_for_recognition(
self.path, audioEncoding='LINEAR16_PCM', sampleRateHertz='48000', audioChannelCount=1, rawResults=False
)
recognize_long_audio.send_for_recognition(self.path, audioEncoding='LINEAR16_PCM', sampleRateHertz='48000',
audioChannelCount=1, rawResults=False)

while True:
time.sleep(2)
Expand Down

0 comments on commit 54d766b

Please sign in to comment.