-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
128 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# use .flake8 until we can move this config to pyproject.toml (not possible yet (27/02/2024) according to issue below) | ||
# https://github.com/PyCQA/flake8/issues/234 | ||
|
||
[flake8] | ||
select = | ||
# B: bugbear warnings | ||
B, | ||
|
||
# B950: bugbear max-linelength warning | ||
# as suggested in the black docs | ||
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length | ||
B950, | ||
|
||
# C: currently only C901, mccabe code complexity | ||
C, | ||
|
||
# E: pycodestyle errors | ||
E, | ||
|
||
# F: flake8 codes for pyflakes | ||
F, | ||
|
||
# W: pycodestyle warnings | ||
W, | ||
|
||
extend-ignore = | ||
# E203: pycodestyle's "whitespace before ',', ';' or ':'" error | ||
# ignored as suggested in the black docs | ||
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#slices | ||
E203, | ||
|
||
# E501: pycodestyle's "line too long (82 > 79) characters" error | ||
# ignored in favor of B950 as suggested in the black docs | ||
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length | ||
E501, | ||
|
||
# W503 line break before binary operator | ||
W503, | ||
|
||
# set max-line-length to be black compatible, as suggested in the black docs | ||
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length | ||
max-line-length = 88 | ||
|
||
# set max cyclomatic complexity for mccabe plugin | ||
max-complexity = 10 | ||
|
||
# show total number of errors, set exit code to 1 if tot is not empty | ||
count = True | ||
|
||
# show the source generating each error or warning | ||
show-source = True | ||
|
||
# count errors and warnings | ||
statistics = True | ||
|
||
exclude = | ||
.venv | ||
misc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
RABBITMQ: | ||
HOST: dane-rabbitmq-api.default.svc.cluster.local | ||
PORT: 5672 | ||
EXCHANGE: DANE-exchange | ||
RESPONSE_QUEUE: DANE-response-queue | ||
USER: guest # change this for production mode | ||
PASSWORD: guest # change this for production mode | ||
ELASTICSEARCH: | ||
HOST: | ||
- elasticsearch | ||
PORT: 9200 | ||
USER: '' # change this for production mode | ||
PASSWORD: '' # change this for production mode | ||
SCHEME: http | ||
INDEX: dane-index-k8s | ||
FILE_SYSTEM: | ||
BASE_MOUNT: data # data when running locally | ||
INPUT_DIR: input-files | ||
OUTPUT_DIR: output-files | ||
INPUT: | ||
TEST_INPUT_PATH: testsource__testcarrier/inputfile.wav | ||
S3_ENDPOINT_URL: https://s3-host | ||
MODEL: s3://bucket/model | ||
DELETE_ON_COMPLETION: False | ||
OUTPUT: | ||
DELETE_ON_COMPLETION: True | ||
TRANSFER_ON_COMPLETION: True | ||
S3_ENDPOINT_URL: https://s3-host | ||
S3_BUCKET: bucket-name # bucket reserved for 1 type of output | ||
S3_FOLDER_IN_BUCKET: folder # folder within the bucket | ||
WHISPER_ASR_SETTINGS: | ||
WORD_TIMESTAMPS: True | ||
DANE_DEPENDENCIES: | ||
- input-generating-worker |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import Optional, TypedDict | ||
from dane.provenance import Provenance | ||
|
||
|
||
# returned by callback() | ||
class CallbackResponse(TypedDict): | ||
state: int | ||
message: str | ||
|
||
|
||
# These are the types of output this worker (possibly) provides (depending on configuration) | ||
class OutputType(Enum): | ||
# name of output type, should just have a significant name, no other restrictions | ||
# (as far as I understand) | ||
TRANSCRIPT = "transcript" | ||
PROVENANCE = "provenance" # produced by provenance.py | ||
|
||
|
||
@dataclass | ||
class WhisperASRInput: | ||
state: int # HTTP status code | ||
message: str # error/success message | ||
source_id: str = "" # <program ID>__<carrier ID> | ||
input_file_path: str = "" # where the audio was downloaded from | ||
provenance: Optional[Provenance] = None # mostly: how long did it take to download | ||
|
||
|
||
@dataclass | ||
class WhisperASROutput: | ||
state: int # HTTP status code | ||
message: str # error/success message | ||
output_file_path: str = "" # where to store the text file | ||
provenance: Optional[Provenance] = None # audio extraction provenance |