Skip to content

Commit

Permalink
Add models, config, .flake8
Browse files Browse the repository at this point in the history
  • Loading branch information
greenw0lf committed Mar 19, 2024
1 parent a92569d commit 1d5ff27
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 1 deletion.
58 changes: 58 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# use .flake8 until we can move this config to pyproject.toml (not possible yet (27/02/2024) according to issue below)
# https://github.com/PyCQA/flake8/issues/234

[flake8]
select =
# B: bugbear warnings
B,

# B950: bugbear max-linelength warning
# as suggested in the black docs
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length
B950,

# C: currently only C901, mccabe code complexity
C,

# E: pycodestyle errors
E,

# F: flake8 codes for pyflakes
F,

# W: pycodestyle warnings
W,

extend-ignore =
# E203: pycodestyle's "whitespace before ',', ';' or ':'" error
# ignored as suggested in the black docs
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#slices
E203,

# E501: pycodestyle's "line too long (82 > 79) characters" error
# ignored in favor of B950 as suggested in the black docs
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length
E501,

# W503 line break before binary operator
W503,

# set max-line-length to be black compatible, as suggested in the black docs
# https://github.com/psf/black/blob/d038a24ca200da9dacc1dcb05090c9e5b45b7869/docs/the_black_code_style/current_style.md#line-length
max-line-length = 88

# set max cyclomatic complexity for mccabe plugin
max-complexity = 10

# show total number of errors, set exit code to 1 if tot is not empty
count = True

# show the source generating each error or warning
show-source = True

# count errors and warnings
statistics = True

exclude =
.venv
misc
2 changes: 1 addition & 1 deletion .github/workflows/_deploy.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Deploy dane-visual-feature-extraction-worker to ghcr
name: Deploy dane-whisper-asr-worker to ghcr

on:
workflow_call:
Expand Down
34 changes: 34 additions & 0 deletions config/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
RABBITMQ:
HOST: dane-rabbitmq-api.default.svc.cluster.local
PORT: 5672
EXCHANGE: DANE-exchange
RESPONSE_QUEUE: DANE-response-queue
USER: guest # change this for production mode
PASSWORD: guest # change this for production mode
ELASTICSEARCH:
HOST:
- elasticsearch
PORT: 9200
USER: '' # change this for production mode
PASSWORD: '' # change this for production mode
SCHEME: http
INDEX: dane-index-k8s
FILE_SYSTEM:
BASE_MOUNT: data # data when running locally
INPUT_DIR: input-files
OUTPUT_DIR: output-files
INPUT:
TEST_INPUT_PATH: testsource__testcarrier/inputfile.wav
S3_ENDPOINT_URL: https://s3-host
MODEL: s3://bucket/model
DELETE_ON_COMPLETION: False
OUTPUT:
DELETE_ON_COMPLETION: True
TRANSFER_ON_COMPLETION: True
S3_ENDPOINT_URL: https://s3-host
S3_BUCKET: bucket-name # bucket reserved for 1 type of output
S3_FOLDER_IN_BUCKET: folder # folder within the bucket
WHISPER_ASR_SETTINGS:
WORD_TIMESTAMPS: True
DANE_DEPENDENCIES:
- input-generating-worker
35 changes: 35 additions & 0 deletions models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dataclasses import dataclass
from enum import Enum
from typing import Optional, TypedDict
from dane.provenance import Provenance


# returned by callback()
class CallbackResponse(TypedDict):
state: int
message: str


# These are the types of output this worker (possibly) provides (depending on configuration)
class OutputType(Enum):
# name of output type, should just have a significant name, no other restrictions
# (as far as I understand)
TRANSCRIPT = "transcript"
PROVENANCE = "provenance" # produced by provenance.py


@dataclass
class WhisperASRInput:
state: int # HTTP status code
message: str # error/success message
source_id: str = "" # <program ID>__<carrier ID>
input_file_path: str = "" # where the audio was downloaded from
provenance: Optional[Provenance] = None # mostly: how long did it take to download


@dataclass
class WhisperASROutput:
state: int # HTTP status code
message: str # error/success message
output_file_path: str = "" # where to store the text file
provenance: Optional[Provenance] = None # audio extraction provenance

0 comments on commit 1d5ff27

Please sign in to comment.