Skip to content

Commit 80fe2ca

Browse files
committed
[!158][RELEASE] Automatic Subtitling with SBAAM (ACL2024)
# Which work do we release? "SBAAM! Eliminating Transcript Dependency in Automatic Subtitling" # What changes does this release refer to? ec9480d9f5de12269f420848c9c55f820089da4b d0d8ac1ee13c2bd12ab9a483fc2aa6b0653651f5 e2d7504f8d3245532c7e781f8c7b3cb93709d8de cb56d5a6af98913e501a5eb54159c57179433960 fc1f065bfce66922815c840de213d01978917543 16fb354c87ee5397c26b97fb54547c3d463a9dff dee3b0f125d155fc28574ba235c88bc2367e6e76 fea2e98b7f033380f19c718de250332c7f0bf322 fa0e7a3a750db9096bdbcefae1591a2d890dd771
1 parent ec8ce81 commit 80fe2ca

File tree

3 files changed

+329
-8
lines changed

3 files changed

+329
-8
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Dedicated README for each work can be found in the `fbk_works` directory.
55

66
### 2024
77

8+
- [[ACL 2024] **SBAAM! Eliminating Transcript Dependency in Automatic Subtitling**](fbk_works/SBAAM.md)
89
- [[ACL 2024] **When Good and Reproducible Results are a Giant with Feet of Clay: The Importance of Software Quality in NLP**](fbk_works/BUGFREE_CONFORMER.md)
910
- [[LREC-COLING 2024] **How do Hyenas deal with Human Speech? Speech Recognition and Translation with ConfHyena**](fbk_works/HYENA_COLING2024.md)
1011

examples/speech_to_text/scripts/attention_based_timestamp_estimation.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ def aligns(self, boundaries_indexes):
8484
raise NotImplementedError("Subclasses of AttentionMatrixProcessor should implement aligns")
8585

8686

87-
class CustomAttentionAligner(AttentionMatrixProcessor):
87+
class SBAAMNoForceEndAttentionAligner(AttentionMatrixProcessor):
8888
"""
89-
Custom method specifically designed by FBK to determine block boundaries,
90-
trying to maximize the value of the attention area of corresponding text and audio.
89+
Determines subtitling block boundaries, trying to maximize the value of the attention area
90+
of corresponding text and audio.
9191
"""
9292
def normalize(self):
9393
self.std_normalize()
@@ -131,10 +131,13 @@ def aligns(self, boundaries_indexes):
131131
return splitting_time_idxs
132132

133133

134-
class CustomForcedEndAttentionAligner(CustomAttentionAligner):
134+
class SBAAMAttentionAligner(SBAAMNoForceEndAttentionAligner):
135135
"""
136-
The current method does not properly estimate the end time of the last eob.
136+
SBAAMNoForceEnd does not properly estimate the end time of the last eob.
137137
As a workaround, this forces the last eob to terminate at the end of the audio.
138+
139+
This is the method used and described in
140+
`"SBAAM! Eliminating Transcript Dependency in Automatic Subtitling" <>`_.
138141
"""
139142
def aligns(self, boundaries_indexes):
140143
splitting_time_idxs = super().aligns(boundaries_indexes)
@@ -243,8 +246,8 @@ def aligns(self, boundaries_indexes):
243246

244247
class AttentionAlignerArgparse(argparse.Action):
245248
AVAILABLE_ALIGNERS = {
246-
"custom": CustomAttentionAligner,
247-
"custom-forceend": CustomForcedEndAttentionAligner,
249+
"sbaam-noforce": SBAAMNoForceEndAttentionAligner,
250+
"sbaam": SBAAMAttentionAligner,
248251
"dtw-medianf": DTWMedianFilterAttentionAligner,
249252
}
250253

@@ -356,7 +359,7 @@ def main(args):
356359
parser.add_argument('--alignment-operator',
357360
action=AttentionAlignerArgparse,
358361
choices=AttentionAlignerArgparse.AVAILABLE_ALIGNERS.keys(),
359-
default=AttentionAlignerArgparse.AVAILABLE_ALIGNERS['custom-forceend'],
362+
default=AttentionAlignerArgparse.AVAILABLE_ALIGNERS['sbaam'],
360363
help="method to use to perform alignments")
361364
parser.add_argument('--remove-last-frame', action='store_true', default=False,
362365
help="if set, last token is removed before computing alignments")

0 commit comments

Comments
 (0)