From 3f7979e1c81d6f1f06504d10cbdde82e14884c02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 6 Nov 2023 11:42:15 +0100
Subject: [PATCH 01/28] Remove ' and - from the punctuations to be removed

---
 ASR_NL_benchmark/normalize.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/normalize.py b/ASR_NL_benchmark/normalize.py
index ddd5ab6..d4e9d7e 100644
--- a/ASR_NL_benchmark/normalize.py
+++ b/ASR_NL_benchmark/normalize.py
@@ -44,9 +44,10 @@ def replace_numbers_and_symbols(text):
     >>> replace_numbers_and_symbols('12,3%')
     'twaalf komma drie procent'
     """
+    removed_punct = string.punctuation.replace("'", '').replace("-", '')
     text_without_symbols = replace_symbols(text)
     clean_text = replace_numbers(text_without_symbols)
-    clean_text = clean_text.translate(str.maketrans('', '', string.punctuation))
+    clean_text = clean_text.translate(str.maketrans('', '', removed_punct))
     return clean_text
 
 def replace_numbers(text):

From 221eeeb64852b34b0f9166779e041422b9a79c05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 6 Nov 2023 14:06:52 +0100
Subject: [PATCH 02/28] Add some variations + remove dash (-) again

---
 ASR_NL_benchmark/normalize.py   | 2 +-
 ASR_NL_benchmark/variations.glm | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/normalize.py b/ASR_NL_benchmark/normalize.py
index d4e9d7e..be738f8 100644
--- a/ASR_NL_benchmark/normalize.py
+++ b/ASR_NL_benchmark/normalize.py
@@ -44,7 +44,7 @@ def replace_numbers_and_symbols(text):
     >>> replace_numbers_and_symbols('12,3%')
     'twaalf komma drie procent'
     """
-    removed_punct = string.punctuation.replace("'", '').replace("-", '')
+    removed_punct = string.punctuation.replace("'", '')
     text_without_symbols = replace_symbols(text)
     clean_text = replace_numbers(text_without_symbols)
     clean_text = clean_text.translate(str.maketrans('', '', removed_punct))
diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index c7aaaa4..1c80848 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -13,6 +13,8 @@ z'n => zijn / [ ] __ [ ]
 'k => ik / [ ] __ [ ]
 'r => er / [ ] __ [ ]
 'ns => eens / [ ] __ [ ]
+ie => hij / [ ] __ [ ]
+da's => dat is / [ ] __ [ ]
 d'ruit => eruit / [ ] __ [ ]
 restaurant- => restaurant / [ ] __ [ ]
 jeugd- => jeugd / [ ] __ [ ]

From 3f6f38a6d45d38edf607042a7ee45e449e1cd1fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 13 Nov 2023 15:02:48 +0100
Subject: [PATCH 03/28] 1 more variation + remove dash (-) from punctuation to
 be removed

---
 ASR_NL_benchmark/normalize.py   | 2 +-
 ASR_NL_benchmark/variations.glm | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/normalize.py b/ASR_NL_benchmark/normalize.py
index be738f8..e3121af 100644
--- a/ASR_NL_benchmark/normalize.py
+++ b/ASR_NL_benchmark/normalize.py
@@ -44,7 +44,7 @@ def replace_numbers_and_symbols(text):
     >>> replace_numbers_and_symbols('12,3%')
     'twaalf komma drie procent'
     """
-    removed_punct = string.punctuation.replace("'", '')
+    removed_punct = string.punctuation.replace("'", '').replace('-', '')
     text_without_symbols = replace_symbols(text)
     clean_text = replace_numbers(text_without_symbols)
     clean_text = clean_text.translate(str.maketrans('', '', removed_punct))
diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index 1c80848..4c841a4 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -59,6 +59,7 @@ tewerk => te werk / [ ] __ [ ]
 [concept-] => [{ concept- / concept }] / [ ] __ [ ]
 [NAVO-] => [{ NAVO- / NAVO }] / [ ] __ [ ]
 [uh] => [{ uh / %HESITATION }] / [ ] __ [ ]
+[bnr-nieuwsradio] => [{ bnr-nieuwsradio / bnr nieuwsradio }]
 ;;
 ;; BN-VL
 [Darfour] => [{ Darfour / Darfur }] / [ ] __ [ ]

From 62f618e15a7879deccc0e13fd9256391f2863bfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 13 Nov 2023 15:43:41 +0100
Subject: [PATCH 04/28] Fix variation bug

---
 ASR_NL_benchmark/variations.glm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index 4c841a4..746ac57 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -59,7 +59,7 @@ tewerk => te werk / [ ] __ [ ]
 [concept-] => [{ concept- / concept }] / [ ] __ [ ]
 [NAVO-] => [{ NAVO- / NAVO }] / [ ] __ [ ]
 [uh] => [{ uh / %HESITATION }] / [ ] __ [ ]
-[bnr-nieuwsradio] => [{ bnr-nieuwsradio / bnr nieuwsradio }]
+[bnr-nieuwsradio] => [{ bnr-nieuwsradio / bnr nieuwsradio }] / [ ] __ [ ]
 ;;
 ;; BN-VL
 [Darfour] => [{ Darfour / Darfur }] / [ ] __ [ ]

From 2c614e1dbc7c5ebce83ef3405c149190c2e6cb71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 13 Nov 2023 16:01:01 +0100
Subject: [PATCH 05/28] Add capitalization to BNR variation

---
 ASR_NL_benchmark/variations.glm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index 746ac57..2c5c305 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -59,7 +59,7 @@ tewerk => te werk / [ ] __ [ ]
 [concept-] => [{ concept- / concept }] / [ ] __ [ ]
 [NAVO-] => [{ NAVO- / NAVO }] / [ ] __ [ ]
 [uh] => [{ uh / %HESITATION }] / [ ] __ [ ]
-[bnr-nieuwsradio] => [{ bnr-nieuwsradio / bnr nieuwsradio }] / [ ] __ [ ]
+[BNR-nieuwsradio] => [{ BNR-nieuwsradio / BNR nieuwsradio }] / [ ] __ [ ]
 ;;
 ;; BN-VL
 [Darfour] => [{ Darfour / Darfur }] / [ ] __ [ ]

From 67467bdab83b11622ce199ac922c8178ee453c0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 17 Nov 2023 15:29:42 +0100
Subject: [PATCH 06/28] Add support for skipping normalization in pipeline

---
 ASR_NL_benchmark/__main__.py | 16 +++++++++++++++-
 ASR_NL_benchmark/pipeline.py | 16 +++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/ASR_NL_benchmark/__main__.py b/ASR_NL_benchmark/__main__.py
index 36dbf28..c1b7f20 100644
--- a/ASR_NL_benchmark/__main__.py
+++ b/ASR_NL_benchmark/__main__.py
@@ -21,6 +21,15 @@
                         metavar='value',
                         default='',
                         help='help: True if you want to use the GUI')
+    parser.add_argument('-skip_ref_normalization', 
+                        action = 'store_true',
+                        help = 'Skip the normalization step for the reference file')
+    parser.add_argument('-skip_hyp_normalization', 
+                        action = 'store_true',
+                        help = 'Skip the normalization step for the hypothesis file')
+    parser.add_argument('-skip-normalization',
+                        action = 'store_true',
+                        help = 'Skip the normalization step for both hypothesis and reference files')
 
     args = parser.parse_args()
 
@@ -29,7 +38,12 @@
         interface.main()
     else:
         print('Running benchmarking')
-        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind)
+        skip_ref_norm = args.skip_ref_normalization
+        skip_hyp_norm = args.skip_hyp_normalization
+        if args.skip_normalization:
+            skip_ref_norm = args.skip_ref_normalization
+            skip_hyp_norm = args.skip_hyp_normalization
+        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind, skip_ref_norm=skip_ref_norm, skip_hyp_norm=skip_hyp_norm)
         benchmarking.main()
         pipeline.process_results(kind=args.kind)
 
diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
index fdd6c9d..ce44dac 100644
--- a/ASR_NL_benchmark/pipeline.py
+++ b/ASR_NL_benchmark/pipeline.py
@@ -26,7 +26,7 @@ def set_logging(logpath):
     return logging
 
 
-def run_pipeline(hypfile, reffile):
+def run_pipeline(hypfile, reffile, skip_ref_norm, skip_hyp_norm):
     """ Validates and Normalizes the hyp and ref file and runs them trough sclite
     Args:
         hypfile: the hypothesis file
@@ -37,9 +37,11 @@ def run_pipeline(hypfile, reffile):
     reffile.validate(great_expectations_validation)
 
     # Normalize
-    reffile.clean_text(replace_numbers_and_symbols)
+    if not skip_ref_norm:
+        reffile.clean_text(replace_numbers_and_symbols)
     reffile.export(os.path.join(os.path.sep,'input',f'{reffile.name}_normalized.{reffile.extension}'))
-    hypfile.clean_text(replace_numbers_and_symbols)
+    if not skip_hyp_norm:
+        hypfile.clean_text(replace_numbers_and_symbols)
     hypfile.export(os.path.join(os.path.sep,'input',f'{hypfile.name}_normalized.{hypfile.extension}'))
 
     #Create results folder if not exists:
@@ -210,7 +212,7 @@ def process_input(hypfile_arg, reffile_arg):
 
 
 class Pipeline():
-    def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension, kind):
+    def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension, kind, skip_ref_norm, skip_hyp_norm):
         self.progress = 0
         self.failed = 0
         self.hypfile_input_path = os.path.join(os.path.sep,'input',hypfile_input_path)
@@ -218,11 +220,15 @@ def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refexte
         self.hypextension = hypextension
         self.refextension = refextension
         self.kind = kind
+        self.skip_ref_norm = skip_ref_norm
+        self.skip_hyp_norm = skip_hyp_norm
         self.logging = set_logging(logpath=os.path.join(os.path.sep,'input',f'{date.today()}_logging.log'))
         self.logging.info(f"hypfile path from terminal: {hypfile_input_path}")
         self.logging.info(f"reffile path from terminal: {reffile_input_path}")
         self.logging.info(f"Pipeline class' hypfile path: {self.hypfile_input_path}")
         self.logging.info(f"Pipeline class' reffile path: {self.reffile_input_path}")
+        self.logging.info(f"Skip reffile normalization: {self.skip_ref_norm}")
+        self.logging.info(f"Skip hypfile normalization: {self.skip_hyp_norm}")
 
     def main(self):
         hyp_list, ref_list = process_input(self.hypfile_input_path, self.reffile_input_path)
@@ -235,7 +241,7 @@ def main(self):
                 # Parse input
                 reffile = STM(reffile_path, self.refextension)
                 hypfile = CTM(hypfile_path, self.hypextension)
-                run_pipeline(hypfile, reffile)
+                run_pipeline(hypfile, reffile, self.skip_ref_norm, self.skip_hyp_norm)
                 done += 1
                 self.progress = done/total
             except:

From 2250756aaba42eb7c7d738111ce2444944761736 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 17 Nov 2023 15:47:26 +0100
Subject: [PATCH 07/28] Add sclite -D flag for optional words

---
 ASR_NL_benchmark/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
index ce44dac..52d2608 100644
--- a/ASR_NL_benchmark/pipeline.py
+++ b/ASR_NL_benchmark/pipeline.py
@@ -58,7 +58,7 @@ def run_pipeline(hypfile, reffile, skip_ref_norm, skip_hyp_norm):
     run = os.system(
         f"csrfilt.sh -s -i stm {os.path.join('ASR_NL_benchmark','variations.glm')} < {reffile.normalized_path} > {reffile.variation_path}")
     run = os.system(
-        f"sclite -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')} -o dtl spk")
+        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')} -o dtl spk")
 
 def calculate_wer(df):
     """ Calculates the word error rate and adds the collumn 'product' to the dataframe

From b08506ce891147be2fc1a9cffe3ec5485177c59c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 17 Nov 2023 17:12:52 +0100
Subject: [PATCH 08/28] Testing skip normalization in interface

---
 ASR_NL_benchmark/templates/select_files.html | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 8355762..ff634ac 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -30,8 +30,13 @@ <h1> Select Hypothese and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
+          <input type="checkbox" id="skip-hyp-norm">
+          <p>Skip the normalization step for the hyp file(s)</p>
           <label>Path to reference file or folder</label>
-          <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder"><button type="submit" class="btn btn-primary" >Submit</button>
+          <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
+          <input type="checkbox" id="skip-ref-norm">
+          <p>Skip the normalization step for the ref file(s)</p>
+          <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>
   </div>

From a12f09b761d46e8d3c4f923ecd3615b043bc7521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 17 Nov 2023 17:31:23 +0100
Subject: [PATCH 09/28] More interface testing

---
 ASR_NL_benchmark/interface.py                | 4 ++++
 ASR_NL_benchmark/templates/select_files.html | 6 ++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/ASR_NL_benchmark/interface.py b/ASR_NL_benchmark/interface.py
index 394fd27..07f3eba 100644
--- a/ASR_NL_benchmark/interface.py
+++ b/ASR_NL_benchmark/interface.py
@@ -20,6 +20,10 @@ def upload_page():
         hyp = os.path.join(os.path.sep,'input',request.form.get('hyp'))
         ref = os.path.join(os.path.sep,'input',request.form.get('ref'))
         kind = request.form.get('kind')
+        skip_ref_norm = request.form.get('skip_ref_norm')
+        skip_hyp_norm = request.form.get('skip_hyp_norm')
+        print(skip_hyp_norm)
+        print(skip_ref_norm)
         global benchmarking
         benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind)
         Thread(target=benchmarking.main).start()
diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index ff634ac..5d903d8 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -30,12 +30,10 @@ <h1> Select Hypothese and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <input type="checkbox" id="skip-hyp-norm">
-          <p>Skip the normalization step for the hyp file(s)</p>
+          <input type="checkbox" class="form-control" id="skip-hyp-norm"> Skip the normalization step for the hyp file(s)
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <input type="checkbox" id="skip-ref-norm">
-          <p>Skip the normalization step for the ref file(s)</p>
+          <input type="checkbox" class="form-control" id="skip-ref-norm"> Skip the normalization step for the ref file(s)
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>

From ea69c2981a3cf2d88a3184e84d36f20ce09a1a5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 17 Nov 2023 17:47:23 +0100
Subject: [PATCH 10/28] Even more interface testing

---
 ASR_NL_benchmark/templates/select_files.html | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 5d903d8..f0351da 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -21,7 +21,7 @@
 
 
   <div class="container pt-3 m-3" width="80%">
-  <h1> Select Hypothese and Reference files or folders </h1>
+  <h1> Select Hypothesis and Reference files or folders </h1>
   </div>
   <div class="container pt-3 m-3" width="80%">
   <div class="form-group">
@@ -30,10 +30,12 @@ <h1> Select Hypothese and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <input type="checkbox" class="form-control" id="skip-hyp-norm"> Skip the normalization step for the hyp file(s)
+          <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
+          <input type="checkbox" class="form-control" id="skip-hyp-norm"> 
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <input type="checkbox" class="form-control" id="skip-ref-norm"> Skip the normalization step for the ref file(s)
+          <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
+          <input type="checkbox" class="form-control" id="skip-ref-norm">
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>

From f1ad8202e39956d2a88661e510bcaa81b4fd1ed1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Sat, 18 Nov 2023 10:08:25 +0100
Subject: [PATCH 11/28] Hopefully last UI changes

---
 ASR_NL_benchmark/templates/select_files.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index f0351da..c834a15 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -30,11 +30,11 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
+          <label>Skip the normalization step for the hypothesis file(s)</label>
           <input type="checkbox" class="form-control" id="skip-hyp-norm"> 
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
+          <label>Skip the normalization step for the reference file(s)</label>
           <input type="checkbox" class="form-control" id="skip-ref-norm">
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>

From 64073b821e7dfcc18e5a712beadc467729ff440e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Sun, 19 Nov 2023 09:11:17 +0100
Subject: [PATCH 12/28] More UI testing

---
 ASR_NL_benchmark/templates/select_files.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index c834a15..93619bf 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -30,12 +30,12 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <label>Skip the normalization step for the hypothesis file(s)</label>
-          <input type="checkbox" class="form-control" id="skip-hyp-norm"> 
+          <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
+          <input type="checkbox" id="skip-hyp-norm"> 
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <label>Skip the normalization step for the reference file(s)</label>
-          <input type="checkbox" class="form-control" id="skip-ref-norm">
+          <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
+          <input type="checkbox" id="skip-ref-norm">
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>

From f6a3aaba416cbf5e956317e6c0b059e077dc3f46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Sun, 19 Nov 2023 09:16:46 +0100
Subject: [PATCH 13/28] Checkbox for skipping now visible

---
 ASR_NL_benchmark/templates/select_files.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 93619bf..0352b86 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -30,12 +30,12 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
           <input type="checkbox" id="skip-hyp-norm"> 
+          <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
           <input type="checkbox" id="skip-ref-norm">
+          <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>

From 85da6e5d343c51c961be6b5b0b92640d2c9cb0ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 10:48:01 +0100
Subject: [PATCH 14/28] Add a break between UI elements

---
 ASR_NL_benchmark/templates/select_files.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 0352b86..41a8278 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -32,10 +32,12 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
           <input type="checkbox" id="skip-hyp-norm"> 
           <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
+          <br>
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
           <input type="checkbox" id="skip-ref-norm">
           <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
+          <br>
           <button type="submit" class="btn btn-primary" >Submit</button>
       </form>
   </div>

From 30211cd027314594cac57411a1fbfa0499883549 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 12:21:37 +0100
Subject: [PATCH 15/28] Make it look better?

---
 ASR_NL_benchmark/templates/select_files.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 41a8278..f2eaefd 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -28,11 +28,13 @@ <h1> Select Hypothesis and Reference files or folders </h1>
       <form method="POST">
           <label>Name of speech recognizer</label>
           <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
+          <p>_______________________________</p>
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
           <input type="checkbox" id="skip-hyp-norm"> 
           <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
           <br>
+          <p>_______________________________</p>
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
           <input type="checkbox" id="skip-ref-norm">

From d4b5a237702ee16110ffa5fc2e64441eb8f2dee1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 12:40:24 +0100
Subject: [PATCH 16/28] Final touches for the interface

---
 ASR_NL_benchmark/interface.py                | 2 +-
 ASR_NL_benchmark/templates/select_files.html | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ASR_NL_benchmark/interface.py b/ASR_NL_benchmark/interface.py
index 07f3eba..5b5af7d 100644
--- a/ASR_NL_benchmark/interface.py
+++ b/ASR_NL_benchmark/interface.py
@@ -25,7 +25,7 @@ def upload_page():
         print(skip_hyp_norm)
         print(skip_ref_norm)
         global benchmarking
-        benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind)
+        benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind, skip_ref_norm, skip_hyp_norm)
         Thread(target=benchmarking.main).start()
         return redirect(f'/progress?ref={ref}&hyp={hyp}')
     return render_template('select_files.html')
diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index f2eaefd..6ca7aec 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -31,13 +31,13 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <p>_______________________________</p>
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <input type="checkbox" id="skip-hyp-norm"> 
+          <input type="checkbox" id="skip-hyp-norm" value="skip-hyp-norm"> 
           <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
           <br>
           <p>_______________________________</p>
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <input type="checkbox" id="skip-ref-norm">
+          <input type="checkbox" id="skip-ref-norm" value="skip-ref-norm">
           <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
           <br>
           <button type="submit" class="btn btn-primary" >Submit</button>

From f0e687ec2c866f891461ce6751284f3dbb938bd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 13:10:54 +0100
Subject: [PATCH 17/28] Fix issue with getting values from form submit

---
 ASR_NL_benchmark/interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ASR_NL_benchmark/interface.py b/ASR_NL_benchmark/interface.py
index 5b5af7d..7bd1186 100644
--- a/ASR_NL_benchmark/interface.py
+++ b/ASR_NL_benchmark/interface.py
@@ -20,8 +20,8 @@ def upload_page():
         hyp = os.path.join(os.path.sep,'input',request.form.get('hyp'))
         ref = os.path.join(os.path.sep,'input',request.form.get('ref'))
         kind = request.form.get('kind')
-        skip_ref_norm = request.form.get('skip_ref_norm')
-        skip_hyp_norm = request.form.get('skip_hyp_norm')
+        skip_ref_norm = request.form.get('skip-ref-norm')
+        skip_hyp_norm = request.form.get('skip-hyp-norm')
         print(skip_hyp_norm)
         print(skip_ref_norm)
         global benchmarking

From f6b5f0a241333d9b10ed54ce41c2eabd2eec5ab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 13:29:27 +0100
Subject: [PATCH 18/28] Hopefully works this time (changed value to name)

---
 ASR_NL_benchmark/templates/select_files.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
index 6ca7aec..b946f42 100644
--- a/ASR_NL_benchmark/templates/select_files.html
+++ b/ASR_NL_benchmark/templates/select_files.html
@@ -31,13 +31,13 @@ <h1> Select Hypothesis and Reference files or folders </h1>
           <p>_______________________________</p>
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
-          <input type="checkbox" id="skip-hyp-norm" value="skip-hyp-norm"> 
+          <input type="checkbox" id="skip-hyp-norm" name="skip-hyp-norm"> 
           <label for="skip-hyp-norm">Skip the normalization step for the hypothesis file(s)</label>
           <br>
           <p>_______________________________</p>
           <label>Path to reference file or folder</label>
           <input type="text" class="form-control" id="ref" name="ref" placeholder="Ref File or folder">
-          <input type="checkbox" id="skip-ref-norm" value="skip-ref-norm">
+          <input type="checkbox" id="skip-ref-norm" name="skip-ref-norm">
           <label for="skip-ref-norm">Skip the normalization step for the reference file(s)</label>
           <br>
           <button type="submit" class="btn btn-primary" >Submit</button>

From 526ffd1d8d2f1b7fd5ee0209508bb15c1f563569 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 14:38:01 +0100
Subject: [PATCH 19/28] Add variations from top 20 confusion pairs

---
 ASR_NL_benchmark/interface.py   |  2 --
 ASR_NL_benchmark/variations.glm | 14 +++++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/ASR_NL_benchmark/interface.py b/ASR_NL_benchmark/interface.py
index 7bd1186..e05818d 100644
--- a/ASR_NL_benchmark/interface.py
+++ b/ASR_NL_benchmark/interface.py
@@ -22,8 +22,6 @@ def upload_page():
         kind = request.form.get('kind')
         skip_ref_norm = request.form.get('skip-ref-norm')
         skip_hyp_norm = request.form.get('skip-hyp-norm')
-        print(skip_hyp_norm)
-        print(skip_ref_norm)
         global benchmarking
         benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind, skip_ref_norm, skip_hyp_norm)
         Thread(target=benchmarking.main).start()
diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index 2c5c305..7feb56c 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -13,8 +13,6 @@ z'n => zijn / [ ] __ [ ]
 'k => ik / [ ] __ [ ]
 'r => er / [ ] __ [ ]
 'ns => eens / [ ] __ [ ]
-ie => hij / [ ] __ [ ]
-da's => dat is / [ ] __ [ ]
 d'ruit => eruit / [ ] __ [ ]
 restaurant- => restaurant / [ ] __ [ ]
 jeugd- => jeugd / [ ] __ [ ]
@@ -59,7 +57,6 @@ tewerk => te werk / [ ] __ [ ]
 [concept-] => [{ concept- / concept }] / [ ] __ [ ]
 [NAVO-] => [{ NAVO- / NAVO }] / [ ] __ [ ]
 [uh] => [{ uh / %HESITATION }] / [ ] __ [ ]
-[BNR-nieuwsradio] => [{ BNR-nieuwsradio / BNR nieuwsradio }] / [ ] __ [ ]
 ;;
 ;; BN-VL
 [Darfour] => [{ Darfour / Darfur }] / [ ] __ [ ]
@@ -90,3 +87,14 @@ tewerk => te werk / [ ] __ [ ]
 [marktonderzoekbureau] => [{ marktonderzoekbureau / marktonderzoeksbureau }] / [ ] __ [ ]
 [Noordwestkust] => [{ Noordwestkust / Noord-Westkust }] / [ ] __ [ ]
 [carnavalvierders] => [{ carnavalvierders / carnavalsvierders }] / [ ] __ [ ]
+
+;; Whisper evaluation on N-Best
+;; BN-NL
+ie => hij / [ ] __ [ ]
+da's => dat is / [ ] __ [ ]
+[BNR-nieuwsradio] => [{ BNR-nieuwsradio / BNR nieuwsradio }] / [ ] __ [ ]
+[Moszkowicz] => [{ Moszkowicz / Moskovic }] / [ ] __ [ ]
+[Kooi] => [{ Kooi / Kooij }] / [ ] __ [ ]
+[Araújo] => [{ Araújo / Araujo }] / [ ] __ [ ]
+[Bagdad] => [{ Bagdad / Baghdad }] / [ ] __ [ ]
+[Holleeder] => [{ Holleeder / Holleder }] / [ ] __ [ ]

From ff18c715adb24015a7dd663c2452f33e2914ebf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Mon, 20 Nov 2023 16:59:14 +0100
Subject: [PATCH 20/28] One final variation

---
 ASR_NL_benchmark/variations.glm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index 7feb56c..de5704e 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -98,3 +98,4 @@ da's => dat is / [ ] __ [ ]
 [Araújo] => [{ Araújo / Araujo }] / [ ] __ [ ]
 [Bagdad] => [{ Bagdad / Baghdad }] / [ ] __ [ ]
 [Holleeder] => [{ Holleeder / Holleder }] / [ ] __ [ ]
+[Imac] => [{ Imac / Imaç }] / [ ] __ [ ]

From 2a36c17427cd3c9e2a77de74f78a9272f81fe18c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Tue, 5 Dec 2023 14:06:41 +0100
Subject: [PATCH 21/28] Test removing -m hyp from sclite command in pipeline

---
 ASR_NL_benchmark/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
index 52d2608..5b1fbc0 100644
--- a/ASR_NL_benchmark/pipeline.py
+++ b/ASR_NL_benchmark/pipeline.py
@@ -58,7 +58,7 @@ def run_pipeline(hypfile, reffile, skip_ref_norm, skip_hyp_norm):
     run = os.system(
         f"csrfilt.sh -s -i stm {os.path.join('ASR_NL_benchmark','variations.glm')} < {reffile.normalized_path} > {reffile.variation_path}")
     run = os.system(
-        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')} -o dtl spk")
+        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -O {os.path.join(os.path.sep,'input','results')} -o dtl spk")
 
 def calculate_wer(df):
     """ Calculates the word error rate and adds the collumn 'product' to the dataframe

From 04f99c3cac03b42240bdb3b99596328e23b7c14c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Tue, 5 Dec 2023 15:08:18 +0100
Subject: [PATCH 22/28] add a flag that gives a more detailed breakdown

---
 ASR_NL_benchmark/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
index 5b1fbc0..33145d7 100644
--- a/ASR_NL_benchmark/pipeline.py
+++ b/ASR_NL_benchmark/pipeline.py
@@ -58,7 +58,7 @@ def run_pipeline(hypfile, reffile, skip_ref_norm, skip_hyp_norm):
     run = os.system(
         f"csrfilt.sh -s -i stm {os.path.join('ASR_NL_benchmark','variations.glm')} < {reffile.normalized_path} > {reffile.variation_path}")
     run = os.system(
-        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -O {os.path.join(os.path.sep,'input','results')} -o dtl spk")
+        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')} -o prf dtl spk")
 
 def calculate_wer(df):
     """ Calculates the word error rate and adds the collumn 'product' to the dataframe

From 528820a4007275e7380c1928f2812f5ff24fc30d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Thu, 7 Dec 2023 13:42:05 +0100
Subject: [PATCH 23/28] Add another variation for Moszkowicz

---
 ASR_NL_benchmark/variations.glm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ASR_NL_benchmark/variations.glm b/ASR_NL_benchmark/variations.glm
index de5704e..54b40b9 100644
--- a/ASR_NL_benchmark/variations.glm
+++ b/ASR_NL_benchmark/variations.glm
@@ -93,7 +93,7 @@ tewerk => te werk / [ ] __ [ ]
 ie => hij / [ ] __ [ ]
 da's => dat is / [ ] __ [ ]
 [BNR-nieuwsradio] => [{ BNR-nieuwsradio / BNR nieuwsradio }] / [ ] __ [ ]
-[Moszkowicz] => [{ Moszkowicz / Moskovic }] / [ ] __ [ ]
+[Moszkowicz] => [{ Moszkowicz / Moskovic / Moskowitz }] / [ ] __ [ ]
 [Kooi] => [{ Kooi / Kooij }] / [ ] __ [ ]
 [Araújo] => [{ Araújo / Araujo }] / [ ] __ [ ]
 [Bagdad] => [{ Bagdad / Baghdad }] / [ ] __ [ ]

From 7b675eccf9f2e1b3ab8a78b8f72cf0d1528f1814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Wed, 3 Jan 2024 12:45:51 +0200
Subject: [PATCH 24/28] Update part of the README

---
 README.md | 106 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index e322a28..a3f8e74 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ASR-NL-benchmark
 ## Description
-ASR-NL-benchmark is a python package to evaluate and compare the performance of speech-to-text for the Dutch language. Universities and Dutch media companies joined forces to develop this package that makes it easier to compare the performance of various open-source or commercial speech-to-text solutions on Dutch broadcast media. This package wraps around the famous sclite tool (part of [SCTK](https://github.com/usnistgov/SCTK) that has been used for decades in the speech-to-text benchmark evaluations organised by NIST in the US. Further, the package contains several preprocessing files and connectors to databases.
+ASR-NL-benchmark is a python package to evaluate and compare the performance of speech-to-text for the Dutch language. Universities and Dutch media companies joined forces to develop this package that makes it easier to compare the performance of various open-source or commercial speech-to-text solutions on Dutch broadcast media. This package wraps around the famous sclite tool (part of [SCTK](https://github.com/usnistgov/SCTK) that has been used for decades in the speech-to-text benchmark evaluations organised by NIST in the US). Further, the package contains several preprocessing files and connectors to databases.
 
 ## How to use
 ### How to: Create a reference file
@@ -10,79 +10,83 @@ Reference files can be created using tooling such as:
 
 - [ELAN](https://archive.mpi.nl/tla/elan/download)
 
-A full annotation protocol can be found [here](https://github.com/opensource-spraakherkenning-nl/ASR-NL-benchmark/issues/7).
+<!-- A full annotation protocol can be found [here](https://github.com/opensource-spraakherkenning-nl/ASR-NL-benchmark/issues/7). -->
 
 Please check the guidelines for the reference file in the section below.
 
 
 ### How to: Install
-- Install docker
-- Pull the docker image: <code>docker pull asrnlbenchmark/asr-nl-benchmark</code>
+- Install [Docker](https://www.docker.com/products/docker-desktop/)
+- Pull the Docker image: <code>docker pull asrnlbenchmark/asr-nl-benchmark</code>
 
 ### How to: Run Using the command line only
 
 
 In order to run the benchmarking tool over a (set of) local hyp and ref file(s) we need docker to mount the local directory where the input files are located. The output files of the benchmarking tool will appear in the same folder. 
 
-The following line runs the benchmarking tool over a local hyp and ref file. Use the absolute file path as the value for the variables SOURCE. For HYPFILENAME use the filename of the hypfile and for REFFILENAME the name of the reffile.
+The following line runs the benchmarking tool over a local hyp and ref file. Use the absolute file path as the value for the variables `SOURCE`. For `HYPFILENAME` use the filename of the hypfile and for `REFFILENAME` the name of the reffile. 
 
-- run: <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR-NL-benchmark/src/app.py -hyp /input/HYPFILENAME ctm -ref /input/REFFILENAME stm </code>
+`HYPFILENAME` and `REFFILENAME` can also be the names of the folders containing the *hypfiles* and *reffiles* respectively. **Make sure** to create a folder named `results` in the `SOURCE` folder before running the command below:
 
- The results (.dtl, .spk, and .csv format) can be found inside a folder named 'results' which can be found on the local 'SOURCE' location (see above). 
+- <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -hyp HYPFILENAME ctm -ref REFFILENAME stm </code>
 
+The results (.dtl, .prf, .spk, and .csv format) can be found inside the `results` folder which can be found in the local `SOURCE` location (see above). 
 
-### How to: Use the User Interface
 
-In order to open a User Interface, run the same command as above but now with the optional argument -interface set to TRUE:
+### How to: Use the Interface
 
-- run: <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR-NL-benchmark/src/app.py -interactive True </code>
+In order to open a User Interface, run a command similar to the one above but now with the optional argument `-interface` set to `True`:
 
-Use a web browser to access the UI by navigating to "http://loaclhost:5000" : 
+- <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -interactive True </code>
 
-- Navigate to http://localhost:5000/ 
+Use a web browser to access the UI by navigating to "http://localhost:5000"
 
 Within the tab Select folder, enter the path to the hypotheses and reference files:
 
 - Enter the path of the hyp or the path to a folder containing a set of hyp files: (e.g. "ref_folder" or "ref_file.stm")
 - Enter the path of the ref file or the path to a folder containing a set of ref files: (e.g. "hyp_folder" or "hyp_file.stm")
-- click submit
+- click "Submit"
 
-A progress bar will appear. As soon as the benchmarking is ready, you will be forwarded to the results. The results (.dtl, .spk, and .csv format) can be found inside a folder named 'results' which can be found on the local 'SOURCE' location (see above). 
+A progress bar will appear. As soon as the benchmarking is ready, you will be forwarded to the results. The results (.dtl, .prf, .spk, and .csv format) can be found inside a folder named `results` which can be found on the local `SOURCE` location (see above).
+
+There is a visual bug when forwarding to the results page after benchmarking is complete where the page is blank. To fix it, refresh the page.
 
 
 ### How to: Interpret the results
-The final results are saved in .csv format inside a folder named 'results' stored locally on the 'SOURCE' location (see above). Those results are based upon the .dtl and .spk output files as generated by sclite.
+The final results are saved in .csv format inside a folder named `results` stored locally on the `SOURCE` location (see above). Those results are based upon the .dtl and .spk output files as generated by sclite.
 
 #### The different output files
-- .dtl files - Detailed Overall Report as returned by sclite
+- .dtl files - Detailed overall report as returned by sclite
+- .prf files - Detailed report including string alignments between hypothesis and reference as returned by sclite
 - .spk files - Report with scoring for a speaker as returned by sclite
 - .csv files - Overall results of the benchmarking as shown in the interface
 
 
 ## More about the pipeline
-### Normalisation
-Manual transcripts (used as reference files) sometimes contain abbreviations (e.g. "'n" instead of "een"), symbols (e.g. "&" instead of "en") and numbers ("4" instead of "vier"). The reference files often contain the written form of the words instead. Since we don't want to penalize the speech-to-text tooling or algorithm for such differences we normalize both, the reference and hypotheses files.
+### Normalization
+Manual transcripts (used as reference files) sometimes contain abbreviations (e.g. "'n" instead of "een"), symbols (e.g. "&" instead of "en") and numbers ("4" instead of "vier"). The reference files often contain the written form of the words instead. Since we don't want to penalize the speech-to-text tooling or algorithm for such differences, we normalize both the reference and hypothesis files.
+
+Normalization replacements:
 
-Normalisation replacements:
+- Symbols:
+    - '%' => "procent"
+    - '°' => "graden"
+    - '&' => "en"
+    - '€' => "euro"
 
-Symbols:
-- '%' => " procent"
-- '°' => " graden")
-- '&' => " en"
-- '€' => " euro"
+- Double spaces:
+    - '__' => '_'
 
-Double spaces:
-- '  ' =>' ')
-Numbers (i.a.):
-- 4 => "vier"
-- 4.5 => "vier punt vijf"
-- 4,3 => "vier komma drie"
+- Numbers (e.g.):
+    - 4 => "vier"
+    - 4.5 => "vier punt vijf"
+    - 4,3 => "vier komma drie"
 
-Combinations (e.g.):
-- 12,3% => 'twaalf komma drie procent'
+- Combinations (e.g.):
+    - 12,3% => 'twaalf komma drie procent'
 
-### Variation
-In order to deal with spelling variations, this tool applies a .glm file to the reference and hypothesis files. This .glm file contains a list of words with their spelling variations and can be found [here](https://github.com/opensource-spraakherkenning-nl/ASR-NL-benchmark/blob/3f96f9a9584c8567ffce09abe4ea082f6e6fc8c1/ASR_NL_benchmark/variations.glm). Whereas the normalisation step is typically rule-based, the variations are not. Therefore, we invite you all to adjustment to the glm and to create a pull request with the requested additions.
+### Variations
+In order to deal with spelling variations, this tool applies a `variations.glm` file to the reference and hypothesis files. This .glm file contains a list of words with their spelling variations and can be found [here](https://github.com/opensource-spraakherkenning-nl/ASR_NL_benchmark/blob/main/ASR_NL_benchmark/variations.glm). Whereas the normalisation step is typically rule-based, the variations are not. Therefore, we invite you all to adjustment to the .glm and to create a pull request with the requested additions.
 
 
 ## Guidelines
@@ -92,9 +96,9 @@ In order for the benchmarking tool to match the reference and hypothesis files,
 2. In case you are using subcategories (See Benchmarking subcategories).
 
 ### Benchmarking subcategories
-[PLACEHOLDER]
 
 example:
+
 Without subcategories:
 - program_1.stm
 - program_1.ctm
@@ -121,16 +125,18 @@ The reference file is used as the ground truth. To get the best results, the ref
 In order to create those reference files, we suggest to use a transcription tool like [transcriber](http://trans.sourceforge.net/en/usermanUS.php).
 
 #### Segment Time Mark (STM)
-The Segment Time Mark files, to be used as reference files, consist of a connotation of time marked text segment records. Those segments are separated by a new line and follow the File_id Channel Speaker_id Begin_Time End_Time <Label> Transcript
+The Segment Time Mark files, to be used as reference files, consist of a connotation of time marked text segment records. Those segments are separated by a new line and follow the format:
+
+    File_id Channel Speaker_id Begin_Time End_Time <Label> Transcript
   
-To comment out a line start the line with ';;'
+To comment out a line, start the line with ';;'
 
 ##### Example STM
-;; Some information you want to comment out like a description  
-;; More information you want to include and comment out  
-;; like the name of the transcriber, the version or explanation of labels   
-Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 0.000 1.527 <o, f1, female> The first line  
-Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 1.530 2.127 <o, f1, male> The second text segment  
+    ;; Some information you want to comment out like a description  
+    ;; More information you want to include and comment out  
+    ;; like the name of the transcriber, the version or explanation of labels   
+    Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 0.000 1.527 <o, f1, female> The first line  
+    Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 1.530 2.127 <o, f1, male> The second text segment  
 
 
 ### Hypothesis file
@@ -139,19 +145,19 @@ To get the best results the hypothesis file (i.e. the output of a speech recogni
 - utf-8 encoded
 
 #### CTM Format
-The Time Marked Conversation files, to be used as hypothesis files, consist of a connotation of time-marked word records. Those records are separated by a new line and follow the following format:
+The Time Marked Conversation files, to be used as hypothesis files, consist of a connotation of time-marked word records. Those records are separated by a new line and follow the format:
 
-File_id Channel Begin_time Duration Word Confidence
+    File_id Channel Begin_time Duration Word Confidence
 
-To comment out a line start the line with ';;'
+To comment out a line, start the line with ';;'
 
 ##### Example CTM
 
-;; Some infomration you want to comment out like a description  
-;; More information you want to include and comment out  
-Your_favorite_tv_show_2021_S1_E1 A 0.000 0.482 The 0.95  
-Your_favorite_tv_show_2021_S1_E1 A 0.496 0.281 first 0.98  
-Your_favorite_tv_show_2021_S1_E1 A 1.216 0.311 line 0.88  
+    ;; Some infomration you want to comment out like a description  
+    ;; More information you want to include and comment out  
+    Your_favorite_tv_show_2021_S1_E1 A 0.000 0.482 The 0.95  
+    Your_favorite_tv_show_2021_S1_E1 A 0.496 0.281 first 0.98  
+    Your_favorite_tv_show_2021_S1_E1 A 1.216 0.311 line 0.88  
 
 ## Related Documentation
 - [sclite documentation](https://github.com/usnistgov/SCTK/blob/master/doc/sclite.htm)

From 3074dee3c6d396cc78d8d344ddcacd3ed0446cda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Thu, 22 Feb 2024 11:34:23 +0100
Subject: [PATCH 25/28] Add small changes before adding the sc_args
 functionality

Changes include:
- Changing nargs for hypfile and reffile args
- Small rewording of comments and help messages
- Removing skip_normalization as it was redundant
- Changing the way interactive behaves (have it be a True value in the code when used)
- Change normalization of numbers slightly (add a space after duizend, this is how it's done for Dutch)
- Reorder sclite and variation related LOCs
- Update README with new arguments added
---
 ASR_NL_benchmark/__main__.py  | 38 +++++++++++++++++------------------
 ASR_NL_benchmark/normalize.py |  3 +++
 ASR_NL_benchmark/pipeline.py  | 22 +++++++++++---------
 README.md                     |  5 +++++
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/ASR_NL_benchmark/__main__.py b/ASR_NL_benchmark/__main__.py
index c1b7f20..217a611 100644
--- a/ASR_NL_benchmark/__main__.py
+++ b/ASR_NL_benchmark/__main__.py
@@ -6,44 +6,42 @@
 if __name__ == "__main__":
     # Set parser
     parser = argparse.ArgumentParser(description='normalize ref and hyp file')
-    parser.add_argument('-hyp', '--hypfile', nargs='+',
+    parser.add_argument('-hyp', '--hypfile', nargs=2,
                         metavar=('hypfile_name', 'extension'),
-                        default=['ASR_NL_benchmark/data/test_hyp.ctm', 'ctm'], help='help: path to the hypothesis file and its extension')
-    parser.add_argument('-ref', '--reffile', nargs='+',
+                        default=['ASR_NL_benchmark/data/test_hyp.ctm', 'ctm'],
+                        help='path to the hypothesis file and its extension')
+    parser.add_argument('-ref', '--reffile', nargs=2,
                         metavar=('reffile_name', 'extension'),
                         default=['ASR_NL_benchmark/data/test_ref.stm', 'stm'],
-                        help='help: path to the reference file and its extension')
+                        help='path to the reference file and its extension')
     parser.add_argument('-kind', '--kind',
                         metavar=('speechrecognizer'),
                         default='',
-                        help='help: enter the name of your speech recognizer')
+                        help='enter the name of your speech recognizer')
     parser.add_argument('-interactive',
-                        metavar='value',
-                        default='',
-                        help='help: True if you want to use the GUI')
+                        action = 'store_true',
+                        help='if you want to use the GUI')
     parser.add_argument('-skip_ref_normalization', 
                         action = 'store_true',
-                        help = 'Skip the normalization step for the reference file')
+                        help = 'skip the normalization step for the reference file')
     parser.add_argument('-skip_hyp_normalization', 
                         action = 'store_true',
-                        help = 'Skip the normalization step for the hypothesis file')
-    parser.add_argument('-skip-normalization',
-                        action = 'store_true',
-                        help = 'Skip the normalization step for both hypothesis and reference files')
+                        help = 'skip the normalization step for the hypothesis file')
+    parser.add_argument('-sc_args', nargs='*',
+                        default=[],
+                        help='extra sclite arguments you want to use (without the -)')
 
     args = parser.parse_args()
 
-    if bool(args.interactive):
+    if args.interactive:
         print('Opening interface')
         interface.main()
     else:
         print('Running benchmarking')
-        skip_ref_norm = args.skip_ref_normalization
-        skip_hyp_norm = args.skip_hyp_normalization
-        if args.skip_normalization:
-            skip_ref_norm = args.skip_ref_normalization
-            skip_hyp_norm = args.skip_hyp_normalization
-        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind, skip_ref_norm=skip_ref_norm, skip_hyp_norm=skip_hyp_norm)
+        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], \
+                                         args.reffile[0], args.reffile[1], \
+                                         kind=args.kind, \
+                                         skip_ref_norm=args.skip_ref_normalization, skip_hyp_norm=args.skip_hyp_normalization)
         benchmarking.main()
         pipeline.process_results(kind=args.kind)
 
diff --git a/ASR_NL_benchmark/normalize.py b/ASR_NL_benchmark/normalize.py
index e3121af..803503a 100644
--- a/ASR_NL_benchmark/normalize.py
+++ b/ASR_NL_benchmark/normalize.py
@@ -20,6 +20,7 @@ def check_and_covert_interger(word):
             float(word)
             new_word = num2words(word, to='cardinal', lang='nl')
             new_word = new_word.replace('komma', 'punt')
+            new_word = new_word.replace('duizend', 'duizend ')
             logging.info(f'converted the number {word} to {new_word}')
             return new_word
         except:
@@ -29,6 +30,7 @@ def check_and_covert_interger(word):
         try:
             float(new_word)
             new_word = num2words(new_word, to='cardinal', lang='nl')
+            new_word = new_word.replace('duizend', 'duizend ')
             logging.info(f'converted number {word} to {new_word}')
             return new_word
         except:
@@ -66,6 +68,7 @@ def replace_numbers(text):
         if word.isdigit():
             number_of_numbers += 1
             text_list[position] = num2words(word, to='cardinal', lang='nl')
+            text_list[position] = text_list[position].replace('duizend', 'duizend ')
         elif check_and_covert_interger(word):
             text_list[position] = check_and_covert_interger(word)
     text_without_numbers = " ".join(text_list)
diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
index 33145d7..84371bb 100644
--- a/ASR_NL_benchmark/pipeline.py
+++ b/ASR_NL_benchmark/pipeline.py
@@ -47,18 +47,20 @@ def run_pipeline(hypfile, reffile, skip_ref_norm, skip_hyp_norm):
     #Create results folder if not exists:
     if not os.path.exists(os.path.join(os.path.sep,'input','results')):
         os.makedirs(os.path.join(os.path.sep,'input','results'))
+    
+    # sclite command to be logged and executed
+    command = f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} \
+        -m hyp -O {os.path.join(os.path.sep,'input','results')} -o prf dtl spk"
 
     # Run variation scripts
-    logging.info(
-        f"running: sclite -h {hypfile.normalized_path} {hypfile.extension} -r {reffile.normalized_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')}  -o dtl spk")
-    run = os.system(
-        f"csrfilt.sh -s -i ctm {os.path.join('ASR_NL_benchmark','variations.glm')} < {hypfile.normalized_path} > {hypfile.variation_path}")
-
-    # Run sclite
-    run = os.system(
-        f"csrfilt.sh -s -i stm {os.path.join('ASR_NL_benchmark','variations.glm')} < {reffile.normalized_path} > {reffile.variation_path}")
-    run = os.system(
-        f"sclite -D -h {hypfile.variation_path} {hypfile.extension} -r {reffile.variation_path} {reffile.extension} -m hyp -O {os.path.join(os.path.sep,'input','results')} -o prf dtl spk")
+    # Hypothesis
+    run = os.system(f"csrfilt.sh -s -i ctm {os.path.join('ASR_NL_benchmark','variations.glm')} < {hypfile.normalized_path} > {hypfile.variation_path}")
+    # Reference
+    run = os.system(f"csrfilt.sh -s -i stm {os.path.join('ASR_NL_benchmark','variations.glm')} < {reffile.normalized_path} > {reffile.variation_path}")
+
+    # Log & run sclite
+    logging.info("running:" + command)
+    run = os.system(command)
 
 def calculate_wer(df):
     """ Calculates the word error rate and adds the collumn 'product' to the dataframe
diff --git a/README.md b/README.md
index a3f8e74..03b0fca 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,11 @@ The final results are saved in .csv format inside a folder named `results` store
 - .spk files - Report with scoring for a speaker as returned by sclite
 - .csv files - Overall results of the benchmarking as shown in the interface
 
+## Extra arguments
+There are extra arguments that you can add to the command line:
+- `-skip_hyp_normalization`: Skips the normalization step for the hypothesis file(s) (STILL APPLIES VARIATIONS)
+- `-skip_ref_normalization`: Skips the normalization step for the reference file(s) (STILL APPLIES VARIATIONS)
+- `-sc_args`: With this argument, you can add extra sclite-specific flags. For more information, check the [documentation of sclite](https://github.com/usnistgov/SCTK/blob/master/doc/sclite.htm) (to view it properly, we suggest locally downloading the entire `doc` folder of the SCTK repository).
 
 ## More about the pipeline
 ### Normalization

From a9d39bb7e1fd4ba0a26ef000a915b8edadc61770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Thu, 22 Feb 2024 13:40:44 +0100
Subject: [PATCH 26/28] Remove sc_args (to be added upon request)

---
 ASR_NL_benchmark/__main__.py |  3 ---
 README.md                    | 41 ++++++++++++++++++------------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/ASR_NL_benchmark/__main__.py b/ASR_NL_benchmark/__main__.py
index 217a611..05d9e3b 100644
--- a/ASR_NL_benchmark/__main__.py
+++ b/ASR_NL_benchmark/__main__.py
@@ -27,9 +27,6 @@
     parser.add_argument('-skip_hyp_normalization', 
                         action = 'store_true',
                         help = 'skip the normalization step for the hypothesis file')
-    parser.add_argument('-sc_args', nargs='*',
-                        default=[],
-                        help='extra sclite arguments you want to use (without the -)')
 
     args = parser.parse_args()
 
diff --git a/README.md b/README.md
index 03b0fca..7de9c62 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Please check the guidelines for the reference file in the section below.
 
 In order to run the benchmarking tool over a (set of) local hyp and ref file(s) we need docker to mount the local directory where the input files are located. The output files of the benchmarking tool will appear in the same folder. 
 
-The following line runs the benchmarking tool over a local hyp and ref file. Use the absolute file path as the value for the variables `SOURCE`. For `HYPFILENAME` use the filename of the hypfile and for `REFFILENAME` the name of the reffile. 
+The following line runs the benchmarking tool over a local hyp and ref file. Use the absolute file path as the value for the `SOURCE` variable. For `HYPFILENAME` use the filename of the hypfile and for `REFFILENAME` the name of the reffile. 
 
 `HYPFILENAME` and `REFFILENAME` can also be the names of the folders containing the *hypfiles* and *reffiles* respectively. **Make sure** to create a folder named `results` in the `SOURCE` folder before running the command below:
 
@@ -35,16 +35,16 @@ The results (.dtl, .prf, .spk, and .csv format) can be found inside the `results
 
 ### How to: Use the Interface
 
-In order to open a User Interface, run a command similar to the one above but now with the optional argument `-interface` set to `True`:
+In order to open a User Interface, run a command similar to the one above but now with the optional argument `-interactive`:
 
-- <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -interactive True </code>
+- <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -interactive </code>
 
 Use a web browser to access the UI by navigating to "http://localhost:5000"
 
-Within the tab Select folder, enter the path to the hypotheses and reference files:
+Within the tab "Select folder", enter the path to the hypothesis and reference files:
 
-- Enter the path of the hyp or the path to a folder containing a set of hyp files: (e.g. "ref_folder" or "ref_file.stm")
-- Enter the path of the ref file or the path to a folder containing a set of ref files: (e.g. "hyp_folder" or "hyp_file.stm")
+- Enter the path of the hyp file or the path to a folder containing a set of hyp files: (e.g. "hyp_folder" or "hyp_file.stm")
+- Enter the path of the ref file or the path to a folder containing a set of ref files: (e.g. "ref_folder" or "ref_file.stm")
 - click "Submit"
 
 A progress bar will appear. As soon as the benchmarking is ready, you will be forwarded to the results. The results (.dtl, .prf, .spk, and .csv format) can be found inside a folder named `results` which can be found on the local `SOURCE` location (see above).
@@ -62,14 +62,13 @@ The final results are saved in .csv format inside a folder named `results` store
 - .csv files - Overall results of the benchmarking as shown in the interface
 
 ## Extra arguments
-There are extra arguments that you can add to the command line:
+There are extra arguments that you can add to the command line (**NOT** the interface):
 - `-skip_hyp_normalization`: Skips the normalization step for the hypothesis file(s) (STILL APPLIES VARIATIONS)
 - `-skip_ref_normalization`: Skips the normalization step for the reference file(s) (STILL APPLIES VARIATIONS)
-- `-sc_args`: With this argument, you can add extra sclite-specific flags. For more information, check the [documentation of sclite](https://github.com/usnistgov/SCTK/blob/master/doc/sclite.htm) (to view it properly, we suggest locally downloading the entire `doc` folder of the SCTK repository).
 
 ## More about the pipeline
 ### Normalization
-Manual transcripts (used as reference files) sometimes contain abbreviations (e.g. "'n" instead of "een"), symbols (e.g. "&" instead of "en") and numbers ("4" instead of "vier"). The reference files often contain the written form of the words instead. Since we don't want to penalize the speech-to-text tooling or algorithm for such differences, we normalize both the reference and hypothesis files.
+Manual transcripts (used as reference files) sometimes contain abbreviations (e.g. `'n` instead of `een`), symbols (e.g. `&` instead of `en`) and numbers (`4` instead of `vier`). The reference files often contain the written form of the words instead. Since we don't want to penalize the speech-to-text tooling or algorithm for such differences, we normalize both the reference and hypothesis files.
 
 Normalization replacements:
 
@@ -107,13 +106,13 @@ example:
 Without subcategories:
 - program_1.stm
 - program_1.ctm
-- programe_2.stm
+- program_2.stm
 - program_2.ctm
 
 With subcategories (sports v.s. news):
-- programe_1.stm
+- program_1.stm
 - program_1-sports.ctm
-- programe_2.stm
+- program_2.stm
 - program_2-news.ctm
 
 
@@ -122,15 +121,15 @@ The reference file is used as the ground truth. To get the best results, the ref
 
 - The reference file should be a Segment Time Mark file (STM), see description below.
 - Words should be written according to the modern Dutch spelling
-- No abbreviations (e.g. use: "bijvoorbeeld" instead of: "bv." or "bijv. , use: "het" instead of "'t")
-- No symbols (use: "procent" instead of:  "%")
-- No numbers (write out all numbers: "drie" instead of "3")
+- No abbreviations (e.g. use `bijvoorbeeld` instead of `bv.` or `bijv.` , use `het` instead of `'t`)
+- No symbols (use: `procent` instead of `%`)
+- No numbers (write out all numbers: `drie` instead of `3`)
 - utf-8 encoded
 
 In order to create those reference files, we suggest to use a transcription tool like [transcriber](http://trans.sourceforge.net/en/usermanUS.php).
 
 #### Segment Time Mark (STM)
-The Segment Time Mark files, to be used as reference files, consist of a connotation of time marked text segment records. Those segments are separated by a new line and follow the format:
+The Segment Time Mark file, to be used as reference file, consists of a connotation of time marked text segment records. Those segments are separated by a new line and follow the format:
 
     File_id Channel Speaker_id Begin_Time End_Time <Label> Transcript
   
@@ -139,18 +138,18 @@ To comment out a line, start the line with ';;'
 ##### Example STM
     ;; Some information you want to comment out like a description  
     ;; More information you want to include and comment out  
-    ;; like the name of the transcriber, the version or explanation of labels   
+    ;; like the name of the transcriber, the version or explanation of labels, etc.
     Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 0.000 1.527 <o, f1, female> The first line  
     Your_favorite_tv_show_2021_S1_E1 Speaker_01_Female_Native A 1.530 2.127 <o, f1, male> The second text segment  
 
 
 ### Hypothesis file
-To get the best results the hypothesis file (i.e. the output of a speech recognizer) should meet the following guidelines:
-- The hypothesis file should be Time Marked Conversations files (CTM), see the description below.
+To get the best results, the hypothesis file (i.e. the output of a speech recognizer) should meet the following guidelines:
+- The hypothesis file should be a Time Marked Conversations file (CTM), see the description below.
 - utf-8 encoded
 
 #### CTM Format
-The Time Marked Conversation files, to be used as hypothesis files, consist of a connotation of time-marked word records. Those records are separated by a new line and follow the format:
+The Time Marked Conversation file, to be used as hypothesis file, consists of a connotation of time-marked word records. Those records are separated by a new line and follow the format:
 
     File_id Channel Begin_time Duration Word Confidence
 
@@ -158,7 +157,7 @@ To comment out a line, start the line with ';;'
 
 ##### Example CTM
 
-    ;; Some infomration you want to comment out like a description  
+    ;; Some information you want to comment out like a description  
     ;; More information you want to include and comment out  
     Your_favorite_tv_show_2021_S1_E1 A 0.000 0.482 The 0.95  
     Your_favorite_tv_show_2021_S1_E1 A 0.496 0.281 first 0.98  

From 8c0537043d54bf9281e909b7f1a456f61d8bbb78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 29 Mar 2024 14:47:01 +0100
Subject: [PATCH 27/28] Add test files

---
 test/hyp/test.ctm | 284 ++++++++++++++++++++++++++++++++++++++++++++++
 test/ref/test.stm |   9 ++
 2 files changed, 293 insertions(+)
 create mode 100644 test/hyp/test.ctm
 create mode 100644 test/ref/test.stm

diff --git a/test/hyp/test.ctm b/test/hyp/test.ctm
new file mode 100644
index 0000000..905fd54
--- /dev/null
+++ b/test/hyp/test.ctm
@@ -0,0 +1,284 @@
+test_file 1 0.72 0.12 de 1.00
+test_file 1 0.84 0.36 nederlandse 0.99
+test_file 1 1.2 0.54 spoorwegen 1.00
+test_file 1 1.74 0.12 en 0.86
+test_file 1 1.86 0.45 schiphol 1.00
+test_file 1 2.31 0.27 toch 0.95
+test_file 1 2.58 0.21 twee 1.00
+test_file 1 2.79 0.54 gevoelige 1.00
+test_file 1 3.33 0.48 dossiers 1.00
+test_file 1 3.81 0.12 op 1.00
+test_file 1 3.93 0.09 het 0.88
+test_file 1 4.02 0.45 verkeerde 0.76
+test_file 1 4.47 0.36 gebied 0.99
+test_file 1 5.16 0.30 zitten 1.00
+test_file 1 5.46 0.15 in 1.00
+test_file 1 5.61 0.09 het 0.93
+test_file 1 5.7 0.30 nieuwe 1.00
+test_file 1 6 0.45 kabinet 1.00
+test_file 1 6.45 0.12 in 1.00
+test_file 1 6.57 0.06 de 1.00
+test_file 1 6.63 0.54 portefeuille 1.00
+test_file 1 7.17 0.15 van 1.00
+test_file 1 7.32 0.09 de 1.00
+test_file 1 7.41 0.60 minister 1.00
+test_file 1 8.16 0.24 van 1.00
+test_file 1 8.4 0.30 verkeer 1.00
+test_file 1 8.7 0.12 en 1.00
+test_file 1 8.82 0.60 waterstaat 1.00
+test_file 1 9.48 0.23 worden 0.76
+test_file 1 9.72 0.08 hem 0.38
+test_file 1 9.8 0.40 zojuist 0.98
+test_file 1 10.2 0.12 al 0.99
+test_file 1 10.32 0.33 camiel 0.97
+test_file 1 10.65 0.39 eurlings 1.00
+test_file 1 11.4 0.12 in 0.60
+test_file 1 11.52 0.09 het 0.89
+test_file 1 11.61 0.18 net 1.00
+test_file 1 11.79 0.48 opgestapte 0.99
+test_file 1 12.27 0.45 kabinet 0.98
+test_file 1 12.72 0.24 vielen 1.00
+test_file 1 12.96 0.27 deze 1.00
+test_file 1 13.23 0.39 dossiers 1.00
+test_file 1 13.62 0.21 nog 1.00
+test_file 1 13.83 0.21 onder 1.00
+test_file 1 14.04 0.12 het 0.86
+test_file 1 14.16 0.96 staatssecretariaat 1.00
+test_file 1 16.28 0.15 het 1.00
+test_file 1 16.43 0.63 treinverkeer 1.00
+test_file 1 17.06 0.27 van 1.00
+test_file 1 17.33 0.15 en 1.00
+test_file 1 17.48 0.24 naar 1.00
+test_file 1 17.72 0.51 utrecht 1.00
+test_file 1 18.38 0.24 is 1.00
+test_file 1 18.65 0.33 sinds 1.00
+test_file 1 18.98 0.27 half 1.00
+test_file 1 19.25 0.24 zes 1.00
+test_file 1 19.49 0.63 ontregeld 1.00
+test_file 1 20.15 0.21 door 1.00
+test_file 1 20.36 0.09 een 1.00
+test_file 1 20.45 1.08 computerstoring 1.00
+test_file 1 22.09 0.63 prorail 0.87
+test_file 1 22.72 0.21 hoopt 0.98
+test_file 1 22.93 0.15 dat 1.00
+test_file 1 23.08 0.06 de 0.97
+test_file 1 23.14 0.48 problemen 1.00
+test_file 1 23.62 0.12 nog 1.00
+test_file 1 23.74 0.48 vanavond 1.00
+test_file 1 24.22 0.21 kunnen 1.00
+test_file 1 24.43 0.18 worden 1.00
+test_file 1 24.61 0.54 opgelost 1.00
+test_file 1 26.27 0.20 een 0.97
+test_file 1 26.47 0.72 eenenveertigjarige 1.00
+test_file 1 27.19 0.60 amsterdammer 1.00
+test_file 1 27.82 0.24 heeft 1.00
+test_file 1 28.06 0.24 ruim 1.00
+test_file 1 28.3 0.36 honderd 1.00
+test_file 1 28.66 0.33 dagen 1.00
+test_file 1 28.99 0.27 cel 1.00
+test_file 1 29.26 0.54 gekregen 1.00
+test_file 1 29.8 0.18 voor 1.00
+test_file 1 30.01 0.78 opruiing 1.00
+test_file 1 30.79 0.15 en 1.00
+test_file 1 30.94 0.09 het 1.00
+test_file 1 31.03 0.45 bedreigen 1.00
+test_file 1 31.48 0.15 van 1.00
+test_file 1 31.63 0.27 premier 1.00
+test_file 1 31.9 0.60 balkenende 1.00
+test_file 1 32.92 0.18 dat 1.00
+test_file 1 33.1 0.18 staat 1.00
+test_file 1 33.28 0.39 gelijk 1.00
+test_file 1 33.7 0.18 aan 1.00
+test_file 1 33.88 0.06 de 0.99
+test_file 1 33.94 0.33 tijd 1.00
+test_file 1 34.3 0.27 dat 1.00
+test_file 1 34.57 0.09 de 1.00
+test_file 1 34.66 0.21 man 1.00
+test_file 1 34.87 0.12 al 1.00
+test_file 1 34.99 0.18 heeft 1.00
+test_file 1 35.17 0.48 gezeten 1.00
+test_file 1 36.1 0.23 kreeg 0.82
+test_file 1 36.34 0.17 ook 1.00
+test_file 1 36.51 0.18 zes 1.00
+test_file 1 36.69 0.27 maanden 1.00
+test_file 1 36.96 0.57 voorwaardelijk 1.00
+test_file 1 38.01 0.12 en 0.98
+test_file 1 38.13 0.27 schreef 0.60
+test_file 1 38.41 0.11 op 1.00
+test_file 1 38.52 0.09 het 0.98
+test_file 1 38.61 0.39 internet 1.00
+test_file 1 39 0.12 op 0.63
+test_file 1 39.12 0.39 ruilen 0.51
+test_file 1 39.51 0.42 columns 1.00
+test_file 1 39.93 0.21 over 1.00
+test_file 1 40.14 0.63 balkenende 1.00
+test_file 1 42.12 0.36 nederland 1.00
+test_file 1 42.48 0.27 betaalt 1.00
+test_file 1 42.75 0.12 nog 1.00
+test_file 1 42.87 0.33 steeds 1.00
+test_file 1 43.2 0.18 voor 1.00
+test_file 1 43.38 0.06 de 1.00
+test_file 1 43.44 0.54 beveiliging 1.00
+test_file 1 43.98 0.15 van 1.00
+test_file 1 44.13 0.33 ayaan 1.00
+test_file 1 44.46 0.33 hirsi 1.00
+test_file 1 44.79 0.39 ali 1.00
+test_file 1 45.3 0.18 dat 0.56
+test_file 1 45.48 0.12 zij 0.48
+test_file 1 45.6 0.12 het 0.49
+test_file 1 45.720 0.33 oud 0.96
+test_file 1 46.050 0.33 kamerlid 0.96
+test_file 1 46.38 0.09 in 0.98
+test_file 1 46.47 0.18 het 0.97
+test_file 1 46.68 0.51 interview 1.00
+test_file 1 47.19 0.24 met 1.00
+test_file 1 47.49 0.63 newsweek 1.00
+test_file 1 48.57 0.21 is 0.95
+test_file 1 48.78 0.12 je 0.86
+test_file 1 48.9 0.27 ali 0.79
+test_file 1 49.17 0.27 woont 1.00
+test_file 1 49.44 0.12 en 1.00
+test_file 1 49.56 0.24 werkt 1.00
+test_file 1 49.8 0.51 tegenwoordig 1.00
+test_file 1 50.31 0.12 in 1.00
+test_file 1 50.43 0.06 de 1.00
+test_file 1 50.49 0.30 verenigde 1.00
+test_file 1 50.79 0.39 staten 1.00
+test_file 1 51.57 0.15 het 1.00
+test_file 1 51.72 0.39 tijdschrift 0.99
+test_file 1 52.11 0.30 sprak 1.00
+test_file 1 52.41 0.15 met 1.00
+test_file 1 52.56 0.30 hirsi 0.99
+test_file 1 52.86 0.30 ali 1.00
+test_file 1 53.16 0.18 over 1.00
+test_file 1 53.34 0.06 de 1.00
+test_file 1 53.4 0.54 verschijning 1.00
+test_file 1 53.94 0.15 van 1.00
+test_file 1 54.09 0.12 haar 1.00
+test_file 1 54.21 0.27 boek 1.00
+test_file 1 54.51 0.12 de 0.98
+test_file 1 54.63 0.62 inval 0.84
+test_file 1 55.41 0.18 in 1.00
+test_file 1 55.59 0.09 de 1.00
+test_file 1 55.68 0.36 vs 1.00
+test_file 1 56.41 0.23 staat 1.00
+test_file 1 56.64 0.24 hoog 1.00
+test_file 1 56.88 0.09 op 1.00
+test_file 1 56.97 0.09 de 1.00
+test_file 1 57.06 0.54 boekenlijst 1.00
+test_file 1 57.6 0.12 van 0.99
+test_file 1 57.72 0.06 de 0.49
+test_file 1 57.78 0.18 new 1.00
+test_file 1 57.96 0.18 york 1.00
+test_file 1 58.14 0.39 times 1.00
+test_file 1 59.99 0.39 iran 1.00
+test_file 1 60.38 0.15 is 1.00
+test_file 1 60.53 0.33 gisteren 1.00
+test_file 1 60.86 0.15 niet 1.00
+test_file 1 61.01 0.36 gestopt 1.00
+test_file 1 61.37 0.15 met 1.00
+test_file 1 61.52 0.09 het 1.00
+test_file 1 61.61 0.42 verrijken 1.00
+test_file 1 62.03 0.15 van 1.00
+test_file 1 62.18 0.60 uranium 1.00
+test_file 1 63.08 0.24 zoals 1.00
+test_file 1 63.32 0.15 het 0.87
+test_file 1 63.5 0.75 internationale 1.00
+test_file 1 64.25 0.35 gemeenten 0.43
+test_file 1 64.6 0.13 over 0.72
+test_file 1 64.76 0.24 de 1.00
+test_file 1 65 0.60 internationale 1.00
+test_file 1 65.6 0.42 gemeenschap 1.00
+test_file 1 66.02 0.15 wel 0.99
+test_file 1 66.17 0.15 had 1.00
+test_file 1 66.32 0.45 geeist 0.75
+test_file 1 67.19 0.15 dat 1.00
+test_file 1 67.34 0.15 heeft 1.00
+test_file 1 67.49 0.06 de 1.00
+test_file 1 67.55 0.72 atoomwaakhond 1.00
+test_file 1 68.3 0.90 iaea 1.00
+test_file 1 69.2 0.15 in 1.00
+test_file 1 69.35 0.30 wenen 1.00
+test_file 1 69.65 0.72 bekendgemaakt 1.00
+test_file 1 70.94 0.33 iran 1.00
+test_file 1 71.27 0.18 heeft 1.00
+test_file 1 71.45 0.27 meer 1.00
+test_file 1 71.72 0.15 dan 1.00
+test_file 1 71.87 0.54 driehonderd 0.83
+test_file 1 72.41 0.69 centrifuges 1.00
+test_file 1 73.1 0.60 neergezet 1.00
+test_file 1 73.73 0.18 in 1.00
+test_file 1 73.91 0.09 een 1.00
+test_file 1 74 0.42 fabriek 1.00
+test_file 1 74.63 0.12 waar 0.85
+test_file 1 74.75 0.06 de 1.00
+test_file 1 74.81 0.45 verrijking 1.00
+test_file 1 75.26 0.15 van 1.00
+test_file 1 75.41 0.54 uranium 1.00
+test_file 1 75.95 0.18 op 1.00
+test_file 1 76.16 0.69 industriële 0.96
+test_file 1 76.85 0.30 schaal 1.00
+test_file 1 77.15 0.09 is 1.00
+test_file 1 77.24 0.45 gepland 0.98
+test_file 1 77.96 0.33 aldus 1.00
+test_file 1 78.29 0.09 het 0.97
+test_file 1 78.38 0.87 atoomagentschap 0.92
+test_file 1 79.49 0.15 het 0.94
+test_file 1 79.64 0.30 westen 1.00
+test_file 1 79.94 0.21 maakt 1.00
+test_file 1 80.15 0.15 zich 1.00
+test_file 1 80.3 0.36 zorgen 1.00
+test_file 1 80.66 0.18 dat 1.00
+test_file 1 80.84 0.39 iran 1.00
+test_file 1 81.26 0.78 atoombommen 1.00
+test_file 1 82.04 0.18 wil 1.00
+test_file 1 82.22 0.39 bouwen 1.00
+test_file 1 84.13 0.21 in 1.00
+test_file 1 84.34 0.39 verschillende 1.00
+test_file 1 84.73 0.27 wijken 1.00
+test_file 1 85 0.15 van 1.00
+test_file 1 85.15 0.51 bagdad 1.00
+test_file 1 85.66 0.21 zijn 1.00
+test_file 1 85.87 0.09 het 1.00
+test_file 1 85.96 0.42 afgelopen 1.00
+test_file 1 86.38 0.36 etmaal 1.00
+test_file 1 86.74 0.36 twintig 1.00
+test_file 1 87.1 0.39 lijken 1.00
+test_file 1 87.49 0.39 gevonden 1.00
+test_file 1 87.88 0.18 van 1.00
+test_file 1 88.06 0.63 doodgeschoten 1.00
+test_file 1 88.69 0.63 irakezen 1.00
+test_file 1 89.74 0.15 de 0.98
+test_file 1 89.89 0.39 stoffelijke 1.00
+test_file 1 90.28 0.45 overschotten 1.00
+test_file 1 90.73 0.36 vertoonden 0.57
+test_file 1 91.09 0.33 sporen 1.00
+test_file 1 91.42 0.15 van 1.00
+test_file 1 91.57 0.57 marteling 1.00
+test_file 1 93.2 0.15 en 0.96
+test_file 1 93.35 0.39 nederland 1.00
+test_file 1 93.77 0.18 doet 1.00
+test_file 1 93.95 0.09 het 0.98
+test_file 1 94.04 0.12 op 1.00
+test_file 1 94.16 0.09 het 0.98
+test_file 1 94.25 0.27 gebied 1.00
+test_file 1 94.52 0.18 van 1.00
+test_file 1 94.7 0.66 innovatie 1.00
+test_file 1 95.36 0.54 beter 1.00
+test_file 1 95.93 0.30 dan 1.00
+test_file 1 96.23 0.09 het 1.00
+test_file 1 96.32 0.45 gemiddelde 1.00
+test_file 1 96.77 0.15 van 1.00
+test_file 1 96.92 0.06 de 0.99
+test_file 1 96.98 0.45 europese 1.00
+test_file 1 97.43 0.30 unie 1.00
+test_file 1 98.06 0.12 dat 0.99
+test_file 1 98.18 0.30 blijkt 1.00
+test_file 1 98.48 0.12 uit 1.00
+test_file 1 98.6 0.09 het 1.00
+test_file 1 98.69 0.45 jaarlijkse 1.00
+test_file 1 99.14 0.70 innovaties 0.56
+test_file 1 99.84 0.62 scorebord 1.00
+test_file 1 100.46 0.15 van 1.00
+test_file 1 100.61 0.12 de 1.00
+test_file 1 100.73 0.36 eu 1.00
\ No newline at end of file
diff --git a/test/ref/test.stm b/test/ref/test.stm
new file mode 100644
index 0000000..d49bd0d
--- /dev/null
+++ b/test/ref/test.stm
@@ -0,0 +1,9 @@
+test_file 1 test_spk 0.5 15.163 <o,F0,M> de Nederlandse spoorwegen en Schiphol toch twee gevoelige dossiers op het verkeersgebied zitten in het nieuwe kabinet in de portefeuille van de minister van verkeer en waterstaat u hoorde hem zojuist al Camiel Eurlings in het net opgestapte kabinet vielen deze dossiers nog onder het staatssecretariaat
+test_file 1 test_spk 16.203 25.197999999999997 <o,F0,M> het treinverkeer van en naar Utrecht is sinds half zes ontregeld door een computerstoring ProRail hoopt dat de problemen nog vanavond kunnen worden opgelost
+test_file 1 test_spk 26.236 37.813 <o,F0,M> een eenenveertigjarige Amsterdammer heeft ruim honderd dagen cel gekregen voor opruiing en het bedreigen van premier Balkenende dat staat gelijk aan de tijd dat de man al heeft gezeten kreeg ook zes maanden voorwaardelijk
+test_file 1 test_spk 37.813 40.774 <o,F0,M> hij schreef op het internet opruiende columns over Balkenende
+test_file 1 test_spk 42.078 58.601 <o,F0,M> Nederland betaalt nog steeds voor de beveiliging van Ayaan Hirsi Ali dat zei het oud kamerlid in het interview met Newsweek Hirsi Ali woont en werkt tegenwoordig in de Verenigde Staten het tijdschrift sprak met Hirsi Ali over de verschijning van haar boek The Infidel in de VS het staat hoog op de boekenlijst van de New York Times
+test_file 1 test_spk 59.958 70.695 <o,F0,M> Iran is gisteren niet gestopt met het verrijken van uranium zoals het internationale- de internationale gemeenschap wel had geëist dat heeft de atoomwaakhond IAEA in Wenen bekendgemaakt
+test_file 1 test_spk 70.695 82.777 <o,F0,M> Iran heeft meer dan driehonderd centrifuges neergezet in een fabriek waar de verrijking van uranium op industriële schaal is gepland aldus het atoomagentschap het westen maakt zich zorgen dat Iran atoombommen wil bouwen
+test_file 1 test_spk 84.082 92.164 <o,F0,M> in verschillende wijken van Bagdad zijn het afgelopen etmaal twintig lijken gevonden van doodgeschoten Irakezen de stoffelijke overschotten vertoonden sporen van marteling
+test_file 1 test_spk 93.176 101.39699999999999 <o,F0,M> Nederland doet het op het gebied van innovatie beter dat het gemiddelde van de Europese unie dat blijkt uit het jaarlijkse innovatiescorebord van de EU
\ No newline at end of file

From 0d23a05f0bcb45cc951d0b9dca43d5c083495bba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Drago=C8=99?= <balandragos5555@gmail.com>
Date: Fri, 5 Apr 2024 11:47:59 +0200
Subject: [PATCH 28/28] Rename folder with example files and add small comment
 in README

---
 README.md                                    | 2 +-
 test/hyp/test.ctm => example/hyp/example.ctm | 0
 test/ref/test.stm => example/ref/example.stm | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename test/hyp/test.ctm => example/hyp/example.ctm (100%)
 rename test/ref/test.stm => example/ref/example.stm (100%)

diff --git a/README.md b/README.md
index 7de9c62..dfdcf35 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ The results (.dtl, .prf, .spk, and .csv format) can be found inside the `results
 
 In order to open a User Interface, run a command similar to the one above but now with the optional argument `-interactive`:
 
-- <code> docker run -it  --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -interactive </code>
+- <code> docker run -it -p 5000:5000 --mount type=bind,source=SOURCE,target=/input  asrnlbenchmark/asr-nl-benchmark:latest python ASR_NL_benchmark -interactive </code>
 
 Use a web browser to access the UI by navigating to "http://localhost:5000"
 
diff --git a/test/hyp/test.ctm b/example/hyp/example.ctm
similarity index 100%
rename from test/hyp/test.ctm
rename to example/hyp/example.ctm
diff --git a/test/ref/test.stm b/example/ref/example.stm
similarity index 100%
rename from test/ref/test.stm
rename to example/ref/example.stm