From 7a9aa5de25b23abbbc3b1654c5b8ee5ae7b51f33 Mon Sep 17 00:00:00 2001 From: Kalvin Chang Date: Fri, 8 Mar 2024 22:30:51 -0800 Subject: [PATCH 1/2] fix: ooTextFile should handle tabs in addition to spaces --- nltk_contrib/textgrid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nltk_contrib/textgrid.py b/nltk_contrib/textgrid.py index 26b929e..73eff36 100644 --- a/nltk_contrib/textgrid.py +++ b/nltk_contrib/textgrid.py @@ -219,7 +219,7 @@ def _find_tiers(self): if self.text_type == "ooTextFile": m = OOTEXTFILE - header = " +item ?\[[^]]*\]:" + header = "[ \t]+item ?\[[^]]*\]:" elif self.text_type == "ChronTextFile": m = CHRONTEXTFILE header = "\"\S+\" \".*\" \d+\.?\d* \d+\.?\d*" @@ -351,11 +351,11 @@ def _make_info(self): self.size = None size = "" elif self.text_type == "ooTextFile": - classid = " +class = \"(.*)\" *[\r\n]+" - nameid = " +name = \"(.*)\" *[\r\n]+" - xmin = " +xmin = (\d+\.?\d*) *[\r\n]+" - xmax = " +xmax = (\d+\.?\d*) *[\r\n]+" - size = " +\S+: size = (\d+) *[\r\n]+" + classid = "[ \t]+class = \"(.*)\" *[\r\n]+" + nameid = "[ \t]+name = \"(.*)\" *[\r\n]+" + xmin = "[ \t]+xmin = (\d+\.?\d*) *[\r\n]+" + xmax = "[ \t]+xmax = (\d+\.?\d*) *[\r\n]+" + size = "[ \t]+\S+: size = (\d+) *[\r\n]+" elif self.text_type == "OldooTextFile": classid = "\"(.*)\" *[\r\n]+" nameid = "\"(.*)\" *[\r\n]+" From 0a445e0de1fcfe85a3e98a63bd1d33d0673e75ef Mon Sep 17 00:00:00 2001 From: Kalvin Chang Date: Fri, 8 Mar 2024 23:01:02 -0800 Subject: [PATCH 2/2] fix: handle tabs in simple_transcript as well --- nltk_contrib/textgrid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nltk_contrib/textgrid.py b/nltk_contrib/textgrid.py index 73eff36..aa7ff42 100644 --- a/nltk_contrib/textgrid.py +++ b/nltk_contrib/textgrid.py @@ -383,10 +383,10 @@ def make_simple_transcript(self): trans_xmax = " (\S+)[\r\n]+" trans_text = "\"([\S\s]*?)\"" elif self.text_type == "ooTextFile": - trans_head = " +\S+ \[\d+\]: *[\r\n]+" - trans_xmin = " +\S+ = (\S+) *[\r\n]+" - trans_xmax = " +\S+ = (\S+) *[\r\n]+" - trans_text = " +\S+ = \"([^\"]*?)\"" + trans_head = "[ \t]+\S+ \[\d+\]:[ \t]*[\r\n]+" + trans_xmin = "[ \t]+\S+ = (\S+)[ \t]*[\r\n]+" + trans_xmax = "[ \t]+\S+ = (\S+)[ \t]*[\r\n]+" + trans_text = "[ \t]+\S+ = \"([^\"]*?)\"" elif self.text_type == "OldooTextFile": trans_head = "" trans_xmin = "(.*)[\r\n]+"