diff --git a/semantic_chunkers/chunkers/regex.py b/semantic_chunkers/chunkers/regex.py index 3191d49..e724680 100644 --- a/semantic_chunkers/chunkers/regex.py +++ b/semantic_chunkers/chunkers/regex.py @@ -43,6 +43,8 @@ def __call__(self, docs: list[str]) -> List[List[Chunk]]: current_chunk.token_count = 0 current_chunk.splits.append(sentence) + if current_chunk.token_count is None: + current_chunk.token_count = 0 current_chunk.token_count += sentence_token_count # Last chunk diff --git a/semantic_chunkers/splitters/regex.py b/semantic_chunkers/splitters/regex.py index a2af2a6..355a2dd 100644 --- a/semantic_chunkers/splitters/regex.py +++ b/semantic_chunkers/splitters/regex.py @@ -55,7 +55,7 @@ class RegexSplitter(BaseSplitter): def __call__( self, doc: str, delimiters: List[Union[str, regex.Pattern]] = [] ) -> List[str]: - if not delimiters: + if not delimiters: compiled_pattern = regex.compile(self.regex_pattern) delimiters.append(compiled_pattern) sentences = [doc]