Skip to content

Commit

Permalink
Merge pull request #25 from dinhviettoanle/main
Browse files Browse the repository at this point in the history
Reconstruction of original tokens from BPE tokens
  • Loading branch information
Natooz authored Feb 22, 2023
2 parents 8224f01 + 77f7c53 commit 5d261fd
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion miditok/midi_tokenizer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,9 +884,12 @@ def __find_subseq(in_list: List[int], pattern: List[int]) -> List[int]:
:return: indices of in_list where the pattern has been found.
"""
matches = []
next_possible_idx = 0
for i in range(len(in_list)):
if in_list[i] == pattern[0] and in_list[i: i + len(pattern)] == pattern:
if in_list[i] == pattern[0] and in_list[i:i + len(pattern)] == pattern and i >= next_possible_idx:
matches.append(i)
next_possible_idx = i + len(pattern)

return matches

def apply_bpe_to_dataset(
Expand Down

0 comments on commit 5d261fd

Please sign in to comment.