Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 7bf4936

Browse files
Standardized EOS token
1 parent 27c6185 commit 7bf4936

File tree

1 file changed

+3
-4
lines changed
  • tensor2tensor/data_generators

1 file changed

+3
-4
lines changed

tensor2tensor/data_generators/wmt.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@
3838
FLAGS = tf.flags.FLAGS
3939

4040

41-
# End-of-sentence marker (should correspond to the position of EOS in the
42-
# RESERVED_TOKENS list in text_encoder.py)
43-
EOS = 1
41+
# End-of-sentence marker
42+
EOS = text_encoder.EOS_TOKEN
4443

4544

4645
def character_generator(source_path, target_path, character_vocab, eos=None):
@@ -183,7 +182,7 @@ def ende_bpe_token_generator(tmp_dir, train):
183182
train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path)
184183
token_path = os.path.join(tmp_dir, "vocab.bpe.32000")
185184
token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path)
186-
return token_generator(train_path + ".en", train_path + ".de", token_vocab, 1)
185+
return token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS)
187186

188187

189188
_ENDE_TRAIN_DATASETS = [

0 commit comments

Comments
 (0)