please run
pip install -r requirements.txt
python evaluator.py --input-file-path <path-to-test-file> --target-file-path <path-to-target-file>
$DATA_PATH=/path/to/data
-
$DATA_PATH
will contain these files (generated synthetic data from monolingual text; to recreate see constructing the pipelines below):- predictions/predictions_english_st_regex.txt
- unaligned_tokenized_rempunc.en
- predictions/predictions_french_bt_regex.txt
- unaligned_tokenized.fr
-
CUDA_VISIBLE_DEVICES="0" python train.py --data_path $DATA_PATH --experiment 1_st --batch_size 64
--num_layer 2 --d_model 1024 --dff 1024 --epochs 3
--p_wd_st 0.3 --p_wd_bt 0.1 --dropout_rate 0.4 --start 200000
--st --bt
$DATA_PATH
should contain these files:
-
train.lang1
-
train.lang2
-
python split_data.py --data_path $DATA_PATH
CUDA_VISIBLE_DEVICES="0" python train.py --data_path $DATA_PATH --experiment 1_st --batch_size 64
--num_layer 1 --d_model 1024 --dff 1024 --epochs 50
--dropout_rate 0.4
--train_lang1 train/split_train.lang1
--train_lang2 train/split_train.lang2
--val_lang1 train/split_train.lang1
--val_lang2 train/split_train.lang2 \
switch the languages
CUDA_VISIBLE_DEVICES="0" python train.py --data_path $DATA_PATH --experiment 1_bt --batch_size 64
--num_layer 1 --d_model 1024 --dff 1024 --epochs 50
--dropout_rate 0.4
--train_lang1 train/split_train.lang2
--train_lang2 train/split_train.lang1
--val_lang1 train/split_train.lang2
--val_lang2 train/split_train.lang1 \
CUDA_VISIBLE_DEVICES="0" python generation.py --checkpoint_path $/path/to/st/model \ --npz_path ../model/data_and_vocab_bt_st_upsample_best.npz \ --start 200000 --end 400000
Predictions generated will be saved in an outfile: predictions_english_monolingual_$(START)_$(END).txt
CUDA_VISIBLE_DEVICES="0" python generation.py --checkpoint_path $/path/to/bt/model \ --npz_path ../model/data_and_vocab_bt_st_upsample_best.npz \ --start 200000 --end 400000
Predictions generated will be saved in an outfile: predictions_english_monolingual_$(START)_$(END).txt
- `python refine_preds_regex.py --file predictions/forward/txt
- `python refine_preds_regex.py --file predictions/backward/txt