forked from HGU-DLLAB/Korean-FastSpeech2-Pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhparams.py
86 lines (66 loc) · 1.55 KB
/
hparams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
### kss ###
dataset = "kss_v_1.4"
data_path = os.path.join("/home/minsu/dataset/", dataset)
meta_name = "transcript.v.1.4.txt" # "transcript.v.1.4.txt" or "transcript.v.1.3.txt"
textgrid_name = "TextGrid.zip"
### set GPU number ###
train_visible_devices = "6,7"
synth_visible_devices = "1"
# Text
text_cleaners = ['korean_cleaners']
# Audio and mel
### kss ###
sampling_rate = 22050
filter_length = 1024
hop_length = 256
win_length = 1024
### kss ###
max_wav_value = 32768.0
n_mel_channels = 80
mel_fmin = 0
mel_fmax = 8000
f0_min = 71.0
f0_max = 792.8
energy_min = 0.0
energy_max = 283.72
# FastSpeech 2
encoder_layer = 4
encoder_head = 2
encoder_hidden = 256
decoder_layer = 4
decoder_head = 2
decoder_hidden = 256
fft_conv1d_filter_size = 1024
fft_conv1d_kernel_size = (9, 1)
encoder_dropout = 0.2
decoder_dropout = 0.2
variance_predictor_filter_size = 256
variance_predictor_kernel_size = 3
variance_predictor_dropout = 0.5
max_seq_len = 1000
# Checkpoints and synthesis path
preprocessed_path = os.path.join("./preprocessed/", dataset)
checkpoint_path = os.path.join("./ckpt/", dataset)
eval_path = os.path.join("./eval/", dataset)
log_path = os.path.join("./log/", dataset)
test_path = "./results"
# Optimizer
batch_size = 16
epochs = 1000
n_warm_up_step = 4000
grad_clip_thresh = 1.0
acc_steps = 1
betas = (0.9, 0.98)
eps = 1e-9
weight_decay = 0.
# Vocoder
vocoder = 'waveglow' # 'waveglow' or 'melgan'
# Log-scaled duration
log_offset = 1.
# Save, log and synthesis
save_step = 10000
eval_step = 1000
eval_size = 256
log_step = 1000
clear_Time = 20