File tree Expand file tree Collapse file tree 3 files changed +26
-8
lines changed Expand file tree Collapse file tree 3 files changed +26
-8
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
12
12
13
13
[tool .poetry ]
14
14
name = " together"
15
- version = " 1.5.16 "
15
+ version = " 1.5.17 "
16
16
authors = [
" Together AI <[email protected] >" ]
17
17
description = " Python client for Together's Cloud Platform!"
18
18
readme = " README.md"
Original file line number Diff line number Diff line change @@ -139,7 +139,7 @@ def fine_tuning(ctx: click.Context) -> None:
139
139
@click .option (
140
140
"--dpo-beta" ,
141
141
type = float ,
142
- default = 0.1 ,
142
+ default = None ,
143
143
help = "Beta parameter for DPO training (only used when '--training-method' is 'dpo')" ,
144
144
)
145
145
@click .option (
@@ -154,7 +154,7 @@ def fine_tuning(ctx: click.Context) -> None:
154
154
@click .option (
155
155
"--rpo-alpha" ,
156
156
type = float ,
157
- default = 0.0 ,
157
+ default = None ,
158
158
help = (
159
159
"RPO alpha parameter of DPO training to include NLL in the loss "
160
160
"(only used when '--training-method' is 'dpo')"
@@ -163,7 +163,7 @@ def fine_tuning(ctx: click.Context) -> None:
163
163
@click .option (
164
164
"--simpo-gamma" ,
165
165
type = float ,
166
- default = 0.0 ,
166
+ default = None ,
167
167
help = "SimPO gamma parameter (only used when '--training-method' is 'dpo')" ,
168
168
)
169
169
@click .option (
@@ -188,7 +188,7 @@ def fine_tuning(ctx: click.Context) -> None:
188
188
@click .option (
189
189
"--train-on-inputs" ,
190
190
type = BOOL_WITH_AUTO ,
191
- default = "auto" ,
191
+ default = None ,
192
192
help = "Whether to mask the user messages in conversational data or prompts in instruction data. "
193
193
"`auto` will automatically determine whether to mask the inputs based on the data format." ,
194
194
)
@@ -229,10 +229,10 @@ def create(
229
229
confirm : bool ,
230
230
train_on_inputs : bool | Literal ["auto" ],
231
231
training_method : str ,
232
- dpo_beta : float ,
232
+ dpo_beta : float | None ,
233
233
dpo_normalize_logratios_by_length : bool ,
234
- rpo_alpha : float ,
235
- simpo_gamma : float ,
234
+ rpo_alpha : float | None ,
235
+ simpo_gamma : float | None ,
236
236
from_checkpoint : str ,
237
237
) -> None :
238
238
"""Start fine-tuning"""
Original file line number Diff line number Diff line change @@ -183,6 +183,24 @@ def create_finetune_request(
183
183
)
184
184
train_on_inputs = "auto"
185
185
186
+ if dpo_beta is not None and training_method != "dpo" :
187
+ raise ValueError ("dpo_beta is only supported for DPO training" )
188
+ if dpo_normalize_logratios_by_length and training_method != "dpo" :
189
+ raise ValueError (
190
+ "dpo_normalize_logratios_by_length=True is only supported for DPO training"
191
+ )
192
+ if rpo_alpha is not None :
193
+ if training_method != "dpo" :
194
+ raise ValueError ("rpo_alpha is only supported for DPO training" )
195
+ if not rpo_alpha >= 0.0 :
196
+ raise ValueError (f"rpo_alpha should be non-negative (got { rpo_alpha } )" )
197
+
198
+ if simpo_gamma is not None :
199
+ if training_method != "dpo" :
200
+ raise ValueError ("simpo_gamma is only supported for DPO training" )
201
+ if not simpo_gamma >= 0.0 :
202
+ raise ValueError (f"simpo_gamma should be non-negative (got { simpo_gamma } )" )
203
+
186
204
lr_scheduler : FinetuneLRScheduler
187
205
if lr_scheduler_type == "cosine" :
188
206
if scheduler_num_cycles <= 0.0 :
You can’t perform that action at this time.
0 commit comments