Fix dpo defaults for cli + turn on the checks removed previously (#330)

timofeev1995 · web-flow · commit 24796157c9a7 · 2025-06-20T14:30:12.000+02:00
* Fix dpo defaults for cli + turn on the checks removed previously
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.16"
+version = "1.5.17"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"
diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
@@ -139,7 +139,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--dpo-beta",
     type=float,
-    default=0.1,
+    default=None,
     help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
 )
 @click.option(
@@ -154,7 +154,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--rpo-alpha",
     type=float,
-    default=0.0,
+    default=None,
     help=(
         "RPO alpha parameter of DPO training to include NLL in the loss "
         "(only used when '--training-method' is 'dpo')"
@@ -163,7 +163,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--simpo-gamma",
     type=float,
-    default=0.0,
+    default=None,
     help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
 )
 @click.option(
@@ -188,7 +188,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--train-on-inputs",
     type=BOOL_WITH_AUTO,
-    default="auto",
+    default=None,
     help="Whether to mask the user messages in conversational data or prompts in instruction data. "
     "`auto` will automatically determine whether to mask the inputs based on the data format.",
 )
@@ -229,10 +229,10 @@ def create(
     confirm: bool,
     train_on_inputs: bool | Literal["auto"],
     training_method: str,
-    dpo_beta: float,
+    dpo_beta: float | None,
     dpo_normalize_logratios_by_length: bool,
-    rpo_alpha: float,
-    simpo_gamma: float,
+    rpo_alpha: float | None,
+    simpo_gamma: float | None,
     from_checkpoint: str,
 ) -> None:
     """Start fine-tuning"""
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -183,6 +183,24 @@ def create_finetune_request(
         )
         train_on_inputs = "auto"
 
+    if dpo_beta is not None and training_method != "dpo":
+        raise ValueError("dpo_beta is only supported for DPO training")
+    if dpo_normalize_logratios_by_length and training_method != "dpo":
+        raise ValueError(
+            "dpo_normalize_logratios_by_length=True is only supported for DPO training"
+        )
+    if rpo_alpha is not None:
+        if training_method != "dpo":
+            raise ValueError("rpo_alpha is only supported for DPO training")
+        if not rpo_alpha >= 0.0:
+            raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
+
+    if simpo_gamma is not None:
+        if training_method != "dpo":
+            raise ValueError("simpo_gamma is only supported for DPO training")
+        if not simpo_gamma >= 0.0:
+            raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
+
     lr_scheduler: FinetuneLRScheduler
     if lr_scheduler_type == "cosine":
         if scheduler_num_cycles <= 0.0: