diff --git a/pyproject.toml b/pyproject.toml index 94226c1..2b5ccaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.5.7" +version = "1.5.8" authors = ["Together AI "] description = "Python client for Together's Cloud Platform!" readme = "README.md" diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py index 751fe6a..640dafe 100644 --- a/src/together/cli/api/finetune.py +++ b/src/together/cli/api/finetune.py @@ -142,6 +142,13 @@ def fine_tuning(ctx: click.Context) -> None: default=0.1, help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')", ) +@click.option( + "--rpo-alpha", + type=float, + default=None, + help="RPO alpha to control the weight of NLL loss component for chosen responses " + "(only used when '--training-method' is 'dpo')", +) @click.option( "--suffix", "-s", @@ -206,6 +213,7 @@ def create( train_on_inputs: bool | Literal["auto"], training_method: str, dpo_beta: float, + rpo_alpha: float, from_checkpoint: str, ) -> None: """Start fine-tuning""" @@ -239,6 +247,7 @@ def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + rpo_alpha=rpo_alpha, from_checkpoint=from_checkpoint, ) diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py index 275d683..16b0f28 100644 --- a/src/together/resources/finetune.py +++ b/src/together/resources/finetune.py @@ -80,6 +80,7 @@ def create_finetune_request( train_on_inputs: bool | Literal["auto"] = "auto", training_method: str = "sft", dpo_beta: float | None = None, + rpo_alpha: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneRequest: if model is not None and from_checkpoint is not None: @@ -193,7 +194,7 @@ def create_finetune_request( training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT() if training_method == "dpo": - training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta) + training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta, rpo_alpha=rpo_alpha) finetune_request = FinetuneRequest( model=model, @@ -322,6 +323,7 @@ def create( train_on_inputs: bool | Literal["auto"] = "auto", training_method: str = "sft", dpo_beta: float | None = None, + rpo_alpha: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneResponse: """ @@ -373,6 +375,7 @@ def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. + rpo_alpha (float, optional): RPO alpha to control the weight of NLL loss component for chosen responses. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. @@ -425,6 +428,7 @@ def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + rpo_alpha=rpo_alpha, from_checkpoint=from_checkpoint, ) @@ -710,6 +714,7 @@ async def create( train_on_inputs: bool | Literal["auto"] = "auto", training_method: str = "sft", dpo_beta: float | None = None, + rpo_alpha: float | None = None, from_checkpoint: str | None = None, ) -> FinetuneResponse: """ @@ -761,6 +766,7 @@ async def create( training_method (str, optional): Training method. Defaults to "sft". Supported methods: "sft", "dpo". dpo_beta (float, optional): DPO beta parameter. Defaults to None. + rpo_alpha (float, optional): RPO alpha to control the weight of NLL loss component for chosen responses. Defaults to None. from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job. The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. The step value is optional, without it the final checkpoint will be used. @@ -813,6 +819,7 @@ async def create( train_on_inputs=train_on_inputs, training_method=training_method, dpo_beta=dpo_beta, + rpo_alpha=rpo_alpha, from_checkpoint=from_checkpoint, ) diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py index 5c2c2c2..011bc63 100644 --- a/src/together/types/finetune.py +++ b/src/together/types/finetune.py @@ -158,6 +158,7 @@ class TrainingMethodDPO(TrainingMethod): method: Literal["dpo"] = "dpo" dpo_beta: float | None = None + rpo_alpha: float | None = None class FinetuneRequest(BaseModel):