Skip to content

Commit e3215ad

Browse files
authored
Update default LR scheduler to cosine with 0.5 cycles (#322)
* Update default LR scheduler to cosine with 0.5 cycles - Change default lr_scheduler_type from 'linear' to 'cosine' in finetune.py - Update CLI default to use cosine scheduler - Maintains backward compatibility for explicit scheduler specification * Bump version to 1.5.12 --------- Co-authored-by: Soroush Bassam <[email protected]>
1 parent c6353ae commit e3215ad

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
1212

1313
[tool.poetry]
1414
name = "together"
15-
version = "1.5.11"
15+
version = "1.5.12"
1616
authors = ["Together AI <[email protected]>"]
1717
description = "Python client for Together's Cloud Platform!"
1818
readme = "README.md"

src/together/cli/api/finetune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def fine_tuning(ctx: click.Context) -> None:
8282
@click.option(
8383
"--lr-scheduler-type",
8484
type=click.Choice(["linear", "cosine"]),
85-
default="linear",
85+
default="cosine",
8686
help="Learning rate scheduler type",
8787
)
8888
@click.option(

src/together/resources/finetune.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def create_finetune_request(
5353
n_checkpoints: int | None = 1,
5454
batch_size: int | Literal["max"] = "max",
5555
learning_rate: float | None = 0.00001,
56-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
56+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
5757
min_lr_ratio: float = 0.0,
5858
scheduler_num_cycles: float = 0.5,
5959
warmup_ratio: float | None = None,
@@ -281,7 +281,7 @@ def create(
281281
n_checkpoints: int | None = 1,
282282
batch_size: int | Literal["max"] = "max",
283283
learning_rate: float | None = 0.00001,
284-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
284+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
285285
min_lr_ratio: float = 0.0,
286286
scheduler_num_cycles: float = 0.5,
287287
warmup_ratio: float = 0.0,
@@ -318,7 +318,7 @@ def create(
318318
batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
319319
learning_rate (float, optional): Learning rate multiplier to use for training
320320
Defaults to 0.00001.
321-
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
321+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
322322
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
323323
the learning rate scheduler. Defaults to 0.0.
324324
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
@@ -693,7 +693,7 @@ async def create(
693693
n_checkpoints: int | None = 1,
694694
batch_size: int | Literal["max"] = "max",
695695
learning_rate: float | None = 0.00001,
696-
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
696+
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
697697
min_lr_ratio: float = 0.0,
698698
scheduler_num_cycles: float = 0.5,
699699
warmup_ratio: float = 0.0,
@@ -730,7 +730,7 @@ async def create(
730730
batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
731731
learning_rate (float, optional): Learning rate multiplier to use for training
732732
Defaults to 0.00001.
733-
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
733+
lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "cosine".
734734
min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
735735
the learning rate scheduler. Defaults to 0.0.
736736
scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.

0 commit comments

Comments
 (0)