2
2
3
3
import re
4
4
from pathlib import Path
5
- from typing import Literal , List
5
+ from typing import List , Literal
6
6
7
7
from rich import print as rprint
8
8
9
9
from together .abstract import api_requestor
10
10
from together .filemanager import DownloadManager
11
11
from together .together_response import TogetherResponse
12
12
from together .types import (
13
+ CosineLRScheduler ,
14
+ CosineLRSchedulerArgs ,
15
+ FinetuneCheckpoint ,
13
16
FinetuneDownloadResult ,
14
17
FinetuneList ,
15
18
FinetuneListEvents ,
19
+ FinetuneLRScheduler ,
16
20
FinetuneRequest ,
17
21
FinetuneResponse ,
18
22
FinetuneTrainingLimits ,
19
23
FullTrainingType ,
24
+ LinearLRScheduler ,
25
+ LinearLRSchedulerArgs ,
20
26
LoRATrainingType ,
21
27
TogetherClient ,
22
28
TogetherRequest ,
23
- TrainingType ,
24
- FinetuneLRScheduler ,
25
- LinearLRScheduler ,
26
- CosineLRScheduler ,
27
- LinearLRSchedulerArgs ,
28
- CosineLRSchedulerArgs ,
29
29
TrainingMethodDPO ,
30
30
TrainingMethodSFT ,
31
- FinetuneCheckpoint ,
31
+ TrainingType ,
32
32
)
33
33
from together .types .finetune import (
34
34
DownloadCheckpointType ,
35
- FinetuneEventType ,
36
35
FinetuneEvent ,
36
+ FinetuneEventType ,
37
37
)
38
38
from together .utils import (
39
+ get_event_step ,
39
40
log_warn_once ,
40
41
normalize_key ,
41
- get_event_step ,
42
42
)
43
43
44
+
44
45
_FT_JOB_WITH_STEP_REGEX = r"^ft-[\dabcdef-]+:\d+$"
45
46
46
47
@@ -63,7 +64,7 @@ def create_finetune_request(
63
64
lr_scheduler_type : Literal ["linear" , "cosine" ] = "linear" ,
64
65
min_lr_ratio : float = 0.0 ,
65
66
scheduler_num_cycles : float = 0.5 ,
66
- warmup_ratio : float = 0.0 ,
67
+ warmup_ratio : float | None = None ,
67
68
max_grad_norm : float = 1.0 ,
68
69
weight_decay : float = 0.0 ,
69
70
lora : bool = False ,
@@ -81,7 +82,6 @@ def create_finetune_request(
81
82
dpo_beta : float | None = None ,
82
83
from_checkpoint : str | None = None ,
83
84
) -> FinetuneRequest :
84
-
85
85
if model is not None and from_checkpoint is not None :
86
86
raise ValueError (
87
87
"You must specify either a model or a checkpoint to start a job from, not both"
@@ -90,6 +90,8 @@ def create_finetune_request(
90
90
if model is None and from_checkpoint is None :
91
91
raise ValueError ("You must specify either a model or a checkpoint" )
92
92
93
+ model_or_checkpoint = model or from_checkpoint
94
+
93
95
if batch_size == "max" :
94
96
log_warn_once (
95
97
"Starting from together>=1.3.0, "
@@ -103,7 +105,9 @@ def create_finetune_request(
103
105
min_batch_size : int = 0
104
106
if lora :
105
107
if model_limits .lora_training is None :
106
- raise ValueError ("LoRA adapters are not supported for the selected model." )
108
+ raise ValueError (
109
+ f"LoRA adapters are not supported for the selected model ({ model_or_checkpoint } )."
110
+ )
107
111
lora_r = lora_r if lora_r is not None else model_limits .lora_training .max_rank
108
112
lora_alpha = lora_alpha if lora_alpha is not None else lora_r * 2
109
113
training_type = LoRATrainingType (
@@ -118,7 +122,9 @@ def create_finetune_request(
118
122
119
123
else :
120
124
if model_limits .full_training is None :
121
- raise ValueError ("Full training is not supported for the selected model." )
125
+ raise ValueError (
126
+ f"Full training is not supported for the selected model ({ model_or_checkpoint } )."
127
+ )
122
128
123
129
max_batch_size = model_limits .full_training .max_batch_size
124
130
min_batch_size = model_limits .full_training .min_batch_size
@@ -127,25 +133,29 @@ def create_finetune_request(
127
133
128
134
if batch_size > max_batch_size :
129
135
raise ValueError (
130
- "Requested batch size is higher that the maximum allowed value."
136
+ f "Requested batch size of { batch_size } is higher that the maximum allowed value of { max_batch_size } ."
131
137
)
132
138
133
139
if batch_size < min_batch_size :
134
140
raise ValueError (
135
- "Requested batch size is lower that the minimum allowed value."
141
+ f "Requested batch size of { batch_size } is lower that the minimum allowed value of { min_batch_size } ."
136
142
)
137
143
138
144
if warmup_ratio > 1 or warmup_ratio < 0 :
139
- raise ValueError ("Warmup ratio should be between 0 and 1" )
145
+ raise ValueError (f "Warmup ratio should be between 0 and 1 (got { warmup_ratio } ) " )
140
146
141
147
if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0 ):
142
- raise ValueError ("Min learning rate ratio should be between 0 and 1" )
148
+ raise ValueError (
149
+ f"Min learning rate ratio should be between 0 and 1 (got { min_lr_ratio } )"
150
+ )
143
151
144
152
if max_grad_norm < 0 :
145
- raise ValueError ("Max gradient norm should be non-negative" )
153
+ raise ValueError (
154
+ f"Max gradient norm should be non-negative (got { max_grad_norm } )"
155
+ )
146
156
147
157
if weight_decay is not None and (weight_decay < 0 ):
148
- raise ValueError ("Weight decay should be non-negative" )
158
+ raise ValueError (f "Weight decay should be non-negative (got { weight_decay } ) " )
149
159
150
160
if training_method not in AVAILABLE_TRAINING_METHODS :
151
161
raise ValueError (
@@ -155,7 +165,9 @@ def create_finetune_request(
155
165
lr_scheduler : FinetuneLRScheduler
156
166
if lr_scheduler_type == "cosine" :
157
167
if scheduler_num_cycles <= 0.0 :
158
- raise ValueError ("Number of cycles should be greater than 0" )
168
+ raise ValueError (
169
+ f"Number of cycles should be greater than 0 (got { scheduler_num_cycles } )"
170
+ )
159
171
160
172
lr_scheduler = CosineLRScheduler (
161
173
lr_scheduler_args = CosineLRSchedulerArgs (
0 commit comments