Skip to content

Commit 8c15d3b

Browse files
authored
Merge branch 'main' into add-s3-uploader
2 parents 2651a17 + 4a2ddca commit 8c15d3b

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

src/nanotron/models/llama.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def __init__(
165165
bias=False,
166166
async_communication=tp_linear_async_communication and tp_mode is TensorParallelLinearMode.REDUCE_SCATTER,
167167
)
168-
self.split_silu_mul = torch.compile(GLUActivation(config.hidden_act))
168+
self.split_silu_mul = GLUActivation(config.hidden_act)
169169

170170
def forward(self, hidden_states): # [seq_length, batch_size, hidden_dim]
171171
merged_states = self.gate_up_proj(hidden_states)

src/nanotron/trainer.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,7 @@ def _load_model_checkpoint(self, model: NanotronModel) -> NanotronModel:
723723
)
724724
reloaded_from_checkpoint=True
725725
if not reloaded_from_checkpoint:
726-
# TODO @eliebak add s3 support also here
727-
log_rank("No checkpoint path provided.", logger=logger, level=logging.INFO)
726+
log_rank("No checkpoint path provided.", logger=logger, level=logging.INFO, rank=0)
728727
if isinstance(self.config.model.init_method, ExistingCheckpointInit):
729728
# Initialize model from an pretrained model checkpoint (without optimizer, lr_scheduler...)
730729
self.param_shard_metadata = load_weights(

0 commit comments

Comments
 (0)