Skip to content

Commit

Permalink
Fix order of loss resets. (#259)
Browse files Browse the repository at this point in the history
* Fix order of loss resets.

* Formatting.

---------

Co-authored-by: George Smyrnis <[email protected]>
  • Loading branch information
GeorgiosSmyrnis and GeorgiosSmyrnis authored May 8, 2024
1 parent 87955f5 commit 6321543
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
11 changes: 6 additions & 5 deletions open_lm/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,6 @@ def train_one_epoch(
# resetting batch / data time meters per log window
batch_time_m.reset()
data_time_m.reset()
# reset all average meters
losses_m.reset()
if averagers is not None and args.log_avg_model_training_loss:
for k in averagers.avgs_dict.keys():
losses_avg_m[k].reset()

if math.isnan(losses_m.val):
# case where loss goes to nan, we see this sometimes with bad nodes.
Expand All @@ -339,6 +334,12 @@ def train_one_epoch(
# training on restarts.
return False, step

# reset all average meters
losses_m.reset()
if averagers is not None and args.log_avg_model_training_loss:
for k in averagers.avgs_dict.keys():
losses_avg_m[k].reset()

# end for
if tb_writer is not None:
tb_writer.flush()
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _read_reqs(relpath):

setuptools.setup(
name="open_lm",
version="0.0.33",
version="0.0.34",
author=[
"Suchin Gururangan*",
"Mitchell Wortsman*",
Expand Down

0 comments on commit 6321543

Please sign in to comment.