Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hydra #6

Merged
merged 5 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[core]
remote = s3cache
[hydra]
enabled = True
['remote "s3cache"']
url = s3://lstm-seq2seq/cache
4 changes: 2 additions & 2 deletions .github/workflows/cml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ jobs:
pip install --upgrade -r requirements.txt

dvc pull --allow-missing
dvc exp run
dvc push
dvc exp run -S model=full
dvc exp push --rev HEAD origin

git add .
git commit -m "experiment results"
Expand Down
5 changes: 5 additions & 0 deletions conf/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- model: small
data_path: fra.txt
num_samples: 100000
seed: 423
5 changes: 5 additions & 0 deletions conf/model/full.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
batch_size: 512
latent_dim: 64
lr: 0.01
duration: "00:00:30:00"
max_epochs: -1
5 changes: 5 additions & 0 deletions conf/model/small.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
batch_size: 512
latent_dim: 8
lr: 0.01
duration: "00:00:30:00"
max_epochs: 2
46 changes: 46 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
schema: '2.0'
stages:
download:
cmd:
- curl -O http://www.manythings.org/anki/fra-eng.zip
- unzip fra-eng.zip
- rm _about.txt fra-eng.zip
deps:
- path: http://www.manythings.org/anki/fra-eng.zip
checksum: '33473984774969389833272906710854276175'
size: 7155035
outs:
- path: fra.txt
md5: df3f29f57cbe8a45787cac25a9a049ba
size: 31547877
train:
cmd: python train.py
deps:
- path: fra.txt
md5: df3f29f57cbe8a45787cac25a9a049ba
size: 31547877
- path: train.py
md5: 06cf41493f48d027ed990f2a2aa4fb16
size: 6745
params:
params.yaml:
data_path: fra.txt
model:
batch_size: 512
latent_dim: 16
lr: 0.01
duration: 00:00:30:00
max_epochs: 2
num_samples: 100000
seed: 423
outs:
- path: model/model.ckpt
md5: 0f6376fb11a6ef0fdeba407badb70726
size: 108609
- path: results/metrics.json
md5: 99914b932bd37a50b983c5e7c90ae93b
size: 2
- path: results/plots
md5: f0df82b8295e65705cdc82d949b578ac.dir
size: 659
nfiles: 7
3 changes: 1 addition & 2 deletions dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@ stages:
- fra.txt
- train.py
params:
- params.py:
- params.yaml:
outs:
- model/model.ckpt:
persist: true
checkpoint: true
- results/metrics.json:
cache: false
persist: true
Expand Down
8 changes: 0 additions & 8 deletions params.py

This file was deleted.

9 changes: 9 additions & 0 deletions params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
model:
batch_size: 512
latent_dim: 16
lr: 0.01
duration: 00:00:30:00
max_epochs: 2
data_path: fra.txt
num_samples: 100000
seed: 423
1 change: 1 addition & 0 deletions results/dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
1 change: 1 addition & 0 deletions results/metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
13 changes: 13 additions & 0 deletions results/plots/metrics/epoch.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
step epoch
15 0
15 0
31 1
31 1
47 2
47 2
49 3
63 3
63 3
79 4
79 4
49 0
6 changes: 6 additions & 0 deletions results/plots/metrics/train/epoch/acc.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
step acc
15 0.8118199110031128
31 0.8846276998519897
47 0.8911845684051514
63 0.8954362273216248
79 0.8986569046974182
6 changes: 6 additions & 0 deletions results/plots/metrics/train/epoch/loss.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
step loss
15 0.9161079525947571
31 0.4333362281322479
47 0.3871815800666809
63 0.36666935682296753
79 0.35249167680740356
3 changes: 3 additions & 0 deletions results/plots/metrics/train/step/acc.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
step acc
49 0.8924490809440613
49 0.931284487247467
3 changes: 3 additions & 0 deletions results/plots/metrics/train/step/loss.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
step loss
49 0.3790258467197418
49 0.24283865094184875
6 changes: 6 additions & 0 deletions results/plots/metrics/val/acc.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
step acc
15 0.8807533383369446
31 0.8894977569580078
47 0.8969053030014038
63 0.8982577323913574
79 0.9016255140304565
6 changes: 6 additions & 0 deletions results/plots/metrics/val/loss.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
step loss
15 0.4680022895336151
31 0.3969021737575531
47 0.3669666349887848
63 0.3546425402164459
79 0.34155383706092834
77 changes: 77 additions & 0 deletions results/report.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="5">
<title>DVC Plot</title>


<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>

<style>
table {
border-spacing: 15px;
}
</style>
</head>
<body>


<div id = "static_epoch">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "15", "epoch": "0", "rev": "workspace"}, {"step": "15", "epoch": "0", "rev": "workspace"}, {"step": "31", "epoch": "1", "rev": "workspace"}, {"step": "31", "epoch": "1", "rev": "workspace"}, {"step": "47", "epoch": "2", "rev": "workspace"}, {"step": "47", "epoch": "2", "rev": "workspace"}, {"step": "49", "epoch": "3", "rev": "workspace"}, {"step": "63", "epoch": "3", "rev": "workspace"}, {"step": "63", "epoch": "3", "rev": "workspace"}, {"step": "79", "epoch": "4", "rev": "workspace"}, {"step": "79", "epoch": "4", "rev": "workspace"}, {"step": "49", "epoch": "0", "rev": "workspace"}]}, "title": "epoch", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "epoch", "type": "quantitative", "title": "epoch", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "epoch", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "epoch", "type": "quantitative", "title": "epoch", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_epoch', spec);
</script>
</div>


<div id = "static_train_epoch_loss">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "15", "loss": "0.9161079525947571", "rev": "workspace"}, {"step": "31", "loss": "0.4333362281322479", "rev": "workspace"}, {"step": "47", "loss": "0.3871815800666809", "rev": "workspace"}, {"step": "63", "loss": "0.36666935682296753", "rev": "workspace"}, {"step": "79", "loss": "0.35249167680740356", "rev": "workspace"}]}, "title": "train/epoch/loss", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "loss", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_train_epoch_loss', spec);
</script>
</div>


<div id = "static_train_epoch_acc">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "15", "acc": "0.8118199110031128", "rev": "workspace"}, {"step": "31", "acc": "0.8846276998519897", "rev": "workspace"}, {"step": "47", "acc": "0.8911845684051514", "rev": "workspace"}, {"step": "63", "acc": "0.8954362273216248", "rev": "workspace"}, {"step": "79", "acc": "0.8986569046974182", "rev": "workspace"}]}, "title": "train/epoch/acc", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "acc", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_train_epoch_acc', spec);
</script>
</div>


<div id = "static_train_step_loss">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "49", "loss": "0.3790258467197418", "rev": "workspace"}, {"step": "49", "loss": "0.24283865094184875", "rev": "workspace"}]}, "title": "train/step/loss", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "loss", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_train_step_loss', spec);
</script>
</div>


<div id = "static_train_step_acc">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "49", "acc": "0.8924490809440613", "rev": "workspace"}, {"step": "49", "acc": "0.931284487247467", "rev": "workspace"}]}, "title": "train/step/acc", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "acc", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_train_step_acc', spec);
</script>
</div>


<div id = "static_val_loss">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "15", "loss": "0.4680022895336151", "rev": "workspace"}, {"step": "31", "loss": "0.3969021737575531", "rev": "workspace"}, {"step": "47", "loss": "0.3669666349887848", "rev": "workspace"}, {"step": "63", "loss": "0.3546425402164459", "rev": "workspace"}, {"step": "79", "loss": "0.34155383706092834", "rev": "workspace"}]}, "title": "val/loss", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "loss", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "loss", "type": "quantitative", "title": "loss", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_val_loss', spec);
</script>
</div>


<div id = "static_val_acc">
<script type = "text/javascript">
var spec = {"$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": {"values": [{"step": "15", "acc": "0.8807533383369446", "rev": "workspace"}, {"step": "31", "acc": "0.8894977569580078", "rev": "workspace"}, {"step": "47", "acc": "0.8969053030014038", "rev": "workspace"}, {"step": "63", "acc": "0.8982577323913574", "rev": "workspace"}, {"step": "79", "acc": "0.9016255140304565", "rev": "workspace"}]}, "title": "val/acc", "width": 300, "height": 300, "params": [{"name": "smooth", "value": 0.001, "bind": {"input": "range", "min": 0.001, "max": 1, "step": 0.001}}], "layer": [{"mark": "line", "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}, "transform": [{"loess": "acc", "on": "step", "groupby": ["rev", "filename", "field", "filename::field"], "bandwidth": {"signal": "smooth"}}]}, {"mark": {"type": "point", "tooltip": {"content": "data"}}, "encoding": {"x": {"field": "step", "type": "quantitative", "title": "step"}, "y": {"field": "acc", "type": "quantitative", "title": "acc", "scale": {"zero": false}}, "color": {"field": "rev", "type": "nominal"}}}]};
vegaEmbed('#static_val_acc', spec);
</script>
</div>

</body>
</html>
19 changes: 11 additions & 8 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@
import torch
import pytorch_lightning as pl
import torchmetrics
from dvc.api import params_show
from dvclive import Live
from dvclive.lightning import DVCLiveLogger

from params import *
params = params_show()

# Vectorize the data.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
with open(data_path, "r", encoding="utf-8") as f:
with open(params["data_path"], "r", encoding="utf-8") as f:
lines = f.read().split("\n")[:-1]
np.random.seed(seed)
np.random.seed(params["seed"])
np.random.shuffle(lines)
for line in lines[: min(num_samples, len(lines) - 1)]:
for line in lines[: min(params["num_samples"], len(lines) - 1)]:
input_text, target_text, _ = line.split("\t")
for char in input_text:
if char not in input_characters:
Expand Down Expand Up @@ -76,6 +77,7 @@
class LSTMSeqToSeq(pl.LightningModule):
def __init__(self):
super().__init__()
latent_dim = params["model"]["latent_dim"]
self.encoder_embedding = torch.nn.Embedding(num_encoder_tokens, latent_dim)
self.encoder = torch.nn.LSTM(latent_dim, latent_dim, batch_first=True)
self.decoder_embedding = torch.nn.Embedding(num_decoder_tokens,
Expand Down Expand Up @@ -121,7 +123,7 @@ def validation_step(self, batch, batch_idx):
self.log("val_acc", acc, on_step=False, on_epoch=True)

def configure_optimizers(self):
optimizer = torch.optim.RMSprop(self.parameters(), lr=lr)
optimizer = torch.optim.RMSprop(self.parameters(), lr=params["model"]["lr"])
return optimizer


Expand All @@ -147,7 +149,8 @@ def __getitem__(self, idx):
train_len = int(len(combined_data)*0.8)
val_len = len(combined_data) - train_len
train, val = torch.utils.data.random_split(combined_data, [train_len, val_len],
generator=torch.Generator().manual_seed(seed))
generator=torch.Generator().manual_seed(params["seed"]))
batch_size = params["model"]["batch_size"]
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size)
val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size)

Expand All @@ -159,9 +162,9 @@ def __getitem__(self, idx):
monitor="val_acc",
mode="max",
save_weights_only=True, every_n_epochs=1)
timer = pl.callbacks.Timer(duration=duration)
timer = pl.callbacks.Timer(duration=params["model"]["duration"])

trainer = pl.Trainer(max_epochs=-1, logger=[live],
trainer = pl.Trainer(max_epochs=params["model"]["max_epochs"], logger=[live],
callbacks=[timer, checkpoint])
trainer.fit(model=arch, train_dataloaders=train_loader,
val_dataloaders=val_loader)