Skip to content

Commit cc2cbe5

Browse files
committed
linted
1 parent 1d29180 commit cc2cbe5

File tree

4 files changed

+98
-94
lines changed

4 files changed

+98
-94
lines changed

examples/pytorch/01-start-with-pytorch.ipynb

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,16 @@
2828
"metadata": {},
2929
"outputs": [],
3030
"source": [
31-
"import pandas as pd\n",
32-
"import uuid\n",
31+
"import uuid # noqa\n",
3332
"import datetime\n",
34-
"import pickle\n",
33+
"import pickle # noqa\n",
3534
"import json\n",
36-
"import torch\n",
37-
"import math\n",
35+
"import torch # noqa\n",
3836
"import torch.nn as nn\n",
3937
"import torch.optim as optim\n",
4038
"import numpy as np\n",
4139
"import urllib.request\n",
42-
"import time\n",
40+
"import pandas as pd # noqa\n",
4341
"from torch.utils.data import Dataset, DataLoader"
4442
]
4543
},
@@ -65,44 +63,47 @@
6563
"characters = list(\"*+abcdefghijklmnopqrstuvwxyz-. \")\n",
6664
"str_len = 8\n",
6765
"\n",
66+
"\n",
6867
"def format_training_data(pet_names, device=None):\n",
6968
" def get_substrings(in_str):\n",
7069
" # add the stop character to the end of the name, then generate all the partial names\n",
7170
" in_str = in_str + \"+\"\n",
7271
" res = [in_str[0: j] for j in range(1, len(in_str) + 1)]\n",
7372
" return res\n",
74-
" pet_names_expanded = [get_substrings(name) for name in pet_names]\n",
73+
" pet_names_expanded = [get_substrings(name) for name in pet_names]\n",
7574
" pet_names_expanded = [item for sublist in pet_names_expanded for item in sublist]\n",
7675
" pet_names_characters = [list(name) for name in pet_names_expanded]\n",
7776
" pet_names_padded = [name[-(str_len + 1):] for name in pet_names_characters]\n",
78-
" pet_names_padded = [list((str_len + 1- len(characters)) * \"*\") + characters for characters in pet_names_padded]\n",
77+
" pet_names_padded = [list((str_len + 1 - len(characters)) * \"*\") + characters for characters in pet_names_padded]\n",
7978
" pet_names_numeric = [[characters.index(char) for char in name] for name in pet_names_padded]\n",
8079
"\n",
8180
" # the final x and y data to use for training the model. Note that the x data needs to be one-hot encoded\n",
8281
" if device is None:\n",
8382
" y = torch.tensor([name[1:] for name in pet_names_numeric])\n",
8483
" x = torch.tensor([name[:-1] for name in pet_names_numeric])\n",
8584
" else:\n",
86-
" y = torch.tensor([name[1:] for name in pet_names_numeric], device = device)\n",
87-
" x = torch.tensor([name[:-1] for name in pet_names_numeric], device = device)\n",
88-
" x = torch.nn.functional.one_hot(x, num_classes = len(characters)).float()\n",
85+
" y = torch.tensor([name[1:] for name in pet_names_numeric], device=device)\n",
86+
" x = torch.tensor([name[:-1] for name in pet_names_numeric], device=device)\n",
87+
" x = torch.nn.functional.one_hot(x, num_classes=len(characters)).float()\n",
8988
" return x, y\n",
9089
"\n",
90+
"\n",
9191
"class OurDataset(Dataset):\n",
9292
" def __init__(self, pet_names, device=None):\n",
9393
" self.x, self.y = format_training_data(pet_names, device)\n",
9494
" self.permute()\n",
95-
" \n",
95+
"\n",
9696
" def __getitem__(self, idx):\n",
9797
" idx = self.permutation[idx]\n",
9898
" return self.x[idx], self.y[idx]\n",
99-
" \n",
99+
"\n",
100100
" def __len__(self):\n",
101101
" return len(self.x)\n",
102-
" \n",
102+
"\n",
103103
" def permute(self):\n",
104104
" self.permutation = torch.randperm(len(self.x))\n",
105105
"\n",
106+
"\n",
106107
"class Model(nn.Module):\n",
107108
" def __init__(self):\n",
108109
" super(Model, self).__init__()\n",
@@ -115,6 +116,7 @@
115116
" dropout=0.1,\n",
116117
" )\n",
117118
" self.fc = nn.Linear(self.lstm_size, len(characters))\n",
119+
"\n",
118120
" def forward(self, x):\n",
119121
" output, state = self.lstm(x)\n",
120122
" logits = self.fc(output)\n",
@@ -138,21 +140,21 @@
138140
"def train():\n",
139141
" device = torch.device(0)\n",
140142
"\n",
141-
" dataset = OurDataset(pet_names, device = device)\n",
142-
" loader = DataLoader(dataset, batch_size=batch_size,shuffle=True, num_workers=0)\n",
143-
" \n",
143+
" dataset = OurDataset(pet_names, device=device)\n",
144+
" loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
145+
"\n",
144146
" model = Model()\n",
145147
" model = model.to(device)\n",
146-
" \n",
148+
"\n",
147149
" criterion = nn.CrossEntropyLoss()\n",
148150
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
149-
" \n",
151+
"\n",
150152
" for epoch in range(num_epochs):\n",
151153
" dataset.permute()\n",
152154
" for i, (batch_x, batch_y) in enumerate(loader):\n",
153155
" optimizer.zero_grad()\n",
154156
" batch_y_pred = model(batch_x)\n",
155-
" \n",
157+
"\n",
156158
" loss = criterion(batch_y_pred.transpose(1, 2), batch_y)\n",
157159
" loss.backward()\n",
158160
" optimizer.step()\n",
@@ -206,21 +208,21 @@
206208
" in_progress_name_padded = in_progress_name[-str_len:]\n",
207209
" in_progress_name_padded = list((str_len - len(in_progress_name_padded)) * \"*\") + in_progress_name_padded\n",
208210
" in_progress_name_numeric = [characters.index(char) for char in in_progress_name_padded]\n",
209-
" in_progress_name_tensor = torch.tensor(in_progress_name_numeric, device = device)\n",
210-
" in_progress_name_tensor = torch.nn.functional.one_hot(in_progress_name_tensor, num_classes = len(characters)).float()\n",
211+
" in_progress_name_tensor = torch.tensor(in_progress_name_numeric, device=device)\n",
212+
" in_progress_name_tensor = torch.nn.functional.one_hot(in_progress_name_tensor, num_classes=len(characters)).float()\n",
211213
" in_progress_name_tensor = torch.unsqueeze(in_progress_name_tensor, 0)\n",
212-
" \n",
214+
"\n",
213215
" # get the probabilities of each possible next character by running the model\n",
214216
" with torch.no_grad():\n",
215217
" next_letter_probabilities = model(in_progress_name_tensor)\n",
216-
" \n",
217-
" next_letter_probabilities = next_letter_probabilities[0,-1,:]\n",
218+
"\n",
219+
" next_letter_probabilities = next_letter_probabilities[0, -1, :]\n",
218220
" next_letter_probabilities = torch.nn.functional.softmax(next_letter_probabilities, dim=0).detach().cpu().numpy()\n",
219221
" next_letter_probabilities = next_letter_probabilities[1:]\n",
220-
" next_letter_probabilities = [p/sum(next_letter_probabilities) for p in next_letter_probabilities]\n",
221-
" \n",
222+
" next_letter_probabilities = [p / sum(next_letter_probabilities) for p in next_letter_probabilities]\n",
223+
"\n",
222224
" # determine what the actual letter is\n",
223-
" next_letter = characters[np.random.choice(len(characters)-1, p=next_letter_probabilities) + 1]\n",
225+
" next_letter = characters[np.random.choice(len(characters) - 1, p=next_letter_probabilities) + 1]\n",
224226
" if(next_letter != \"+\"):\n",
225227
" # if the next character isn't stop add the latest generated character to the name and continue\n",
226228
" in_progress_name.append(next_letter)\n",
@@ -243,7 +245,7 @@
243245
"outputs": [],
244246
"source": [
245247
"# Generate 50 names then filter out existing ones\n",
246-
"generated_names = [generate_name(model, characters, str_len) for i in range(0,50)]\n",
248+
"generated_names = [generate_name(model, characters, str_len) for i in range(0, 50)]\n",
247249
"generated_names = [name for name in generated_names if name not in pet_names]\n",
248250
"print(generated_names)"
249251
]

examples/pytorch/02-start-with-dask.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"source": [
4848
"@dask.delayed\n",
4949
"def lazy_exponent(args):\n",
50-
" x,y = args\n",
50+
" x, y = args\n",
5151
" '''Define a lazily evaluating function'''\n",
5252
" return x ** y"
5353
]
@@ -65,7 +65,7 @@
6565
"metadata": {},
6666
"outputs": [],
6767
"source": [
68-
"inputs = [[1,2], [3,4], [5,6], [9, 10], [11, 12]]\n",
68+
"inputs = [[1, 2], [3, 4], [5, 6], [9, 10], [11, 12]]\n",
6969
"\n",
7070
"example_future = client.map(lazy_exponent, inputs)\n",
7171
"futures_gathered = client.gather(example_future)\n",

examples/pytorch/03-start-with-pytorch-dask-multiple-models.ipynb

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,16 @@
2222
"metadata": {},
2323
"outputs": [],
2424
"source": [
25-
"import pandas as pd\n",
26-
"import uuid\n",
27-
"import datetime\n",
28-
"import pickle\n",
29-
"import json\n",
30-
"import torch\n",
31-
"import math\n",
32-
"import torch.nn as nn\n",
33-
"import torch.optim as optim\n",
34-
"import numpy as np\n",
35-
"import urllib.request\n",
36-
"import time\n",
25+
"import uuid # noqa\r\n",
26+
"import datetime\r\n",
27+
"import pickle # noqa\r\n",
28+
"import json\r\n",
29+
"import torch\r\n",
30+
"import torch.nn as nn\r\n",
31+
"import torch.optim as optim\r\n",
32+
"import numpy as np # noqa\r\n",
33+
"import urllib.request\r\n",
34+
"import pandas as pd\r\n",
3735
"from torch.utils.data import Dataset, DataLoader"
3836
]
3937
},
@@ -52,44 +50,47 @@
5250
"characters = list(\"*+abcdefghijklmnopqrstuvwxyz-. \")\n",
5351
"str_len = 8\n",
5452
"\n",
53+
"\n",
5554
"def format_training_data(pet_names, device=None):\n",
5655
" def get_substrings(in_str):\n",
5756
" # add the stop character to the end of the name, then generate all the partial names\n",
5857
" in_str = in_str + \"+\"\n",
5958
" res = [in_str[0: j] for j in range(1, len(in_str) + 1)]\n",
6059
" return res\n",
61-
" pet_names_expanded = [get_substrings(name) for name in pet_names]\n",
60+
" pet_names_expanded = [get_substrings(name) for name in pet_names]\n",
6261
" pet_names_expanded = [item for sublist in pet_names_expanded for item in sublist]\n",
6362
" pet_names_characters = [list(name) for name in pet_names_expanded]\n",
6463
" pet_names_padded = [name[-(str_len + 1):] for name in pet_names_characters]\n",
65-
" pet_names_padded = [list((str_len + 1- len(characters)) * \"*\") + characters for characters in pet_names_padded]\n",
64+
" pet_names_padded = [list((str_len + 1 - len(characters)) * \"*\") + characters for characters in pet_names_padded]\n",
6665
" pet_names_numeric = [[characters.index(char) for char in name] for name in pet_names_padded]\n",
6766
"\n",
6867
" # the final x and y data to use for training the model. Note that the x data needs to be one-hot encoded\n",
6968
" if device is None:\n",
7069
" y = torch.tensor([name[1:] for name in pet_names_numeric])\n",
7170
" x = torch.tensor([name[:-1] for name in pet_names_numeric])\n",
7271
" else:\n",
73-
" y = torch.tensor([name[1:] for name in pet_names_numeric], device = device)\n",
74-
" x = torch.tensor([name[:-1] for name in pet_names_numeric], device = device)\n",
75-
" x = torch.nn.functional.one_hot(x, num_classes = len(characters)).float()\n",
72+
" y = torch.tensor([name[1:] for name in pet_names_numeric], device=device)\n",
73+
" x = torch.tensor([name[:-1] for name in pet_names_numeric], device=device)\n",
74+
" x = torch.nn.functional.one_hot(x, num_classes=len(characters)).float()\n",
7675
" return x, y\n",
7776
"\n",
77+
"\n",
7878
"class OurDataset(Dataset):\n",
7979
" def __init__(self, pet_names, device=None):\n",
8080
" self.x, self.y = format_training_data(pet_names, device)\n",
8181
" self.permute()\n",
82-
" \n",
82+
"\n",
8383
" def __getitem__(self, idx):\n",
8484
" idx = self.permutation[idx]\n",
8585
" return self.x[idx], self.y[idx]\n",
86-
" \n",
86+
"\n",
8787
" def __len__(self):\n",
8888
" return len(self.x)\n",
89-
" \n",
89+
"\n",
9090
" def permute(self):\n",
9191
" self.permutation = torch.randperm(len(self.x))\n",
9292
"\n",
93+
"\n",
9394
"class Model(nn.Module):\n",
9495
" def __init__(self):\n",
9596
" super(Model, self).__init__()\n",
@@ -102,6 +103,7 @@
102103
" dropout=0.1,\n",
103104
" )\n",
104105
" self.fc = nn.Linear(self.lstm_size, len(characters))\n",
106+
"\n",
105107
" def forward(self, x):\n",
106108
" output, state = self.lstm(x)\n",
107109
" logits = self.fc(output)\n",
@@ -162,28 +164,27 @@
162164
" training_start_time = datetime.datetime.now()\n",
163165
" device = torch.device(0)\n",
164166
"\n",
165-
" dataset = OurDataset(pet_names, device = device)\n",
167+
" dataset = OurDataset(pet_names, device=device)\n",
166168
" loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)\n",
167-
" \n",
169+
"\n",
168170
" model = Model()\n",
169171
" model = model.to(device)\n",
170-
" \n",
172+
"\n",
171173
" criterion = nn.CrossEntropyLoss()\n",
172174
" optimizer = optim.Adam(model.parameters(), lr=lr)\n",
173-
" \n",
175+
"\n",
174176
" results = []\n",
175177
"\n",
176178
" for epoch in range(num_epochs):\n",
177179
" dataset.permute()\n",
178180
" for i, (batch_x, batch_y) in enumerate(loader):\n",
179181
" optimizer.zero_grad()\n",
180182
" batch_y_pred = model(batch_x)\n",
181-
" \n",
183+
"\n",
182184
" loss = criterion(batch_y_pred.transpose(1, 2), batch_y)\n",
183185
" loss.backward()\n",
184186
" optimizer.step()\n",
185187
" logger.info(f\"{datetime.datetime.now().isoformat()} - batch {i} - batch_size {batch_size} - lr {lr} - epoch {epoch} complete - loss {loss.item()}\")\n",
186-
" epoch_end_time = datetime.datetime.now().isoformat()\n",
187188
" new_results = {\n",
188189
" \"batch_size\": batch_size,\n",
189190
" \"lr\": lr,\n",
@@ -210,7 +211,7 @@
210211
"metadata": {},
211212
"outputs": [],
212213
"source": [
213-
"inputs = [(4096,0.001), (16384, 0.001), (4096,0.01), (16384, 0.01)]\n",
214+
"inputs = [(4096, 0.001), (16384, 0.001), (4096, 0.01), (16384, 0.01)]\n",
214215
"\n",
215216
"train_future = client.map(train, inputs)\n",
216217
"futures_gathered = client.gather(train_future)\n",

0 commit comments

Comments
 (0)