|
88 | 88 | {
|
89 | 89 | "data": {
|
90 | 90 | "text/plain": [
|
91 |
| - "tensor([ 1.8153, 0.1568, 0.0348, 0.0334, 0.9967, 0.3957, 1.0805, 0.0302,\n", |
92 |
| - " -0.4433, -0.0206], device='cuda:0', grad_fn=<CopyBackwards>)" |
| 91 | + "Parameter containing:\n", |
| 92 | + "tensor([ 0.3078, -1.9857, 1.0512, 1.5122, -1.0199, -0.7402, -1.3111, 0.6142,\n", |
| 93 | + " -0.6474, 0.1758], requires_grad=True)" |
93 | 94 | ]
|
94 | 95 | },
|
95 | 96 | "execution_count": 7,
|
|
110 | 111 | "metadata": {},
|
111 | 112 | "outputs": [],
|
112 | 113 | "source": [
|
113 |
| - "def sigma(x):\n", |
| 114 | + "def sigmoid(x):\n", |
114 | 115 | " # sigmoid function\n",
|
115 | 116 | " return 1.0 / (1.0 + torch.exp(-x))\n",
|
116 | 117 | " # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))"
|
|
122 | 123 | "metadata": {},
|
123 | 124 | "outputs": [],
|
124 | 125 | "source": [
|
125 |
| - "def sigma_prime(x):\n", |
| 126 | + "def sigmoid_prime(x):\n", |
126 | 127 | " # derivative of the sigmoid function\n",
|
127 |
| - " return sigma(x) * (1 - sigma(x))" |
| 128 | + " return sigmoid(x) * (1 - sigmoid(x))" |
128 | 129 | ]
|
129 | 130 | },
|
130 | 131 | {
|
|
136 | 137 | "name": "stdout",
|
137 | 138 | "output_type": "stream",
|
138 | 139 | "text": [
|
139 |
| - "829\n", |
140 |
| - "846\n", |
141 |
| - "870\n", |
142 |
| - "876\n", |
| 140 | + "736\n", |
| 141 | + "862\n", |
| 142 | + "860\n", |
143 | 143 | "881\n",
|
144 |
| - "892\n", |
145 |
| - "894\n", |
146 |
| - "894\n", |
147 |
| - "891\n", |
148 |
| - "898\n" |
| 144 | + "874\n", |
| 145 | + "890\n", |
| 146 | + "904\n", |
| 147 | + "923\n", |
| 148 | + "916\n", |
| 149 | + "920\n" |
149 | 150 | ]
|
150 | 151 | }
|
151 | 152 | ],
|
|
161 | 162 | " X = X.view(-1, 28 * 28).to(device)\n",
|
162 | 163 | " Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device) # one-hot\n",
|
163 | 164 | " l1 = torch.add(torch.matmul(X, w1), b1)\n",
|
164 |
| - " a1 = sigma(l1)\n", |
| 165 | + " a1 = sigmoid(l1)\n", |
165 | 166 | " l2 = torch.add(torch.matmul(a1, w2), b2)\n",
|
166 |
| - " y_pred = sigma(l2)\n", |
| 167 | + " y_pred = sigmoid(l2)\n", |
167 | 168 | "\n",
|
168 | 169 | " diff = y_pred - Y\n",
|
169 | 170 | "\n",
|
170 | 171 | " # Back prop (chain rule)\n",
|
171 |
| - " d_l2 = diff * sigma_prime(l2)\n", |
| 172 | + " d_l2 = diff * sigmoid_prime(l2)\n", |
172 | 173 | " d_b2 = d_l2\n",
|
173 | 174 | " d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)\n",
|
174 | 175 | "\n",
|
175 | 176 | " d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))\n",
|
176 |
| - " d_l1 = d_a1 * sigma_prime(l1)\n", |
| 177 | + " d_l1 = d_a1 * sigmoid_prime(l1)\n", |
177 | 178 | " d_b1 = d_l1\n",
|
178 | 179 | " d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)\n",
|
179 | 180 | "\n",
|
|
184 | 185 | "\n",
|
185 | 186 | " if i % 1000 == 0:\n",
|
186 | 187 | " l1 = torch.add(torch.matmul(X_test, w1), b1)\n",
|
187 |
| - " a1 = sigma(l1)\n", |
| 188 | + " a1 = sigmoid(l1)\n", |
188 | 189 | " l2 = torch.add(torch.matmul(a1, w2), b2)\n",
|
189 |
| - " y_pred = sigma(l2)\n", |
| 190 | + " y_pred = sigmoid(l2)\n", |
190 | 191 | " acct_mat = torch.argmax(y_pred, 1) == Y_test\n",
|
191 | 192 | " acct_res = acct_mat.sum()\n",
|
192 | 193 | " print(acct_res.item())\n",
|
|
212 | 213 | "name": "python",
|
213 | 214 | "nbconvert_exporter": "python",
|
214 | 215 | "pygments_lexer": "ipython3",
|
215 |
| - "version": "3.5.3" |
| 216 | + "version": "3.6.8" |
216 | 217 | }
|
217 | 218 | },
|
218 | 219 | "nbformat": 4,
|
|
0 commit comments