materials

Kartikeya-trivedi · Feb 8, 2021 · 9b5b8c6 · 9b5b8c6
1 parent 94277ae
commit 9b5b8c6
Show file tree

Hide file tree

Showing 614 changed files with 176,362 additions and 0 deletions.
diff --git a/extra-notes/boosting.pdf b/extra-notes/boosting.pdf
diff --git a/extra-notes/boosting_example.m b/extra-notes/boosting_example.m
@@ -0,0 +1,78 @@
+% BOOSTING_EXAMPLE
+%
+% Constructs a 2-dimensional dataset classifiable by boosting, but not any
+% simple linear classifier, because of the thresholding nature of the data.
+
+rand('seed', 0);
+
+% m datapoints in 2-dimensions
+mm = 150;
+X = rand(mm, 2);
+
+thresh_pos = .6;
+y = [X(:, 1) < thresh_pos & X(:, 2) < thresh_pos];
+y = 2 * y - 1;
+
+for T = [2, 4, 5, 10]
+  figure;
+  hpos = plot(X(y == 1, 1), X(y == 1, 2), 'o');
+  hold on;
+  hneg = plot(X(y == -1, 1), X(y == -1, 2), 'x');
+  set(hpos, 'linewidth', 2);
+  set(hneg, 'linewidth', 2);
+
+  [theta, feature_inds, thresholds] = stump_booster(X, y, T);
+
+  x1_coords = linspace(0, 1, 100);
+  x2_coords = linspace(0, 1, 100);
+  Z = zeros(100);
+  for ii = 1:100
+    for jj = 1:100
+      pred = (sign(x1_coords(ii) - thresholds(feature_inds == 1))' * ...
+              theta(feature_inds == 1)) + ...
+             (sign(x2_coords(jj) - thresholds(feature_inds == 2))' * ...
+              theta(feature_inds == 2));
+      Z(jj, ii) = sign(pred);
+    end
+  end
+
+  C = contourc(x1_coords, x2_coords, Z, [0 0]);
+  h = plot(C(1, 2:end), C(2, 2:end), 'k-');
+  set(h, 'linewidth', 2);
+  title(sprintf('Iterations = %d', T));
+  set(gca, 'fontsize', 18);
+  print('-depsc2', sprintf('boost_plot_%d.eps', T));
+end
+
+%% Now solve the logistic regression problem directly
+
+mm = 200;
+X = rand(mm, 2);
+y = [X(:, 1) < thresh_pos & X(:, 2) < thresh_pos];
+y = 2 * y - 1;
+
+theta_log = zeros(3, 1);
+X_logit = [ones(mm, 1), X];
+for iter = 1:1000
+  risk = (1/mm) * sum(log(1 + exp(-y .* (X_logit * theta_log))));
+  if (mod(iter, 50) == 0)
+    fprintf(1, 'Iter %d, loss %1.4f\n', iter, risk);
+  end
+  p = 1 ./ (1 + exp(y .* (X_logit * theta_log)));
+  g = -(1/mm) * X_logit' * (p .* y);
+  theta_log = theta_log - 2 * g;
+end
+
+x1_coord = linspace(0, 1, 100);
+x2_coord = -(theta_log(1) + theta_log(2) * x1_coord) / theta_log(3);
+
+figure;
+hpos = plot(X(y == 1, 1), X(y == 1, 2), 'o');
+hold on;
+hneg = plot(X(y == -1, 1), X(y == -1, 2), 'x');
+set(hpos, 'linewidth', 2);
+set(hneg, 'linewidth', 2);
+h = plot(x1_coord, x2_coord, 'k-', 'linewidth', 2);
+axis([0 1 0 1]);
+set(gca, 'fontsize', 18);
+print -depsc2 'logistic_plot.eps';
diff --git a/extra-notes/hoeffding.pdf b/extra-notes/hoeffding.pdf
diff --git a/extra-notes/loss-functions.pdf b/extra-notes/loss-functions.pdf
diff --git a/extra-notes/representer-function.pdf b/extra-notes/representer-function.pdf
diff --git a/materials/ML-advice.pdf b/materials/ML-advice.pdf
diff --git a/materials/critiques-ml-aut19.pdf b/materials/critiques-ml-aut19.pdf
diff --git a/materials/critiques-ml.pdf b/materials/critiques-ml.pdf
diff --git a/materials/cs229-mt-review.pdf b/materials/cs229-mt-review.pdf
diff --git a/materials/ps0.pdf b/materials/ps0.pdf
diff --git a/notes/backprop.py b/notes/backprop.py
@@ -0,0 +1,77 @@
+import numpy as np
+from copy import copy
+
+# Example backpropagation code for binary classification with 2-layer
+# neural network (single hidden layer)
+
+sigmoid = lambda x: 1 / (1 + np.exp(-x))
+
+def fprop(x, y, params):
+  # Follows procedure given in notes
+  W1, b1, W2, b2 = [params[key] for key in ('W1', 'b1', 'W2', 'b2')]
+  z1 = np.dot(W1, x) + b1
+  a1 = sigmoid(z1)
+  z2 = np.dot(W2, a1) + b2
+  a2 = sigmoid(z2)
+  loss = -(y * np.log(a2) + (1-y) * np.log(1-a2))
+  ret = {'x': x, 'y': y, 'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2, 'loss': loss}
+  for key in params:
+    ret[key] = params[key]
+  return ret
+
+def bprop(fprop_cache):
+  # Follows procedure given in notes
+  x, y, z1, a1, z2, a2, loss = [fprop_cache[key] for key in ('x', 'y', 'z1', 'a1', 'z2', 'a2', 'loss')]
+  dz2 = (a2 - y)
+  dW2 = np.dot(dz2, a1.T)
+  db2 = dz2
+  dz1 = np.dot(fprop_cache['W2'].T, dz2) * sigmoid(z1) * (1-sigmoid(z1))
+  dW1 = np.dot(dz1, x.T)
+  db1 = dz1
+  return {'b1': db1, 'W1': dW1, 'b2': db2, 'W2': dW2}
+
+# Gradient checking
+
+if __name__ == '__main__':
+  # Initialize random parameters and inputs
+  W1 = np.random.rand(2,2)
+  b1 = np.random.rand(2, 1)
+  W2 = np.random.rand(1, 2)
+  b2 = np.random.rand(1, 1)
+  params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
+  x = np.random.rand(2, 1)
+  y = np.random.randint(0, 2)  # Returns 0/1
+
+  fprop_cache = fprop(x, y, params)
+  bprop_cache = bprop(fprop_cache)
+
+  # Numerical gradient checking
+  # Note how slow this is! Thus we want to use the backpropagation algorithm instead.
+  eps = 1e-6
+  ng_cache = {}
+  # For every single parameter (W, b)
+  for key in params:
+    param = params[key]
+    # This will be our numerical gradient
+    ng = np.zeros(param.shape)
+    for j in range(ng.shape[0]):
+      for k in xrange(ng.shape[1]):
+        # For every element of parameter matrix, compute gradient of loss wrt
+        # that element numerically using finite differences
+        add_eps = np.copy(param)
+        min_eps = np.copy(param)
+        add_eps[j, k] += eps
+        min_eps[j, k] -= eps
+        add_params = copy(params)
+        min_params = copy(params)
+        add_params[key] = add_eps
+        min_params[key] = min_eps
+        ng[j, k] = (fprop(x, y, add_params)['loss'] - fprop(x, y, min_params)['loss']) / (2 * eps)
+    ng_cache[key] = ng
+
+  # Compare numerical gradients to those computed using backpropagation algorithm
+  for key in params:
+    print key
+    # These should be the same
+    print(bprop_cache[key])
+    print(ng_cache[key])
diff --git a/notes/cs229-notes-backprop.pdf b/notes/cs229-notes-backprop.pdf
diff --git a/notes/cs229-notes-deep_learning.pdf b/notes/cs229-notes-deep_learning.pdf
diff --git a/notes/cs229-notes1.pdf b/notes/cs229-notes1.pdf
diff --git a/notes/cs229-notes10.pdf b/notes/cs229-notes10.pdf
diff --git a/notes/cs229-notes11.pdf b/notes/cs229-notes11.pdf
diff --git a/notes/cs229-notes12.pdf b/notes/cs229-notes12.pdf
diff --git a/notes/cs229-notes13.pdf b/notes/cs229-notes13.pdf
diff --git a/notes/cs229-notes2.pdf b/notes/cs229-notes2.pdf
diff --git a/notes/cs229-notes3.pdf b/notes/cs229-notes3.pdf
diff --git a/notes/cs229-notes4.pdf b/notes/cs229-notes4.pdf
diff --git a/notes/cs229-notes5.pdf b/notes/cs229-notes5.pdf
diff --git a/notes/cs229-notes6.pdf b/notes/cs229-notes6.pdf
diff --git a/notes/cs229-notes7a.pdf b/notes/cs229-notes7a.pdf
diff --git a/notes/cs229-notes7b.pdf b/notes/cs229-notes7b.pdf
diff --git a/notes/cs229-notes8.pdf b/notes/cs229-notes8.pdf
diff --git a/notes/cs229-notes9.pdf b/notes/cs229-notes9.pdf
diff --git a/notes/error-analysis.pdf b/notes/error-analysis.pdf
diff --git a/poster-guidelines.pdf b/poster-guidelines.pdf
diff --git a/problem-sets-solutions/PS0/ps0sol.pdf b/problem-sets-solutions/PS0/ps0sol.pdf
diff --git a/problem-sets-solutions/PS1/code/README.md b/problem-sets-solutions/PS1/code/README.md
@@ -0,0 +1,29 @@
+# CS229 Fall 2018 Problem Set #1
+
+
+## Setup for Coding Parts
+
+1. Install [Miniconda](https://conda.io/docs/user-guide/install/index.html#regular-installation)
+  - Conda is a package manager that sandboxes your project’s dependencies in a virtual environment
+  - Miniconda contains Conda and its dependencies with no extra packages by default (as opposed to Anaconda, which installs some extra packages)
+2. cd into src, run `conda env create -f environment.yml`
+  - This creates a Conda environment called `cs229`
+3. Run `source activate cs229`
+  - This activates the `cs229` environment
+  - Do this each time you want to write/test your code
+4. (Optional) If you use PyCharm:
+  - Open the `src` directory in PyCharm
+  - Go to `PyCharm` > `Preferences` > `Project` > `Project interpreter`
+  - Click the gear in the top-right corner, then `Add`
+  - Select `Conda environment` > `Existing environment` > Button on the right with `…`
+  - Select `/Users/YOUR_USERNAME/miniconda3/envs/cs229/bin/python`
+  - Select `OK` then `Apply`
+5. Browse the code in `linear_model.py`
+  - The `LinearModel` class roughly follows the sklearn classifier interface: You must implement a `fit` and a `predict` method for every `LinearModel` subclass you write.
+6. Browse the `util.py` file. Notice you have access to methods that do the following tasks:
+  - Load a dataset in the CSV format provided in PS1
+  - Add an intercept to a dataset (*i.e.,* add a new column of 1s to the design matrix)
+  - Plot a dataset and a linear decision boundary. Some plots in PS1 will require modified plotting code, but you can use this as a starting point.
+7. Notice the `run.py` file. You should **not** modify the commands in this file, since the autograder expects your code to run with the flags given in `run.py`. Use this script to make sure your code runs without errors.
+  - You can run `python run.py` to run all problems, or add a problem number (*e.g.,* `python run.py 1` to run problem 1).
+  - When you submit to Gradescope, the autograder will immediately check that your code runs and produces output files of the correct name, output format, and shape.