add: pure numpy (hunkim#180)

add: Numpy README
phutoan31299 · Jun 13, 2017 · 56abca5 · 56abca5
1 parent fdcb256
commit 56abca5
Show file tree

Hide file tree

Showing 3 changed files with 299 additions and 0 deletions.
diff --git a/numpy/README.md b/numpy/README.md
@@ -0,0 +1,16 @@
+# Pure Numpy version of DeepLearningZeroToAll
+
+* [x] Logistics Regression
+* [x] Softmax Classification
+* [ ] Convolution Network
+* [ ] Recurrent Neural Network
+
+
+## Other resources
+* [minpy](https://github.com/dmlc/minpy)
+
+    Minpy is a pure Numpy Interface on the top of MXNet. So it can run on GPUs. It's simple to use by swapping `import numpy as np` with `import minpy.numpy as np`
+
+* [Autograd](https://github.com/HIPS/autograd)
+
+    Autograd allows automatic gradient computations with Numpy. However, in this repo, there will be no autograd for study purpose.
diff --git a/numpy/lab-05-1-logistic_regression.py b/numpy/lab-05-1-logistic_regression.py
@@ -0,0 +1,135 @@
+"""
+Logistic Regression
+
+y = sigmoid(X @ W + b)
+
+"""
+import numpy as np
+
+x_data = [[1, 2],
+          [2, 3],
+          [3, 1],
+          [4, 3],
+          [5, 3],
+          [6, 2]]
+y_data = [[0],
+          [0],
+          [0],
+          [1],
+          [1],
+          [1]]
+
+X_train = np.array(x_data, dtype=np.float32)
+y_train = np.array(y_data).reshape(-1, 1)
+
+N = X_train.shape[0]
+D = X_train.shape[1]
+
+C = 1
+LEARNING_RATE = 0.1
+MAX_ITER = 1000
+
+W = np.random.standard_normal((D, C))
+b = np.zeros((C,))
+
+
+def sigmoid(x):
+    """Sigmoid function """
+    sigmoid = 1 / (1 + np.exp(-x))
+
+    return sigmoid
+
+
+def sigmoid_cross_entropy(logit, labels):
+    """Compute a binary cross entropy loss
+
+    z = logit = X @ W + b
+    p = sigmoid(z)
+    loss_i = y * - log(p) + (1 - y) * - log(1 - p)
+
+    Args:
+        logit (2-D Array): Logit array of shape (N, 1)
+        labels (2-D Array): Binary Label array of shape (N, 1)
+
+    Returns:
+        float: mean(loss_i)
+    """
+    assert logit.shape == (N, C)
+    assert labels.shape == (N, C)
+
+    probs = sigmoid(logit)
+    loss_i = labels * -np.log(probs + 1e-8)
+    loss_i += (1 - labels) * -np.log(1 - probs + 1e-8)
+
+    loss = np.mean(loss_i)
+
+    return loss
+
+
+def grad_sigmoid_cross_entropy(logit, labels):
+    """Returns
+
+    d_loss_i       d_sigmoid
+    --------   *   ---------
+    d_sigmoid      d_z
+
+    z = logit = X * W + b
+
+    Args:
+        logit (2-D Array): Logit array of shape (N, 1)
+        labels (2-D Array): Binary Label array of shape (N, 1)
+
+    Returns:
+        2-D Array: Backpropagated gradients of loss (N, 1)
+    """
+    return sigmoid(logit) - labels
+
+
+def affine_forward(X, W, b):
+    """Returns a logit
+
+    logit = X @ W + b
+
+    Args:
+        X (2-D Array): Input array of shape (N, D)
+        W (2-D Array): Weight array of shape (D, 1)
+        b (1-D Array): Bias array of shape (1,)
+
+    Returns:
+        logit (2-D Array): Logit array of shape (N, 1)
+    """
+    return np.dot(X, W) + b
+
+
+for i in range(MAX_ITER):
+
+    logit = affine_forward(X_train, W, b)
+    loss = sigmoid_cross_entropy(logit, y_train)
+    d_loss = grad_sigmoid_cross_entropy(logit, y_train)
+
+    d_W = np.dot(X_train.T, d_loss) / N
+    d_b = np.sum(d_loss) / N
+
+    W -= LEARNING_RATE * d_W
+    b -= LEARNING_RATE * d_b
+
+    if i % (MAX_ITER // 10) == 0:
+        prob = affine_forward(X_train, W, b)
+        prob = sigmoid(prob)
+        pred = prob > 0.5
+        acc = (pred == y_train).mean()
+
+        print("[Step: {:5}] Loss: {:<5.3} Accuracy: {:>5.2%}".format(i, loss, acc))
+
+"""
+[Step:     0] Loss: 2.35  Accuracy: 50.00%
+[Step:   100] Loss: 0.523 Accuracy: 83.33%
+[Step:   200] Loss: 0.435 Accuracy: 83.33%
+[Step:   300] Loss: 0.368 Accuracy: 83.33%
+[Step:   400] Loss: 0.316 Accuracy: 83.33%
+[Step:   500] Loss: 0.275 Accuracy: 83.33%
+[Step:   600] Loss: 0.243 Accuracy: 100.00%
+[Step:   700] Loss: 0.217 Accuracy: 100.00%
+[Step:   800] Loss: 0.196 Accuracy: 100.00%
+[Step:   900] Loss: 0.178 Accuracy: 100.00%
+"""
diff --git a/numpy/lab-06-2-softmax_zoo_classifier.py b/numpy/lab-06-2-softmax_zoo_classifier.py
@@ -0,0 +1,148 @@
+import numpy as np
+
+data = np.loadtxt("../data-04-zoo.csv",
+                  delimiter=",",
+                  dtype=np.float32)
+
+X_train = data[:, :-1]
+y_train = data[:, -1].astype(np.int8)
+assert X_train.shape == (101, 16)
+assert y_train.shape == (101,)
+
+N, D = X_train.shape
+C = np.max(y_train) + 1
+
+y_train_onehot = np.zeros(shape=(N, C))
+y_train_onehot[np.arange(N), y_train] = 1
+
+assert C == 7, "There are 7 classes to predict"
+
+W = np.random.standard_normal((D, C))
+b = np.zeros((C,))
+
+
+def affine_forward(X, W, b):
+    """Returns a logit
+
+    logit = X @ W + b
+
+    Args:
+        X (2-D Array): Input array of shape (N, D)
+        W (2-D Array): Weight array of shape (D, C)
+        b (1-D Array): Bias array of shape (C,)
+
+    Returns:
+        logit (2-D Array): Logit array of shape (N, C)
+    """
+    return np.dot(X, W) + b
+
+
+def softmax(z):
+    """Softmax Function
+
+    Subtract max for numerical stability
+
+    Args:
+        z (2-D Array): Array of shape (N, C)
+
+    Returns:
+        2-D Array: Softmax output of (N, C)
+    """
+    z -= np.max(z)
+    numerator = np.exp(z)
+    denominator = np.sum(numerator, axis=1).reshape(-1, 1) + 1e-7
+
+    return numerator / denominator
+
+
+def softmax_cross_entropy_loss(logit, labels):
+    """Returns a softmax cross entropy loss
+
+    loss_i = - log(P(y_i | x_i))
+
+    Args:
+        logit (2-D Array): Logit array of shape (N, C)
+        labels (2-D Array): Label Onehot array of shape (N, C)
+
+    Returns:
+        float: mean(loss_i)
+    """
+    p = softmax(logit)
+    loss_i = - labels * np.log(p + 1e-8)
+    return np.mean(loss_i)
+
+
+def grad_softmax_cross_entropy_loss(logit, labels):
+    """Returns
+
+    d_loss_i       d_softmax
+    --------   *   ---------
+    d_softmax      d_z
+
+    z = logit = X * W + b
+
+    Args:
+        logit (2-D Array): Logit array of shape (N, C)
+        labels (2-D Array): Onehot label array of shape (N, C)
+
+    Returns:
+        2-D Array: Backpropagated gradients of loss (N, C)
+
+    Notes:
+        [1] Neural Net Backprop in one slide! by Sung Kim
+        https://docs.google.com/presentation/d/1_ZmtfEjLmhbuM_PqbDYMXXLAqeWN0HwuhcSKnUQZ6MM/edit#slide=id.g1ec1d04b5a_1_83
+
+    """
+    return softmax(logit) - labels
+
+
+def get_accuracy(logit, labels):
+    """Returna an accracy
+
+    Args:
+        logit (2-D Array): Logit array of shape (N, C)
+        labels (2-D Array): Onehot label array of shape (N, C)
+
+    Returns:
+        float: Accuracy
+    """
+
+    probs = softmax(logit)
+    pred = np.argmax(probs, axis=1)
+    true = np.argmax(labels, axis=1)
+
+    return np.mean(pred == true)
+
+
+LEARNING_RATE = 0.1
+MAX_ITER = 2000
+PRINT_N = 10
+
+for i in range(MAX_ITER):
+
+    logit = affine_forward(X_train, W, b)
+    loss = softmax_cross_entropy_loss(logit, y_train_onehot)
+    d_loss = grad_softmax_cross_entropy_loss(logit, y_train_onehot)
+
+    d_W = np.dot(X_train.T, d_loss) / N
+    d_b = np.sum(d_loss) / N
+
+    W -= LEARNING_RATE * d_W
+    b -= LEARNING_RATE * d_b
+
+    if i % (MAX_ITER // PRINT_N) == 0:
+        acc = get_accuracy(logit, y_train_onehot)
+        print("[Step: {:5}] Loss: {:<10.5} Acc: {:.2%}".format(i, loss, acc))
+
+"""
+[Step:     0] Loss: 0.76726    Acc: 31.68%
+[Step:   200] Loss: 0.057501   Acc: 87.13%
+[Step:   400] Loss: 0.034893   Acc: 92.08%
+[Step:   600] Loss: 0.025472   Acc: 97.03%
+[Step:   800] Loss: 0.020099   Acc: 97.03%
+[Step:  1000] Loss: 0.016562   Acc: 99.01%
+[Step:  1200] Loss: 0.014058   Acc: 100.00%
+[Step:  1400] Loss: 0.012204   Acc: 100.00%
+[Step:  1600] Loss: 0.010784   Acc: 100.00%
+[Step:  1800] Loss: 0.0096631  Acc: 100.00%
+"""