hw4

Erostrate9 · Dec 24, 2022 · 9b85eaf · 9b85eaf
commit 9b85eaf
Show file tree

Hide file tree

Showing 66 changed files with 11,745 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 3.2)
+project(needle C CXX)
+
+# find correct version of Python
+execute_process(COMMAND python3-config --prefix
+  OUTPUT_VARIABLE Python_ROOT_DIR)
+find_package(Python COMPONENTS Development Interpreter REQUIRED)
+include_directories(${Python_INCLUDE_DIRS})
+
+# find pybind
+set(Python3_FIND_VIRTUALENV "ONLY")
+execute_process(COMMAND python3 -m pybind11 --cmakedir
+  RESULT_VARIABLE __pybind_exit_code
+  OUTPUT_VARIABLE __pybind_path
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+find_package(pybind11 PATHS ${__pybind_path})
+
+
+if(NOT MSVC)
+  set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CUDA_STANDARD 14)
+else()
+  set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CUDA_STANDARD 14)
+endif()
+
+include_directories(SYSTEM ${pybind11_INCLUDE_DIRS})
+list(APPEND LINKER_LIBS ${pybind11_LIBRARIES})
+
+
+###################
+### CPU BACKEND ###
+###################
+add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc)
+target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS})
+pybind11_extension(ndarray_backend_cpu)
+pybind11_strip(ndarray_backend_cpu)
+
+
+# directly output to ffi folder
+set_target_properties(ndarray_backend_cpu
+  PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
+  CXX_VISIBILITY_PRESET "hidden"
+)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup)
+endif()
+
+
+
+####################
+### CUDA BACKEND ###
+####################
+find_package(CUDA)
+if(CUDA_FOUND)
+  message(STATUS "Found cuda, building cuda backend")
+
+  include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
+  list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY})
+
+  # invoke nvidia smi to detect if we really have a GPU
+  execute_process(COMMAND "nvidia-smi" ERROR_QUIET  RESULT_VARIABLE NV_RET)
+  if(NV_RET EQUAL "0")
+    CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto)
+  else()
+    # set to 3.7 the flag of K80
+    CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7)
+  endif()
+
+  # set arch flags properly
+  CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS})
+
+  target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS})
+  pybind11_extension(ndarray_backend_cuda)
+  pybind11_strip(ndarray_backend_cuda)
+
+  # directly output to ffi folder
+  set_target_properties(ndarray_backend_cuda
+    PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
+    CXX_VISIBILITY_PRESET "hidden"
+    CUDA_VISIBILITY_PRESET "hidden"
+)
+
+endif()
diff --git a/Makefile b/Makefile
@@ -0,0 +1,16 @@
+.PHONY: lib, pybind, clean, format, all
+
+all: lib
+
+
+lib:
+	@mkdir -p build
+	@cd build; cmake ..
+	@cd build; $(MAKE)
+
+format:
+	python3 -m black .
+	clang-format -i src/*.cc src/*.cu
+
+clean:
+	rm -rf build python/needle/backend_ndarray/ndarray_backend*.so
diff --git a/README.md b/README.md
@@ -0,0 +1,2 @@
+# Homework 4
+Public repository and stub/testing code for Homework 4 of 10-714.
diff --git a/ResNet9.png b/ResNet9.png
diff --git a/apps/__pycache__/lstm.cpython-39-pytest-7.1.2.pyc b/apps/__pycache__/lstm.cpython-39-pytest-7.1.2.pyc
diff --git a/apps/__pycache__/models.cpython-38.pyc b/apps/__pycache__/models.cpython-38.pyc
diff --git a/apps/__pycache__/models.cpython-39.pyc b/apps/__pycache__/models.cpython-39.pyc
diff --git a/apps/__pycache__/rnn.cpython-39-pytest-7.1.2.pyc b/apps/__pycache__/rnn.cpython-39-pytest-7.1.2.pyc
diff --git a/apps/__pycache__/simple_training.cpython-38.pyc b/apps/__pycache__/simple_training.cpython-38.pyc
diff --git a/apps/__pycache__/simple_training.cpython-39.pyc b/apps/__pycache__/simple_training.cpython-39.pyc
diff --git a/apps/models.py b/apps/models.py
@@ -0,0 +1,106 @@
+import sys
+
+sys.path.append('./python')
+import needle as ndl
+import needle.nn as nn
+import math
+import numpy as np
+
+np.random.seed(0)
+
+
+class ResNet9(ndl.nn.Module):
+    def __init__(self, device=None, dtype="float32"):
+        super().__init__()
+        ### BEGIN YOUR SOLUTION ###
+        self.device = device
+        self.dtype = dtype
+        self.resnet9 = nn.Sequential(
+            nn.ConvBN(3, 16, 7, 4, device=device, dtype=dtype),
+            nn.ConvBN(16, 32, 3, 2, device=device, dtype=dtype),
+            nn.Residual(
+                nn.Sequential(
+                    nn.ConvBN(32, 32, 3, 1, device=device, dtype=dtype),
+                    nn.ConvBN(32, 32, 3, 1, device=device, dtype=dtype),
+                )
+            ),
+            nn.ConvBN(32, 64, 3, 2, device=device, dtype=dtype),
+            nn.ConvBN(64, 128, 3, 2, device=device, dtype=dtype),
+            nn.Residual(
+                nn.Sequential(
+                    nn.ConvBN(128, 128, 3, 1, device=device, dtype=dtype),
+                    nn.ConvBN(128, 128, 3, 1, device=device, dtype=dtype),
+                )
+            ),
+            nn.Flatten(),
+            nn.Linear(128, 128, device=device, dtype=dtype),
+            nn.ReLU(),
+            nn.Flatten(),
+            nn.Linear(128, 10, device=device, dtype=dtype)
+        )
+        ### END YOUR SOLUTION
+
+    def forward(self, x):
+        ### BEGIN YOUR SOLUTION
+        return self.resnet9(x)
+        ### END YOUR SOLUTION
+
+
+class LanguageModel(nn.Module):
+    def __init__(self, embedding_size, output_size, hidden_size, num_layers=1,
+                 seq_model='rnn', device=None, dtype="float32"):
+        """
+        Consists of an embedding layer, a sequence model (either RNN or LSTM), and a
+        linear layer.
+        Parameters:
+        output_size: Size of dictionary
+        embedding_size: Size of embeddings
+        hidden_size: The number of features in the hidden state of LSTM or RNN
+        seq_model: 'rnn' or 'lstm', whether to use RNN or LSTM
+        num_layers: Number of layers in RNN or LSTM
+        """
+        super(LanguageModel, self).__init__()
+        ### BEGIN YOUR SOLUTION
+        self.embedding_size = embedding_size
+        self.output_size = output_size
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.device = device
+        self.dtype = dtype
+        self.seq_model = seq_model
+        self.embedding = nn.Embedding(output_size, embedding_size, device=device, dtype="float32")
+        self.seq = nn.LSTM(embedding_size, hidden_size, num_layers=num_layers, device=device,
+                             dtype="float32") if seq_model == "lstm" else nn.RNN(embedding_size, hidden_size,
+                                                                                 num_layers=num_layers, device=device,
+                                                                                 dtype="float32")
+        self.linear = nn.Linear(hidden_size, output_size, device=device, dtype=dtype)
+        ### END YOUR SOLUTION
+
+    def forward(self, x, h=None):
+        """
+        Given sequence (and the previous hidden state if given), returns probabilities of next word
+        (along with the last hidden state from the sequence model).
+        Inputs:
+        x of shape (seq_len, bs)
+        h of shape (num_layers, bs, hidden_size) if using RNN,
+            else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
+        Returns (out, h)
+        out of shape (seq_len*bs, output_size)
+        h of shape (num_layers, bs, hidden_size) if using RNN,
+            else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
+        """
+        ### BEGIN YOUR SOLUTION
+        X = self.embedding(x)
+        out, h = self.seq(X, h)
+        out = self.linear(out.reshape((out.shape[0]*out.shape[1], out.shape[2])))
+        return out, h
+        ### END YOUR SOLUTION
+
+
+if __name__ == "__main__":
+    model = ResNet9()
+    x = ndl.ops.randu((1, 32, 32, 3), requires_grad=True)
+    model(x)
+    cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
+    train_loader = ndl.data.DataLoader(cifar10_train_dataset, 128, ndl.cpu(), dtype="float32")
+    print(dataset[1][0].shape)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Homework 4
		Public repository and stub/testing code for Homework 4 of 10-714.