Merge pull request #28 from JadenFiotto-Kaufman/dev

Dev
ndif-team · Dec 20, 2023 · ffd0d9e · ffd0d9e
2 parents 84526fc + 31143b9
commit ffd0d9e
Show file tree

Hide file tree

Showing 17 changed files with 292 additions and 111 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -4,6 +4,8 @@
 name: Python application
 
 on:
+  push:
+    branches: ["main", "dev"]
   pull_request:
     branches: [ "main", "dev" ]
 

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
@@ -1,10 +1,10 @@
 .button-group {
-	display: flex;
-	flex-direction: row;
-	flex-wrap: nowrap;
-	justify-content: flex-start;
-	align-items: center;
-	align-content: stretch;
+    display: flex;
+    flex-direction: row;
+    flex-wrap: nowrap;
+    justify-content: flex-start;
+    align-items: center;
+    align-content: stretch;
     gap: 10px;
 }
 
@@ -34,7 +34,7 @@ html[data-theme="light"] {
 }
 
 
-.features { 
+.features {
     height: 60vh;
     overflow: hidden;
 }
@@ -44,7 +44,9 @@ html[data-theme="light"] {
     margin-top: 50px;
 }
 
-
+img {
+    pointer-events: none;
+}
 
 .title-bot {
     margin-bottom: -10px !important;
@@ -62,21 +64,28 @@ html[data-theme="light"] {
     gap: 20px;
 }
 
-@media only screen and (max-width: 768px) { /* Adjust this value based on your breakpoint for mobile */
-    .front-container, .hero {
-        height: auto; /* Change from fixed height to auto */
-        min-height: 50vh; /* Adjust this as needed */
+@media only screen and (max-width: 768px) {
+
+    /* Adjust this value based on your breakpoint for mobile */
+    .front-container,
+    .hero {
+        height: auto;
+        /* Change from fixed height to auto */
+        min-height: 50vh;
+        /* Adjust this as needed */
     }
 
     .features-container {
-        margin-bottom: 20px; /* Increase bottom margin */
+        margin-bottom: 20px;
+        /* Increase bottom margin */
     }
 
     .hero {
-        margin-bottom: 30px; /* Adjust the bottom margin of the main container */
+        margin-bottom: 30px;
+        /* Adjust the bottom margin of the main container */
     }
 
     .features {
         height: 110vh;
     }
-}
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,42 +1,41 @@
 # Configuration file for the Sphinx documentation builder.
 
 # Project Information
-project = 'nnsight'
-copyright = '2023, NDIF'
-author = 'Jaden Fiotto-Kaufman'
+project = "nnsight"
+copyright = "2023, NDIF"
+author = "Jaden Fiotto-Kaufman"
 
 
 # General Configuration
 extensions = [
-    'sphinx.ext.autodoc', # Auto documentation from docstrings
-    'sphinx.ext.napoleon', # Support for NumPy and Google style docstrings
-    'sphinx_copybutton', # Copy button for code blocks
-    'sphinx_design', # Boostrap design components
-    'nbsphinx', # Jupyter notebook support
+    "sphinx.ext.autodoc",  # Auto documentation from docstrings
+    "sphinx.ext.napoleon",  # Support for NumPy and Google style docstrings
+    "sphinx_copybutton",  # Copy button for code blocks
+    "sphinx_design",  # Boostrap design components
+    "nbsphinx",  # Jupyter notebook support
 ]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 exclude_patterns = []
 fixed_sidebar = True
 
 
-
 # HTML Output Options
 
 # See https://sphinx-themes.org/ for more
 html_theme = "pydata_sphinx_theme"
 html_title = "nnsight"
 html_logo = "_static/images/nnsight_logo.svg"
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
-html_favicon = '_static/images/icon.ico'
+html_favicon = "_static/images/icon.ico"
 html_show_sourcelink = False
 html_theme_options = {
-  "logo": {"text":"nnsight"},
-  "show_nav_level": 2,
-  "navbar_end": ["navbar-icon-links", "ndif_status"],
-  "navbar_align": "left",
-  "icon_links": [
+    "logo": {"text": "nnsight"},
+    "show_nav_level": 2,
+    "navbar_end": ["navbar-icon-links", "ndif_status"],
+    "navbar_align": "left",
+    "icon_links": [
         {
             "name": "GitHub",
             "url": "https://github.com/JadenFiotto-Kaufman/nnsight",
@@ -47,15 +46,11 @@
             "url": "https://discord.gg/6uFJmCSwW7",
             "icon": "fa-brands fa-discord",
         },
-  ]
+    ],
 }
 
-html_context = {
-   "default_mode": "light",
-   "ndif_url" : "https://ndif.baulab.us/ping"
-}
+html_context = {"default_mode": "light", "ndif_url": "https://ndif.dev/ping"}
 
 html_css_files = [
-    'css/custom.css',
+    "css/custom.css",
 ]
-
diff --git a/docs/source/notebooks/features/gradients.ipynb b/docs/source/notebooks/features/gradients.ipynb
@@ -11,13 +11,181 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Coming Soon!"
+    "There are a couple of ways we can interact with the gradients during and after a backward pass."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the following example, we save the hidden states of the last layer and do a backward pass on the sum of the logits.\n",
+    "\n",
+    "Note two things:\n",
+    "\n",
+    "1. We use `inference=False` in the `.forward` call to turn off inference mode. This allows gradients to be calculated. \n",
+    "2. We can all `.backward()` on a value within the tracing context just like you normally would."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 0.5216, -1.1755, -0.4617,  ..., -1.1919,  0.0204, -2.0075],\n",
+      "         [ 0.9841,  2.2175,  3.5851,  ...,  0.5212, -2.2286,  5.7334]]],\n",
+      "       device='cuda:0', grad_fn=<SliceBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from nnsight import LanguageModel\n",
+    "\n",
+    "model = LanguageModel('gpt2', device_map='cuda')\n",
+    "\n",
+    "with model.forward(inference=False) as runner:\n",
+    "    with runner.invoke('Hello World') as invoker:\n",
+    "\n",
+    "        hidden_states = model.transformer.h[-1].output[0].save()\n",
+    "\n",
+    "        logits = model.lm_head.output\n",
+    "\n",
+    "        logits.sum().backward()\n",
+    "\n",
+    "print(hidden_states.value)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If we wanted to see the gradients for the hidden_states, we can call `.retain_grad()` on it and access the `.grad` attribute after execution. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 0.5216, -1.1755, -0.4617,  ..., -1.1919,  0.0204, -2.0075],\n",
+      "         [ 0.9841,  2.2175,  3.5851,  ...,  0.5212, -2.2286,  5.7334]]],\n",
+      "       device='cuda:0', grad_fn=<AsStridedBackward0>)\n",
+      "tensor([[[  28.7976, -282.5977,  868.7343,  ...,  120.1742,   52.2264,\n",
+      "           168.6447],\n",
+      "         [  79.4183, -253.6227, 1322.1290,  ...,  208.3981,  -19.5544,\n",
+      "           509.9856]]], device='cuda:0')\n"
+     ]
+    }
+   ],
+   "source": [
+    "from nnsight import LanguageModel\n",
+    "\n",
+    "model = LanguageModel('gpt2', device_map='cuda')\n",
+    "\n",
+    "with model.forward(inference=False) as runner:\n",
+    "    with runner.invoke('Hello World') as invoker:\n",
+    "\n",
+    "        hidden_states = model.transformer.h[-1].output[0].save()\n",
+    "        hidden_states.retain_grad()\n",
+    "\n",
+    "        logits = model.lm_head.output\n",
+    "\n",
+    "        logits.sum().backward()\n",
+    "\n",
+    "print(hidden_states.value)\n",
+    "print(hidden_states.value.grad)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Torch also provides hooks into the backward process via the inputs and outputs. NNsight uses these in a similar way as  `.input` and `.output` by also providing `.backward_input` and `.backward_output`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 0.5216, -1.1755, -0.4617,  ..., -1.1919,  0.0204, -2.0075],\n",
+      "         [ 0.9841,  2.2175,  3.5851,  ...,  0.5212, -2.2286,  5.7334]]],\n",
+      "       device='cuda:0', grad_fn=<SliceBackward0>)\n",
+      "tensor([[[  28.7976, -282.5977,  868.7343,  ...,  120.1742,   52.2264,\n",
+      "           168.6447],\n",
+      "         [  79.4183, -253.6227, 1322.1290,  ...,  208.3981,  -19.5544,\n",
+      "           509.9856]]], device='cuda:0')\n"
+     ]
+    }
+   ],
+   "source": [
+    "from nnsight import LanguageModel\n",
+    "\n",
+    "model = LanguageModel('gpt2', device_map='cuda')\n",
+    "\n",
+    "with model.forward(inference=False) as runner:\n",
+    "    with runner.invoke('Hello World') as invoker:\n",
+    "\n",
+    "        hidden_states = model.transformer.h[-1].output[0].save()\n",
+    "        hidden_states_grad = model.transformer.h[-1].backward_output[0].save()\n",
+    "        logits = model.lm_head.output\n",
+    "\n",
+    "        logits.sum().backward()\n",
+    "\n",
+    "print(hidden_states.value)\n",
+    "print(hidden_states_grad.value)"
    ]
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "ndif",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/docs/source/notebooks/walkthrough.ipynb b/docs/source/notebooks/walkthrough.ipynb
@@ -2623,7 +2623,7 @@
    "source": [
     "# Next steps\n",
     "\n",
-    "Check out the [Features](/tutorials/features/) and [Documentation](/documentation/) pages and the [README](https://github.com/JadenFiotto-Kaufman/nnsight/blob/main/README.md) for more guides."
+    "Check out the [Features](/features/) and [Documentation](/documentation/) pages and the [README](https://github.com/JadenFiotto-Kaufman/nnsight/blob/main/README.md) for more guides."
    ]
   }
  ],

diff --git a/docs/sourcelatex/documentation/contexts.rst b/docs/sourcelatex/documentation/contexts.rst
@@ -1,5 +1,5 @@
 nnsight.contexts
-------------
+-----------------
 
 
 .. automodule:: nnsight.contexts

diff --git a/src/nnsight/__init__.py b/src/nnsight/__init__.py
@@ -54,10 +54,26 @@ def repeat_interleave(
 
 
 DEFAULT_PATCHER.add(
-    Patch(torch.repeat_interleave, repeat_interleave_wrapper(torch.repeat_interleave))
+    Patch(
+        torch, repeat_interleave_wrapper(torch.repeat_interleave), "repeat_interleave"
+    )
 )
 
 
+def cpu_wrapper(fn):
+    @wraps(fn)
+    def cpu(input: torch.Tensor, *args, **kwargs):
+        if input.device.type == "meta":
+            return input
+
+        else:
+            return fn(input, *args, **kwargs)
+
+    return cpu
+
+
+DEFAULT_PATCHER.add(Patch(torch.Tensor, cpu_wrapper(torch.Tensor.cpu), "cpu"))
+
 DEFAULT_PATCHER.__enter__()
 
 from torch._meta_registrations import (_meta_lib_dont_use_me_use_register_meta,

diff --git a/src/nnsight/alteration/gpt.py b/src/nnsight/alteration/gpt.py
@@ -85,6 +85,6 @@ def forward(
         return outputs  # a, present, (attentions)
 
 
-GPT2Patcher = Patcher([
-    Patch(gpt2.modeling_gpt2.GPT2Attention, GPT2AttentionAltered)
-    ])
+GPT2Patcher = Patcher(
+    [Patch(gpt2.modeling_gpt2, GPT2AttentionAltered, "GPT2Attention")]
+)