Completes UNB-2161 - Adding explainability scores fails when running pytorch example

gustavocidornelas · whoseoyster · commit fd626d476f27 · 2022-08-24T08:23:50.000-06:00
diff --git a/examples/text-classification/pytorch/pytorch.ipynb b/examples/text-classification/pytorch/pytorch.ipynb
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "fb8d43e9",
    "metadata": {},
    "outputs": [],
@@ -67,7 +67,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "2a5d7425",
    "metadata": {},
    "outputs": [
@@ -86,7 +86,7 @@
        "  'Oil prices soar to all-time record, posing new menace to US economy (AFP) AFP - Tearaway world oil prices, toppling records and straining wallets, present a new economic menace barely three months before the US presidential elections.')]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "4ba18c72",
    "metadata": {},
    "outputs": [],
@@ -123,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "7d03c0fe",
    "metadata": {},
    "outputs": [],
@@ -134,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "2a43dfd9",
    "metadata": {},
    "outputs": [],
@@ -156,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "225c9a01",
    "metadata": {},
    "outputs": [],
@@ -182,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "da8b2413",
    "metadata": {},
    "outputs": [],
@@ -225,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "53def141",
    "metadata": {},
    "outputs": [],
@@ -248,7 +248,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "6ff581cd",
    "metadata": {},
    "outputs": [
@@ -257,16 +257,16 @@
      "output_type": "stream",
      "text": [
       "| epoch   1 |   500/ 1782 batches | accuracy    0.684\n",
-      "| epoch   1 |  1000/ 1782 batches | accuracy    0.854\n",
-      "| epoch   1 |  1500/ 1782 batches | accuracy    0.876\n",
+      "| epoch   1 |  1000/ 1782 batches | accuracy    0.855\n",
+      "| epoch   1 |  1500/ 1782 batches | accuracy    0.877\n",
       "-----------------------------------------------------------\n",
-      "| end of epoch   1 | time: 12.70s | valid accuracy    0.885 \n",
+      "| end of epoch   1 | time: 14.62s | valid accuracy    0.884 \n",
       "-----------------------------------------------------------\n",
       "| epoch   2 |   500/ 1782 batches | accuracy    0.900\n",
-      "| epoch   2 |  1000/ 1782 batches | accuracy    0.898\n",
-      "| epoch   2 |  1500/ 1782 batches | accuracy    0.901\n",
+      "| epoch   2 |  1000/ 1782 batches | accuracy    0.896\n",
+      "| epoch   2 |  1500/ 1782 batches | accuracy    0.904\n",
       "-----------------------------------------------------------\n",
-      "| end of epoch   2 | time: 13.83s | valid accuracy    0.901 \n",
+      "| end of epoch   2 | time: 14.14s | valid accuracy    0.876 \n",
       "-----------------------------------------------------------\n"
      ]
     }
@@ -317,7 +317,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "3a668a76",
    "metadata": {},
    "outputs": [
@@ -326,7 +326,7 @@
      "output_type": "stream",
      "text": [
       "Checking the results of test dataset.\n",
-      "test accuracy    0.896\n"
+      "test accuracy    0.876\n"
      ]
     }
    ],
@@ -366,7 +366,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "compressed-occupation",
    "metadata": {},
    "outputs": [],
@@ -386,24 +386,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "19408128",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Created your project. Navigate to http://localhost:8000/projects/3 to see it in the UI.\n"
+      "Found your project. Navigate to http://localhost:8000/projects/30 to see it.\n"
      ]
     }
    ],
    "source": [
     "from unboxapi.tasks import TaskType\n",
     "\n",
-    "project = client.create_project(name=\"Text classification with PyTorch\",\n",
-    "                                task_type=TaskType.TextClassification,\n",
-    "                                description=\"Evaluating NN for text classification\")"
+    "project = client.create_or_load_project(name=\"Text classification with PyTorch\",\n",
+    "                                        task_type=TaskType.TextClassification,\n",
+    "                                        description=\"Evaluating NN for text classification\")"
    ]
   },
   {
@@ -461,16 +461,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
    "id": "supposed-survey",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def predict_proba(model, texts, tokenizer, vocab):\n",
+    "def predict_proba(model, texts, tokenizer_fn, vocab):\n",
     "    with torch.no_grad():\n",
     "        texts = [\n",
     "            torch.tensor(\n",
-    "                [vocab[token] for token in tokenizer(text)]\n",
+    "                [vocab[token] for token in tokenizer_fn(text)]\n",
     "            ) \n",
     "            for text in texts]\n",
     "        text_list = torch.tensor(torch.cat(texts)).long()\n",
@@ -495,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
    "id": "north-valuation",
    "metadata": {},
    "outputs": [],
@@ -520,34 +520,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
    "id": "comprehensive-jenny",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/ipykernel_21571/785500925.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/ipykernel_22576/710996952.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
       "  text_list = torch.tensor(torch.cat(texts)).long()\n",
-      "/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/ipykernel_21571/785500925.py:17: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/ipykernel_22576/710996952.py:17: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
       "  return sm(output).numpy().tolist()\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "[[0.004791636019945145,\n",
-       "  0.9912257790565491,\n",
-       "  0.0018143205670639873,\n",
-       "  0.0021683182567358017],\n",
-       " [0.009553060866892338,\n",
-       "  0.9899933934211731,\n",
-       "  6.70066146994941e-05,\n",
-       "  0.0003865564940497279]]"
+       "[[0.012467482127249241,\n",
+       "  0.9524526596069336,\n",
+       "  0.0024990958627313375,\n",
+       "  0.03258078917860985],\n",
+       " [0.024693824350833893,\n",
+       "  0.9746410846710205,\n",
+       "  1.4036187167221215e-05,\n",
+       "  0.0006511638639494777]]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -566,23 +566,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 17,
    "id": "f0b3eb3f",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2022-08-08 10:56:08,400] WARNING - BentoML by default does not include spacy and torchvision package when using PytorchModelArtifact. To make sure BentoML bundle those packages if they are required for your model, either import those packages in BentoService definition file or manually add them via `@env(pip_packages=['torchvision'])` when defining a BentoService\n",
+      "[2022-08-24 10:54:58,678] WARNING - BentoML by default does not include spacy and torchvision package when using PytorchModelArtifact. To make sure BentoML bundle those packages if they are required for your model, either import those packages in BentoService definition file or manually add them via `@env(pip_packages=['torchvision'])` when defining a BentoService\n",
       "Bundling model and artifacts...\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/gustavocid/miniconda3/envs/pytorch-notebook/lib/python3.8/site-packages/bentoml/frameworks/pytorch.py:162: ResourceWarning: unclosed file <_io.BufferedWriter name='/private/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/bentoml-temp-kjhae170/TemplateModel/artifacts/model.pt'>\n",
+      "/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/bentoml/frameworks/pytorch.py:162: ResourceWarning: unclosed file <_io.BufferedWriter name='/private/var/folders/9z/j3bd32nd47j_l0thnbj6vbnw0000gn/T/bentoml-temp-3ct5zn56/TemplateModel/artifacts/model.pt'>\n",
       "  return cloudpickle.dump(self._model, open(self._file_path(dst), \"wb\"))\n",
       "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n"
      ]
@@ -598,15 +598,15 @@
    "source": [
     "from unboxapi.models import ModelType\n",
     "\n",
-    "model = project.add_model(\n",
+    "ml_model = project.add_model(\n",
     "    function=predict_proba, \n",
     "    model=model,\n",
     "    model_type=ModelType.pytorch,\n",
     "    class_names=['world', 'sports', 'business', 'sci/tec'],\n",
     "    name='pytorch 4',\n",
     "    commit_message='this is my pytorch model',\n",
     "    requirements_txt_file='requirements.txt',\n",
-    "    tokenizer=tokenizer,\n",
+    "    tokenizer_fn=tokenizer,\n",
     "    vocab=vocab,\n",
     ")"
    ]