From 6bea5151bca9a8f332c60459a7a52323f7cf95b6 Mon Sep 17 00:00:00 2001
From: rashika <rashika@thebigdiesel.ccs.neu.edu>
Date: Fri, 22 Mar 2024 18:08:56 -0400
Subject: [PATCH] Fixed the plot.ipynb so that best prediction file per group
 could be accurately selected and used

---
 src/plot.ipynb | 56 ++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/src/plot.ipynb b/src/plot.ipynb
index ccde901..08b4c1d 100644
--- a/src/plot.ipynb
+++ b/src/plot.ipynb
@@ -44,6 +44,8 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Map column names to full names (for axis labels)\n",
@@ -51,11 +53,7 @@
     "\n",
     "# Map ontology namespaces to full names (for plot titles)\n",
     "ontology_dict = {'biological_process': 'BPO', 'molecular_function': 'MFO', 'cellular_component': 'CCO'}"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -88,7 +86,7 @@
     "    else:\n",
     "        df['is_baseline'].fillna(False, inplace=True)\n",
     "    # print(methods)\n",
-    "df = df.drop(columns='filename').set_index(['group', 'label', 'ns', 'tau'])\n",
+    "df = df.set_index(['group', 'label', 'ns', 'filename','tau'])\n",
     "df"
    ]
   },
@@ -105,6 +103,8 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Assign colors based on group\n",
@@ -113,9 +113,7 @@
     "df['colors'] = pd.factorize(df['colors'])[0]\n",
     "df['colors'] = df['colors'].apply(lambda x: cmap.colors[x % len(cmap.colors)])\n",
     "df"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -152,6 +150,8 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Add first last points to precision and recall curves to improve APS calculation\n",
@@ -165,47 +165,41 @@
     "if metric.startswith('f') and add_extreme_points:\n",
     "    df_methods = df_methods.reset_index().groupby(['group', 'label', 'ns'], as_index=False).apply(add_points).set_index(['group', 'label', 'ns'])\n",
     "df_methods"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Filter the dataframe for the best method and threshold\n",
     "df_best = df.loc[index_best, ['cov', 'colors'] + cols + [metric]]\n",
     "df_best"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Calculate average precision score \n",
     "if metric.startswith('f'):\n",
     "    df_best['aps'] = df_methods.groupby(level=['group', 'label', 'ns'])[[cols[0], cols[1]]].apply(lambda x: (x[cols[0]].diff(-1).shift(1) * x[cols[1]]).sum())\n",
     "df_best"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Calculate the max coverage across all thresholds\n",
     "df_best['max_cov'] = df_methods.groupby(level=['group', 'label', 'ns'])['cov'].max()\n",
     "df_best"
-   ],
-   "metadata": {},
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
@@ -247,7 +241,7 @@
     "\n",
     "    # Iterate methods\n",
     "    for i, (index, row) in enumerate(df_g.sort_values(by=[metric, 'max_cov'], ascending=[False if metric.startswith('f') else True, False]).iterrows()):\n",
-    "        data = df_methods.loc[index[:-1]]\n",
+    "        data = df_methods.loc[index[:-2]]\n",
     "        \n",
     "        # Precision-recall or mi-ru curves\n",
     "        ax.plot(data[cols[0]], data[cols[1]], color=row['colors'], label=row['label'], lw=2, zorder=500-i)\n",
@@ -282,12 +276,10 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": null
+   "source": []
   }
  ],
  "metadata": {
@@ -306,7 +298,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,