lincc-frameworks
diff --git a/‎docs/about.rst
Lines changed: 2 additions & 2 deletions b/‎docs/about.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/about_nested_pandas/internals.rst renamed to ‎docs/about/internals.rst b/‎docs/about_nested_pandas/internals.rst renamed to ‎docs/about/internals.rst
diff --git a/‎docs/about_nested_pandas/npd_internals.png renamed to ‎docs/about/npd_internals.png b/‎docs/about_nested_pandas/npd_internals.png renamed to ‎docs/about/npd_internals.png
diff --git a/‎docs/about_nested_pandas/objects.parquet renamed to ‎docs/about/objects.parquet b/‎docs/about_nested_pandas/objects.parquet renamed to ‎docs/about/objects.parquet
diff --git a/‎docs/about/performance.ipynb
Lines changed: 146 additions & 0 deletions b/‎docs/about/performance.ipynb
Lines changed: 146 additions & 0 deletions
diff --git a/‎docs/about_nested_pandas/ztf_sources.parquet renamed to ‎docs/about/ztf_sources.parquet b/‎docs/about_nested_pandas/ztf_sources.parquet renamed to ‎docs/about/ztf_sources.parquet
diff --git a/‎docs/about_nested_pandas/performance.ipynb
Lines changed: 0 additions & 150 deletions b/‎docs/about_nested_pandas/performance.ipynb
Lines changed: 0 additions & 150 deletions
diff --git a/‎docs/index.rst
Lines changed: 4 additions & 0 deletions b/‎docs/index.rst
Lines changed: 4 additions & 0 deletions
@@ -4,5 +4,5 @@ About Nested-Pandas
 
 .. toctree::
 
-    Internal Representation of Nested Data <about_nested_pandas/internals>
-    Performance Impact of Nested-Pandas <about_nested_pandas/performance>
+    Internal Representation of Nested Data <about/internals>
+    Performance Impact of Nested-Pandas <about/performance>
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Performance Impact of `nested-pandas`\n",
+    "\n",
+    "For use-cases involving nesting data, `nested-pandas` can offer significant speedups compared to using the native `pandas` API. Below is a brief example workflow comparison between `pandas` and `nested-pandas`, where this example workflow calculates the amplitude of photometric fluxes after a few filtering steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nested_pandas as npd\n",
+    "import pandas as pd\n",
+    "import light_curve as licu\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "494 ms ± 3.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "\n",
+    "# Read data\n",
+    "object_df = pd.read_parquet(\"objects.parquet\")\n",
+    "source_df = pd.read_parquet(\"ztf_sources.parquet\")\n",
+    "\n",
+    "# Filter on object\n",
+    "filtered_object = object_df.query(\"ra > 10.0\")\n",
+    "#sync object to source --removes any index values of source not found in object\n",
+    "filtered_source = filtered_object[[]].join(source_df, how=\"left\")\n",
+    "\n",
+    "# Count number of observations per photometric band and add it to the object table\n",
+    "band_counts = source_df.groupby(level=0).apply(lambda x: \n",
+    "                                               x[[\"band\"]].value_counts().reset_index()).pivot_table(values=\"count\", \n",
+    "                                                                                                     index=\"index\", \n",
+    "                                                                                                     columns=\"band\", \n",
+    "                                                                                                     aggfunc=\"sum\")\n",
+    "filtered_object = filtered_object.join(band_counts[[\"g\",\"r\"]])\n",
+    "\n",
+    "# Filter on our nobs\n",
+    "filtered_object = filtered_object.query(\"g > 520\")\n",
+    "filtered_source = filtered_object[[]].join(source_df, how=\"left\")\n",
+    "\n",
+    "# Calculate Amplitude\n",
+    "amplitude = licu.Amplitude()\n",
+    "filtered_source.groupby(level=0).apply(lambda x: amplitude(np.array(x.mjd), np.array(x.flux)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Nested-Pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "230 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "\n",
+    "#Read in parquet data\n",
+    "#nesting sources into objects\n",
+    "nf = npd.read_parquet(data=\"objects.parquet\",\n",
+    "                  to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n",
+    "\n",
+    "# Filter on object\n",
+    "nf = nf.query(\"ra > 10.0\")\n",
+    "\n",
+    "# Count number of observations per photometric band and add it as a column\n",
+    "from nested_pandas.utils import count_nested # utility function of nested_pandas\n",
+    "nf = count_nested(nf, \"ztf_sources\", by=\"band\", join=True)\n",
+    "\n",
+    "# Filter on our nobs\n",
+    "nf = nf.query(\"n_ztf_sources_g > 520\")\n",
+    "\n",
+    "# Calculate Amplitude\n",
+    "amplitude = licu.Amplitude()\n",
+    "nf.reduce(amplitude, \"ztf_sources.mjd\", \"ztf_sources.flux\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "In addition, less lines of code are needed!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "lsdb",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -79,6 +79,9 @@ API-level information about nested-pandas is viewable in the
 :doc:`API Reference <reference>`
 section.
 
+The :doc:`About Nested-Pandas <about>` section provides information on the
+design and performance advantages of nested-pandas.
+
 Learn more about contributing to this repository in our :doc:`Contribution Guide <gettingstarted/contributing>`.
 
 .. toctree::
@@ -88,3 +91,4 @@ Learn more about contributing to this repository in our :doc:`Contribution Guide
    Getting Started <gettingstarted>
    Tutorials <tutorials>
    API Reference <reference>
+   About Nested-Pandas <about>