|
11 | 11 | },
|
12 | 12 | {
|
13 | 13 | "cell_type": "code",
|
14 |
| - "execution_count": 1, |
| 14 | + "execution_count": null, |
15 | 15 | "metadata": {},
|
16 | 16 | "outputs": [],
|
17 | 17 | "source": [
|
|
30 | 30 | },
|
31 | 31 | {
|
32 | 32 | "cell_type": "code",
|
33 |
| - "execution_count": 2, |
| 33 | + "execution_count": null, |
34 | 34 | "metadata": {},
|
35 |
| - "outputs": [ |
36 |
| - { |
37 |
| - "name": "stdout", |
38 |
| - "output_type": "stream", |
39 |
| - "text": [ |
40 |
| - "494 ms ± 3.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" |
41 |
| - ] |
42 |
| - } |
43 |
| - ], |
| 35 | + "outputs": [], |
44 | 36 | "source": [
|
45 | 37 | "%%timeit\n",
|
46 | 38 | "\n",
|
|
50 | 42 | "\n",
|
51 | 43 | "# Filter on object\n",
|
52 | 44 | "filtered_object = object_df.query(\"ra > 10.0\")\n",
|
53 |
| - "#sync object to source --removes any index values of source not found in object\n", |
| 45 | + "# sync object to source --removes any index values of source not found in object\n", |
54 | 46 | "filtered_source = filtered_object[[]].join(source_df, how=\"left\")\n",
|
55 | 47 | "\n",
|
56 | 48 | "# Count number of observations per photometric band and add it to the object table\n",
|
57 |
| - "band_counts = source_df.groupby(level=0).apply(lambda x: \n", |
58 |
| - " x[[\"band\"]].value_counts().reset_index()).pivot_table(values=\"count\", \n", |
59 |
| - " index=\"index\", \n", |
60 |
| - " columns=\"band\", \n", |
61 |
| - " aggfunc=\"sum\")\n", |
62 |
| - "filtered_object = filtered_object.join(band_counts[[\"g\",\"r\"]])\n", |
| 49 | + "band_counts = (\n", |
| 50 | + " source_df.groupby(level=0)\n", |
| 51 | + " .apply(lambda x: x[[\"band\"]].value_counts().reset_index())\n", |
| 52 | + " .pivot_table(values=\"count\", index=\"index\", columns=\"band\", aggfunc=\"sum\")\n", |
| 53 | + ")\n", |
| 54 | + "filtered_object = filtered_object.join(band_counts[[\"g\", \"r\"]])\n", |
63 | 55 | "\n",
|
64 | 56 | "# Filter on our nobs\n",
|
65 | 57 | "filtered_object = filtered_object.query(\"g > 520\")\n",
|
|
81 | 73 | "cell_type": "code",
|
82 | 74 | "execution_count": null,
|
83 | 75 | "metadata": {},
|
84 |
| - "outputs": [ |
85 |
| - { |
86 |
| - "name": "stdout", |
87 |
| - "output_type": "stream", |
88 |
| - "text": [ |
89 |
| - "230 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" |
90 |
| - ] |
91 |
| - } |
92 |
| - ], |
| 76 | + "outputs": [], |
93 | 77 | "source": [
|
94 | 78 | "%%timeit\n",
|
95 | 79 | "\n",
|
96 |
| - "#Read in parquet data\n", |
97 |
| - "#nesting sources into objects\n", |
98 |
| - "nf = npd.read_parquet(data=\"objects.parquet\",\n", |
99 |
| - " to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n", |
| 80 | + "# Read in parquet data\n", |
| 81 | + "# nesting sources into objects\n", |
| 82 | + "nf = npd.read_parquet(data=\"objects.parquet\", to_pack={\"ztf_sources\": \"ztf_sources.parquet\"})\n", |
100 | 83 | "\n",
|
101 | 84 | "# Filter on object\n",
|
102 | 85 | "nf = nf.query(\"ra > 10.0\")\n",
|
103 | 86 | "\n",
|
104 | 87 | "# Count number of observations per photometric band and add it as a column\n",
|
105 |
| - "from nested_pandas.utils import count_nested # utility function of nested_pandas\n", |
| 88 | + "from nested_pandas.utils import count_nested # utility function of nested_pandas\n", |
| 89 | + "\n", |
106 | 90 | "nf = count_nested(nf, \"ztf_sources\", by=\"band\", join=True)\n",
|
107 | 91 | "\n",
|
108 | 92 | "# Filter on our nobs\n",
|
|
0 commit comments