Skip to content

Commit d1690d2

Browse files
authored
Merge pull request #56 from roocs/intake-demo
added notebook for intake demo
2 parents b20997f + cd99da2 commit d1690d2

File tree

2 files changed

+279
-0
lines changed

2 files changed

+279
-0
lines changed

environment.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ dependencies:
1414
- xarray>=0.16
1515
# esgf search
1616
- esgf-pyclient
17+
# intake catalog
18+
- intake
19+
- pandas
20+
- requests
21+
- aiohttp
1722
# tests
1823
#- pytest
1924
#- flake8
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "undefined-december",
6+
"metadata": {},
7+
"source": [
8+
"# Demo for using C3S intake catalog\n",
9+
"\n",
10+
"Intake Example:\n",
11+
"https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb\n"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"id": "perceived-danger",
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"import intake"
22+
]
23+
},
24+
{
25+
"cell_type": "markdown",
26+
"id": "floating-spare",
27+
"metadata": {},
28+
"source": [
29+
"## Open remote catalog"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"id": "genetic-inflation",
36+
"metadata": {},
37+
"outputs": [],
38+
"source": [
39+
"cat = intake.open_catalog(\"https://raw.githubusercontent.com/cehbrecht/c3s_34g_manifests/intake/intake/catalogs/c3s.yaml\")\n"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": null,
45+
"id": "unsigned-wyoming",
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"list(cat)"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"id": "cultural-church",
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"print(cat['c3s-cmip6'])"
60+
]
61+
},
62+
{
63+
"cell_type": "markdown",
64+
"id": "excess-decrease",
65+
"metadata": {},
66+
"source": [
67+
"## Load catalog for c3s-cmip6\n",
68+
"Catalogs will be cached locally in `~/.intake/cache`.\n",
69+
"\n",
70+
"See: https://intake.readthedocs.io/en/latest/catalog.html?highlight=simplecache#caching-source-files-locally"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"id": "cardiac-level",
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"df = cat['c3s-cmip6'].read()"
81+
]
82+
},
83+
{
84+
"cell_type": "markdown",
85+
"id": "economic-color",
86+
"metadata": {},
87+
"source": [
88+
"## Show first datasets"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"id": "interim-chassis",
95+
"metadata": {},
96+
"outputs": [],
97+
"source": [
98+
"df.head()"
99+
]
100+
},
101+
{
102+
"cell_type": "markdown",
103+
"id": "aware-paragraph",
104+
"metadata": {},
105+
"source": [
106+
"## Show number of datasets"
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": null,
112+
"id": "grand-toner",
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"df.ds_id.nunique()"
117+
]
118+
},
119+
{
120+
"cell_type": "markdown",
121+
"id": "adverse-fashion",
122+
"metadata": {},
123+
"source": [
124+
"## Define a search function for dataset and time"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": null,
130+
"id": "broken-registrar",
131+
"metadata": {},
132+
"outputs": [],
133+
"source": [
134+
"def search(df, collection, time=None):\n",
135+
" # a common search we do in rook\n",
136+
" start = end = None\n",
137+
" if time:\n",
138+
" if \"/\" in time:\n",
139+
" start, end = time.split(\"/\")\n",
140+
" start = start.strip()\n",
141+
" end = end.strip()\n",
142+
" else:\n",
143+
" start = time.strip()\n",
144+
" \n",
145+
" start = start or \"1800-01-01\"\n",
146+
" end = end or \"2500-12-31\"\n",
147+
" \n",
148+
" sdf = df.fillna({'start_time': '1000-01-01T12:00:00', 'end_time': '3000-12-31T12:00:00'})\n",
149+
"\n",
150+
" result = sdf.loc[(sdf.ds_id == collection) & (sdf.end_time >= start) & (sdf.start_time <= end)]\n",
151+
" return list(result.path.sort_values().to_dict().values())\n",
152+
" "
153+
]
154+
},
155+
{
156+
"cell_type": "markdown",
157+
"id": "close-strap",
158+
"metadata": {},
159+
"source": [
160+
"## Search for a dataset with time restrictions"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"id": "geographic-passing",
167+
"metadata": {},
168+
"outputs": [],
169+
"source": [
170+
"result = search(\n",
171+
" df, \n",
172+
" collection=\"c3s-cmip6.CMIP.SNU.SAM0-UNICON.historical.r1i1p1f1.day.pr.gn.v20190323\",\n",
173+
" time=\"2000-01-01/2001-12-31\")\n",
174+
"result"
175+
]
176+
},
177+
{
178+
"cell_type": "markdown",
179+
"id": "received-copyright",
180+
"metadata": {},
181+
"source": [
182+
"## Search for dataset with no time axis (fx, fixed fields)"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": null,
188+
"id": "rational-concrete",
189+
"metadata": {},
190+
"outputs": [],
191+
"source": [
192+
"df.loc[df.table_id==\"fx\"].ds_id"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"id": "inside-mediterranean",
199+
"metadata": {},
200+
"outputs": [],
201+
"source": [
202+
"collection = df.iloc[29].ds_id\n",
203+
"collection"
204+
]
205+
},
206+
{
207+
"cell_type": "code",
208+
"execution_count": null,
209+
"id": "authorized-spectacular",
210+
"metadata": {},
211+
"outputs": [],
212+
"source": [
213+
"result = search(df, collection=collection, time=\"2000-01-01/2010-12-31\")\n",
214+
"result"
215+
]
216+
},
217+
{
218+
"cell_type": "markdown",
219+
"id": "important-machine",
220+
"metadata": {},
221+
"source": [
222+
"## Other searches ..."
223+
]
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": null,
228+
"id": "considerable-antenna",
229+
"metadata": {},
230+
"outputs": [],
231+
"source": [
232+
"result = df.loc[\n",
233+
" (df.variable_id==\"tas\") \n",
234+
" & (df.experiment_id==\"historical\")\n",
235+
" & (df.table_id==\"day\")\n",
236+
" & (df.member_id==\"r1i1p1f1\")\n",
237+
" & (df.institution_id==\"MIROC\")\n",
238+
"]\n",
239+
"result.head()"
240+
]
241+
},
242+
{
243+
"cell_type": "code",
244+
"execution_count": null,
245+
"id": "ruled-creator",
246+
"metadata": {},
247+
"outputs": [],
248+
"source": [
249+
"result.ds_id.unique()"
250+
]
251+
}
252+
],
253+
"metadata": {
254+
"kernelspec": {
255+
"display_name": "Python 3",
256+
"language": "python",
257+
"name": "python3"
258+
},
259+
"language_info": {
260+
"codemirror_mode": {
261+
"name": "ipython",
262+
"version": 3
263+
},
264+
"file_extension": ".py",
265+
"mimetype": "text/x-python",
266+
"name": "python",
267+
"nbconvert_exporter": "python",
268+
"pygments_lexer": "ipython3",
269+
"version": "3.9.2"
270+
}
271+
},
272+
"nbformat": 4,
273+
"nbformat_minor": 5
274+
}

0 commit comments

Comments
 (0)