Skip to content

Commit a095865

Browse files
authored
Merge pull request #81 from sam-hartke-ucar/main
Update writeERA5file.py to run in parallel
2 parents e57e056 + 4fe96ce commit a095865

File tree

1 file changed

+95
-24
lines changed

1 file changed

+95
-24
lines changed

helpers/writeERA5file.py

Lines changed: 95 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,43 @@
1+
from dask_jobqueue import PBSCluster
2+
from dask.distributed import Client
3+
import time
4+
5+
cluster = PBSCluster(
6+
queue="casper",
7+
walltime="03:00:00",
8+
project="P48500028",
9+
memory="30GB",
10+
cores=1,
11+
processes=1,
12+
)
13+
14+
cluster.scale(12)
15+
16+
client=Client(cluster)
17+
time.sleep(30) # wait 30 seconds to give all dask workers time to populate
18+
print('Cluster created and assigned to dask client')
19+
120
import os
221
import numpy as np
322
import pandas as pd
423
import xarray as xr
5-
#import qfgrib
624
from datetime import timedelta,datetime
725
from dateutil.relativedelta import relativedelta
826

927

10-
era5pl_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.pl/{yr}{mth}/e5.oper.an.pl.{var_id}.{dt}00_{dt}23.grb"
11-
era5sfc_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.sfc/{yr}{mth}/e5.oper.an.sfc.{var_id}.{dt}00_{dt2}23.grb"
12-
w_format = "128_135_w.ll025sc"
13-
v_format = "128_132_v.ll025uv"
14-
u_format = "128_131_u.ll025uv"
15-
q_format = "128_133_q.ll025sc"
16-
t_format = "128_130_t.ll025sc"
17-
tcrw_format = "228_089_tcrw.ll025sc"
28+
#--------------------------------------------------------------------------
29+
# Function for writing ERA5 data to yearly input files for GARD
1830

1931

32+
era5pl_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.pl/{yr}{mth}/e5.oper.an.pl.{var_id}.{dt}00_{dt}23.grb"
33+
era5sfc_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.sfc/{yr}{mth}/e5.oper.an.sfc.{var_id}.{dt}00_{dt2}23.grb"
2034

21-
var_formats = {'v':v_format,
22-
'u':u_format,
23-
'w':w_format,
24-
'q':q_format,
25-
't':t_format,
26-
'tcrw':tcrw_format,
35+
var_formats = {'v':"128_132_v.ll025uv",
36+
'u':"128_131_u.ll025uv",
37+
'w':"128_135_w.ll025sc",
38+
'q':"128_133_q.ll025sc",
39+
't':"128_130_t.ll025sc",
40+
'tcrw':"228_089_tcrw.ll025sc",
2741
}
2842

2943
# new variable names to match those from CESM LENS 2
@@ -35,7 +49,7 @@
3549
'tcrw':'PRECT',
3650
}
3751

38-
def createInputDataset(yr_st,yr_end,varlist):
52+
def createERA5Dataset(yr_st,yr_end,varlist):
3953

4054
m=0
4155
for var in varlist:
@@ -45,7 +59,8 @@ def createInputDataset(yr_st,yr_end,varlist):
4559
yr = dt.year
4660
mth = "%.02d"%dt.month
4761
if var in ('u','v','w','q','t'):
48-
var_files.append(era5pl_template.format(yr=yr,mth=mth,var_id=var_formats[var],dt=dt.strftime('%Y%m%d')))
62+
var_files.append(era5pl_template.format(yr=yr,mth=mth,var_id=var_formats[var],
63+
dt=dt.strftime('%Y%m%d')))
4964
dt = dt + timedelta(days=1)
5065

5166
elif var == 'tcrw':
@@ -56,7 +71,7 @@ def createInputDataset(yr_st,yr_end,varlist):
5671
dt = dt + relativedelta(months=+1)
5772

5873
vardata = xr.open_mfdataset(var_files,concat_dim='time',combine='nested',
59-
backend_kwargs={"indexpath":""}).sel(latitude=slice(50,20),
74+
backend_kwargs={"indexpath":""},parallel=True).sel(latitude=slice(50,20),
6075
longitude=slice(360-120,360-60))[var]
6176

6277
if var in ('u','v','w','q','t'):
@@ -69,23 +84,79 @@ def createInputDataset(yr_st,yr_end,varlist):
6984

7085
if m==0:
7186
era5_ds = vardata.to_dataset()
87+
era5_ds = era5_ds.rename({var:newvarname[var]})
7288
else:
7389
era5_ds = era5_ds.assign(var=vardata)
74-
75-
era5_ds = era5_ds.rename({'var':newvarname[var]})
90+
era5_ds = era5_ds.rename({'var':newvarname[var]})
7691

7792
m+=1
7893

94+
era5_ds = era5_ds.drop_vars(('number','step','surface'))
95+
7996
if yr_st!=yr_end:
8097
outfile = '/glade/scratch/shartke/gard/era5/era5_daily_%d_%d.nc'%(yr_st,yr_end)
8198
else:
8299
outfile = '/glade/scratch/shartke/gard/era5/era5_daily_%d.nc'%yr_st
83100
era5_ds.to_netcdf(outfile)
84101

85102

103+
104+
#--------------------------------------------------------------------------
105+
# Function for writing CESM LENS2 data to decadal input files for GARD
106+
107+
108+
cesm_template = "/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/{var}/b.e21.B{scen}{forcing}.f09_g17.LE2-{styr}.0{ens}.cam.h{i}.{var}.{yr1}0101-{yr2}1231.nc"
109+
scen="HIST"
110+
f="cmip6"
111+
112+
def createCESM2Dataset(yr,styr,varlist,enslist):
113+
114+
yr1 = yr-yr%10
115+
116+
for e in enslist:
117+
m=0
118+
for var in varlist:
119+
if var in ('U','V','T','Q'):
120+
ds = xr.open_dataset(cesm_template.format(var=var,scen=scen,forcing=f,styr=styr,
121+
ens="%.02d"%e,yr1=yr1,yr2=yr1+9,i=6))[var]
122+
# select data over CONUS at ~450 mb level
123+
vardata = ds.sel(lev=ds.lev[19],lat=slice(20,50),lon=slice(360.-120.,360.-60.)) # ,time=slice(str(yr),str(yr))
124+
vardata = vardata.drop_vars('lev')
125+
elif var in ('PSL','PRECT'):
126+
ds = xr.open_dataset(cesm_template.format(var=var,scen=scen,forcing=f,styr=styr,
127+
ens="%.02d"%e,yr1=yr1,yr2=yr1+9,i=1))[var]
128+
vardata = ds.sel(lat=slice(20,50),lon=slice(360.-120.,360.-60.)) # ,time=slice(str(yr),str(yr))
129+
# convert m/s to mm/d
130+
vardata = vardata*3600*24*1000
131+
132+
if m==0:
133+
cesm_ds = vardata.to_dataset()
134+
else:
135+
cesm_ds = cesm_ds.assign(var=vardata)
136+
cesm_ds = cesm_ds.rename({'var':var})
137+
138+
m+=1
139+
140+
outfile = '/glade/scratch/shartke/gard/cesmlens2/cesm_daily_%d_%d_%d_%.02d.nc'%(yr,yr+9,styr,e)
141+
cesm_ds.to_netcdf(outfile)
142+
143+
144+
#--------------------------------------------------------------------------
86145

146+
# Note: Generating the ERA5 datasets will take the bulk of the time for this program
147+
87148
print(datetime.now())
88-
createInputDataset(1982,1982,['u','v','w','q','t','tcrw'])
89-
print('1982 dataset complete at: ',datetime.now())
90-
createInputDataset(1983,1983,['u','v','w','q','t','tcrw'])
91-
print('1983 dataset complete at: ',datetime.now())
149+
styr = 1301 # 1231, 1251, 1281, or 1301
150+
createCESM2Dataset(1960,styr,['U','V','W','Q','T','PRECT'],np.arange(1,3))
151+
createCESM2Dataset(1970,styr,['U','V','W','Q','T','PRECT'],np.arange(1,3))
152+
print('CESM LENS2 datasets complete at: ',datetime.now())
153+
154+
for yr in (1980,1981,1982,1983):
155+
createERA5Dataset(yr,yr,['u','v','w','q','t','tcrw'])
156+
print('ERA5 %s dataset complete at: '%yr,datetime.now())
157+
158+
159+
160+
161+
# now you should be able to train GARD using 1980-1999 ERA5 data
162+
# and predict downscaled 1960-1979 precip or temp using CESM LENS2 data

0 commit comments

Comments
 (0)