-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_extractor.py
61 lines (47 loc) · 1.83 KB
/
data_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys
import os
DATA_PATH = os.environ['DATA_PATH']
from extraction.nc4_extractor import NC4Extractor
from extraction.hdf_extractor import HDFExtractor
from spatial.poi_data_struct import PoiDataStruct
if __name__ == '__main__':
DATA_SET_TYPE = sys.argv[1]
REGION_TYPE = sys.argv[2] # strict / bugger
IP_FILE_PATH = sys.argv[3]
OP_FILE_PATH = None
if len(sys.argv) > 4:
OP_FILE_PATH = sys.argv[4]
STAT_METHODS = [
'min',
'max',
'median',
'mean',
'std',
]
if DATA_SET_TYPE == 'merra2':
poiStruct = PoiDataStruct(os.path.join(DATA_PATH, DATA_SET_TYPE + "-"+ REGION_TYPE + ".csv")).generate()
fields = [
'BCSCATAU','BCSMASS','OCSCATAU','OCSMASS','DUSCATAU','DUSMASS',
'SSSCATAU','SSSMASS','SUSCATAU','SO4SMASS','SO2SMASS','TOTSCATAU'
]
ext = NC4Extractor(IP_FILE_PATH, ["lat", "lon"], poiStruct)
for r in ext.iterativeExtractor(fields, STAT_METHODS):
d = [ r[0], r[1], r[2], r[3]['min'], r[3]['max'], r[3]['median'], r[3]['mean'], r[3]['std'] ]
if OP_FILE_PATH:
with open(OP_FILE_PATH, 'a') as out:
out.write(",".join(map(str, d)) + "\n")
else:
print ",".join(map(str, d))
elif DATA_SET_TYPE == 'caliop':
poiStruct = PoiDataStruct(os.path.join(DATA_PATH, DATA_SET_TYPE + "-"+ REGION_TYPE + ".csv")).generate()
fields = [
'AOD_Mean', 'AOD_Mean_Dust', 'AOD_Mean_Smoke', 'AOD_Mean_Polluted_Dust',
]
ext = HDFExtractor(IP_FILE_PATH, ["Latitude_Midpoint", "Longitude_Midpoint"], poiStruct)
for r in ext.iterativeExtractor(fields, STAT_METHODS):
d = [ r[0], r[1], r[2], r[3]['min'], r[3]['max'], r[3]['median'], r[3]['mean'], r[3]['std'] ]
if OP_FILE_PATH:
with open(OP_FILE_PATH, 'a') as out:
out.write(",".join(map(str, d)) + "\n")
else:
print ",".join(map(str, d))