Skip to content

Commit

Permalink
simplify shell script n update USGS level 1 to accomodate gz n mtl.tx…
Browse files Browse the repository at this point in the history
…t at the same time
  • Loading branch information
tina5870 committed Jun 20, 2018
1 parent a2b9ea6 commit 0002941
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 16 deletions.
4 changes: 3 additions & 1 deletion do_qsub_l1.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
for i in `find /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts_l1 -name "*.qsub"`; do qsub $i; done
HOME=/g/data2/v10/AGDCv2/datacube-ingestion/indexed-products/ledaps_lasrc/opendatacubepipelines.ledapslasrc

for i in `find $HOME/qsub_scripts_l1 -name "*.qsub"`; do qsub $i; done

7 changes: 5 additions & 2 deletions ledaps_lasrc_pbs_tasker.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
for i in `ls -1 /g/data/v10/projects/ARD_interoperability/L2/unzip`; do if [[ $i != *":"* ]]; then mkdir -p /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/yamls/$i ; fi; done
for i in `ls /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/yamls/`; do cp ledaps_lasrc_prepare.sh /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts/$i.qsub; sed -i -e "s/TARGET/$i/g" "/g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts/$i.qsub"; echo 'qsub /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts/'$i'.qsub'; done
HOME=/g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc
DATA=/g/data/dz56/ARD_interoperability/L2

for i in `ls -1 $DATA/unzip`; do if [[ $i != *":"* ]]; then mkdir -p $DATA/yamls/$i ; fi; done
for i in `ls $DATA/yamls/`; do cp ledaps_lasrc_prepare.sh $HOME/qsub_scripts/$i.qsub; sed -i -e "s/TARGET/$i/g" "$HOME/qsub_scripts/$i.qsub"; echo 'qsub $HOME/qsub_scripts/'$i'.qsub'; done
9 changes: 7 additions & 2 deletions ledaps_lasrc_pbs_tasker_l1.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
for i in `ls -1 /g/data/v10/projects/ARD_interoperability/L1`; do if [[ $i != *":"* ]]; then mkdir -p /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/yamls_l1/$i ; fi; done
for i in `ls /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/yamls_l1/`; do cp ledaps_lasrc_prepare_l1.sh /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts_l1/$i.qsub; sed -i -e "s/TARGET/$i/g" "/g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts_l1/$i.qsub"; echo 'qsub /g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc/qsub_scripts_l1/'$i'.qsub'; done
#HOME=/g/data1b/da82/AODH/USGS/L1/Landsat/C1
#DATA=/g/data1b/da82/AODH/USGS/L1/Landsat/C1
HOME=/g/data2/v10/AGDCv2/datacube-ingestion/indexed-products/ledaps_lasrc/opendatacubepipelines.ledapslasrc
DATA=/g/data2/v10/AGDCv2/datacube-ingestion/indexed-products/ledaps_lasrc/opendatacubepipelines.ledapslasrc/test_data

for i in `ls -1 $DATA`; do if [[ $i != *":"* ]]; then mkdir -p $HOME/yamls_test/$i ; fi; done
for i in `ls $HOME/yamls_test/`; do cp ledaps_lasrc_prepare_l1.sh $HOME/qsub_scripts_l1/$i.qsub; sed -i -e "s/TARGET/$i/g" "$HOME/qsub_scripts_l1/$i.qsub"; echo 'qsub $HOME/qsub_scripts_l1/'$i'.qsub'; done
4 changes: 2 additions & 2 deletions ledaps_lasrc_prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ module load gaip/dev-sen2redo
module load parallel

HOME=/g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc
DATA=/g/data/v10/projects/ARD_interoperability/L2
DATA=/g/data/dz56/ARD_interoperability/L2

find $DATA/unzip/TARGET -name *.xml | parallel --jobs 16 "python $HOME/ls_usgs_l2_prepare.py {} --output $HOME/yamls/TARGET --no-checksum --date 1/1/1999"
find $DATA/unzip/TARGET -name *.xml | parallel --jobs 16 "python $HOME/ls_usgs_l2_prepare.py {} --output $DATA/yamls/TARGET --no-checksum --date 1/1/1999"
6 changes: 3 additions & 3 deletions ledaps_lasrc_prepare_l1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module load gaip/dev-sen2redo
#module load agdc-py3-prod/1.5.1
module load parallel

HOME=/g/data/v10/AGDCv2/indexed_datasets/ledaps_lasrc/opendatacubepipelines.ledapslasrc
DATA=/g/data/v10/projects/ARD_interoperability/L1
HOME=/g/data2/v10/AGDCv2/datacube-ingestion/indexed-products/ledaps_lasrc/opendatacubepipelines.ledapslasrc
DATA=/g/data2/v10/AGDCv2/datacube-ingestion/indexed-products/ledaps_lasrc/opendatacubepipelines.ledapslasrc/test_data

find $DATA/TARGET -name *_MTL.txt | parallel --jobs 16 "python $HOME/ls_usgs_l1_prepare.py {} --output $HOME/yamls_l1/TARGET --no-checksum --date 1/1/1999"
find $DATA/TARGET -name *_MTL.txt -o -name *.gz | parallel --jobs 16 "python $HOME/ls_usgs_l1_prepare.py {} --output $HOME/yamls_test/TARGET --no-checksum --date 1/1/1999"
54 changes: 48 additions & 6 deletions ls_usgs_l1_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from datetime import datetime
from os.path import join as pjoin
import hashlib
import tarfile
import glob

images1 = [('1', 'coastal_aerosol'),
('2', 'blue'),
Expand Down Expand Up @@ -214,6 +216,29 @@ def absolutify_paths(doc, path):
return doc


def find_gz_mtl(ds_path, output_folder):
"""
Find the MTL metadata file for the archived dataset and extract the xml
file and store it temporally in output folder
:param ds_path: the dataset path
:param output_folder: the output folder
:returns: xml with full path
"""

mtl_path = ''

reT = re.compile("MTL.txt")
tar_gz = tarfile.open(str(ds_path), 'r')
members=[m for m in tar_gz.getmembers() if reT.search(m.name)]
tar_gz.extractall(output_folder, members)
mtl_path = pjoin(output_folder, members[0].name)

return mtl_path


@click.command(help="""\b
Prepare USGS Landsat Collection 1 data for ingestion into the Data Cube.
This prepare script supports only for MTL.txt metadata file
Expand All @@ -239,8 +264,8 @@ def absolutify_paths(doc, path):

def main(output, datasets, checksum, date):
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.INFO)

level=logging.INFO)
for ds in datasets:
(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(ds)
create_date = datetime.utcfromtimestamp(ctime)
Expand All @@ -249,12 +274,20 @@ def main(output, datasets, checksum, date):
" is older than start date ", date, "...SKIPPING")
else:
ds_path = Path(ds)
if ds_path.suffix in ('MTL.txt'):
mtl_path = str(ds_path)
ds_path = os.path.dirname(str(ds_path))
#if ds_path.suffix in ('.gz', 'MTL.txt'):
if ds_path.suffix in ('.gz', '.txt'):
if ds_path.suffix != '.txt':
mtl_path = find_gz_mtl(ds_path, output)
if mtl_path == '':
raise RuntimeError('no MTL file under the product folder')
else:
mtl_path = str(ds_path)

ds_path = os.path.dirname(str(ds_path))

#print (mtl_path)
logging.info("Processing %s", ds_path)
output_yaml = pjoin(output, '{}.yaml'.format(os.path.basename(ds_path)))
output_yaml = pjoin(output, '{}.yaml'.format(os.path.basename(mtl_path).replace('_MTL.txt', '')))
logging.info("Output %s", output_yaml)
if os.path.exists(output_yaml):
logging.info("Output already exists %s", output_yaml)
Expand All @@ -275,5 +308,14 @@ def main(output, datasets, checksum, date):
with open(output_yaml, 'w') as stream:
yaml.dump(docs, stream)

#delete intermediate MTL files for archive datasets in output folder
mtl_list = glob.glob('{}/*MTL.txt'.format(output))
if len(mtl_list) > 0:
for f in mtl_list:
try:
os.remove(f)
except OSError:
pass

if __name__ == "__main__":
main()

0 comments on commit 0002941

Please sign in to comment.