-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathgetPileup.py
More file actions
executable file
·79 lines (66 loc) · 3.56 KB
/
getPileup.py
File metadata and controls
executable file
·79 lines (66 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
from datetime import datetime
import json
import configparser
import os, sys
import argparse
import warnings
from scripts.logger import Logger
from scripts.argparser import ArgParser
class GetPileup:
def __init__(self):
self.TIMESTAMP = datetime.now().strftime('%Y%m%d_%H%M%S')
self.SAMPLEFACTORY = os.getcwd() #os.getenv("SAMPLEFACTORY")
self.ARGS = vars(ArgParser(__file__))
self.__validate_ARGS()
self.__query_DAS()
def __read_JSON(self, json_file):
out = None
with open(json_file) as rf:
out = json.load(rf)
if not out:
Logger.ERROR(json_file + " is empty")
return out
def __validate_ARGS(self):
if not os.path.exists(self.ARGS["chain"]):
Logger.ERROR(self.ARGS["chain"] + " does not exist")
def __query_DAS(self):
self.CHAIN_NAME = os.path.basename(self.ARGS["chain"]).split(".")[0]
chain_json = self.__read_JSON(self.ARGS["chain"])
workflows = chain_json["WORKFLOWS"]
req_frags = ["DRPremix"]
pileup_dataset = []
for wf_idx, (wf, cfg) in enumerate(workflows.items()):
OPTIONS = cfg.get("OPTIONS",{})
for opt_name, opt_value in OPTIONS.items():
if opt_name == "pileup_input":
pileup_dataset.append(opt_value)
if len(pileup_dataset) != 1:
Logger.ERROR("pileup_input not properly not given in " + self.ARGS["chain"])
pileup_dataset = pileup_dataset[0].replace("dbs:", "").replace("\"","")
pileup_txt = f"{self.CHAIN_NAME}.txt"
sites = ["T1_DE_KIT_Disk", "T1_ES_PIC_Disk", "T1_FR_CCIN2P3_Disk", "T1_IT_CNAF_Disk", "T1_UK_RAL_Disk", "T1_US_FNAL_Disk", "T2_AT_Vienna", "T2_BE_IIHE", "T2_BE_UCL", "T2_CH_CERN", "T2_CH_CSCS", "T2_DE_DESY", "T2_DE_RWTH", "T2_ES_CIEMAT", "T2_ES_IFCA", "T2_FR_GRIF", "T2_FR_IPHC", "T2_IT_Bari", "T2_IT_Legnaro", "T2_IT_Pisa", "T2_IT_Rome", "T2_UK_London_Brunel", "T2_UK_London_IC", "T2_UK_SGrid_RALPP", "T2_US_Caltech", "T2_US_Florida", "T2_US_MIT", "T2_US_Nebraska", "T2_US_Purdue", "T2_US_UCSD", "T2_US_Vanderbilt", "T2_US_Wisconsin"]
os.makedirs("data/pileups", exist_ok=True)
if os.path.exists(f"data/pileups/{pileup_txt}"):
os.system(f"rm data/pileups/{pileup_txt}")
os.system(f"touch data/pileups/{pileup_txt}")
for site in sites:
Logger.WARNING(f"checking {pileup_dataset} in {site}")
#print (f"dasgoclient -query='file dataset={pileup_dataset} site={site}'")
os.system(f"dasgoclient -query='file dataset={pileup_dataset} site={site}' >> data/pileups/{pileup_txt}")
with open(f"data/pileups/{pileup_txt}", "r") as rf:
if "failure" in rf.read():
Logger.WARNING(f"skipping {site} due to {rf.read()}")
continue
os.system(f"sort data/pileups/{pileup_txt} > data/pileups/__{pileup_txt}")
os.system(f"uniq data/pileups/__{pileup_txt} > data/pileups/{pileup_txt}")
os.system(f"rm data/pileups/__{pileup_txt}")
with open(f"data/pileups/{pileup_txt}") as rf:
nfiles = len(rf.readlines())
Logger.INFO(f"collected {nfiles} files for {pileup_dataset}")
if nfiles == 0:
Logger.WARNING(f"wasn't able to collect any files for {pileup_dataset}")
Logger.WARNING(f"querying DAS for full dataset")
os.system(f"dasgoclient -query='file dataset={pileup_dataset}' > data/pileups/{pileup_txt}")
if __name__ == "__main__":
GetPileup()