Skip to content

Commit

Permalink
cms-2016-collision-datasets: fix record IDs for new RECO files
Browse files Browse the repository at this point in the history
Fixes generation of record IDs for RECO configuration files that were
jumping into the space reserved for the collision data already. And we
have more RECO files due to adding full provenance chain, so they have
to jump the given record ID interval.

Adds NanoAOD data semantics documents from latest run.
  • Loading branch information
tiborsimko committed Mar 8, 2024
1 parent 784813d commit 3a2095a
Show file tree
Hide file tree
Showing 21 changed files with 1,762 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def get_run_numbers(dataset_full_name):


def get_dataset_config_file_name(dataset_full_name):
dataset = dataset_full_name.split("/")[1]
dataset = dataset_full_name.split("/")[1]
run_period = dataset_full_name.split("/")[2].split("-", 1)[0]
version = dataset_full_name.split("/")[2].split("-")[1]
config_file = f"ReReco-{run_period}-{dataset}-{version}"
Expand Down Expand Up @@ -276,7 +276,6 @@ def create_selection_information(dataset, dataset_full_name):
out += "<p><strong>Data taking / HLT</strong>"
out += '<br/>The collision data were assigned to different RAW datasets using the following <a href="/record/30300">HLT configuration</a>.</p>'
# data processing / NANO/PAT/RECO:
run_period = re.search(r"(Run[0-9]+.)", dataset_full_name).groups()[0]
aodformat = dataset_full_name.split("/")[3]
step_dataset = dataset_full_name
steps = []
Expand All @@ -291,8 +290,8 @@ def create_selection_information(dataset, dataset_full_name):
{"process": "PAT"},
{"process": "RECO"}
]
out += f"<p><strong>Data processing </strong>"

out += "<p><strong>Data processing </strong>"
out += (
"<br/>This %s dataset was processed from the RAW dataset by the following steps: "
% (aodformat)
Expand All @@ -305,7 +304,7 @@ def create_selection_information(dataset, dataset_full_name):
generator_text = "Configuration file for " + steps[i]['process'] + " step " + afile
release = get_release_for_processing(step_dataset)
global_tag = get_global_tag_for_processing(step_dataset)

out += "<br/><strong>Step %s </strong>" % steps[i]['process']
out += "<br/>Release: %s" % release
out += "<br/>Global tag: %s" % global_tag
Expand Down Expand Up @@ -359,14 +358,15 @@ def get_dataset_index_files(dataset_full_name):
files.append((afile_uri, afile_size, afile_checksum))
return files


def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
"""Produce the dataset semantics files and return their data-curation paths for the given dataset."""
output_dir = f"outputs/docs/NanoAOD/{recid}"
eos_dir=f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
isExist = os.path.exists(output_dir)
if not isExist:
os.makedirs(output_dir)

script = "inspectNanoFile.py"

html_doc_path = f"{output_dir}/{dataset_name}_doc.html"
Expand All @@ -381,6 +381,7 @@ def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):

return {"url": html_eos_path, "json": json_eos_path}


def get_doi(dataset_full_name):
"Return DOI for the given dataset."
return DOI_INFO.get(dataset_full_name, "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


RECID_START = 30400
RECID_MAX = 30500 # when this record ID number is reached, continue from the "next" number
RECID_NEXT = 30566 # next free record ID number
YEAR_CREATED = "2016"
YEAR_PUBLISHED = "2024"
COLLISION_ENERGY = "13Tev"
Expand Down Expand Up @@ -128,10 +130,10 @@ def main():

if not afile_python_filename.startswith("ReReco") and not afile_python_filename.startswith("recoskim"):
continue

if afile_python_filename in files_seen:
continue

files_seen.append(afile_python_filename)

# Create nice reco_*.py files for copying them over to EOSPUBLIC
Expand Down Expand Up @@ -209,6 +211,10 @@ def main():
)
recid += 1

# jump over some record ID range which were already preselected for collision data
if recid == RECID_MAX:
recid = RECID_NEXT

fdesc.write("}\n")
fdesc.close()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12870,8 +12870,8 @@
"Proton_singleRP_thetaY",
"Proton_singleRP_xi",
"Proton_singleRP_decRPId",
"nProton_multiRP",
"nProton_singleRP"
"nProton_singleRP",
"nProton_multiRP"
]
},
"Muon": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12663,8 +12663,8 @@
"Proton_singleRP_thetaY",
"Proton_singleRP_xi",
"Proton_singleRP_decRPId",
"nProton_singleRP",
"nProton_multiRP"
"nProton_multiRP",
"nProton_singleRP"
]
},
"Muon": {
Expand Down
Loading

0 comments on commit 3a2095a

Please sign in to comment.