cms-2016-collision-datasets: fix record IDs for new RECO files

Fixes generation of record IDs for RECO configuration files that were jumping into the space reserved for the collision data already. And we have more RECO files due to adding full provenance chain, so they have to jump the given record ID interval. Adds NanoAOD data semantics documents from latest run.
tiborsimko · Mar 8, 2024 · 3a2095a · 3a2095a
1 parent 784813d
commit 3a2095a
Show file tree

Hide file tree

Showing 21 changed files with 1,762 additions and 71 deletions.
diff --git a/cms-2016-collision-datasets/code/create_cms_2016_collision_datasets.py b/cms-2016-collision-datasets/code/create_cms_2016_collision_datasets.py
@@ -239,7 +239,7 @@ def get_run_numbers(dataset_full_name):
 
 
 def get_dataset_config_file_name(dataset_full_name):
-    dataset = dataset_full_name.split("/")[1]    
+    dataset = dataset_full_name.split("/")[1]
     run_period = dataset_full_name.split("/")[2].split("-", 1)[0]
     version = dataset_full_name.split("/")[2].split("-")[1]
     config_file = f"ReReco-{run_period}-{dataset}-{version}"
@@ -276,7 +276,6 @@ def create_selection_information(dataset, dataset_full_name):
     out += "<p><strong>Data taking / HLT</strong>"
     out += '<br/>The collision data were assigned to different RAW datasets using the following <a href="/record/30300">HLT configuration</a>.</p>'
     # data processing / NANO/PAT/RECO:
-    run_period = re.search(r"(Run[0-9]+.)", dataset_full_name).groups()[0]
     aodformat = dataset_full_name.split("/")[3]
     step_dataset = dataset_full_name
     steps = []
@@ -291,8 +290,8 @@ def create_selection_information(dataset, dataset_full_name):
             {"process": "PAT"},
             {"process": "RECO"}
         ]
-    
-    out += f"<p><strong>Data processing </strong>"
+
+    out += "<p><strong>Data processing </strong>"
     out += (
         "<br/>This %s dataset was processed from the RAW dataset by the following steps: "
         % (aodformat)
@@ -305,7 +304,7 @@ def create_selection_information(dataset, dataset_full_name):
         generator_text = "Configuration file for " + steps[i]['process'] + " step " + afile
         release = get_release_for_processing(step_dataset)
         global_tag = get_global_tag_for_processing(step_dataset)
-    
+
         out += "<br/><strong>Step %s </strong>" % steps[i]['process']
         out += "<br/>Release: %s" % release
         out += "<br/>Global tag: %s" % global_tag
@@ -359,14 +358,15 @@ def get_dataset_index_files(dataset_full_name):
             files.append((afile_uri, afile_size, afile_checksum))
     return files
 
+
 def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
     """Produce the dataset semantics files and return their data-curation paths for the given dataset."""
     output_dir = f"outputs/docs/NanoAOD/{recid}"
-    eos_dir=f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
+    eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
     isExist = os.path.exists(output_dir)
     if not isExist:
         os.makedirs(output_dir)
-    
+
     script = "inspectNanoFile.py"
 
     html_doc_path = f"{output_dir}/{dataset_name}_doc.html"
@@ -381,6 +381,7 @@ def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
 
     return {"url": html_eos_path, "json": json_eos_path}
 
+
 def get_doi(dataset_full_name):
     "Return DOI for the given dataset."
     return DOI_INFO.get(dataset_full_name, "")

diff --git a/cms-2016-collision-datasets/code/create_reco_config_file_records.py b/cms-2016-collision-datasets/code/create_reco_config_file_records.py
@@ -23,6 +23,8 @@
 
 
 RECID_START = 30400
+RECID_MAX = 30500   # when this record ID number is reached, continue from the "next" number
+RECID_NEXT = 30566  # next free record ID number
 YEAR_CREATED = "2016"
 YEAR_PUBLISHED = "2024"
 COLLISION_ENERGY = "13Tev"
@@ -128,10 +130,10 @@ def main():
 
             if not afile_python_filename.startswith("ReReco") and not afile_python_filename.startswith("recoskim"):
                 continue
-            
+
             if afile_python_filename in files_seen:
                 continue
-            
+
             files_seen.append(afile_python_filename)
 
             # Create nice reco_*.py files for copying them over to EOSPUBLIC
@@ -209,6 +211,10 @@ def main():
             )
             recid += 1
 
+            # jump over some record ID range which were already preselected for collision data
+            if recid == RECID_MAX:
+                recid = RECID_NEXT
+
     fdesc.write("}\n")
     fdesc.close()
 

diff --git a/cms-2016-collision-datasets/outputs/docs/NanoAOD/30518/BTagMu_doc.json b/cms-2016-collision-datasets/outputs/docs/NanoAOD/30518/BTagMu_doc.json
@@ -12870,8 +12870,8 @@
                         "Proton_singleRP_thetaY",
                         "Proton_singleRP_xi",
                         "Proton_singleRP_decRPId",
-                        "nProton_multiRP",
-                        "nProton_singleRP"
+                        "nProton_singleRP",
+                        "nProton_multiRP"
                     ]
                 },
                 "Muon": {

diff --git a/cms-2016-collision-datasets/outputs/docs/NanoAOD/30524/HTMHT_doc.json b/cms-2016-collision-datasets/outputs/docs/NanoAOD/30524/HTMHT_doc.json
@@ -12663,8 +12663,8 @@
                         "Proton_singleRP_thetaY",
                         "Proton_singleRP_xi",
                         "Proton_singleRP_decRPId",
-                        "nProton_singleRP",
-                        "nProton_multiRP"
+                        "nProton_multiRP",
+                        "nProton_singleRP"
                     ]
                 },
                 "Muon": {