biosimulators · AlexPatrie · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/.github/workflows/batch.yml b/.github/workflows/batch.yml
@@ -0,0 +1,25 @@
+name: Submit Simulation Batch
+
+on:
+  workflow_dispatch:
+
+jobs:
+  CI:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+
+    - name: Test BioCompose
+      run: |
+        pytest tests/main.py
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -21,8 +21,9 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+        python -m pip install -r requirements.datagen.txt
         pip install .
 
-    - name: Test BioCompose 
+    - name: Run Batch
       run: |
-        pytest tests/main.py
+        cd datagen
diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,4 @@ demos/.ipynb_checkpoints/
 test_archive.ipynb
 poetry.lock
 .venv
+datagen/verification_request/results
diff --git a/README.md b/README.md
@@ -38,3 +38,4 @@ poetry run python -m ipykernel install --user --name=$ENV_NAME --display-name "$
 ### Smoldyn to Simularium conversion:
 A convenient template notebook for converting the outputs of Smoldyn simulations to Simularium trajectories can be
 [found here.](https://colab.research.google.com/drive/17uMMRq3L3KqRIXnezahM6TtOtJYK8Cu6#scrollTo=6n5Wf58hthFm)
+
diff --git a/bio_compose/api.py b/bio_compose/api.py
@@ -3,7 +3,6 @@
 import warnings
 import zipfile
 from typing import *
-from functools import wraps
 
 import requests
 
@@ -85,7 +84,7 @@ def verify(*args) -> VerificationResult:
     elif len(args) == 5:
         simulators = args[4]
     else:
-        simulators = ['amici', 'copasi', 'tellurium']
+        simulators = DEFAULT_SBML_SIMULATORS
 
     run_sbml = False
     for arg in args:
@@ -103,7 +102,8 @@ def verify(*args) -> VerificationResult:
 
     # fetch params
     submission = submission_generator(*args)
-    job_id = submission.get('job_id')
+    time.sleep(1)
+    job_id = submission.get('job_id') if submission else None
 
     # poll gateway for results
     n_attempts = 0
@@ -132,14 +132,16 @@ def verify(*args) -> VerificationResult:
             else:
                 output = verifier.get_output(job_id=job_id)
                 break
+    else:
+        return VerificationResult({'content': {'job_id': 'A job ID could not be identified.'}})
 
     return VerificationResult(data=output)
 
 
 def run_simulation(*args, **kwargs) -> SimulationResult:
     """Run a simulation with BioCompose.
 
-    :param args: Positional arguments
+    :param args: Positional arguments to be unpacked
 
     * 1 argument: smoldyn simulation configuration in which time parameters (dt, duration) are already defined. **Smoldyn simulation only**.
     * 3 arguments: smoldyn configuration file, smoldyn simulation duration, smoldyn simulation dt. **Smoldyn simulation only**.
@@ -304,7 +306,6 @@ def extract_sbml_from_zip(zip_path: str, output_dir: str):
     """
     Extract a single XML(SBML) file from a zip archive retrieved from BioModels to a specified directory.
 
-    Args:
     :param zip_path: (`str`) Path to the zip file.
     :param output_dir: (`str`) Directory where the extracted file will be saved.
     """
@@ -341,22 +342,19 @@ def run_batch_sbml_verification(model_files: list[str], start: int, stop: int, s
     return results
 
 
-def run_batch_verification(model_files: list[str], *args) -> Dict[str, VerificationResult]:
+def run_batch_verification(input_files: list[str], *args) -> Dict[str, VerificationResult]:
     """
-    Run a batch of verifications
+    Run several verifications as a synchronous batch, returning a dictionary of verification results indexed by verification job ID.
 
-    Args:
-    :param model_files: (`list[str]`) A list of biomodel SBML files to verify.
-
-    Positional arguments:
-    :param args: (`list | tuple`) Positional arguments to pass to `run_batch_verification`: if sbml verifications, start, stop, steps.
+    :param input_files: (`list[str]`) A list of biomodel SBML files to verify.
+    :param args: Positional arguments to use as unpacked arguments in `run_batch_verification` related to the verification scope (SBML-only/OMEX): if sbml verifications, start, stop, steps.
 
     :return: Verification results indexed by verification Job ID.
     :rtype: `dict`
     """
     results = {}
-    for model_file in model_files:
-        verification = verify(model_file, *args)
+    for input_file in input_files:
+        verification = verify(input_file, *args)
         results[verification.job_id] = verification
 
     return results
diff --git a/biosimulations_runutils/__init__.py b/biosimulations_runutils/__init__.py
diff --git a/biosimulations_runutils/biosim_pipeline/__init__.py b/biosimulations_runutils/biosim_pipeline/__init__.py
diff --git a/biosimulations_runutils/biosim_pipeline/biosim_api.py b/biosimulations_runutils/biosim_pipeline/biosim_api.py
@@ -0,0 +1,144 @@
+import http
+import json
+import os
+from typing import BinaryIO, Union
+
+import requests
+from pydantic import BaseModel
+
+from biosimulations_runutils.biosim_pipeline.data_manager import DataManager
+from biosimulations_runutils.biosim_pipeline.datamodels import SourceOmex, Simulator, SimulationRun, BiosimulationsProject
+
+
+class _SimulationRunApiRequest(BaseModel):
+    name: str  # what does this correspond to?
+    simulator: str
+    simulatorVersion: str
+    maxTime: int   # in minutes
+    # email: Optional[str] = None
+    # cpus: Optional[int] = None
+    # memory: Optional[int] = None (in GB)
+
+
+def check_run_status(simulation_run: SimulationRun) -> str:
+    api_base_url = os.environ.get('API_BASE_URL')
+    assert(api_base_url is not None)
+    getrun = requests.get(api_base_url + "/runs/" + simulation_run.simulation_id)
+    if getrun.status_code == 404:
+        return "not found"
+    getrun_dict = getrun.json()
+    result = getrun_dict['status']
+    return result
+
+
+def run_project(
+        source_omex: SourceOmex,
+        simulator: Simulator,
+        data_manager: DataManager,
+        simulator_version: str) -> None:
+    """
+    This function runs the project on biosimulations.
+    """
+    runAPI = str(os.environ.get('API_BASE_URL')) + '/runs'
+    runAppBaseUrl = str(os.environ.get('RUN_APP_BASE_URL'))
+
+    simulation_run_request = _SimulationRunApiRequest(
+        name=source_omex.project_id,
+        simulator=simulator,
+        simulatorVersion=simulator_version,
+        maxTime=600,
+    )
+
+    print(source_omex.omex_file)
+    with open(source_omex.omex_file, 'rb') as omex_file_handle:
+        multipart_form_data: dict[str, Union[tuple[str, BinaryIO],  tuple[None, str]]] = {
+            'file': (source_omex.project_id + '.omex', omex_file_handle),
+            'simulationRun': (None, simulation_run_request.json()),
+        }
+        req = requests.post(runAPI, files=multipart_form_data)
+        req.raise_for_status()
+        res = req.json()
+
+    simulation_id = res["id"]
+    """
+    simulator: Simulator
+    simulator_version: str
+    simulation_id: str
+    project_id: str
+    status: Optional[str] = "Unknown"
+    """
+    data_manager.write_run(SimulationRun(
+        simulator=simulator,
+        simulator_version=res['simulatorVersion'],
+        simulation_id=simulation_id,
+        project_id=source_omex.project_id,
+        status=res['status']
+    ))
+
+    print("Ran " + source_omex.project_id + " on biosimulations with simulation id: " + simulation_id)
+    print("View:", runAppBaseUrl + "/runs/" + simulation_id)
+
+
+def publish_project(data_manager: DataManager, run: SimulationRun, overwrite: bool = False) -> None:
+    api_base_url = os.environ.get('API_BASE_URL')
+    if run.status != "SUCCEEDED":
+        print(run.project_id, "did not succeed - or status needs to be updated. status is", run.status)
+        return
+
+    for project in data_manager.read_projects():
+        if project.project_id == run.project_id:
+            print(run.project_id, "already published")
+            return
+
+    simulation_publish_data = {
+        'id': run.project_id,
+        'simulationRun': run.simulation_id,
+    }
+    token = get_token()
+    headers = {
+        "Authorization": f"{token}"
+    }
+    print(run.project_id, "publishing")
+    getproj = requests.get(f"{api_base_url}/projects/" + run.project_id, headers=headers)
+    if getproj.status_code == 404:
+        req = requests.post(
+            f"{api_base_url}/projects/" + run.project_id,
+            json=simulation_publish_data, headers=headers)
+        req.raise_for_status()
+        data_manager.write_project(BiosimulationsProject(
+            project_id=run.project_id,
+            simulation_id=run.simulation_id
+        ))
+    elif overwrite:
+        req = requests.put(
+            f"{api_base_url}/projects/" + run.project_id,
+            json=simulation_publish_data, headers=headers)
+        req.raise_for_status()
+        data_manager.write_project(BiosimulationsProject(
+            project_id=run.project_id,
+            simulation_id=run.simulation_id
+        ))
+
+
+def get_token() -> str:
+    client_id = os.environ.get('CLIENT_ID', "CLIENT_ID-not-set")
+    client_secret = os.environ.get('CLIENT_SECRET', "CLIENT_SECRET-not-set")
+    auth_host = os.environ.get('AUTH_HOST', "AUTH_HOST-not-set")
+    auth_audience = os.environ.get('AUTH_AUDIENCE', "AUTH_AUDIENCE-not-set")
+
+    conn = http.client.HTTPSConnection(auth_host)
+    payload = f'{{"client_id":"{client_id}","client_secret":"{client_secret}",' \
+              f'"audience":"{auth_audience}","grant_type":"client_credentials"}}'
+    headers = {'content-type': "application/json"}
+    conn.request("POST", "/oauth/token", payload, headers)
+
+    res = conn.getresponse()
+    data_bytes = res.read()
+
+    data = data_bytes.decode("utf-8")
+    data_dict = json.loads(data)
+    token: str = data_dict['access_token']
+    token = "Bearer " + token
+    return token
+
+
diff --git a/biosimulations_runutils/biosim_pipeline/data_manager.py b/biosimulations_runutils/biosim_pipeline/data_manager.py
@@ -0,0 +1,107 @@
+import json
+import os
+from pathlib import Path
+from typing import Optional
+
+from biosimulations_runutils.biosim_pipeline.datamodels import (
+    BiosimulationsProject,
+    SourceOmex,
+    SimulationRun,
+    SimulatorComparison
+)
+
+
+def _get_project_name(omex_file: Path) -> str:
+    return str(omex_file.name).split(".")[0]
+
+
+class DataManager(object):
+    omex_src_dir: Path
+    out_dir: Path
+    projects_ndjson_file: Path
+    runs_ndjson_file: Path
+
+    def __init__(self, omex_src_dir: Optional[Path] = None, out_dir: Optional[Path] = None):
+        self.omex_src_dir = Path(os.environ.get("OMEX_SOURCE_DIR", "OMEX_SOURCE_DIR-not-specified"))
+        if omex_src_dir is not None:
+            self.omex_src_dir = omex_src_dir
+        if not os.path.exists(self.omex_src_dir):
+            raise ValueError(f"Base source directory {self.omex_src_dir} does not exist")
+
+        self.out_dir = Path(os.environ.get("OMEX_OUTPUT_DIR", "OMEX_OUTPUT_DIR-not-specified"))
+        if out_dir is not None:
+            self.out_dir = out_dir
+        if not os.path.exists(self.out_dir):
+            os.makedirs(self.out_dir)
+
+        self.projects_ndjson_file = self.out_dir / 'biosimulations_projects.ndjson'
+        self.runs_ndjson_file = self.out_dir / 'biosimulations_runs.ndjson'
+        self.compare_ndjson_file = self.out_dir / 'biosimulations_comparisons.ndjson'
+
+    def read_run_requests(self) -> list[SimulationRun]:
+        projects: list[SimulationRun]
+        if os.path.exists(self.runs_ndjson_file):
+            with open(self.runs_ndjson_file) as f:
+                projects = [SimulationRun(**json.loads(line)) for line in f.readlines()]
+        else:
+            projects = []
+        return projects
+
+    def read_projects(self) -> list[BiosimulationsProject]:
+        projects: list[BiosimulationsProject]
+        if os.path.exists(self.projects_ndjson_file):
+            with open(self.projects_ndjson_file) as f:
+                projects = [BiosimulationsProject(**json.loads(line)) for line in f.readlines()]
+        else:
+            projects = []
+        return projects
+
+    def write_project(self, project: BiosimulationsProject) -> None:
+        with open(self.projects_ndjson_file, 'a') as f:
+            f.write(json.dumps(project.dict()) + "\n")
+
+    def get_spec_omex_list(self) -> list[Path]:
+        omex_files: list[Path] = []
+        for omex_file in os.listdir(self.omex_src_dir):
+            if not str(omex_file).endswith(".omex"):
+                continue
+            omex_files.append(self.omex_src_dir / str(omex_file))
+        return omex_files
+
+    def get_source_omex_archives(self) -> list[SourceOmex]:
+        source_omex_archives: list[SourceOmex] = []
+        for omex_file_name in os.listdir(self.omex_src_dir):
+            if not str(omex_file_name).endswith(".omex"):
+                continue
+            omex_file = self.omex_src_dir / str(omex_file_name)
+            project_id = _get_project_name(omex_file)
+            source_omex_archives.append(SourceOmex(omex_file=omex_file, project_id=project_id))
+        return source_omex_archives
+
+    def write_run(self, simulation_run: SimulationRun) -> None:
+        with open(self.runs_ndjson_file, 'a') as f:
+            f.write(json.dumps(simulation_run.dict()) + "\n")
+
+    def write_runs(self, runs: list[SimulationRun]):
+        with open(self.runs_ndjson_file, 'wt') as f:
+            for run in runs:
+                f.write(json.dumps(run.dict()) + "\n")
+
+    def write_comparison(self, simulation_comparison: SimulatorComparison) -> None:
+        with open(self.compare_ndjson_file, 'a') as f:
+            f.write(json.dumps(simulation_comparison.dict()) + "\n")
+
+    def read_comparisons(self) -> list[SimulatorComparison]:
+        comparisons: list[SimulatorComparison]
+        if os.path.exists(self.compare_ndjson_file):
+            with open(self.compare_ndjson_file) as f:
+                comparisons = [SimulatorComparison(**json.loads(line)) for line in f.readlines()]
+        else:
+            comparisons = []
+        return comparisons
+
+    def get_run_output_dir(self, simulation_run: SimulationRun) -> Path:
+        run_out_dir = self.out_dir / simulation_run.project_id / simulation_run.simulator.value / simulation_run.simulator_version
+        if not os.path.exists(run_out_dir):
+            os.makedirs(run_out_dir)
+        return run_out_dir
Original file line number	Diff line number	Diff line change
Expand Up		@@ -38,3 +38,4 @@ poetry run python -m ipykernel install --user --name=$ENV_NAME --display-name "$
		### Smoldyn to Simularium conversion:
		A convenient template notebook for converting the outputs of Smoldyn simulations to Simularium trajectories can be
		[found here.](https://colab.research.google.com/drive/17uMMRq3L3KqRIXnezahM6TtOtJYK8Cu6#scrollTo=6n5Wf58hthFm)