Skip to content

Commit

Permalink
Issue #1057 extract inline CWL to resource files
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Feb 20, 2025
1 parent 709c225 commit cb10e23
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 59 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include openeogeotrellis/**/*.sh
include openeogeotrellis/**/*.j2
include openeogeotrellis/integrations/cwl/*.cwl
55 changes: 6 additions & 49 deletions openeogeotrellis/deploy/kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,28 +152,7 @@ def _cwl_demo(args: ProcessArgs, env: EvalEnv):

launcher = CalrissianJobLauncher.from_context()

cwl_source = CwLSource.from_string(
"""
cwlVersion: v1.0
class: CommandLineTool
baseCommand: echo
requirements:
- class: DockerRequirement
dockerPull: debian:stretch-slim
inputs:
message:
type: string
default: "Hello World"
inputBinding:
position: 1
outputs:
output_file:
type: File
outputBinding:
glob: output.txt
stdout: output.txt
"""
)
cwl_source = CwLSource.from_resource(anchor="openeogeotrellis.integrations", path="cwl/hello.cwl")
correlation_id = get_job_id(default=None) or get_request_id(default=None)
cwl_arguments = [
"--message",
Expand Down Expand Up @@ -228,33 +207,7 @@ def _cwl_insar(args: ProcessArgs, env: EvalEnv):

launcher = CalrissianJobLauncher.from_context()

cwl_source = CwLSource.from_string(
f"""
cwlVersion: v1.0
class: CommandLineTool
baseCommand: OpenEO_insar.py
requirements:
DockerRequirement:
dockerPull: registry.stag.warsaw.openeo.dataspace.copernicus.eu/rand/openeo_insar:1.2
EnvVarRequirement:
envDef:
AWS_ACCESS_KEY_ID: {json.dumps(os.environ.get("SWIFT_ACCESS_KEY_ID", ""))}
AWS_SECRET_ACCESS_KEY: {json.dumps(os.environ.get("SWIFT_SECRET_ACCESS_KEY", ""))}
inputs:
input_base64_json:
type: string
inputBinding:
position: 1
outputs:
output_file:
type:
type: array
items: File
outputBinding:
glob: "*2images*"
"""
)
# correlation_id = get_job_id(default=None) or get_request_id(default=None)
cwl_source = CwLSource.from_resource(anchor="openeogeotrellis.integrations", path="cwl/insar.cwl")
input_base64_json = base64.b64encode(json.dumps(args).encode("utf8")).decode("ascii")
cwl_arguments = ["--input_base64_json", input_base64_json]

Expand All @@ -263,6 +216,10 @@ def _cwl_insar(args: ProcessArgs, env: EvalEnv):
cwl_source=cwl_source,
cwl_arguments=cwl_arguments,
output_paths=["output.txt"],
env_vars={
"AWS_ACCESS_KEY_ID": os.environ.get("SWIFT_ACCESS_KEY_ID", ""),
"AWS_SECRET_ACCESS_KEY": os.environ.get("SWIFT_SECRET_ACCESS_KEY", ""),
},
)

return results["output.txt"].read(encoding="utf8")
Expand Down
46 changes: 37 additions & 9 deletions openeogeotrellis/integrations/calrissian.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
from openeogeotrellis.util.runtime import get_job_id, get_request_id
from openeogeotrellis.utils import s3_client


try:
# TODO #1060 importlib.resources on Python 3.8 is pretty limited so we need backport
import importlib_resources
except ImportError:
import importlib.resources

importlib_resources = importlib.resources


_log = logging.getLogger(__name__)


Expand Down Expand Up @@ -77,6 +87,14 @@ def from_url(cls, url: str) -> CwLSource:
resp.raise_for_status()
return cls(content=resp.text)

@classmethod
def from_resource(cls, anchor: str, path: str) -> CwLSource:
"""
Read CWL from a packaged resource file in importlib.resources-style.
"""
content = importlib_resources.files(anchor=anchor).joinpath(path).read_text(encoding="utf-8")
return cls(content=content)


class CalrissianJobLauncher:
"""
Expand Down Expand Up @@ -200,6 +218,7 @@ def create_cwl_job_manifest(
self,
cwl_path: str,
cwl_arguments: List[str],
env_vars: Optional[Dict[str, str]] = None,
) -> Tuple[kubernetes.client.V1Job, str]:
"""
Create a k8s manifest for a Calrissian CWL job.
Expand Down Expand Up @@ -242,6 +261,17 @@ def create_cwl_job_manifest(

_log.info(f"create_cwl_job_manifest {calrissian_arguments=}")

container_env_vars = [
kubernetes.client.V1EnvVar(
name="CALRISSIAN_POD_NAME",
value_from=kubernetes.client.V1EnvVarSource(
field_ref=kubernetes.client.V1ObjectFieldSelector(field_path="metadata.name")
),
)
]
if env_vars:
container_env_vars.extend(kubernetes.client.V1EnvVar(name=k, value=v) for k, v in env_vars.items())

container = kubernetes.client.V1Container(
name=name,
image=container_image,
Expand All @@ -254,14 +284,7 @@ def create_cwl_job_manifest(
)
for volume_info, read_only in volumes
],
env=[
kubernetes.client.V1EnvVar(
name="CALRISSIAN_POD_NAME",
value_from=kubernetes.client.V1EnvVarSource(
field_ref=kubernetes.client.V1ObjectFieldSelector(field_path="metadata.name")
),
)
],
env=container_env_vars,
)
manifest = kubernetes.client.V1Job(
metadata=kubernetes.client.V1ObjectMeta(
Expand Down Expand Up @@ -353,7 +376,11 @@ def get_output_volume_name(self) -> str:
return volume_name

def run_cwl_workflow(
self, cwl_source: CwLSource, cwl_arguments: List[str], output_paths: List[str]
self,
cwl_source: CwLSource,
cwl_arguments: List[str],
output_paths: List[str],
env_vars: Optional[Dict[str, str]] = None,
) -> Dict[str, CalrissianS3Result]:
"""
Run a CWL workflow on Calrissian and return the output as a string.
Expand All @@ -370,6 +397,7 @@ def run_cwl_workflow(
cwl_manifest, relative_output_dir = self.create_cwl_job_manifest(
cwl_path=cwl_path,
cwl_arguments=cwl_arguments,
env_vars=env_vars,
)
cwl_job = self.launch_job_and_wait(manifest=cwl_manifest)

Expand Down
18 changes: 18 additions & 0 deletions openeogeotrellis/integrations/cwl/hello.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cwlVersion: v1.0
class: CommandLineTool
baseCommand: echo
requirements:
- class: DockerRequirement
dockerPull: debian:stretch-slim
inputs:
message:
type: string
default: "Hello World"
inputBinding:
position: 1
outputs:
output_file:
type: File
outputBinding:
glob: output.txt
stdout: output.txt
18 changes: 18 additions & 0 deletions openeogeotrellis/integrations/cwl/insar.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cwlVersion: v1.0
class: CommandLineTool
baseCommand: OpenEO_insar.py
requirements:
DockerRequirement:
dockerPull: registry.stag.warsaw.openeo.dataspace.copernicus.eu/rand/openeo_insar:1.2
inputs:
input_base64_json:
type: string
inputBinding:
position: 1
outputs:
output_file:
type:
type: array
items: File
outputBinding:
glob: "*2images*"
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@
"traceback-with-variables==2.0.4",
'scipy>=1.8', # used by sentinel-3 reader
"PyJWT[crypto]>=2.9.0", # For identity tokens
"urllib3>=1.26.20"
"urllib3>=1.26.20",
"importlib_resources; python_version<'3.9'", # #1060 on python 3.8 we need importlib_resources backport
],
extras_require={
"dev": tests_require,
Expand Down
4 changes: 4 additions & 0 deletions tests/integrations/test_calrissian.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,7 @@ def test_from_url(self, requests_mock):
requests_mock.get(url, text="cwlVersion: v1.0\nclass: CommandLineTool\n")
cwl = CwLSource.from_url(url=url)
assert cwl.get_content() == "cwlVersion: v1.0\nclass: CommandLineTool\n"

def test_from_resource(self):
cwl = CwLSource.from_resource(anchor="openeogeotrellis.integrations", path="cwl/hello.cwl")
assert "Hello World" in cwl.get_content()

0 comments on commit cb10e23

Please sign in to comment.