Skip to content

Commit a02d321

Browse files
authored
support signed links to auxiliary files in batch job result STAC items (#423)
* support signed links to auxiliary files in batch job result STAC items Open-EO/openeo-geopyspark-driver#1278 * Revert "support signed links to auxiliary files in batch job result STAC items" This reverts commit 1109ec4. This added top-level (Collection) links to item-level links implicitly so not right conceptually. Support dedicated derived_from document only for "STAC 1.1 items" (implemented in #411). Open-EO/openeo-geopyspark-driver#1278 * support hydrating BatchJobResultMetadata.items Open-EO/openeo-geopyspark-driver#1278 * expose derived_from_document as link to auxiliary file Open-EO/openeo-geopyspark-driver#1278 * serve aux file Open-EO/openeo-geopyspark-driver#1278 * support signed download URL Open-EO/openeo-geopyspark-driver#1278 * _expose_internal => _expose_auxiliary Open-EO/openeo-geopyspark-driver#1278 * returning items was addressed in openeo-geopyspark-driver PR #1242 Open-EO/openeo-geopyspark-driver#1242 Open-EO/openeo-geopyspark-driver#1278 * support object storage Open-EO/openeo-geopyspark-driver#1278 * expose constant Open-EO/openeo-geopyspark-driver#1278 * adapt CHANGELOG Open-EO/openeo-geopyspark-driver#1278
1 parent 58c532c commit a02d321

File tree

5 files changed

+194
-6
lines changed

5 files changed

+194
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ and start a new "In Progress" section above it.
2222
## In progress: 0.137.0
2323

2424
- Add `ephemeral_flask_server` testing utility (`openeo_driver.testing`) for request mocking based on a Flask app. Allows to do request/response mocking independently from actual request library (`requests`, `urllib`, `urllib3`, etc.) through a well-documented API (Flask).
25+
- Support exposing auxiliary (non-asset) files as links ([Open-EO/openeo-geopyspark-driver#1278](https://github.com/Open-EO/openeo-geopyspark-driver/issues/1278))
2526

2627

2728
## 0.136.0

openeo_driver/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.137.0a1"
1+
__version__ = "0.137.0a2"

openeo_driver/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ class JOB_STATUS:
6161
DEFAULT_LOG_LEVEL_PROCESSING = "info"
6262
# Default value for `level in `GET /jobs/{job_id}/logs`, `GET /services/{service_id}/logs` requests
6363
DEFAULT_LOG_LEVEL_RETRIEVAL = "debug"
64+
65+
66+
class ITEM_LINK_PROPERTY:
67+
EXPOSE_AUXILIARY = "_expose_auxiliary"

openeo_driver/views.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import typing
1010
from collections import defaultdict, namedtuple
1111
from typing import Callable, List, Optional, Tuple, Union
12+
from urllib.parse import urlparse
1213

1314
import flask
1415
import flask_cors
@@ -48,6 +49,7 @@
4849
from openeo_driver.constants import (
4950
DEFAULT_LOG_LEVEL_PROCESSING,
5051
DEFAULT_LOG_LEVEL_RETRIEVAL,
52+
ITEM_LINK_PROPERTY,
5153
JOB_STATUS,
5254
STAC_EXTENSION,
5355
)
@@ -1442,6 +1444,42 @@ def _get_job_result_item11(job_id, item_id, user_id):
14421444
geometry = BoundingBox.from_wsen_tuple(job_info.proj_bbox, job_info.epsg).as_polygon()
14431445
geometry = mapping(reproject_geometry(geometry, CRS.from_epsg(job_info.epsg), CRS.from_epsg(4326)))
14441446

1447+
exposable_links = [
1448+
link for link in item_metadata.get("links", []) if link.get(ITEM_LINK_PROPERTY.EXPOSE_AUXILIARY, False)
1449+
]
1450+
for link in exposable_links:
1451+
link.pop(ITEM_LINK_PROPERTY.EXPOSE_AUXILIARY)
1452+
auxiliary_filename = urlparse(link["href"]).path.split("/")[-1] # TODO: assumes file is not nested
1453+
1454+
if link["href"].startswith("s3://"):
1455+
link["href"] = backend_implementation.config.asset_url.build_url(
1456+
asset_metadata={"href": link["href"]}, # TODO: clean up this hack to support s3proxy
1457+
asset_name=auxiliary_filename,
1458+
job_id=job_id,
1459+
user_id=user_id,
1460+
)
1461+
else:
1462+
signer = get_backend_config().url_signer
1463+
if signer:
1464+
expires = signer.get_expires()
1465+
secure_key = signer.sign_job_asset(
1466+
job_id=job_id, user_id=user_id, filename=auxiliary_filename, expires=expires
1467+
)
1468+
user_base64 = user_id_b64_encode(user_id)
1469+
link["href"] = flask.url_for(
1470+
".download_job_auxiliary_file_signed",
1471+
job_id=job_id,
1472+
user_base64=user_base64,
1473+
filename=auxiliary_filename,
1474+
expires=expires,
1475+
secure_key=secure_key,
1476+
_external=True,
1477+
)
1478+
else:
1479+
link["href"] = flask.url_for(
1480+
".download_job_auxiliary_file", job_id=job_id, filename=auxiliary_filename, _external=True
1481+
)
1482+
14451483
stac_item = {
14461484
"type": "Feature",
14471485
"stac_version": "1.1.0",
@@ -1466,7 +1504,8 @@ def _get_job_result_item11(job_id, item_id, user_id):
14661504
"href": url_for(".list_job_results", job_id=job_id, _external=True), # SHOULD be absolute
14671505
"type": "application/json",
14681506
},
1469-
],
1507+
]
1508+
+ exposable_links,
14701509
"assets": assets,
14711510
"collection": job_id,
14721511
}
@@ -1483,6 +1522,42 @@ def _get_job_result_item11(job_id, item_id, user_id):
14831522
resp.mimetype = stac_item_media_type
14841523
return resp
14851524

1525+
@blueprint.route("/jobs/<job_id>/results/aux/<user_base64>/<secure_key>/<filename>", methods=["GET"])
1526+
def download_job_auxiliary_file_signed(job_id, user_base64, secure_key, filename):
1527+
expires = request.args.get("expires")
1528+
signer = get_backend_config().url_signer
1529+
user_id = user_id_b64_decode(user_base64)
1530+
signer.verify_job_asset(
1531+
signature=secure_key, job_id=job_id, user_id=user_id, filename=filename, expires=expires
1532+
)
1533+
return _download_job_auxiliary_file(job_id=job_id, filename=filename, user_id=user_id)
1534+
1535+
@blueprint.route("/jobs/<job_id>/results/aux/<filename>", methods=["GET"])
1536+
@auth_handler.requires_bearer_auth
1537+
def download_job_auxiliary_file(job_id, filename, user: User):
1538+
return _download_job_auxiliary_file(job_id, filename, user.user_id)
1539+
1540+
def _download_job_auxiliary_file(job_id, filename, user_id):
1541+
metadata = backend_implementation.batch_jobs.get_result_metadata(job_id=job_id, user_id=user_id)
1542+
1543+
auxiliary_links = [
1544+
link
1545+
for item in metadata.items.values()
1546+
for link in item.get("links", [])
1547+
if link.get(ITEM_LINK_PROPERTY.EXPOSE_AUXILIARY, False) and link["href"].endswith(f"/{filename}")
1548+
]
1549+
1550+
if not auxiliary_links:
1551+
raise FilePathInvalidException(f"invalid file {filename!r}")
1552+
1553+
auxiliary_link = auxiliary_links[0]
1554+
uri_parts = urlparse(auxiliary_link["href"])
1555+
1556+
# S3 URIs are handled by s3proxy
1557+
assert uri_parts.scheme in ["", "file"], f"unexpected scheme {uri_parts.scheme}"
1558+
1559+
auxiliary_file = pathlib.Path(uri_parts.path)
1560+
return send_from_directory(auxiliary_file.parent, auxiliary_file.name, mimetype=auxiliary_link.get("type"))
14861561

14871562
def _get_job_result_item(job_id, item_id, user_id):
14881563
if item_id == DriverMlModel.METADATA_FILE_NAME:

tests/test_views.py

Lines changed: 112 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
not_implemented,
3232
)
3333
from openeo_driver.config import OpenEoBackendConfig
34+
from openeo_driver.constants import ITEM_LINK_PROPERTY
3435
from openeo_driver.datacube import DriverVectorCube
3536
from openeo_driver.dummy import dummy_backend, dummy_config
3637
from openeo_driver.dummy.dummy_backend import DummyBackendImplementation, DummyProcessing, DummyProcessRegistry
@@ -3004,6 +3005,14 @@ def test_get_stac_1_1_item(self, api110, backend_implementation, backend_config_
30043005
"id": "5d2db643-5cc3-4b27-8ef3-11f7d203b221_2023-12-31T21:41:00Z",
30053006
"properties": {"datetime": "2023-12-31T21:41:00Z"},
30063007
"bbox": [3.359808992021044, 51.08284561357965, 4.690166134878123, 51.88641704215104],
3008+
"links": [
3009+
{
3010+
"rel": "custom",
3011+
"href": "/data/projects/OpenEO/07024ee9-7847-4b8a-b260-6c879a2b3cdc/07024ee9-7847-4b8a-b260-6c879a2b3cdc_input_items_9569134155392213115.json",
3012+
"type": "application/json",
3013+
ITEM_LINK_PROPERTY.EXPOSE_AUXILIARY: True,
3014+
},
3015+
],
30073016
}
30083017
}
30093018
),
@@ -3042,10 +3051,16 @@ def test_get_stac_1_1_item(self, api110, backend_implementation, backend_config_
30423051
'type': 'application/geo+json'
30433052
},
30443053
{
3045-
'href': 'http://oeo.net/openeo/1.1.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results',
3046-
'rel': 'collection',
3047-
'type': 'application/json'
3048-
}
3054+
"href": "http://oeo.net/openeo/1.1.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results",
3055+
"rel": "collection",
3056+
"type": "application/json",
3057+
},
3058+
{
3059+
"rel": "custom",
3060+
# TODO: what does the URL look like? Currently /aux instead of /assets; should /items be in there?
3061+
"href": "http://oeo.net/openeo/1.1.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/aux/TXIuVGVzdA==/a0274432f627ca9cf9b4ff79d57c61bd/07024ee9-7847-4b8a-b260-6c879a2b3cdc_input_items_9569134155392213115.json",
3062+
"type": "application/json",
3063+
},
30493064
],
30503065
'properties': {'datetime': '2023-12-31T21:41:00Z'},
30513066
'stac_extensions': ['https://stac-extensions.github.io/eo/v1.1.0/schema.json',
@@ -3055,6 +3070,99 @@ def test_get_stac_1_1_item(self, api110, backend_implementation, backend_config_
30553070
'type': 'Feature'
30563071
}
30573072

3073+
@mock.patch("time.time", mock.MagicMock(return_value=1234))
3074+
@pytest.mark.parametrize("backend_config_overrides", [{"url_signer": UrlSigner(secret="123&@#", expiration=1000)}])
3075+
def test_download_job_auxiliary_file_signed_with_expiration(self, api110, tmp_path, backend_config_overrides):
3076+
job_id = "07024ee9-7847-4b8a-b260-6c879a2b3cdc"
3077+
job_dir = tmp_path
3078+
auxiliary_file = job_dir / "07024ee9-7847-4b8a-b260-6c879a2b3cdc_input_items_9569134155392213115.json"
3079+
3080+
with open(auxiliary_file, "w") as f:
3081+
f.write("aux")
3082+
3083+
with self._fresh_job_registry():
3084+
dummy_backend.DummyBatchJobs.set_result_metadata(
3085+
job_id=job_id,
3086+
user_id=TEST_USER,
3087+
metadata=BatchJobResultMetadata(
3088+
items={
3089+
"5d2db643-5cc3-4b27-8ef3-11f7d203b221_2023-12-31T21:41:00Z": {
3090+
"geometry": {
3091+
"coordinates": [
3092+
[
3093+
[3.359808992021044, 51.08284561357965],
3094+
[3.359808992021044, 51.88641704215104],
3095+
[4.690166134878123, 51.88641704215104],
3096+
[4.690166134878123, 51.08284561357965],
3097+
[3.359808992021044, 51.08284561357965],
3098+
]
3099+
],
3100+
"type": "Polygon",
3101+
},
3102+
"assets": {
3103+
"openEO": {
3104+
"datetime": "2023-12-31T21:41:00Z",
3105+
"roles": ["data"],
3106+
"bbox": [
3107+
3.359808992021044,
3108+
51.08284561357965,
3109+
4.690166134878123,
3110+
51.88641704215104,
3111+
],
3112+
"geometry": {
3113+
"coordinates": [
3114+
[
3115+
[3.359808992021044, 51.08284561357965],
3116+
[3.359808992021044, 51.88641704215104],
3117+
[4.690166134878123, 51.88641704215104],
3118+
[4.690166134878123, 51.08284561357965],
3119+
[3.359808992021044, 51.08284561357965],
3120+
]
3121+
],
3122+
"type": "Polygon",
3123+
},
3124+
"href": "s3://openeo-data-staging-waw4-1/batch_jobs/j-250605095828442799fdde3c29b5b047/openEO_20231231T214100Z.tif",
3125+
"nodata": "nan",
3126+
"type": "image/tiff; application=geotiff",
3127+
"bands": [
3128+
{"name": "LST", "common_name": "surface_temperature", "aliases": ["LST_in:LST"]}
3129+
],
3130+
"raster:bands": [
3131+
{
3132+
"name": "LST",
3133+
"statistics": {
3134+
"valid_percent": 66.88,
3135+
"maximum": 281.04800415039,
3136+
"stddev": 19.598456945276,
3137+
"minimum": 224.46798706055,
3138+
"mean": 259.57087672984,
3139+
},
3140+
}
3141+
],
3142+
}
3143+
},
3144+
"id": "5d2db643-5cc3-4b27-8ef3-11f7d203b221_2023-12-31T21:41:00Z",
3145+
"properties": {"datetime": "2023-12-31T21:41:00Z"},
3146+
"bbox": [3.359808992021044, 51.08284561357965, 4.690166134878123, 51.88641704215104],
3147+
"links": [
3148+
{
3149+
"rel": "custom",
3150+
"href": str(auxiliary_file),
3151+
"type": "application/json",
3152+
ITEM_LINK_PROPERTY.EXPOSE_AUXILIARY: True,
3153+
},
3154+
],
3155+
}
3156+
}
3157+
),
3158+
)
3159+
3160+
resp = api110.get(
3161+
"/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/aux/TXIuVGVzdA==/5b3d0f30d2ad8ef3146dc0785821aac3/07024ee9-7847-4b8a-b260-6c879a2b3cdc_input_items_9569134155392213115.json?expires=2234",
3162+
)
3163+
3164+
assert resp.text == "aux"
3165+
30583166
def test_get_job_results_invalid_job(self, api):
30593167
api.get("/jobs/deadbeef-f00/results", headers=self.AUTH_HEADER).assert_error(404, "JobNotFound")
30603168

0 commit comments

Comments
 (0)