From f08043e755b1bfd1671cdc8fdcab9788b1f7cb0d Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Fri, 10 Oct 2025 12:37:51 +0200 Subject: [PATCH] Fix get_artifact returning the TC API response instead of the artifact This fixes a regression from 384afee759351f22ab9e0a43ac066f3fa5f3b6d9 where we started using the taskcluster python client instead of making requests manually. The artifacts route is special because the API returns some JSON as part of a 303 to the artifact's content URL. The previous code followed that 303. The python client does not. So instead we're getting the body which looks like this: `{"type": "s3/...", "url": "whereverthe303pointsat"}`. This fixed it by following the redirect manually to restore the previous behavior. I also fixed the test as the previous one was basically ignoring the fact that taskcluster was returning a 303 and was mocking as if it didn't exist. It's worth noting that while it restores the behavior of returning the artifact's content (as a ByteIO), it is still a breaking change as the previous code used to return a Response object. Fixes #812 --- src/taskgraph/util/taskcluster.py | 3 ++- test/test_util_taskcluster.py | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/taskgraph/util/taskcluster.py b/src/taskgraph/util/taskcluster.py index f1a31511..85f6bc73 100644 --- a/src/taskgraph/util/taskcluster.py +++ b/src/taskgraph/util/taskcluster.py @@ -170,7 +170,8 @@ def get_artifact(task_id, path): """ queue = get_taskcluster_client("queue") response = queue.getLatestArtifact(task_id, path) - return _handle_artifact(path, response) + artifact_response = requests.get(response["url"]) + return _handle_artifact(path, artifact_response) def list_artifacts(task_id): diff --git a/test/test_util_taskcluster.py b/test/test_util_taskcluster.py index 01d38421..bdff659e 100644 --- a/test/test_util_taskcluster.py +++ b/test/test_util_taskcluster.py @@ -110,16 +110,25 @@ def test_get_artifact(responses, root_url): # Test text artifact responses.get( - f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.txt", + "http://foo.bar/artifact.txt", body=b"foobar", ) + responses.get( + f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.txt", + body=b'{"type": "s3", "url": "http://foo.bar/artifact.txt"}', + status=303, + headers={"Location": "http://foo.bar/artifact.txt"}, + ) raw = tc.get_artifact(tid, "artifact.txt") assert raw.read() == b"foobar" # Test JSON artifact + responses.get("http://foo.bar/artifact.json", json={"foo": "bar"}) responses.get( f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.json", - json={"foo": "bar"}, + body=b'{"type": "s3", "url": "http://foo.bar/artifact.json"}', + status=303, + headers={"Location": "http://foo.bar/artifact.json"}, ) result = tc.get_artifact(tid, "artifact.json") assert result == {"foo": "bar"} @@ -127,9 +136,15 @@ def test_get_artifact(responses, root_url): # Test YAML artifact expected_result = {"foo": b"\xe2\x81\x83".decode()} responses.get( - f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.yml", + "http://foo.bar/artifact.yml", body=b'foo: "\xe2\x81\x83"', ) + responses.get( + f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.yml", + body=b'{"type": "s3", "url": "http://foo.bar/artifact.yml"}', + status=303, + headers={"Location": "http://foo.bar/artifact.yml"}, + ) result = tc.get_artifact(tid, "artifact.yml") assert result == expected_result