diff --git a/code_review_graph/embeddings.py b/code_review_graph/embeddings.py index be29045..dc413f7 100644 --- a/code_review_graph/embeddings.py +++ b/code_review_graph/embeddings.py @@ -22,10 +22,20 @@ from typing import Any from urllib.parse import urlparse +from . import __version__ as _crg_version from .graph import GraphNode, GraphStore, node_to_dict logger = logging.getLogger(__name__) +# Sent on every cloud-provider HTTP request. Some providers (e.g. Fireworks) +# sit behind Cloudflare and reject the urllib default ``Python-urllib/X.Y`` +# UA with HTTP 403 / error 1010 ("browser signature banned"). A real UA gets +# us through and gives upstream a way to identify CRG-driven traffic. +_USER_AGENT = ( + f"code-review-graph/{_crg_version} " + "(+https://github.com/tirth8205/code-review-graph)" +) + # --------------------------------------------------------------------------- # Provider Interface and Implementations # --------------------------------------------------------------------------- @@ -198,6 +208,8 @@ def _call_api(self, texts: list[str], task_type: str) -> list[list[float]]: headers={ "Content-Type": "application/json", "Authorization": f"Bearer {self._api_key}", + "User-Agent": _USER_AGENT, + "Accept": "application/json", }, ) @@ -353,6 +365,8 @@ def _call_api(self, texts: list[str]) -> list[list[float]]: headers={ "Content-Type": "application/json", "Authorization": f"Bearer {self._api_key}", + "User-Agent": _USER_AGENT, + "Accept": "application/json", }, ) diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index 2f5cc4b..f05c22b 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -334,6 +334,30 @@ def test_embed_api_error_raises(self): with pytest.raises(RuntimeError, match="invalid api key"): provider.embed_query("test") + def test_embed_sends_user_agent_header(self): + # urllib's default UA ("Python-urllib/X.Y") is rejected by some + # Cloudflare-fronted gateways with HTTP 403 / error 1010. CRG must + # send an explicit User-Agent so requests get through. + provider = MiniMaxEmbeddingProvider(api_key="test-key") + mock_response = json.dumps({ + "vectors": [[0.1] * 1536], + "total_tokens": 1, + "base_resp": {"status_code": 0, "status_msg": "success"}, + }).encode("utf-8") + + mock_resp_obj = MagicMock() + mock_resp_obj.read.return_value = mock_response + mock_resp_obj.__enter__ = MagicMock(return_value=mock_resp_obj) + mock_resp_obj.__exit__ = MagicMock(return_value=False) + + with patch("urllib.request.urlopen", return_value=mock_resp_obj) as mock_urlopen: + provider.embed_query("hello") + + req = mock_urlopen.call_args[0][0] + ua = req.headers.get("User-agent", "") + assert ua.startswith("code-review-graph/") + assert "github.com/tirth8205/code-review-graph" in ua + class TestGetProviderMiniMax: """Tests for get_provider() with MiniMax.""" @@ -464,6 +488,12 @@ def test_embed_calls_api_with_correct_payload(self): assert "dimensions" not in payload # not pinned by default assert req.headers["Authorization"] == "Bearer secret-key" assert req.headers["Content-type"] == "application/json" + # Cloudflare-fronted gateways (e.g. Fireworks) reject the urllib + # default UA with HTTP 403 / error 1010. See _USER_AGENT in + # embeddings.py. + ua = req.headers.get("User-agent", "") + assert ua.startswith("code-review-graph/") + assert "github.com/tirth8205/code-review-graph" in ua assert req.full_url == "http://127.0.0.1:3000/v1/embeddings" def test_explicit_dimension_forwarded_in_payload(self):