Skip to content

Commit ab32173

Browse files
authored
feat: create HTTP DELETE API endpoints to unregister ScoringFn and Benchmark resources in Llama Stack (#3371)
# What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> This PR provides functionality for users to unregister ScoringFn and Benchmark resources for `scoring` and `eval` APIs. <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> Closes #3051 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Updated integration and unit tests via CI workflow
1 parent 01bdcce commit ab32173

File tree

13 files changed

+241
-3
lines changed

13 files changed

+241
-3
lines changed

docs/_static/llama-stack-spec.html

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,40 @@
13801380
}
13811381
}
13821382
]
1383+
},
1384+
"delete": {
1385+
"responses": {
1386+
"200": {
1387+
"description": "OK"
1388+
},
1389+
"400": {
1390+
"$ref": "#/components/responses/BadRequest400"
1391+
},
1392+
"429": {
1393+
"$ref": "#/components/responses/TooManyRequests429"
1394+
},
1395+
"500": {
1396+
"$ref": "#/components/responses/InternalServerError500"
1397+
},
1398+
"default": {
1399+
"$ref": "#/components/responses/DefaultError"
1400+
}
1401+
},
1402+
"tags": [
1403+
"Benchmarks"
1404+
],
1405+
"description": "Unregister a benchmark.",
1406+
"parameters": [
1407+
{
1408+
"name": "benchmark_id",
1409+
"in": "path",
1410+
"description": "The ID of the benchmark to unregister.",
1411+
"required": true,
1412+
"schema": {
1413+
"type": "string"
1414+
}
1415+
}
1416+
]
13831417
}
13841418
},
13851419
"/v1/openai/v1/chat/completions/{completion_id}": {
@@ -1620,6 +1654,40 @@
16201654
}
16211655
}
16221656
]
1657+
},
1658+
"delete": {
1659+
"responses": {
1660+
"200": {
1661+
"description": "OK"
1662+
},
1663+
"400": {
1664+
"$ref": "#/components/responses/BadRequest400"
1665+
},
1666+
"429": {
1667+
"$ref": "#/components/responses/TooManyRequests429"
1668+
},
1669+
"500": {
1670+
"$ref": "#/components/responses/InternalServerError500"
1671+
},
1672+
"default": {
1673+
"$ref": "#/components/responses/DefaultError"
1674+
}
1675+
},
1676+
"tags": [
1677+
"ScoringFunctions"
1678+
],
1679+
"description": "Unregister a scoring function.",
1680+
"parameters": [
1681+
{
1682+
"name": "scoring_fn_id",
1683+
"in": "path",
1684+
"description": "The ID of the scoring function to unregister.",
1685+
"required": true,
1686+
"schema": {
1687+
"type": "string"
1688+
}
1689+
}
1690+
]
16231691
}
16241692
},
16251693
"/v1/shields/{identifier}": {

docs/_static/llama-stack-spec.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,30 @@ paths:
954954
required: true
955955
schema:
956956
type: string
957+
delete:
958+
responses:
959+
'200':
960+
description: OK
961+
'400':
962+
$ref: '#/components/responses/BadRequest400'
963+
'429':
964+
$ref: >-
965+
#/components/responses/TooManyRequests429
966+
'500':
967+
$ref: >-
968+
#/components/responses/InternalServerError500
969+
default:
970+
$ref: '#/components/responses/DefaultError'
971+
tags:
972+
- Benchmarks
973+
description: Unregister a benchmark.
974+
parameters:
975+
- name: benchmark_id
976+
in: path
977+
description: The ID of the benchmark to unregister.
978+
required: true
979+
schema:
980+
type: string
957981
/v1/openai/v1/chat/completions/{completion_id}:
958982
get:
959983
responses:
@@ -1119,6 +1143,31 @@ paths:
11191143
required: true
11201144
schema:
11211145
type: string
1146+
delete:
1147+
responses:
1148+
'200':
1149+
description: OK
1150+
'400':
1151+
$ref: '#/components/responses/BadRequest400'
1152+
'429':
1153+
$ref: >-
1154+
#/components/responses/TooManyRequests429
1155+
'500':
1156+
$ref: >-
1157+
#/components/responses/InternalServerError500
1158+
default:
1159+
$ref: '#/components/responses/DefaultError'
1160+
tags:
1161+
- ScoringFunctions
1162+
description: Unregister a scoring function.
1163+
parameters:
1164+
- name: scoring_fn_id
1165+
in: path
1166+
description: >-
1167+
The ID of the scoring function to unregister.
1168+
required: true
1169+
schema:
1170+
type: string
11221171
/v1/shields/{identifier}:
11231172
get:
11241173
responses:

llama_stack/apis/benchmarks/benchmarks.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,11 @@ async def register_benchmark(
9393
:param metadata: The metadata to use for the benchmark.
9494
"""
9595
...
96+
97+
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
98+
async def unregister_benchmark(self, benchmark_id: str) -> None:
99+
"""Unregister a benchmark.
100+
101+
:param benchmark_id: The ID of the benchmark to unregister.
102+
"""
103+
...

llama_stack/apis/scoring_functions/scoring_functions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,11 @@ async def register_scoring_function(
197197
:param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
198198
"""
199199
...
200+
201+
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
202+
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
203+
"""Unregister a scoring function.
204+
205+
:param scoring_fn_id: The ID of the scoring function to unregister.
206+
"""
207+
...

llama_stack/core/routing_tables/benchmarks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@ async def register_benchmark(
5656
provider_resource_id=provider_benchmark_id,
5757
)
5858
await self.register_object(benchmark)
59+
60+
async def unregister_benchmark(self, benchmark_id: str) -> None:
61+
existing_benchmark = await self.get_benchmark(benchmark_id)
62+
await self.unregister_object(existing_benchmark)

llama_stack/core/routing_tables/common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
6464
return await p.unregister_shield(obj.identifier)
6565
elif api == Api.datasetio:
6666
return await p.unregister_dataset(obj.identifier)
67+
elif api == Api.eval:
68+
return await p.unregister_benchmark(obj.identifier)
69+
elif api == Api.scoring:
70+
return await p.unregister_scoring_function(obj.identifier)
6771
elif api == Api.tool_runtime:
6872
return await p.unregister_toolgroup(obj.identifier)
6973
else:

llama_stack/core/routing_tables/scoring_functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,7 @@ async def register_scoring_function(
6060
)
6161
scoring_fn.provider_id = provider_id
6262
await self.register_object(scoring_fn)
63+
64+
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
65+
existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
66+
await self.unregister_object(existing_scoring_fn)

llama_stack/providers/inline/eval/meta_reference/eval.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ async def register_benchmark(self, task_def: Benchmark) -> None:
7575
)
7676
self.benchmarks[task_def.identifier] = task_def
7777

78+
async def unregister_benchmark(self, benchmark_id: str) -> None:
79+
if benchmark_id in self.benchmarks:
80+
del self.benchmarks[benchmark_id]
81+
82+
key = f"{EVAL_TASKS_PREFIX}{benchmark_id}"
83+
await self.kvstore.delete(key)
84+
7885
async def run_eval(
7986
self,
8087
benchmark_id: str,

llama_stack/providers/inline/scoring/llm_as_judge/scoring.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ async def list_scoring_functions(self) -> list[ScoringFn]:
6363
async def register_scoring_function(self, function_def: ScoringFn) -> None:
6464
self.llm_as_judge_fn.register_scoring_fn_def(function_def)
6565

66+
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
67+
self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id)
68+
6669
async def score_batch(
6770
self,
6871
dataset_id: str,

llama_stack/providers/remote/eval/nvidia/eval.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,23 @@ async def initialize(self) -> None: ...
5151

5252
async def shutdown(self) -> None: ...
5353

54-
async def _evaluator_get(self, path):
54+
async def _evaluator_get(self, path: str):
5555
"""Helper for making GET requests to the evaluator service."""
5656
response = requests.get(url=f"{self.config.evaluator_url}{path}")
5757
response.raise_for_status()
5858
return response.json()
5959

60-
async def _evaluator_post(self, path, data):
60+
async def _evaluator_post(self, path: str, data: dict[str, Any]):
6161
"""Helper for making POST requests to the evaluator service."""
6262
response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
6363
response.raise_for_status()
6464
return response.json()
6565

66+
async def _evaluator_delete(self, path: str) -> None:
67+
"""Helper for making DELETE requests to the evaluator service."""
68+
response = requests.delete(url=f"{self.config.evaluator_url}{path}")
69+
response.raise_for_status()
70+
6671
async def register_benchmark(self, task_def: Benchmark) -> None:
6772
"""Register a benchmark as an evaluation configuration."""
6873
await self._evaluator_post(
@@ -75,6 +80,10 @@ async def register_benchmark(self, task_def: Benchmark) -> None:
7580
},
7681
)
7782

83+
async def unregister_benchmark(self, benchmark_id: str) -> None:
84+
"""Unregister a benchmark evaluation configuration from NeMo Evaluator."""
85+
await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}")
86+
7887
async def run_eval(
7988
self,
8089
benchmark_id: str,

0 commit comments

Comments
 (0)