This gateway service enables controlled interaction with the FMS Guardrails Orchestrator by enforcing stricter access to its exposed endpoints. It provides a mechanism of configuring fixed detector pipelines, and then provides a unique /v1/chat/completions endpoint per configured detector pipeline. This allows for drop-in replacement of an unguardrailed chat completions model with a guardrailed one.
To see the entire stack of the vllm-orchestrator-gateway there are a few services that need to be spun up. Some of these can be swapped for other services that use follow certain api's, such as the detectors and generation models.
The config has 3 main fields, orchestrator, detectors and routes.
orchestrator is where the orchestrator service lives.
detectors are detectors services that have been defined in the fms-guardrails-orchestrator config file. You can specify what detector belongs to input and/or output.
routes are the dynamically exposed routes used to enforce detectors onto endpoints such as the pii endpoint that registers our regex-language detector. You can also specify no detectors such as the passthrough route down below.
fallback_message in the routes field is used as a response from the gateway when a detection is found either in the input or output.
orchestrator:
host: localhost
port: 8085
detectors:
- name: regex-language
input: false
output: true
detector_params:
regex:
- email
- ssn
routes:
- name: pii
detectors:
- regex-language
fallback_message: "I'm sorry, I'm afraid I can't do that."
- name: passthrough
detectors:curl "localhost:8090/pii/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"messages": [
{
"role": "user",
"content": "say hello to me at [email protected]"
},
{
"role": "user",
"content": "btw here is my social 123456789"
}
]
}'{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"audio": null,
"content": "Hello! It looks like you've provided my email address and a social security number. I'm just an AI assistant and not an email or social security system. Please correct this.",
"refusal": null,
"role": "assistant",
"tool_calls": null
}
}
],
"created": 1741182909,
"detections": null,
"id": "chatcmpl-971213a0e09446a8b11bd447db0f3a64",
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 37,
"prompt_tokens": 61,
"total_tokens": 98
},
"warnings": null
}{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"audio": null,
"content": "I'm sorry, I'm afraid I can't do that.",
"refusal": null,
"role": "assistant",
"tool_calls": null
}
}
],
"created": 1741182848,
"detections": {
"input": null,
"output": [
{
"choice_index": 0,
"results": [
{
"detection": "EmailAddress",
"detection_type": "pii",
"detector_id": "regex-language",
"end": 176,
"score": 1.0,
"start": 152,
"text": "[email protected]"
}
]
}
]
},
"id": "16a0abbf4b0c431e885be5cfa4ff1c4b",
"model": "Qwen/Qwen2.5-1.5B-Instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 83,
"prompt_tokens": 61,
"total_tokens": 144
},
"warnings": [
{
"message": "Unsuitable output detected.",
"type": "UNSUITABLE_OUTPUT"
}
]
}