huggingface · Wauplin · Jun 30, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/packages/responses-server/.eslintignore b/packages/responses-server/.eslintignore
@@ -0,0 +1 @@
+dist 
diff --git a/packages/responses-server/.prettierignore b/packages/responses-server/.prettierignore
@@ -0,0 +1,4 @@
+pnpm-lock.yaml
+# In order to avoid code samples to have tabs, they don't display well on npm
+README.md
+dist 
diff --git a/packages/responses-server/README.md b/packages/responses-server/README.md
@@ -0,0 +1,44 @@
+# @huggingface/responses-server
+
+A lightweight Express.js server supporting Responses API on top of Inference Provider Chat Completion API.
+
+## 📁 Project Structure
+
+```
+responses-server/
+├── src/
+│   ├── index.ts           
+│   ├── server.ts     # Express app configuration (e.g. route definition)
+│   ├── routes/       # Routes implementation
+│   ├── middleware/   # Middlewares (validation + logging)
+│   └── schemas/      # Zod validation schemas
+├── scripts/          # Utility scripts
+├── package.json      # Package configuration
+```
+
+## 🚀 Quick Start
+
+### Development
+
+```bash
+# Install dependencies
+pnpm install
+
+# Start development server
+pnpm dev
+```
+
+### Run examples
+
+Some example scripts are implemented in ./examples.
+
+You can run them using
+
+```bash
+# Run ./examples/text.js
+pnpm run example text
+
+# Run ./examples/multi_turn.js
+pnpm run example multi_turn
+```
+
diff --git a/packages/responses-server/examples/function.js b/packages/responses-server/examples/function.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const tools = [
+	{
+		type: "function",
+		name: "get_current_weather",
+		description: "Get the current weather in a given location",
+		parameters: {
+			type: "object",
+			properties: {
+				location: {
+					type: "string",
+					description: "The city and state, e.g. San Francisco, CA",
+				},
+				unit: { type: "string", enum: ["celsius", "fahrenheit"] },
+			},
+			required: ["location", "unit"],
+		},
+	},
+];
+
+const response = await openai.responses.create({
+	model: "meta-llama/Llama-3.3-70B-Instruct",
+	provider: "cerebras",
+	tools: tools,
+	input: "What is the weather like in Boston today?",
+	tool_choice: "auto",
+});
+
+console.log(response);
diff --git a/packages/responses-server/examples/function_streaming.js b/packages/responses-server/examples/function_streaming.js
@@ -0,0 +1,33 @@
+import { OpenAI } from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const tools = [
+	{
+		type: "function",
+		name: "get_weather",
+		description: "Get current temperature for provided coordinates in celsius.",
+		parameters: {
+			type: "object",
+			properties: {
+				latitude: { type: "number" },
+				longitude: { type: "number" },
+			},
+			required: ["latitude", "longitude"],
+			additionalProperties: false,
+		},
+		strict: true,
+	},
+];
+
+const stream = await openai.responses.create({
+	model: "meta-llama/Llama-3.3-70B-Instruct",
+	provider: "cerebras",
+	input: [{ role: "user", content: "What's the weather like in Paris today?" }],
+	tools,
+	stream: true,
+});
+
+for await (const event of stream) {
+	console.log(event);
+}
diff --git a/packages/responses-server/examples/image.js b/packages/responses-server/examples/image.js
@@ -0,0 +1,23 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "user",
+			content: [
+				{ type: "input_text", text: "what is in this image?" },
+				{
+					type: "input_image",
+					image_url:
+						"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+				},
+			],
+		},
+	],
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/examples/multi_turn.js b/packages/responses-server/examples/multi_turn.js
@@ -0,0 +1,20 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "developer",
+			content: "Talk like a pirate.",
+		},
+		{
+			role: "user",
+			content: "Are semicolons optional in JavaScript?",
+		},
+	],
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/examples/streaming.js b/packages/responses-server/examples/streaming.js
@@ -0,0 +1,17 @@
+import { OpenAI } from "openai";
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const stream = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	input: [
+		{
+			role: "user",
+			content: "Say 'double bubble bath' ten times fast.",
+		},
+	],
+	stream: true,
+});
+
+for await (const event of stream) {
+	console.log(event);
+}
diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const Step = z.object({
+	explanation: z.string(),
+	output: z.string(),
+});
+
+const MathReasoning = z.object({
+	steps: z.array(Step),
+	final_answer: z.string(),
+});
+
+const response = await openai.responses.parse({
+	model: "Qwen/Qwen2.5-VL-72B-Instruct",
+	provider: "nebius",
+	input: [
+		{
+			role: "system",
+			content: "You are a helpful math tutor. Guide the user through the solution step by step.",
+		},
+		{ role: "user", content: "how can I solve 8x + 7 = -23" },
+	],
+	text: {
+		format: zodTextFormat(MathReasoning, "math_reasoning"),
+	},
+});
+
+console.log(response.output_parsed);
diff --git a/packages/responses-server/examples/structured_output_streaming.js b/packages/responses-server/examples/structured_output_streaming.js
@@ -0,0 +1,36 @@
+import { OpenAI } from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const CalendarEvent = z.object({
+	name: z.string(),
+	date: z.string(),
+	participants: z.array(z.string()),
+});
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const stream = openai.responses
+	.stream({
+		model: "Qwen/Qwen2.5-VL-72B-Instruct",
+		provider: "nebius",
+		instructions: "Extract the event information.",
+		input: "Alice and Bob are going to a science fair on Friday.",
+		text: {
+			format: zodTextFormat(CalendarEvent, "calendar_event"),
+		},
+	})
+	.on("response.refusal.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.done", () => {
+		process.stdout.write("\n");
+	})
+	.on("response.error", (event) => {
+		console.error(event.error);
+	});
+
+const result = await stream.finalResponse();
+console.log(result.output_parsed);
diff --git a/packages/responses-server/examples/text.js b/packages/responses-server/examples/text.js
@@ -0,0 +1,12 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const response = await openai.responses.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	instructions: "You are a helpful assistant.",
+	input: "Tell me a three sentence bedtime story about a unicorn.",
+});
+
+console.log(response);
+console.log(response.output_text);
diff --git a/packages/responses-server/package.json b/packages/responses-server/package.json
@@ -0,0 +1,63 @@
+{
+	"name": "@huggingface/responses-server",
+	"packageManager": "[email protected]",
+	"version": "0.1.0",
+	"type": "module",
+	"description": "Server for handling AI responses",
+	"repository": "https://github.com/huggingface/huggingface.js.git",
+	"publishConfig": {
+		"access": "public"
+	},
+	"main": "./dist/index.js",
+	"module": "./dist/index.mjs",
+	"types": "./dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"require": "./dist/index.js",
+			"import": "./dist/index.mjs"
+		}
+	},
+	"engines": {
+		"node": ">=18"
+	},
+	"source": "index.ts",
+	"scripts": {
+		"build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
+		"check": "tsc",
+		"dev": "tsx watch src/index.ts",
+		"format": "prettier --write .",
+		"format:check": "prettier --check .",
+		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
+		"lint:check": "eslint --ext .cjs,.ts .",
+		"prepublishOnly": "pnpm run build",
+		"prepare": "pnpm run build",
+		"start": "node dist/index.js",
+		"example": "node scripts/run-example.js"
+	},
+	"files": [
+		"src",
+		"dist",
+		"tsconfig.json"
+	],
+	"keywords": [
+		"huggingface",
+		"ai",
+		"llm",
+		"responses-api",
+		"server"
+	],
+	"author": "Hugging Face",
+	"license": "MIT",
+	"dependencies": {
+		"@huggingface/inference": "workspace:^",
+		"@huggingface/tasks": "workspace:^",
+		"express": "^4.18.2",
+		"openai": "^5.8.2",
+		"zod": "^3.22.4"
+	},
+	"devDependencies": {
+		"@types/express": "^4.17.21",
+		"tsx": "^4.7.0"
+	}
+}