covalenthq · Philotheephilix · Feb 8, 2025 · Feb 8, 2025
diff --git a/README.md b/README.md
@@ -69,6 +69,22 @@ const zee = new ZeeWorkflow({
     console.log(result);
 })();
 ```
+### Using Hugging Face Inference with the SDK
+
+To use HF inference with the SDK, you need to configure the HF config in your agent setup.
+
+```js
+const hfConfig: HuggingFaceConfig = {
+    provider: "HUGGINGFACE",
+    name: "sentence-transformers/all-MiniLM-L6-v2", // Example Model
+    apiKey: "hf_xxxxxxxxxxxxxxxxxxxxxxxx"
+};
+
+const llm = new LLM(hfConfig);
+const response = await llm.generate(messages, schema, tools);
+```
+
+The agent will automatically handle streaming responses and format them appropriately.
 
 ## 🤝 Contributing
 

diff --git a/docs/concepts/llms.mdx b/docs/concepts/llms.mdx
@@ -14,7 +14,16 @@ const llm = new LLM({
     temperature: 0.7
 });
 ```
+```tsx
+const hfConfig: HuggingFaceConfig = {
+    provider: "HUGGINGFACE",
+    name: "sentence-transformers/all-MiniLM-L6-v2",
+    apiKey: "hf_xxxxxxxxxxxxxxxxxxxxxxxx"
+};
 
+const llm = new LLM(hfConfig);
+const response = await llm.generate(messages, schema, tools);
+```
 ## List of supported LLMs
 
 ### Open AI
@@ -45,6 +54,20 @@ const llm = new LLM({
 "gemini-1.5-pro"
 ```
 
+```plaintext Hugging Face
+Any model name that you have pulled in Ollama
+Examples:
+"llama2"
+"codellama"
+"mistral"
+"neural-chat"
+"DeepSeek-R1"
+"Janus-Pro-7B"
+"Kokoro-82M"
+"Mistral-Small-24B-Instruct-2501"
+And any models available in hugging face inference
+```
+
 </CodeGroup>
 
 ## Environment Variables
@@ -67,8 +90,109 @@ GROK_API_KEY
 GEMINI_API_KEY
 ```
 
+```plaintext Hugging Face Inference
+HF_API_KEY
+```
+
 </CodeGroup>
 
+## Using Hugging Face Models
+
+To use Hugging Face models with the SDK:
+
+1. **Get Hugging Face Token**
+   - Create an account at [huggingface.co](https://huggingface.co)
+   - Generate an access token in [Account Settings](https://huggingface.co/settings/tokens)
+
+2. **Configure your agent**:
+```typescript
+const agent = new Agent({
+    name: "HuggingFaceAgent",
+    model: {
+        provider: "HUGGINGFACE",
+        name: "mistralai/Mixtral-8x7B-Instruct-v0.1",  // Any supported HF model
+        apiKey: "hf_xxxxxxxxxxxxxxxxxxxxxxxx",  // Your Hugging Face token
+        baseURL: "https://api-inference.huggingface.co/models", // optional
+    },
+    description: "Cloud-hosted HF model integration",
+});
+```
+
+### Response Format
+Responses maintain the same structure as other providers:
+
+1. Complete Response Object:
+```json
+{
+  "agent": "hf-agent",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Explain quantum computing briefly"
+    },
+    {
+      "role": "assistant",
+      "content": {
+        "thinking": "Parsing request... Identifying key concepts... Generating concise explanation...",
+        "answer": "Quantum computing uses quantum bits to perform calculations through superposition and entanglement..."
+      }
+    }
+  ],
+  "status": "completed",
+  "children": []
+}
+```
+
+2. Assistant Message Content Structure:
+```json
+{
+  "role": "assistant",
+  "content": {
+    "thinking": "Model's internal processing steps",
+    "answer": "Final response to user query",
+    "details": {
+      "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+      "inference_time": 1.23
+    }
+  }
+}
+```
+
+Additional fields:
+- `details`: Contains model-specific metadata
+- `inference_time`: Time taken for response generation (seconds)
+
+Access responses using:
+```typescript
+const lastMessage = result.messages[result.messages.length - 1]?.content;
+console.log(lastMessage.content.thinking);  // Model's reasoning steps
+console.log(lastMessage.content.answer);    // Final response
+console.log(lastMessage.content.details?.inference_time);  // Performance metrics
+```
+
+### Special Features
+1. **Model Switching**:
+```typescript
+// Dynamically switch models mid-conversation
+agent.setModel({
+    provider: "HUGGINGFACE",
+    name: "google/flan-t5-xxl",
+    apiKey: "hf_xxxxxxxxxxxxxxxxxxxxxxxx"
+});
+```
+
+2. **Task-Specific Endpoints**:
+```typescript
+// Directly access specialized endpoints
+const response = await agent.execute({
+    task: "text-to-image",
+    parameters: {
+        prompt: "A cyberpunk cat hacker",
+        model: "stabilityai/stable-diffusion-xl-base-1.0"
+    }
+});
+```
+
 ## Use Cases
 
 ### Image Analysis

diff --git a/packages/ai-agent-sdk/package.json b/packages/ai-agent-sdk/package.json
@@ -49,6 +49,7 @@
     },
     "dependencies": {
         "@covalenthq/client-sdk": "^2.2.3",
+        "@huggingface/inference": "^3.3.0",
         "commander": "^13.1.0",
         "dotenv": "^16.4.7",
         "openai": "^4.79.1",

diff --git a/packages/ai-agent-sdk/src/core/llm/llm.ts b/packages/ai-agent-sdk/src/core/llm/llm.ts
@@ -11,6 +11,7 @@ import type {
 import type { AnyZodObject } from "zod";
 import { z } from "zod";
 import { zodToJsonSchema } from "zod-to-json-schema";
+import { HfInference } from "@huggingface/inference";
 
 const entryToObject = ([key, value]: [string, AnyZodObject]) => {
     return z.object({ type: z.literal(key), value });
@@ -83,13 +84,18 @@ export class LLM extends Base {
                 config.apiKey =
                     process.env["GEMINI_API_KEY"] || this.model.apiKey;
                 break;
+            case "HUGGINGFACE":
+                config.baseURL = "https://api-inference.huggingface.com/models";
+                config.apiKey = process.env["HF_API_KEY"] || this.model.apiKey;
+                break;
             default:
                 var _exhaustiveCheck: never = provider;
                 throw new Error(
                     `Unhandled model provider: ${_exhaustiveCheck}`
                 );
         }
         const client = new OpenAI(config);
+        const HuggingFaceClient = new HfInference(config.apiKey);
 
         const mappedTools = tools ? formatOpenAITools(tools) : [];
 
@@ -161,6 +167,24 @@ export class LLM extends Base {
             return parsed.response as LLMResponse<T>;
         }
 
+        if (provider === "HUGGINGFACE") {
+            const hfClient = HuggingFaceClient as HfInference;
+            const response = await hfClient.textGeneration({
+                model: this.model.name,
+                inputs: messages.filter(m => m.role === 'user').pop()?.content || '',
+                parameters: {
+                    temperature: mappedTemperature,
+                    return_full_text: false
+                }
+            });
+
+            return {
+                type: 'text',
+                value: response.generated_text
+            } as LLMResponse<T>;
+        }
+
+
         throw new Error("No response in message");
     }
 }
diff --git a/packages/ai-agent-sdk/src/core/llm/llm.types.ts b/packages/ai-agent-sdk/src/core/llm/llm.types.ts
@@ -47,18 +47,33 @@ export type GeminiConfig = {
     apiKey?: string;
 };
 
+export type HuggingFaceModel = string; 
+
+export type HuggingFaceConfig = {
+    provider: "HUGGINGFACE";
+    name: HuggingFaceModel;
+    apiKey?: string;
+    temperature?: number;
+};
+
 export type ModelConfig =
     | OpenAIConfig
     | DeepSeekConfig
     | GrokConfig
-    | GeminiConfig;
+    | GeminiConfig
+    | HuggingFaceConfig;
 
-export type LLMResponse<T extends Record<string, AnyZodObject>> = {
-    [K in keyof T]: {
-        type: K;
-        value: z.infer<T[K]>;
-    };
-}[keyof T];
+    export type LLMResponse<T extends Record<string, AnyZodObject>> =
+    | {
+        [K in keyof T]: {
+          type: K;
+          value: z.infer<T[K]>;
+        };
+      }[keyof T]
+    | {
+        type: "text";
+        value: string;
+      };
 
 export type FunctionToolCall = {
     type: "tool_call";