Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dotnet/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
<PackageVersion Include="Microsoft.Identity.Client.Extensions.Msal" Version="4.74.1" />
<PackageVersion Include="Microsoft.IdentityModel.JsonWebTokens" Version="8.13.0" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.22.1" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime.Gpu" Version="1.22.1"/>
<PackageVersion Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="1.0.1" />
<PackageVersion Include="Microsoft.SemanticKernel.Abstractions" Version="1.58.0" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.OpenAI" Version="1.58.0" />
Expand Down
1 change: 1 addition & 0 deletions dotnet/SK-dotnet.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
<Project Path="samples/Demos/ModelContextProtocolPluginAuth/ModelContextProtocolPluginAuth.csproj" />
<Project Path="samples/Demos/OllamaFunctionCalling/OllamaFunctionCalling.csproj" />
<Project Path="samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj" />
<Project Path="samples/Demos/OnnxSimpleChatWithCuda/OnnxSimpleChatWithCuda.csproj" />
<Project Path="samples/Demos/OpenAIRealtime/OpenAIRealtime.csproj" />
<Project Path="samples/Demos/ProcessWithDapr/ProcessWithDapr.csproj" />
<Project Path="samples/Demos/QualityCheck/QualityCheckWithFilters/QualityCheckWithFilters.csproj" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<NoWarn>$(NoWarn);CA2007,CA2208,CS1591,CA1024,IDE0009,IDE0055,IDE0073,IDE0211,VSTHRD111,SKEXP0001</NoWarn>
</PropertyGroup>
<ItemGroup>
<!--
TODO: fix this WORKAROUND
CUDA provider set up with Microsoft.ML.OnnxRuntimeGenAI.Cuda 0.8.3 + Microsoft.ML.OnnxRuntime.Gpu 1.22.1
- doesn't work with Microsoft.ML.OnnxRuntime 1.22.1
- works with Microsoft.ML.OnnxRuntime 1.22.0
-->
<PackageReference Include="Microsoft.ML.OnnxRuntime" VersionOverride="1.22.0" NoWarn="NU1605"/>
<PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda"/>
<ProjectReference Include="..\..\..\src\Connectors\Connectors.Onnx\Connectors.Onnx.csproj"/>
<ProjectReference Include="..\..\..\src\SemanticKernel.Abstractions\SemanticKernel.Abstractions.csproj"/>
</ItemGroup>
</Project>
48 changes: 48 additions & 0 deletions dotnet/samples/Demos/OnnxSimpleChatWithCuda/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using System;
using System.Collections.Generic;
using Microsoft.Extensions.AI;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.Onnx;

// Path to the folder of your downloaded ONNX CUDA model
// i.e: D:\repo\huggingface\Phi-3-mini-4k-instruct-onnx\cuda\cuda-int4-rtn-block-32
string modelPath = "MODEL_PATH";

IKernelBuilder builder = Kernel.CreateBuilder();
builder.AddOnnxRuntimeGenAIChatClient(
modelPath: modelPath,

// Specify the provider you want to use, e.g., "cuda" for GPU support
// For other execution providers, check: https://onnxruntime.ai/docs/genai/reference/config#provideroptions
providers: [new Provider("cuda")] //
);

Kernel kernel = builder.Build();

using IChatClient chatClient = kernel.GetRequiredService<IChatClient>();

List<ChatMessage> chatHistory = [];

while (true)
{
Console.Write("User > ");
string userMessage = Console.ReadLine()!;
if (string.IsNullOrEmpty(userMessage))
{
break;
}

chatHistory.Add(new ChatMessage(ChatRole.User, userMessage));

try
{
ChatResponse result = await chatClient.GetResponseAsync(chatHistory, new() { MaxOutputTokens = 1024 });
Console.WriteLine($"Assistant > {result.Text}");

chatHistory.AddRange(result.Messages);
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
}
44 changes: 44 additions & 0 deletions dotnet/samples/Demos/OnnxSimpleChatWithCuda/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Onnx Simple Chat with Cuda Execution Provider

This sample demonstrates how you use ONNX Connector with CUDA Execution Provider to run Local Models straight from files using Semantic Kernel.

In this example we setup Chat Client from ONNX Connector with [Microsoft's Phi-3-ONNX](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx) model

> [!IMPORTANT]
> You can modify to use any other combination of models enabled for ONNX runtime.

## Semantic Kernel used Features

- [Chat Client](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/IChatCompletionService.cs) - Using the Chat Completion Service from [Onnx Connector](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs) to generate responses from the Local Model.

## Prerequisites

- [.NET 8](https://dotnet.microsoft.com/download/dotnet/8.0).
- [NVIDIA GPU](https://www.nvidia.com/en-us/geforce/graphics-cards)
- [NVIDIA CUDA v12 Toolkit](https://developer.nvidia.com/cuda-12-0-0-download-archive)
- [NVIDIA cuDNN v9.11](https://developer.nvidia.com/cudnn-9-11-0-download-archive)
- Windows users only:

Ensure `PATH` environment variable includes the `bin` folder of the CUDA Toolkit and cuDNN.
i.e:
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0\bin
- C:\Program Files\NVIDIA\CUDNN\v9.11\bin\12.9

- Downloaded ONNX Models (see below).

## Downloading the Model

For this example we chose Hugging Face as our repository for download of the local models, go to a directory of your choice where the models should be downloaded and run the following commands:

```powershell
git lfs install
git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx
```

Update the `Program.cs` file lines below with the paths to the models you downloaded in the previous step.

```csharp
// i.e. Running on Windows
string modelPath = "D:\\repo\\huggingface\\Phi-3-mini-4k-instruct-onnx\\cuda\\cuda-int4-rtn-block-32";
```

Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.OnnxRuntimeGenAI;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.Onnx;
using Xunit;

namespace SemanticKernel.Connectors.Onnx.UnitTests;
Expand Down Expand Up @@ -74,4 +77,75 @@ public void AddOnnxRuntimeGenAIChatClientToKernelBuilderWithServiceId()
Assert.NotNull(serviceDescriptor);
Assert.Equal(ServiceLifetime.Singleton, serviceDescriptor.Lifetime);
}

[Fact]
public void AddOnnxRuntimeGenAIChatClientWithProvidersToServiceCollection()
{
// Arrange
var collection = new ServiceCollection();
var providers = new List<Provider> { new("cuda"), new("cpu") };

// Act
collection.AddOnnxRuntimeGenAIChatClient("modelPath", providers);

// Assert
var serviceDescriptor = collection.FirstOrDefault(x => x.ServiceType == typeof(IChatClient));
Assert.NotNull(serviceDescriptor);
Assert.Equal(ServiceLifetime.Singleton, serviceDescriptor.Lifetime);
Assert.NotNull(serviceDescriptor.ImplementationFactory);
}

[Fact]
public void AddOnnxRuntimeGenAIChatClientWithProvidersToKernelBuilder()
{
// Arrange
var collection = new ServiceCollection();
var kernelBuilder = collection.AddKernel();
var providers = new List<Provider> { new("cuda"), new("cpu") };

// Act
kernelBuilder.AddOnnxRuntimeGenAIChatClient("modelPath", providers);

// Assert
var serviceDescriptor = collection.FirstOrDefault(x => x.ServiceType == typeof(IChatClient));
Assert.NotNull(serviceDescriptor);
Assert.Equal(ServiceLifetime.Singleton, serviceDescriptor.Lifetime);
Assert.NotNull(serviceDescriptor.ImplementationFactory);
}

[Fact]
public void AddOnnxRuntimeGenAIChatClientWithProvidersAndServiceIdToServiceCollection()
{
// Arrange
var collection = new ServiceCollection();
var providers = new List<Provider> { new("cuda") };

// Act
collection.AddOnnxRuntimeGenAIChatClient("modelPath", providers, serviceId: "test-service");
var serviceProvider = collection.BuildServiceProvider();

// Assert
var exception = Assert.Throws<OnnxRuntimeGenAIException>(() => serviceProvider.GetRequiredKeyedService<IChatClient>("test-service"));

Assert.Contains("genai_config.json", exception.Message);
}

[Fact]
public void AddOnnxRuntimeGenAIChatClientWithProvidersAndServiceIdToKernelBuilder()
{
// Arrange
var collection = new ServiceCollection();
var kernelBuilder = collection.AddKernel();
var providers = new List<Provider> { new("cuda") };

// Act
kernelBuilder.AddOnnxRuntimeGenAIChatClient("modelPath", providers, serviceId: "test-service");
var serviceProvider = collection.BuildServiceProvider();

// Assert
var kernel = serviceProvider.GetRequiredService<Kernel>();
var exception = Assert.Throws<OnnxRuntimeGenAIException>(() => kernel.GetRequiredService<IChatClient>("test-service"));

Assert.Contains("genai_config.json", exception.Message);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.OnnxRuntimeGenAI;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.Onnx;
Expand Down Expand Up @@ -46,4 +50,76 @@ public void AddOnnxRuntimeGenAIChatCompletionToKernelBuilder()
Assert.NotNull(service);
Assert.IsType<OnnxRuntimeGenAIChatCompletionService>(service);
}

[Fact]
public void AddOnnxRuntimeGenAIChatCompletionWithProvidersToServiceCollection()
{
// Arrange
var collection = new ServiceCollection();
var providers = new List<Provider> { new("cuda"), new("cpu") };
collection.AddOnnxRuntimeGenAIChatCompletion("modelId", "modelPath", providers);

// Act
var serviceDescriptor = collection.FirstOrDefault(x => x.ServiceType == typeof(IChatCompletionService));

// Assert
Assert.NotNull(serviceDescriptor);
Assert.Equal(ServiceLifetime.Singleton, serviceDescriptor.Lifetime);
Assert.NotNull(serviceDescriptor.ImplementationFactory);
}

[Fact]
public void AddOnnxRuntimeGenAIChatCompletionWithProvidersToKernelBuilder()
{
// Arrange
var collection = new ServiceCollection();
var kernelBuilder = collection.AddKernel();
var providers = new List<Provider> { new("cuda"), new("cpu") };
kernelBuilder.AddOnnxRuntimeGenAIChatCompletion("modelId", "modelPath", providers);

// Act
var serviceDescriptor = collection.FirstOrDefault(x => x.ServiceType == typeof(IChatCompletionService));

// Assert
Assert.NotNull(serviceDescriptor);
Assert.Equal(ServiceLifetime.Singleton, serviceDescriptor.Lifetime);
Assert.NotNull(serviceDescriptor.ImplementationFactory);
}

[Fact]
public void AddOnnxRuntimeGenAIChatCompletionWithProvidersAndServiceIdToServiceCollection()
{
// Arrange
var collection = new ServiceCollection();
var providers = new List<Provider> { new("cuda") };
collection.AddOnnxRuntimeGenAIChatCompletion("modelId", "modelPath", providers, serviceId: "test-service");

// Act
var serviceProvider = collection.BuildServiceProvider();

// Assert
var exception = Assert.Throws<OnnxRuntimeGenAIException>(() => serviceProvider.GetRequiredKeyedService<IChatCompletionService>("test-service"));

Assert.Contains("genai_config.json", exception.Message);
}

[Fact]
public void AddOnnxRuntimeGenAIChatCompletionWithProvidersAndServiceIdToKernelBuilder()
{
// Arrange
var collection = new ServiceCollection();
var kernelBuilder = collection.AddKernel();
var providers = new List<Provider> { new("cuda") };
kernelBuilder.AddOnnxRuntimeGenAIChatCompletion("modelId", "modelPath", providers, serviceId: "test-service");

// Act
var serviceDescriptor = collection.FirstOrDefault(x => x.ServiceType == typeof(IChatCompletionService) && x.ServiceKey?.ToString() == "test-service");
var serviceProvider = collection.BuildServiceProvider();

// Assert
var kernel = serviceProvider.GetRequiredService<Kernel>();
var exception = Assert.Throws<OnnxRuntimeGenAIException>(() => kernel.GetRequiredService<IChatCompletionService>("test-service"));

Assert.Contains("genai_config.json", exception.Message);
}
}
Loading
Loading