diff --git a/dotnet/samples/GettingStartedWithAgents/OpenAIResponse/Step01_OpenAIResponseAgent.cs b/dotnet/samples/GettingStartedWithAgents/OpenAIResponse/Step01_OpenAIResponseAgent.cs index a8f4114f102d..dd2c52ef3c97 100644 --- a/dotnet/samples/GettingStartedWithAgents/OpenAIResponse/Step01_OpenAIResponseAgent.cs +++ b/dotnet/samples/GettingStartedWithAgents/OpenAIResponse/Step01_OpenAIResponseAgent.cs @@ -111,4 +111,33 @@ public async Task UseOpenAIResponseAgentWithThreadedConversationStreamingAsync() agentThread = await WriteAgentStreamMessageAsync(responseItems); } } + + [Fact] + public async Task UseOpenAIResponseAgentWithImageContentAsync() + { + // Define the agent + OpenAIResponseAgent agent = new(this.Client) + { + Name = "ResponseAgent", + Instructions = "Provide a detailed description including the weather conditions.", + }; + + ICollection messages = + [ + new ChatMessageContent( + AuthorRole.User, + items: [ + new TextContent("What is in this image?"), + new ImageContent(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg")) + ] + ), + ]; + + // Invoke the agent and output the response + var responseItems = agent.InvokeAsync(messages); + await foreach (ChatMessageContent responseItem in responseItems) + { + WriteAgentChatMessage(responseItem); + } + } } diff --git a/dotnet/src/Agents/OpenAI/Extensions/ChatContentMessageExtensions.cs b/dotnet/src/Agents/OpenAI/Extensions/ChatContentMessageExtensions.cs index e8f9b8cea96b..c627d9517c81 100644 --- a/dotnet/src/Agents/OpenAI/Extensions/ChatContentMessageExtensions.cs +++ b/dotnet/src/Agents/OpenAI/Extensions/ChatContentMessageExtensions.cs @@ -43,13 +43,14 @@ public static IEnumerable ToThreadInitializationMes /// A instance. public static ResponseItem ToResponseItem(this ChatMessageContent message) { - string content = message.Content ?? string.Empty; + var items = message.Items; + IEnumerable contentParts = items.Select(item => item.ToResponseContentPart()); return message.Role.Label.ToUpperInvariant() switch { - "SYSTEM" => ResponseItem.CreateSystemMessageItem(content), - "USER" => ResponseItem.CreateUserMessageItem(content), - "DEVELOPER" => ResponseItem.CreateDeveloperMessageItem(content), - "ASSISTANT" => ResponseItem.CreateAssistantMessageItem(content), + "SYSTEM" => ResponseItem.CreateSystemMessageItem(contentParts), + "USER" => ResponseItem.CreateUserMessageItem(contentParts), + "DEVELOPER" => ResponseItem.CreateDeveloperMessageItem(contentParts), + "ASSISTANT" => ResponseItem.CreateAssistantMessageItem(contentParts), _ => throw new NotSupportedException($"Unsupported role {message.Role.Label}. Only system, user, developer or assistant roles are allowed."), }; } diff --git a/dotnet/src/Agents/OpenAI/Extensions/KernelContentExtensions.cs b/dotnet/src/Agents/OpenAI/Extensions/KernelContentExtensions.cs new file mode 100644 index 000000000000..59f11f0f68cd --- /dev/null +++ b/dotnet/src/Agents/OpenAI/Extensions/KernelContentExtensions.cs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using OpenAI.Responses; + +namespace Microsoft.SemanticKernel.Agents.OpenAI; + +/// +/// Extensons methods for . +/// +internal static class KernelContentExtensions +{ + internal static ResponseContentPart ToResponseContentPart(this KernelContent content) + { + return content switch + { + TextContent textContent => textContent.ToResponseContentPart(), + ImageContent imageContent => imageContent.ToResponseContentPart(), + BinaryContent binaryContent => binaryContent.ToResponseContentPart(), + FileReferenceContent fileReferenceContent => fileReferenceContent.ToResponseContentPart(), + _ => throw new NotSupportedException($"Unsupported content type {content.GetType().Name}. Cannot convert to {nameof(ResponseContentPart)}.") + }; + } + + internal static ResponseContentPart ToResponseContentPart(this TextContent content) + { + return ResponseContentPart.CreateInputTextPart(content.Text); + } + + internal static ResponseContentPart ToResponseContentPart(this ImageContent content) + { + return content.Uri is not null + ? ResponseContentPart.CreateInputImagePart(content.Uri) + : content.Data is not null + ? ResponseContentPart.CreateInputImagePart(new BinaryData(content.Data), content.MimeType) + : throw new NotSupportedException("ImageContent cannot be converted to ResponseContentPart. Only ImageContent with a uri or binary data is supported."); + } + + internal static ResponseContentPart ToResponseContentPart(this BinaryContent content) + { + return content.Data is not null + ? ResponseContentPart.CreateInputFilePart(new BinaryData(content.Data), content.MimeType, Guid.NewGuid().ToString()) + : throw new NotSupportedException("AudioContent cannot be converted to ResponseContentPart. Only AudioContent with binary data is supported."); + } + + internal static ResponseContentPart ToResponseContentPart(this FileReferenceContent content) + { + return content.FileId is not null + ? ResponseContentPart.CreateInputFilePart(content.FileId) + : throw new NotSupportedException("FileReferenceContent cannot be converted to ResponseContentPart. Only FileReferenceContent with a file id is supported."); + } +} diff --git a/dotnet/src/Agents/UnitTests/OpenAI/Extensions/ChatContentMessageExtensionsTests.cs b/dotnet/src/Agents/UnitTests/OpenAI/Extensions/ChatContentMessageExtensionsTests.cs new file mode 100644 index 000000000000..62c87daad7ed --- /dev/null +++ b/dotnet/src/Agents/UnitTests/OpenAI/Extensions/ChatContentMessageExtensionsTests.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Agents.OpenAI; +using Microsoft.SemanticKernel.ChatCompletion; +using OpenAI.Responses; +using Xunit; + +namespace SemanticKernel.Agents.UnitTests.OpenAI.Extensions; + +/// +/// Unit tests for ChatContentMessageExtensions +/// +public class ChatContentMessageExtensionsTests +{ + [Theory] + [InlineData("User")] + [InlineData("Assistant")] + [InlineData("System")] + public void VerifyToResponseItemWithUserChatMessageContent(string roleLabel) + { + // Arrange + var role = new AuthorRole(roleLabel); + var content = new ChatMessageContent( + role, + items: [ + new TextContent("What is in this image?"), + new ImageContent(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg")), + new BinaryContent(new ReadOnlyMemory([0x52, 0x49, 0x46, 0x46, 0x24, 0x08, 0x00, 0x00, 0x57, 0x41, 0x56, 0x45]), "audio/wav"), + new FileReferenceContent("file-abc123") + ] + ); + + // Act + var responseItem = content.ToResponseItem(); + + // Assert + Assert.NotNull(responseItem); + Assert.IsType(responseItem, exactMatch: false); + var messageResponseItem = responseItem as MessageResponseItem; + Assert.NotNull(messageResponseItem); + Assert.Equal(role.Label.ToUpperInvariant(), messageResponseItem.Role.ToString().ToUpperInvariant()); + Assert.Equal(4, messageResponseItem.Content.Count); + + // Validate TextContent conversion - should create InputText part + var textContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputText); + Assert.NotNull(textContent); + //Assert.IsType<>(textContent); + Assert.Equal("What is in this image?", textContent.Text); + + // Validate ImageContent conversion - should create InputImage part + var imageContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputImage); + Assert.NotNull(imageContent); + + // Validate BinaryContent conversion - should create InputFile part + var binaryContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputFile && p.InputFileBytes is not null); + Assert.NotNull(binaryContent); + Assert.Equal("audio/wav", binaryContent.InputFileBytesMediaType); + + // Validate FileReferenceContent conversion - should create InputImage part + var fileContent = messageResponseItem.Content.FirstOrDefault(p => p.Kind == ResponseContentPartKind.InputFile && p.InputFileId is not null); + Assert.NotNull(fileContent); + Assert.Equal("file-abc123", fileContent.InputFileId); + } +}