diff --git a/apps/11_promptflow/README.md b/apps/11_promptflow/README.md index 9fc7ccb..4a30f58 100644 --- a/apps/11_promptflow/README.md +++ b/apps/11_promptflow/README.md @@ -177,6 +177,32 @@ $ pf run create \ $ pf run show-details --name $RUN_NAME ``` +### image_qa + +To run the image QA flow with GPT-4o, we customize an LLM tool. +Following documents provide more details: + +- docs: [Customizing an LLM Tool](https://microsoft.github.io/promptflow/how-to-guides/develop-a-tool/customize_an_llm_tool.html) +- example codes: [promptflow/examples/flows/chat/chat-with-image](https://github.com/microsoft/promptflow/tree/main/examples/flows/chat/chat-with-image) + +With the image QA flow sample, you can ask questions about an image and get answers from the model. + +```shell +cd apps/11_promptflow/image_qa + +# Create run with multiple lines data +$ RUN_NAME=image_qa-$(date +%s) +$ pf run create \ + --name $RUN_NAME \ + --flow . \ + --data ./data.jsonl \ + --column-mapping image='${data.image}' \ + --stream + +# Show run details +$ pf run show-details --name $RUN_NAME +``` + ## References - [Prompt flow > repos](https://github.com/microsoft/promptflow) diff --git a/apps/11_promptflow/image_qa/.promptflow/flow.tools.json b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json index 35fd04a..7cf2b94 100644 --- a/apps/11_promptflow/image_qa/.promptflow/flow.tools.json +++ b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json @@ -1,26 +1,37 @@ { - "package": {}, - "code": { - "hello.jinja2": { - "type": "prompt", - "inputs": { - "text": { - "type": [ - "string" - ] - } - } + "package": {}, + "code": { + "hello.py": { + "type": "python", + "inputs": { + "connection": { + "type": [ + "AzureOpenAIConnection" + ] }, - "hello.py": { - "type": "python", - "inputs": { - "input1": { - "type": [ - "string" - ] - } - }, - "function": "my_python_tool" + "image": { + "type": [ + "image" + ] + }, + "model": { + "type": [ + "string" + ] + }, + "system_prompt": { + "type": [ + "string" + ] + }, + "user_prompt": { + "type": [ + "string" + ] } + }, + "source": "hello.py", + "function": "my_python_tool" } + } } diff --git a/apps/11_promptflow/image_qa/data.jsonl b/apps/11_promptflow/image_qa/data.jsonl index 15e3aa5..2500c35 100644 --- a/apps/11_promptflow/image_qa/data.jsonl +++ b/apps/11_promptflow/image_qa/data.jsonl @@ -1 +1 @@ -{"text": "Hello World!"} +{"image": "../../../datasets/contoso-receipt.png"} \ No newline at end of file diff --git a/apps/11_promptflow/image_qa/flow.dag.yaml b/apps/11_promptflow/image_qa/flow.dag.yaml index 1415bf3..b2c9758 100644 --- a/apps/11_promptflow/image_qa/flow.dag.yaml +++ b/apps/11_promptflow/image_qa/flow.dag.yaml @@ -1,25 +1,32 @@ $schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json +environment: + python_requirements_txt: requirements.txt inputs: - text: + user_prompt: + type: string + default: Please extract texts from the image + system_prompt: type: string + default: You are an excellent OCR tool + image: + type: image + default: ../../../datasets/contoso-receipt.png + model: + type: string + default: gpt-4o outputs: output_prompt: type: string - reference: ${echo_my_prompt.output} + reference: ${image_qa.output} nodes: -- name: hello_prompt - type: prompt - source: - type: code - path: hello.jinja2 - inputs: - text: ${inputs.text} -- name: echo_my_prompt +- name: image_qa type: python source: type: code path: hello.py inputs: - input1: ${hello_prompt.output} -environment: - python_requirements_txt: requirements.txt + connection: open_ai_connection + image: ${inputs.image} + system_prompt: ${inputs.system_prompt} + user_prompt: ${inputs.user_prompt} + model: ${inputs.model} diff --git a/apps/11_promptflow/image_qa/hello.jinja2 b/apps/11_promptflow/image_qa/hello.jinja2 deleted file mode 100644 index d2519cb..0000000 --- a/apps/11_promptflow/image_qa/hello.jinja2 +++ /dev/null @@ -1,2 +0,0 @@ -{# Please replace the template with your own prompt. #} -Write a simple {{text}} program that displays the greeting message. diff --git a/apps/11_promptflow/image_qa/hello.py b/apps/11_promptflow/image_qa/hello.py index f533b27..a59a278 100644 --- a/apps/11_promptflow/image_qa/hello.py +++ b/apps/11_promptflow/image_qa/hello.py @@ -1,14 +1,48 @@ -# --------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- +import base64 +import io +from openai import AzureOpenAI +from promptflow.connections import AzureOpenAIConnection +from promptflow.contracts.multimedia import Image from promptflow.core import tool -# The inputs section will change based on the arguments of the tool function, after you save the code -# Adding type to arguments and return value will help the system show the types properly -# Please update the function name/signature per need - @tool -def my_python_tool(input1: str) -> str: - return "Prompt: " + input1 +def my_python_tool( + connection: AzureOpenAIConnection, + image: Image, + model: str, + system_prompt: str, + user_prompt: str, +) -> str: + image_stream = io.BytesIO(image) + encoded_image = base64.b64encode(image_stream.read()).decode("utf-8") + + client = AzureOpenAI( + api_key=connection.api_key, + api_version=connection.api_version, + azure_endpoint=connection.api_base, + ) + response = client.chat.completions.create( + model=model, + messages=[ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, + }, + { + "type": "text", + "text": user_prompt, + }, + ], + }, + ], + ) + return response.choices[0].message.content diff --git a/datasets/contoso-receipt.png b/datasets/contoso-receipt.png new file mode 100644 index 0000000..33e85d6 Binary files /dev/null and b/datasets/contoso-receipt.png differ