implement image qa flow

ks6088ts-labs · Aug 30, 2024 · 55b48c3 · 55b48c3
1 parent 670ed51
commit 55b48c3
Show file tree

Hide file tree

Showing 7 changed files with 122 additions and 46 deletions.
diff --git a/apps/11_promptflow/README.md b/apps/11_promptflow/README.md
@@ -177,6 +177,32 @@ $ pf run create \
 $ pf run show-details --name $RUN_NAME
 ```
 
+### image_qa
+
+To run the image QA flow with GPT-4o, we customize an LLM tool.
+Following documents provide more details:
+
+- docs: [Customizing an LLM Tool](https://microsoft.github.io/promptflow/how-to-guides/develop-a-tool/customize_an_llm_tool.html)
+- example codes: [promptflow/examples/flows/chat/chat-with-image](https://github.com/microsoft/promptflow/tree/main/examples/flows/chat/chat-with-image)
+
+With the image QA flow sample, you can ask questions about an image and get answers from the model.
+
+```shell
+cd apps/11_promptflow/image_qa
+
+# Create run with multiple lines data
+$ RUN_NAME=image_qa-$(date +%s)
+$ pf run create \
+    --name $RUN_NAME \
+    --flow . \
+    --data ./data.jsonl \
+    --column-mapping image='${data.image}' \
+    --stream
+
+# Show run details
+$ pf run show-details --name $RUN_NAME
+```
+
 ## References
 
 - [Prompt flow > repos](https://github.com/microsoft/promptflow)

diff --git a/apps/11_promptflow/image_qa/.promptflow/flow.tools.json b/apps/11_promptflow/image_qa/.promptflow/flow.tools.json
@@ -1,26 +1,37 @@
 {
-    "package": {},
-    "code": {
-        "hello.jinja2": {
-            "type": "prompt",
-            "inputs": {
-                "text": {
-                    "type": [
-                        "string"
-                    ]
-                }
-            }
+  "package": {},
+  "code": {
+    "hello.py": {
+      "type": "python",
+      "inputs": {
+        "connection": {
+          "type": [
+            "AzureOpenAIConnection"
+          ]
         },
-        "hello.py": {
-            "type": "python",
-            "inputs": {
-                "input1": {
-                    "type": [
-                        "string"
-                    ]
-                }
-            },
-            "function": "my_python_tool"
+        "image": {
+          "type": [
+            "image"
+          ]
+        },
+        "model": {
+          "type": [
+            "string"
+          ]
+        },
+        "system_prompt": {
+          "type": [
+            "string"
+          ]
+        },
+        "user_prompt": {
+          "type": [
+            "string"
+          ]
         }
+      },
+      "source": "hello.py",
+      "function": "my_python_tool"
     }
+  }
 }
diff --git a/apps/11_promptflow/image_qa/data.jsonl b/apps/11_promptflow/image_qa/data.jsonl
@@ -1 +1 @@
-{"text": "Hello World!"}
+{"image": "../../../datasets/contoso-receipt.png"}
diff --git a/apps/11_promptflow/image_qa/flow.dag.yaml b/apps/11_promptflow/image_qa/flow.dag.yaml
@@ -1,25 +1,32 @@
 $schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
 inputs:
-  text:
+  user_prompt:
+    type: string
+    default: Please extract texts from the image
+  system_prompt:
     type: string
+    default: You are an excellent OCR tool
+  image:
+    type: image
+    default: ../../../datasets/contoso-receipt.png
+  model:
+    type: string
+    default: gpt-4o
 outputs:
   output_prompt:
     type: string
-    reference: ${echo_my_prompt.output}
+    reference: ${image_qa.output}
 nodes:
-- name: hello_prompt
-  type: prompt
-  source:
-    type: code
-    path: hello.jinja2
-  inputs:
-    text: ${inputs.text}
-- name: echo_my_prompt
+- name: image_qa
   type: python
   source:
     type: code
     path: hello.py
   inputs:
-    input1: ${hello_prompt.output}
-environment:
-  python_requirements_txt: requirements.txt
+    connection: open_ai_connection
+    image: ${inputs.image}
+    system_prompt: ${inputs.system_prompt}
+    user_prompt: ${inputs.user_prompt}
+    model: ${inputs.model}
diff --git a/apps/11_promptflow/image_qa/hello.jinja2 b/apps/11_promptflow/image_qa/hello.jinja2
diff --git a/apps/11_promptflow/image_qa/hello.py b/apps/11_promptflow/image_qa/hello.py
@@ -1,14 +1,48 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
+import base64
+import io
 
+from openai import AzureOpenAI
+from promptflow.connections import AzureOpenAIConnection
+from promptflow.contracts.multimedia import Image
 from promptflow.core import tool
 
-# The inputs section will change based on the arguments of the tool function, after you save the code
-# Adding type to arguments and return value will help the system show the types properly
-# Please update the function name/signature per need
-
 
 @tool
-def my_python_tool(input1: str) -> str:
-    return "Prompt: " + input1
+def my_python_tool(
+    connection: AzureOpenAIConnection,
+    image: Image,
+    model: str,
+    system_prompt: str,
+    user_prompt: str,
+) -> str:
+    image_stream = io.BytesIO(image)
+    encoded_image = base64.b64encode(image_stream.read()).decode("utf-8")
+
+    client = AzureOpenAI(
+        api_key=connection.api_key,
+        api_version=connection.api_version,
+        azure_endpoint=connection.api_base,
+    )
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
+                    },
+                    {
+                        "type": "text",
+                        "text": user_prompt,
+                    },
+                ],
+            },
+        ],
+    )
+    return response.choices[0].message.content
diff --git a/datasets/contoso-receipt.png b/datasets/contoso-receipt.png
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"text": "Hello World!"}
		{"image": "../../../datasets/contoso-receipt.png"}