update

cyyeh · cyyeh · commit 5559d345e8b5 · 2025-05-02T09:58:11.000+08:00
diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py
@@ -336,6 +336,12 @@ def create_service_container(
                     **pipe_components["sql_answer"],
                     engine_timeout=settings.engine_timeout,
                 ),
+                "chart_generation": generation.ChartGeneration(
+                    **pipe_components["chart_generation"],
+                ),
+                "chart_adjustment": generation.ChartAdjustment(
+                    **pipe_components["chart_adjustment"],
+                ),
             },
             max_histories=settings.max_histories,
         ),
diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py
@@ -24,7 +24,7 @@
 intent_classification_system_prompt = """
 ### Task ###
 You are an expert detective specializing in intent classification. Combine the user's current question and previous questions to determine their true intent based on the provided database schema or sql data if provided.
-Classify the intent into one of these categories: `MISLEADING_QUERY`, `TEXT_TO_SQL`, `DATA_EXPLORATION`, `GENERAL`, or `USER_GUIDE`. Additionally, provide a concise reasoning (maximum 20 words) for your classification.
+Classify the intent into one of these categories: `MISLEADING_QUERY`, `TEXT_TO_SQL`, `DATA_EXPLORATION`, `GENERAL`, `CHART`, or `USER_GUIDE`. Additionally, provide a concise reasoning (maximum 20 words) for your classification.
 
 ### Instructions ###
 - **Follow the user's previous questions:** If there are previous questions, try to understand the user's current question as following the previous questions.
@@ -67,6 +67,19 @@
 - "List the top 10 products by revenue."
 </TEXT_TO_SQL>
 
+<CHART>
+**When to Use:**  
+- The user's question is about generating a chart.
+
+**Requirements:**  
+- The user's question can be answered by the SQL DATA.
+- SQL DATA is provided.
+- Should pick a SQL from user query histories and the picked SQL should be reflected to the SQL DATA provided.
+
+**Examples:**  
+- "Show me the bar chart of the data"
+</CHART>
+
 <GENERAL>
 **When to Use:**  
 - The user seeks general information about the database schema or its overall capabilities.
@@ -99,9 +112,11 @@
 - The user's inputs is irrelevant to the database schema or includes SQL code.
 - The user's inputs lacks specific details (like table names or columns) needed to generate an SQL query.
 - It appears off-topic or is simply a casual conversation starter.
+- The user's question is about generating a chart but the SQL DATA is not provided.
 
 **Requirements:**  
-- Incorporate phrases from the user's inputs that indicate the lack of relevance to the database schema.
+- For generating SQL: respond to users by incorporating phrases from the user's inputs that indicate the lack of relevance to the database schema.
+- For generating chart: respond to users that we can generate chart only if there is some data available.
 
 **Examples:**  
 - "How are you?"
@@ -115,7 +130,8 @@
 {
     "rephrased_question": "<rephrased question in full standalone question if there are previous questions, otherwise the original question>",
     "reasoning": "<brief chain-of-thought reasoning (max 20 words)>",
-    "results": "MISLEADING_QUERY" | "TEXT_TO_SQL" | "GENERAL" | "USER_GUIDE" | "DATA_EXPLORATION"
+    "results": "MISLEADING_QUERY" | "TEXT_TO_SQL" | "GENERAL" | "USER_GUIDE" | "DATA_EXPLORATION" | "CHART",
+    "sql": "<sql query to be used for generating chart if the intent is CHART, otherwise an empty string>"
 }
 """
 
@@ -314,13 +330,15 @@ def post_process(classify_intent: dict, construct_db_schemas: list[str]) -> dict
             "rephrased_question": results["rephrased_question"],
             "intent": results["results"],
             "reasoning": results["reasoning"],
+            "sql": results["sql"],
             "db_schemas": construct_db_schemas,
         }
     except Exception:
         return {
             "rephrased_question": "",
             "intent": "TEXT_TO_SQL",
             "reasoning": "",
+            "sql": "",
             "db_schemas": construct_db_schemas,
         }
 
diff --git a/wren-ai-service/src/web/v2/services/__init__.py b/wren-ai-service/src/web/v2/services/__init__.py
@@ -146,6 +146,7 @@ async def emit_content_block(
         content_block_label: Optional[str] = None,
         block_type: Literal["tool_use", "text"] = "tool_use",
         stream: bool = False,
+        should_put_in_conversation_history: bool = False,
     ):
         """Emit a complete content block (start → delta → stop)."""
         # 1) start
@@ -159,6 +160,7 @@ async def emit_content_block(
                     "type": block_type,
                     "content_block_label": content_block_label or "",
                     "trace_id": trace_id,
+                    "should_put_in_conversation_history": should_put_in_conversation_history,
                 },
             },
         )
@@ -187,6 +189,7 @@ async def emit_content_block(
                         if block_type == "json"
                         else chunk,
                         "trace_id": trace_id,
+                        "should_put_in_conversation_history": should_put_in_conversation_history,
                     },
                 },
             )
diff --git a/wren-ai-service/src/web/v2/services/conversation.py b/wren-ai-service/src/web/v2/services/conversation.py
@@ -29,8 +29,12 @@ class QuestionResult(BaseModel):
 
 
 class ConversationHistory(BaseModel):
-    request: str
-    response: str
+    class ConversationRequest(BaseModel):
+        query: str
+        additional_info: Optional[dict] = None
+
+    request: ConversationRequest
+    response: dict
 
 
 # POST /v2/conversations
@@ -62,8 +66,9 @@ def convert_conversation_history_to_ask_history(
     conversation_history: list[ConversationHistory],
 ) -> list[AskHistory]:
     return [
-        AskHistory(question=history.request, sql=history.response)
+        AskHistory(question=history.request.query, sql=history.response["sql"])
         for history in conversation_history
+        if history.response.get("sql")
     ]
 
 
@@ -242,6 +247,29 @@ def _run_data_exploration_assistance(
             query_id
         )
 
+    async def _run_chart_generation(
+        self,
+        query: str,
+        sql: str,
+        data: Dict,
+        language: str,
+    ):
+        chart_generation_result = await self._pipelines["chart_generation"].run(
+            query=query,
+            sql=sql,
+            data=data,
+            language=language,
+        )
+
+        return [
+            {
+                "chart_result": chart_generation_result["post_process"]["results"],
+                "sql": sql,
+            }
+        ], {
+            "chart_result": chart_generation_result["post_process"]["results"],
+        }
+
     async def _run_retrieval(
         self,
         query: str,
@@ -514,6 +542,7 @@ async def start_conversation(
                 },
                 content_block_label="HISTORICAL_QUESTION_RETRIEVAL",
                 block_type="tool_use",
+                should_put_in_conversation_history=True,
             ):
                 sql_samples = await self._query_event_manager.emit_content_block(
                     query_id,
@@ -566,6 +595,7 @@ async def start_conversation(
                     "rephrased_question"
                 )
                 db_schemas = intent_classification_result.get("db_schemas")
+                intent_sql = intent_classification_result.get("sql")
 
                 if rephrased_question:
                     user_query = rephrased_question
@@ -631,6 +661,22 @@ async def start_conversation(
                         block_type="text",
                         stream=True,
                     )
+                elif intent == "CHART":
+                    await self._query_event_manager.emit_content_block(
+                        query_id,
+                        trace_id,
+                        index=4,
+                        emit_content_func=self._run_chart_generation,
+                        emit_content_func_kwargs={
+                            "query": user_query,
+                            "sql": intent_sql,
+                            "data": sql_data,
+                            "language": configurations.language,
+                        },
+                        content_block_label="CHART_GENERATION",
+                        block_type="tool_use",
+                        should_put_in_conversation_history=True,
+                    )
                 else:  # TEXT_TO_SQL
                     retrieval_results = (
                         await self._query_event_manager.emit_content_block(
@@ -721,6 +767,7 @@ async def start_conversation(
                                 },
                                 content_block_label="SQL_GENERATION",
                                 block_type="tool_use",
+                                should_put_in_conversation_history=True,
                             )
                         else:
                             text_to_sql_generation_results = await self._query_event_manager.emit_content_block(
@@ -742,6 +789,7 @@ async def start_conversation(
                                 },
                                 content_block_label="SQL_GENERATION",
                                 block_type="tool_use",
+                                should_put_in_conversation_history=True,
                             )
 
                         sql = ""