From 5b90b7789fc9b527f3be669e74bd95ce195636a5 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Mon, 26 May 2025 10:49:20 +0800 Subject: [PATCH 1/6] update --- wren-ai-service/src/pipelines/generation/utils/sql.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wren-ai-service/src/pipelines/generation/utils/sql.py b/wren-ai-service/src/pipelines/generation/utils/sql.py index d57cf6c1b6..33ed05d8a9 100644 --- a/wren-ai-service/src/pipelines/generation/utils/sql.py +++ b/wren-ai-service/src/pipelines/generation/utils/sql.py @@ -174,6 +174,7 @@ async def _task(sql: str): - DON'T USE "FILTER(WHERE )" clause in the generated SQL query. - DON'T USE "EXTRACT(EPOCH FROM )" clause in the generated SQL query. - DON'T USE INTERVAL or generate INTERVAL-like expression in the generated SQL query. +- Aggregate functions are not allowed in the WHERE clause. Instead, they belong in the HAVING clause, which is used to filter after aggregation. - ONLY USE JSON_QUERY for querying fields if "json_type":"JSON" is identified in the columns comment, NOT the deprecated JSON_EXTRACT_SCALAR function. - DON'T USE CAST for JSON fields, ONLY USE the following funtions: - LAX_BOOL for boolean fields From cf6eb2cacbbc4a22343ee9a7c992159948444dbe Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Wed, 28 May 2025 14:00:26 +0800 Subject: [PATCH 2/6] refactor prompts --- .../generation/followup_sql_generation.py | 24 +++++++++++++++---- .../followup_sql_generation_reasoning.py | 6 +++-- .../generation/intent_classification.py | 6 +++-- .../pipelines/generation/sql_generation.py | 24 +++++++++++++++---- .../generation/sql_generation_reasoning.py | 6 +++-- .../pipelines/generation/sql_regeneration.py | 24 +++++++++++++++---- .../src/pipelines/generation/utils/sql.py | 21 +++++++--------- 7 files changed, 77 insertions(+), 34 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index 148fef908f..6cb7869571 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -13,8 +13,10 @@ from src.pipelines.generation.utils.sql import ( SQL_GENERATION_MODEL_KWARGS, SQLGenPostProcessor, + calculated_field_instructions, construct_ask_history_messages, construct_instructions, + metric_instructions, sql_generation_system_prompt, ) from src.pipelines.retrieval.sql_functions import SqlFunction @@ -34,9 +36,12 @@ {{ document }} {% endfor %} -{% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +{% if calculated_field_instructions %} +{{ calculated_field_instructions }} +{% endif %} + +{% if metric_instructions %} +{{ metric_instructions }} {% endif %} {% if sql_functions %} @@ -56,6 +61,13 @@ {% endfor %} {% endif %} +{% if instructions %} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} +{% endif %} + ### QUESTION ### User's Follow-up Question: {{ query }} Current Time: {{ current_time }} @@ -87,10 +99,12 @@ def prompt( sql_generation_reasoning=sql_generation_reasoning, instructions=construct_instructions( configuration, - has_calculated_field, - has_metric, instructions, ), + calculated_field_instructions=calculated_field_instructions + if has_calculated_field + else "", + metric_instructions=metric_instructions if has_metric else "", current_time=configuration.show_current_time(), sql_samples=sql_samples, sql_functions=sql_functions, diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py index 7a538054c6..8b10148256 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py @@ -57,8 +57,10 @@ {% endif %} {% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} {% endif %} ### User's QUERY HISTORY ### diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py index 27b645f7bc..6e479cc4ae 100644 --- a/wren-ai-service/src/pipelines/generation/intent_classification.py +++ b/wren-ai-service/src/pipelines/generation/intent_classification.py @@ -120,8 +120,10 @@ {% endif %} {% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} {% endif %} ### USER GUIDE ### diff --git a/wren-ai-service/src/pipelines/generation/sql_generation.py b/wren-ai-service/src/pipelines/generation/sql_generation.py index 7cda4dc509..f76b5e83b6 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation.py @@ -13,7 +13,9 @@ from src.pipelines.generation.utils.sql import ( SQL_GENERATION_MODEL_KWARGS, SQLGenPostProcessor, + calculated_field_instructions, construct_instructions, + metric_instructions, sql_generation_system_prompt, ) from src.pipelines.retrieval.sql_functions import SqlFunction @@ -28,9 +30,12 @@ {{ document }} {% endfor %} -{% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +{% if calculated_field_instructions %} +{{ calculated_field_instructions }} +{% endif %} + +{% if metric_instructions %} +{{ metric_instructions }} {% endif %} {% if sql_functions %} @@ -50,6 +55,13 @@ {% endfor %} {% endif %} +{% if instructions %} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} +{% endif %} + ### QUESTION ### User's Question: {{ query }} Current Time: {{ current_time }} @@ -83,10 +95,12 @@ def prompt( sql_generation_reasoning=sql_generation_reasoning, instructions=construct_instructions( configuration, - has_calculated_field, - has_metric, instructions, ), + calculated_field_instructions=calculated_field_instructions + if has_calculated_field + else "", + metric_instructions=metric_instructions if has_metric else "", sql_samples=sql_samples, current_time=configuration.show_current_time(), sql_functions=sql_functions, diff --git a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py index 278dddae5a..c7aa47d2d8 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py @@ -54,8 +54,10 @@ {% endif %} {% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} {% endif %} ### QUESTION ### diff --git a/wren-ai-service/src/pipelines/generation/sql_regeneration.py b/wren-ai-service/src/pipelines/generation/sql_regeneration.py index 08137258c7..b31b78644a 100644 --- a/wren-ai-service/src/pipelines/generation/sql_regeneration.py +++ b/wren-ai-service/src/pipelines/generation/sql_regeneration.py @@ -14,7 +14,9 @@ SQL_GENERATION_MODEL_KWARGS, TEXT_TO_SQL_RULES, SQLGenPostProcessor, + calculated_field_instructions, construct_instructions, + metric_instructions, ) from src.pipelines.retrieval.sql_functions import SqlFunction from src.web.v1.services import Configuration @@ -45,9 +47,12 @@ {{ document }} {% endfor %} -{% if instructions %} -### INSTRUCTIONS ### -{{ instructions }} +{% if calculated_field_instructions %} +{{ calculated_field_instructions }} +{% endif %} + +{% if metric_instructions %} +{{ metric_instructions }} {% endif %} {% if sql_functions %} @@ -67,6 +72,13 @@ {% endfor %} {% endif %} +{% if instructions %} +### USER INSTRUCTIONS ### +{% for instruction in instructions %} + {{ loop.index }}. {{ instruction }} +{% endfor %} +{% endif %} + ### QUESTION ### SQL generation reasoning: {{ sql_generation_reasoning }} Original SQL query: {{ sql }} @@ -95,10 +107,12 @@ def prompt( sql_generation_reasoning=sql_generation_reasoning, instructions=construct_instructions( configuration, - has_calculated_field, - has_metric, instructions, ), + calculated_field_instructions=calculated_field_instructions + if has_calculated_field + else "", + metric_instructions=metric_instructions if has_metric else "", sql_samples=sql_samples, current_time=configuration.show_current_time(), sql_functions=sql_functions, diff --git a/wren-ai-service/src/pipelines/generation/utils/sql.py b/wren-ai-service/src/pipelines/generation/utils/sql.py index 33ed05d8a9..d3d95e2073 100644 --- a/wren-ai-service/src/pipelines/generation/utils/sql.py +++ b/wren-ai-service/src/pipelines/generation/utils/sql.py @@ -398,22 +398,17 @@ async def _task(sql: str): def construct_instructions( configuration: Configuration | None = Configuration(), - has_calculated_field: bool = False, - has_metric: bool = False, instructions: list[dict] | None = None, ): - _instructions = "" - if configuration: - if configuration.fiscal_year: - _instructions += f"\n- For calendar year related computation, it should be started from {configuration.fiscal_year.start} to {configuration.fiscal_year.end}\n\n" - if has_calculated_field: - _instructions += calculated_field_instructions - if has_metric: - _instructions += metric_instructions - if instructions: - _instructions += "\n\n".join( - [f"{instruction.get('instruction')}\n\n" for instruction in instructions] + _instructions = [] + if configuration and configuration.fiscal_year: + _instructions.append( + f"For calendar year related computation, it should be started from {configuration.fiscal_year.start} to {configuration.fiscal_year.end}" ) + if instructions: + _instructions += [ + instruction.get("instruction") for instruction in instructions + ] return _instructions From 8b9146fd39144a439d221a340dbd73ac2c0c14a8 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Wed, 28 May 2025 14:17:53 +0800 Subject: [PATCH 3/6] refine prompt --- .../src/pipelines/generation/followup_sql_generation.py | 2 +- .../pipelines/generation/followup_sql_generation_reasoning.py | 2 +- .../src/pipelines/generation/intent_classification.py | 2 +- wren-ai-service/src/pipelines/generation/sql_generation.py | 2 +- .../src/pipelines/generation/sql_generation_reasoning.py | 2 +- wren-ai-service/src/pipelines/generation/sql_regeneration.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index 6cb7869571..96420048a6 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -64,7 +64,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py index 8b10148256..64952acb86 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py @@ -59,7 +59,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py index 6e479cc4ae..4acdf2ded9 100644 --- a/wren-ai-service/src/pipelines/generation/intent_classification.py +++ b/wren-ai-service/src/pipelines/generation/intent_classification.py @@ -122,7 +122,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} diff --git a/wren-ai-service/src/pipelines/generation/sql_generation.py b/wren-ai-service/src/pipelines/generation/sql_generation.py index f76b5e83b6..f59e704867 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation.py @@ -58,7 +58,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} diff --git a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py index c7aa47d2d8..8197310bdb 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py @@ -56,7 +56,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} diff --git a/wren-ai-service/src/pipelines/generation/sql_regeneration.py b/wren-ai-service/src/pipelines/generation/sql_regeneration.py index b31b78644a..bbd1a0eff6 100644 --- a/wren-ai-service/src/pipelines/generation/sql_regeneration.py +++ b/wren-ai-service/src/pipelines/generation/sql_regeneration.py @@ -75,7 +75,7 @@ {% if instructions %} ### USER INSTRUCTIONS ### {% for instruction in instructions %} - {{ loop.index }}. {{ instruction }} +{{ loop.index }}. {{ instruction }} {% endfor %} {% endif %} From dd851b1f4a09fc0f6d7d63a25e1e644fba63828d Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Wed, 28 May 2025 17:44:06 +0800 Subject: [PATCH 4/6] update --- .../followup_sql_generation_reasoning.py | 14 +++++++------- .../pipelines/generation/intent_classification.py | 2 +- .../generation/sql_generation_reasoning.py | 14 +++++++------- .../src/pipelines/generation/utils/sql.py | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py index 64952acb86..cd387ee724 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py @@ -25,13 +25,13 @@ ### INSTRUCTIONS ### 1. Think deeply and reason about the user's question and the database schema, and should consider the user's query history. -2. Give a step by step reasoning plan in order to answer user's question. -3. The reasoning plan should be in the language same as the language user provided in the input. -4. Make sure to consider the current time provided in the input if the user's question is related to the date/time. -5. Don't include SQL in the reasoning plan. -6. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. -7. If SQL SAMPLES are provided, make sure to consider them in the reasoning plan. -8. If INSTRUCTIONS section is provided, please follow them strictly. +2. If USER INSTRUCTIONS section is provided, make sure to consider them in the reasoning plan. +3. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. +4. Give a step by step reasoning plan in order to answer user's question. +5. The reasoning plan should be in the language same as the language user provided in the input. +6. Make sure to consider the current time provided in the input if the user's question is related to the date/time. +7. Don't include SQL in the reasoning plan. +8. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. 9. Do not include ```markdown or ``` in the answer. 10. A table name in the reasoning plan must be in this format: `table: `. 11. A column name in the reasoning plan must be in this format: `column: .`. diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py index 4acdf2ded9..2df7e4ce38 100644 --- a/wren-ai-service/src/pipelines/generation/intent_classification.py +++ b/wren-ai-service/src/pipelines/generation/intent_classification.py @@ -27,7 +27,7 @@ ### Instructions ### - **Follow the user's previous questions:** If there are previous questions, try to understand the user's current question as following the previous questions. -- **Consider Both Inputs:** Combine the user's current question and their previous questions together to identify the user's true intent. +- **Consider Context of Inputs:** Combine the user's current question, their previous questions, and the user's instructions together to identify the user's true intent. - **Rephrase Question":** Rewrite follow-up questions into full standalone questions using prior conversation context." - **Concise Reasoning:** The reasoning must be clear, concise, and limited to 20 words. - **Language Consistency:** Use the same language as specified in the user's output language for the rephrased question and reasoning. diff --git a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py index 8197310bdb..c02b2453f9 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py @@ -22,13 +22,13 @@ ### INSTRUCTIONS ### 1. Think deeply and reason about the user's question and the database schema. -2. Give a step by step reasoning plan in order to answer user's question. -3. The reasoning plan should be in the language same as the language user provided in the input. -4. Make sure to consider the current time provided in the input if the user's question is related to the date/time. -5. Don't include SQL in the reasoning plan. -6. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. -7. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. -8. If INSTRUCTIONS section is provided, please follow them strictly. +2. If USER INSTRUCTIONS section is provided, make sure to consider them in the reasoning plan. +3. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. +4. Give a step by step reasoning plan in order to answer user's question. +5. The reasoning plan should be in the language same as the language user provided in the input. +6. Make sure to consider the current time provided in the input if the user's question is related to the date/time. +7. Don't include SQL in the reasoning plan. +8. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. 9. Do not include ```markdown or ``` in the answer. 10. A table name in the reasoning plan must be in this format: `table: `. 11. A column name in the reasoning plan must be in this format: `column: .`. diff --git a/wren-ai-service/src/pipelines/generation/utils/sql.py b/wren-ai-service/src/pipelines/generation/utils/sql.py index d3d95e2073..372fa2ea1d 100644 --- a/wren-ai-service/src/pipelines/generation/utils/sql.py +++ b/wren-ai-service/src/pipelines/generation/utils/sql.py @@ -217,7 +217,7 @@ async def _task(sql: str): ### GENERAL RULES ### -1. If INSTRUCTIONS section is provided, please follow the instructions strictly. +1. If USER INSTRUCTIONS section is provided, please follow the instructions strictly. 2. If SQL FUNCTIONS section is provided, please choose the appropriate functions from the list and use it in the SQL query. 3. If SQL SAMPLES section is provided, please refer to the samples and learn the usage of the schema structures and how SQL is written based on them. 4. If REASONING PLAN section is provided, please follow the plan strictly. From 1a24ff1d508e17fada531d2b114ebccec44e65d1 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Wed, 28 May 2025 22:38:45 +0800 Subject: [PATCH 5/6] update --- .../generation/followup_sql_generation.py | 2 -- .../followup_sql_generation_reasoning.py | 24 +--------------- .../generation/intent_classification.py | 3 +- .../generation/question_recommendation.py | 8 ------ .../pipelines/generation/sql_generation.py | 2 -- .../generation/sql_generation_reasoning.py | 28 +++---------------- .../pipelines/generation/sql_regeneration.py | 1 - .../src/pipelines/generation/utils/sql.py | 24 ++++++++++++++++ .../src/web/v1/services/__init__.py | 11 -------- .../v1/services/question_recommendation.py | 1 - 10 files changed, 30 insertions(+), 74 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index 96420048a6..0cf6680af7 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -70,7 +70,6 @@ ### QUESTION ### User's Follow-up Question: {{ query }} -Current Time: {{ current_time }} ### REASONING PLAN ### {{ sql_generation_reasoning }} @@ -105,7 +104,6 @@ def prompt( if has_calculated_field else "", metric_instructions=metric_instructions if has_metric else "", - current_time=configuration.show_current_time(), sql_samples=sql_samples, sql_functions=sql_functions, ) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py index cd387ee724..a7e682c0fd 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation_reasoning.py @@ -12,6 +12,7 @@ from src.core.provider import LLMProvider from src.pipelines.generation.utils.sql import ( construct_instructions, + sql_generation_reasoning_system_prompt, ) from src.web.v1.services import Configuration from src.web.v1.services.ask import AskHistory @@ -19,27 +20,6 @@ logger = logging.getLogger("wren-ai-service") -sql_generation_reasoning_system_prompt = """ -### TASK ### -You are a helpful data analyst who is great at thinking deeply and reasoning about the user's question and the database schema, and you provide a step-by-step reasoning plan in order to answer the user's question. - -### INSTRUCTIONS ### -1. Think deeply and reason about the user's question and the database schema, and should consider the user's query history. -2. If USER INSTRUCTIONS section is provided, make sure to consider them in the reasoning plan. -3. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. -4. Give a step by step reasoning plan in order to answer user's question. -5. The reasoning plan should be in the language same as the language user provided in the input. -6. Make sure to consider the current time provided in the input if the user's question is related to the date/time. -7. Don't include SQL in the reasoning plan. -8. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. -9. Do not include ```markdown or ``` in the answer. -10. A table name in the reasoning plan must be in this format: `table: `. -11. A column name in the reasoning plan must be in this format: `column: .`. - -### FINAL ANSWER FORMAT ### -The final answer must be a reasoning plan in plain Markdown string format -""" - sql_generation_reasoning_user_prompt_template = """ ### DATABASE SCHEMA ### {% for document in documents %} @@ -73,7 +53,6 @@ ### QUESTION ### User's Question: {{ query }} -Current Time: {{ current_time }} Language: {{ language }} Let's think step by step. @@ -100,7 +79,6 @@ def prompt( configuration=configuration, instructions=instructions, ), - current_time=configuration.show_current_time(), language=configuration.language, ) diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py index 2df7e4ce38..958352eead 100644 --- a/wren-ai-service/src/pipelines/generation/intent_classification.py +++ b/wren-ai-service/src/pipelines/generation/intent_classification.py @@ -32,6 +32,7 @@ - **Concise Reasoning:** The reasoning must be clear, concise, and limited to 20 words. - **Language Consistency:** Use the same language as specified in the user's output language for the rephrased question and reasoning. - **Vague Queries:** If the question is vague or does not related to a table or property from the schema, classify it as `MISLEADING_QUERY`. +- **Time-related Queries:** Don't rephrase time-related information in the user's question. ### Intent Definitions ### @@ -143,7 +144,6 @@ {% endif %} User's current question: {{query}} -Current Time: {{ current_time }} Output Language: {{ language }} Let's think step by step @@ -275,7 +275,6 @@ def prompt( instructions=instructions, configuration=configuration, ), - current_time=configuration.show_current_time(), docs=wren_ai_docs, ) diff --git a/wren-ai-service/src/pipelines/generation/question_recommendation.py b/wren-ai-service/src/pipelines/generation/question_recommendation.py index 023873fe48..b956cf9c1c 100644 --- a/wren-ai-service/src/pipelines/generation/question_recommendation.py +++ b/wren-ai-service/src/pipelines/generation/question_recommendation.py @@ -1,6 +1,5 @@ import logging import sys -from datetime import datetime from typing import Any import orjson @@ -22,7 +21,6 @@ def prompt( mdl: dict, previous_questions: list[str], language: str, - current_date: str, max_questions: int, max_categories: int, prompt_builder: PromptBuilder, @@ -37,7 +35,6 @@ def prompt( models=[] if previous_questions else mdl.get("models", []), previous_questions=previous_questions, language=language, - current_date=current_date, max_questions=max_questions, max_categories=max_categories, ) @@ -222,8 +219,6 @@ class QuestionResult(BaseModel): Categories: {{categories}} {% endif %} -Current Date: {{current_date}} - Please generate {{max_questions}} insightful questions for each of the {{max_categories}} categories based on the provided data model. Both the questions and category names should be translated into {{language}}{% if user_question %} and be related to the user's question{% endif %}. The output format should maintain the structure but with localized text. """ @@ -255,7 +250,6 @@ async def run( previous_questions: list[str] = [], categories: list[str] = [], language: str = "en", - current_date: str = datetime.now().strftime("%Y-%m-%d %A %H:%M:%S"), max_questions: int = 5, max_categories: int = 3, **_, @@ -268,7 +262,6 @@ async def run( "previous_questions": previous_questions, "categories": categories, "language": language, - "current_date": current_date, "max_questions": max_questions, "max_categories": max_categories, **self._components, @@ -286,7 +279,6 @@ async def run( previous_questions=[], categories=[], language="en", - current_date=datetime.now().strftime("%Y-%m-%d %A %H:%M:%S"), max_questions=5, max_categories=3, ) diff --git a/wren-ai-service/src/pipelines/generation/sql_generation.py b/wren-ai-service/src/pipelines/generation/sql_generation.py index f59e704867..2bb15ba938 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation.py @@ -64,7 +64,6 @@ ### QUESTION ### User's Question: {{ query }} -Current Time: {{ current_time }} {% if sql_generation_reasoning %} ### REASONING PLAN ### @@ -102,7 +101,6 @@ def prompt( else "", metric_instructions=metric_instructions if has_metric else "", sql_samples=sql_samples, - current_time=configuration.show_current_time(), sql_functions=sql_functions, ) diff --git a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py index c02b2453f9..b9422b47ed 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation_reasoning.py @@ -10,33 +10,15 @@ from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider -from src.pipelines.generation.utils.sql import construct_instructions +from src.pipelines.generation.utils.sql import ( + construct_instructions, + sql_generation_reasoning_system_prompt, +) from src.web.v1.services import Configuration logger = logging.getLogger("wren-ai-service") -sql_generation_reasoning_system_prompt = """ -### TASK ### -You are a helpful data analyst who is great at thinking deeply and reasoning about the user's question and the database schema, and you provide a step-by-step reasoning plan in order to answer the user's question. - -### INSTRUCTIONS ### -1. Think deeply and reason about the user's question and the database schema. -2. If USER INSTRUCTIONS section is provided, make sure to consider them in the reasoning plan. -3. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. -4. Give a step by step reasoning plan in order to answer user's question. -5. The reasoning plan should be in the language same as the language user provided in the input. -6. Make sure to consider the current time provided in the input if the user's question is related to the date/time. -7. Don't include SQL in the reasoning plan. -8. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. -9. Do not include ```markdown or ``` in the answer. -10. A table name in the reasoning plan must be in this format: `table: `. -11. A column name in the reasoning plan must be in this format: `column: .`. - -### FINAL ANSWER FORMAT ### -The final answer must be a reasoning plan in plain Markdown string format -""" - sql_generation_reasoning_user_prompt_template = """ ### DATABASE SCHEMA ### {% for document in documents %} @@ -62,7 +44,6 @@ ### QUESTION ### User's Question: {{ query }} -Current Time: {{ current_time }} Language: {{ language }} Let's think step by step. @@ -87,7 +68,6 @@ def prompt( instructions=instructions, configuration=configuration, ), - current_time=configuration.show_current_time(), language=configuration.language, ) diff --git a/wren-ai-service/src/pipelines/generation/sql_regeneration.py b/wren-ai-service/src/pipelines/generation/sql_regeneration.py index bbd1a0eff6..748617e2e9 100644 --- a/wren-ai-service/src/pipelines/generation/sql_regeneration.py +++ b/wren-ai-service/src/pipelines/generation/sql_regeneration.py @@ -114,7 +114,6 @@ def prompt( else "", metric_instructions=metric_instructions if has_metric else "", sql_samples=sql_samples, - current_time=configuration.show_current_time(), sql_functions=sql_functions, ) diff --git a/wren-ai-service/src/pipelines/generation/utils/sql.py b/wren-ai-service/src/pipelines/generation/utils/sql.py index 372fa2ea1d..edf4494c55 100644 --- a/wren-ai-service/src/pipelines/generation/utils/sql.py +++ b/wren-ai-service/src/pipelines/generation/utils/sql.py @@ -129,6 +129,30 @@ async def _task(sql: str): return valid_generation_results, invalid_generation_results +sql_generation_reasoning_system_prompt = """ +### TASK ### +You are a helpful data analyst who is great at thinking deeply and reasoning about the user's question and the database schema, and you provide a step-by-step reasoning plan in order to answer the user's question. + +### INSTRUCTIONS ### +1. Think deeply and reason about the user's question, the database schema, and the user's query history if provided. +2. Explicitly state the following information in the reasoning plan: +if the user puts any specific timeframe(e.g. YYYY-MM-DD) in the user's question, you will put the absolute time frame in the SQL query; +Otherwise, you will put the relative timeframe in the SQL query. +3. If USER INSTRUCTIONS section is provided, make sure to consider them in the reasoning plan. +4. If SQL SAMPLES section is provided, make sure to consider them in the reasoning plan. +5. Give a step by step reasoning plan in order to answer user's question. +6. The reasoning plan should be in the language same as the language user provided in the input. +7. Don't include SQL in the reasoning plan. +8. Each step in the reasoning plan must start with a number, a title(in bold format in markdown), and a reasoning for the step. +9. Do not include ```markdown or ``` in the answer. +10. A table name in the reasoning plan must be in this format: `table: `. +11. A column name in the reasoning plan must be in this format: `column: .`. + +### FINAL ANSWER FORMAT ### +The final answer must be a reasoning plan in plain Markdown string format +""" + + TEXT_TO_SQL_RULES = """ ### SQL RULES ### - ONLY USE SELECT statements, NO DELETE, UPDATE OR INSERT etc. statements that might change the data in the database. diff --git a/wren-ai-service/src/web/v1/services/__init__.py b/wren-ai-service/src/web/v1/services/__init__.py index 364bc08768..621392bb3f 100644 --- a/wren-ai-service/src/web/v1/services/__init__.py +++ b/wren-ai-service/src/web/v1/services/__init__.py @@ -1,8 +1,6 @@ -from datetime import datetime from typing import Optional import orjson -import pytz from pydantic import BaseModel @@ -31,15 +29,6 @@ class Timezone(BaseModel): name: str = "UTC" utc_offset: str = "" # Deprecated, will be removed in the future - def show_current_time(self): - # Get the current time in the specified timezone - tz = pytz.timezone( - self.timezone.name - ) # Assuming timezone.name contains the timezone string - current_time = datetime.now(tz) - - return f"{current_time.strftime('%Y-%m-%d %A %H:%M:%S')}" # YYYY-MM-DD weekday_name HH:MM:SS, ex: 2024-10-23 Wednesday 12:00:00 - fiscal_year: Optional[FiscalYear] = None language: Optional[str] = "English" timezone: Optional[Timezone] = Timezone() diff --git a/wren-ai-service/src/web/v1/services/question_recommendation.py b/wren-ai-service/src/web/v1/services/question_recommendation.py index 4bdde54e12..2d6255ebcb 100644 --- a/wren-ai-service/src/web/v1/services/question_recommendation.py +++ b/wren-ai-service/src/web/v1/services/question_recommendation.py @@ -189,7 +189,6 @@ async def recommend(self, input: Request, **kwargs) -> Event: "mdl": orjson.loads(input.mdl), "previous_questions": input.previous_questions, "language": input.configuration.language, - "current_date": input.configuration.show_current_time(), "max_questions": input.max_questions, "max_categories": input.max_categories, } From 6a30f32a8ccb222f5197ebf41ca0a59237792e28 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 29 May 2025 09:59:17 +0800 Subject: [PATCH 6/6] fix bug --- .../src/web/v1/services/question_recommendation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wren-ai-service/src/web/v1/services/question_recommendation.py b/wren-ai-service/src/web/v1/services/question_recommendation.py index 2d6255ebcb..f8601ea597 100644 --- a/wren-ai-service/src/web/v1/services/question_recommendation.py +++ b/wren-ai-service/src/web/v1/services/question_recommendation.py @@ -225,14 +225,14 @@ async def recommend(self, input: Request, **kwargs) -> Event: except orjson.JSONDecodeError as e: self._handle_exception( - input, + input.event_id, f"Failed to parse MDL: {str(e)}", code="MDL_PARSE_ERROR", trace_id=trace_id, ) except Exception as e: self._handle_exception( - input, + input.event_id, f"An error occurred during question recommendation generation: {str(e)}", trace_id=trace_id, )