diff --git a/pkg-py/docs/.gitignore b/pkg-py/docs/.gitignore
index e0c5635d..6fb1e2c8 100644
--- a/pkg-py/docs/.gitignore
+++ b/pkg-py/docs/.gitignore
@@ -4,6 +4,7 @@
*.quarto_ipynb
objects.txt
objects.json
+changelog.md
# Ignore quartodoc artifacts, these are built in CI
_sidebar-python.yml
diff --git a/pkg-py/docs/CHANGELOG.md b/pkg-py/docs/CHANGELOG.md
new file mode 100644
index 00000000..2c33cdf5
--- /dev/null
+++ b/pkg-py/docs/CHANGELOG.md
@@ -0,0 +1,56 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [UNRELEASED]
+
+### Changes
+
+* The entire functional API (i.e., `init()`, `sidebar()`, `server()`, etc) has been hard deprecated in favor of a simpler OOP-based API. Namely, the new `QueryChat()` class is now the main entry point (instead of `init()`) and has methods to replace old functions (e.g., `.sidebar()`, `.server()`, etc). (#101)
+
+### New features
+
+* New `QueryChat.app()` method enables quicker/easier chatting with a dataset. (#104)
+
+* Enabled bookmarking by default in both `.app()` and `.server()` methods. In latter case, you'll need to also specify the `bookmark_store` (either in `shiny.App()` or `shiny.express.app_opts()`) for it to take effect. (#104)
+
+* The current SQL query and title can now be programmatically set through the `.sql()` and `.title()` methods of `QueryChat()`. (#98, #101)
+
+* Added a `.generate_greeting()` method to help you create a greeting message for your querychat bot. (#87)
+
+* Added `querychat_reset_dashboard()` tool for easily resetting the dashboard filters when asked by the user. (#81)
+
+### Improvements
+
+* Added rich tool UI support using shinychat development version and chatlas >= 0.11.1. (#67)
+
+* querychat's system prompt and tool descriptions were rewritten for clarity and future extensibility. (#90)
+
+## [0.2.2] - 2025-09-04
+
+* Fixed another issue with data sources that aren't already narwhals DataFrames (#83)
+
+## [0.2.1] - 2025-09-04
+
+* Fixed an issue with the query tool when used with SQLAlchemy data sources. (@npelikan #79)
+
+## [0.2.0] - 2025-09-02
+
+* `querychat.init()` now accepts a `client` argument, replacing the previous `create_chat_callback` argument. (#60)
+
+ The `client` can be:
+
+ * a `chatlas.Chat` object,
+ * a function that returns a `chatlas.Chat` object,
+ * or a provider-model string, e.g. `"openai/gpt-4.1"`, to be passed to `chatlas.ChatAuto()`.
+
+ If `client` is not provided, querychat will use the `QUERYCHAT_CLIENT` environment variable, which should be a provider-model string. If the envvar is not set, querychat uses OpenAI with the default model from `chatlas.ChatOpenAI()`.
+
+* `querychat.ui()` now adds a `.querychat` class to the chat container and `querychat.sidebar()` adds a `.querychat-sidebar` class to the sidebar, allowing for easier customization via CSS. (#68)
+
+## [0.1.0] - 2025-05-24
+
+This first release of the `querychat` package.
diff --git a/pkg-py/docs/_brand.yml b/pkg-py/docs/_brand.yml
deleted file mode 100644
index 0393f067..00000000
--- a/pkg-py/docs/_brand.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-
-color:
- palette:
- blue: "#007bc2"
- indigo: "#4b00c1"
- purple: "#74149c"
- pink: "#bf007f"
- red: "#c10000"
- orange: "#f45100"
- yellow: "#f9b928"
- green: "#00891a"
- teal: "#00bf7f"
- cyan: "#03c7e8"
- white: "#ffffff"
- black: "#1D1F21"
-
- foreground: black
- background: white
- primary: blue
- secondary: gray
- success: green
- info: cyan
- warning: yellow
- danger: red
- light: "#f8f8f8"
- dark: "#212529"
-
-typography:
- fonts:
- - family: Open Sans
- source: bunny
- - family: Source Code Pro
- source: bunny
-
- headings:
- family: Open Sans
- weight: 400
- monospace: Source Code Pro
- monospace-inline:
- color: pink
- background-color: transparent
- size: 0.95em
-
-defaults:
- bootstrap:
- defaults:
- navbar-bg: $brand-blue
- code-color-dark: "#fa88d4"
diff --git a/pkg-py/docs/_quarto.yml b/pkg-py/docs/_quarto.yml
index ce82f054..d65d8205 100644
--- a/pkg-py/docs/_quarto.yml
+++ b/pkg-py/docs/_quarto.yml
@@ -1,11 +1,14 @@
project:
type: website
output-dir: ../../docs/py
+ pre-render:
+ cp ../CHANGELOG.md CHANGELOG.md
website:
title: "querychat"
site-url: https://posit-dev.github.io/querychat/py
- description: Chat with your data in Shiny apps
+ description: Explore data using natural language
+ page-navigation: true
bread-crumbs: true
open-graph: true
@@ -21,43 +24,41 @@ website:
[{fig-alt="Posit" width=65px}](https://posit.co)
navbar:
- left:
- - text: Get Started
- href: index.qmd
- - text: "Examples"
- href: examples/index.qmd
+ background: "#193D56"
+ search: true
+ title: 'QueryChat'
+ #title: '
QueryChat'
+
+ right:
- text: API Reference
href: reference/index.qmd
-
- tools:
+ - text: Changelog
+ href: /changelog.html
- icon: github
- menu:
- - text: Source code
- href: https://github.com/posit-dev/querychat/tree/main/pkg-py
- - text: Report a bug
- href: https://github.com/posit-dev/querychat/issues/new
-
+ href: https://github.com/posit-dev/querychat
+ aria-label: GitHub repository
sidebar:
- - id: examples
- title: "Examples"
- style: docked
- type: light
- background: light
- foreground: dark
+ - id: get-started
+ title: Get Started
+ style: floating
+ align: left
contents:
- - href: examples/index.qmd
- - section: "DataFrames"
- contents:
- - href: examples/pandas.qmd
- - section: "Databases"
- contents:
- - href: examples/sqlite.qmd
+ - index.qmd
+ - section: "Overview"
+ contents:
+ - models.qmd
+ - data-sources.qmd
+ - context.qmd
+ - build.qmd
+ - greet.qmd
+ - tools.qmd
+
format:
html:
- theme: [brand]
- highlight-style: github
+ theme:
+ - styles.scss
toc: true
lightbox: auto
@@ -71,18 +72,38 @@ quartodoc:
sidebar: reference/_sidebar.yml
css: reference/_styles-quartodoc.css
sections:
- - title: Get Started
- desc: The basic building blocks of Querychat
+ - title: The Querychat class
+ desc: The starting point for any QueryChat session
contents:
- - init
- - sidebar
- - server
+ - name: QueryChat
+ include_inherited: true
+ - name: express.QueryChat
+ include_inherited: true
- - title: Customize
- desc: Dive deeper into customizing Querychat
+ - title: Reactive values
+ desc: Session-specific reactive values representing the current query
+ contents:
+ - types.ServerValues
+
+ - title: Data Sources
+ desc: The underlying logic for managing data sources
+ contents:
+ - name: types.DataSource
+ signature_name: short
+ - name: types.DataFrameSource
+ signature_name: short
+ - name: types.SQLAlchemySource
+ signature_name: short
+
+ - title: Tools
+ desc: The underlying tools provided to the LLM
contents:
- - ui
- - system_prompt
+ - name: tools.tool_query
+ signature_name: short
+ - name: tools.tool_update_dashboard
+ signature_name: short
+ - name: tools.tool_reset_dashboard
+ signature_name: short
filters:
- "interlinks"
diff --git a/pkg-py/docs/build.qmd b/pkg-py/docs/build.qmd
new file mode 100644
index 00000000..00c92290
--- /dev/null
+++ b/pkg-py/docs/build.qmd
@@ -0,0 +1,518 @@
+---
+title: Build an app
+---
+
+While the [`.app()` method](reference/QueryChat.qmd#querychat.QueryChat.app) provides a quick way to start exploring data, building bespoke Shiny apps with QueryChat unlocks the full power of integrating natural language data exploration with custom visualizations, layouts, and interactivity. This guide shows you how to integrate QueryChat into your own Shiny applications and leverage its reactive data outputs to create rich, interactive dashboards.
+
+## Starter template
+
+Integrating QueryChat into a Shiny app requires just three steps:
+
+1. Initialize a `QueryChat()` instance with your data
+2. Add the QueryChat UI component (either `.sidebar()` or `.ui()`)
+3. Use reactive values like `.df()`, `.sql()`, and `.title()` to build outputs that respond to user queries
+
+Here's a starter template demonstrating these steps:
+
+::: {.panel-tabset group="shiny-mode"}
+
+#### Express
+
+```python
+{{< include /../examples/03-sidebar-express-app.py >}}
+```
+
+#### Core
+
+```python
+{{< include /../examples/03-sidebar-core-app.py >}}
+```
+
+
+:::
+
+::: callout-note
+With Core, you'll need to call the `qc.server()` method within your server function to set up QueryChat's reactive behavior, and capture its return value to access reactive data. This is not necessary with Express, which handles it automatically and exposes reactive values directly on the `QueryChat` instance.
+:::
+
+## Reactives
+
+There are three main reactive values provided by QueryChat for use in your app:
+
+### Filtered data {#filtered-data}
+
+The `.df()` method returns the current filtered and/or sorted data frame. This updates whenever the user prompts a filtering or sorting operation through the chat interface (see [Data updating](tools.qmd#data-updating) for details).
+
+
+::: {.panel-tabset group="shiny-mode"}
+
+#### Express
+
+```python
+@render.data_frame
+def table():
+ return qc.df() # Returns filtered/sorted data
+```
+
+#### Core
+
+```python
+qc_vals = qc.server()
+
+@render.data_frame
+def table():
+ return qc_vals.df() # Returns filtered/sorted data
+```
+
+:::
+
+You can use `.df()` to power any output in your app - visualizations, summary statistics, data tables, and more. When a user asks to "show only survivors" or "sort by age", `.df()` automatically updates, and any outputs that depend on it will re-render.
+
+### SQL query {#sql-query}
+
+The `.sql()` method returns the current SQL query as a string. This is useful for displaying the query to users for transparency and reproducibility:
+
+::: {.panel-tabset group="shiny-mode"}
+
+#### Express
+
+```python
+@render.text
+def current_query():
+ return qc.sql() or "SELECT * FROM my_data"
+```
+
+#### Core
+
+```python
+qc_vals = qc.server()
+
+@render.text
+def current_query():
+ return qc_vals.sql() or "SELECT * FROM my_data"
+```
+
+:::
+
+You can also use `.sql()` as a setter to programmatically update the query (see [Programmatic filtering](#programmatic-filtering) below).
+
+### Title {#title}
+
+The `.title()` method returns a short description of the current filter, provided by the LLM when it generates a query. For example, if a user asks to "show first-class survivors", the title might be "First-class survivors".
+
+::: {.panel-tabset group="shiny-mode"}
+
+##### Express
+
+```python
+@render.text
+def card_title():
+ return qc.title() or "All Data"
+```
+
+#### Core
+
+```python
+qc_vals = qc.server()
+
+@render.text
+def card_title():
+ return qc_vals.title() or "All Data"
+```
+
+:::
+
+Returns `None` when no filter is active. You can also use `.title()` as a setter to update the title programmatically.
+
+## Custom UI
+
+In the starter template above, we used the `.sidebar()` method for a simple sidebar layout. In some cases, you might want to place the chat UI somewhere else in your app layout, or just more fully customize what goes in the sidebar. The `.ui()` method is designed for this -- it returns the chat component without additional layout wrappers.
+
+For example here is how to place the chat in a sidebar with some additional controls:
+
+::: {.panel-tabset group="shiny-mode"}
+
+#### Express
+
+```python
+from shiny.express import ui, reactive
+from querychat.express import QueryChat
+
+qc = QueryChat(data, "my_data")
+
+with ui.sidebar():
+ qc.ui() # Chat component
+ ui.hr()
+ ui.input_action_button("reset", "Reset Filters", class_="w-100")
+```
+
+#### Core
+
+```python
+from shiny import ui, reactive
+from querychat import QueryChat
+
+qc = QueryChat(data, "my_data")
+
+app_ui = ui.page_sidebar(
+ ui.sidebar(
+ qc.ui(), # Chat component
+ ui.hr(),
+ ui.input_action_button("reset", "Reset Filters", class_="w-100"),
+ ),
+ # Main content here
+)
+```
+
+:::
+
+::: callout-tip
+### Custom Shiny chat UIs
+
+Learn more about customizing Shiny chat UIs in the [Shiny Chat documentation](https://shiny.posit.co/py/docs/genai-chatbots.html#layout).
+:::
+
+
+## Data views
+
+Thanks to Shiny's support for [Jupyter Widgets](https://shiny.posit.co/py/docs/jupyter-widgets.html) like [Plotly](https://shiny.posit.co/py/components/outputs/plot-plotly/), it's straightforward to create rich data views that depend on QueryChat data. Here's an example of an app showing both the filtered data and a bar chart depending on that same data:
+
+
+```python
+import plotly.express as px
+
+from seaborn import load_dataset
+from shiny.express import render, ui
+from shinywidgets import render_plotly
+
+from querychat.express import QueryChat
+
+titanic = load_dataset("titanic")
+qc = QueryChat(titanic, "titanic")
+qc.sidebar()
+
+with ui.layout_columns():
+ with ui.card():
+ ui.card_header("Data Table")
+
+ @render.data_frame
+ def table():
+ return qc.df()
+
+ with ui.card():
+ ui.card_header("Survival by Class")
+
+ @render_plotly
+ def survival_plot():
+ d = qc.df()
+ summary = d.groupby('pclass')['survived'].mean().reset_index()
+ return px.bar(summary, x='pclass', y='survived')
+```
+
+Now when a user filters the data through natural language (e.g., "filter to only children"), both the table and the chart update automatically.
+
+{fig-alt="Screenshot of a querychat app showing both a data table and a bar chart of survival by class." class="lightbox shadow rounded mb-3"}
+
+A more useful, but slightly more involved example like the one below might incorporate other [Shiny components](https://shiny.posit.co/py/components/) like value boxes to summarize key statistics about the filtered data.
+
+
+
+
+```python
+from shiny.express import render, ui
+from shinywidgets import render_plotly
+from querychat.express import QueryChat
+from seaborn import load_dataset
+from faicons import icon_svg
+import plotly.express as px
+
+titanic = load_dataset("titanic")
+qc = QueryChat(titanic, "titanic")
+qc.sidebar()
+
+with ui.layout_column_wrap(fill=False):
+ with ui.value_box(showcase=icon_svg("users")):
+ "Passengers"
+
+ @render.text
+ def count():
+ return str(len(qc.df()))
+
+ with ui.value_box(showcase=icon_svg("heart")):
+ "Survival Rate"
+
+ @render.text
+ def survival():
+ rate = qc.df()['survived'].mean() * 100
+ return f"{rate:.1f}%"
+
+ with ui.value_box(showcase=icon_svg("coins")):
+ "Avg Fare"
+
+ @render.text
+ def fare():
+ avg = qc.df()['fare'].mean()
+ return f"${avg:.2f}"
+
+with ui.layout_columns():
+ with ui.card():
+ with ui.card_header():
+ "Data Table"
+
+ @render.text
+ def table_title():
+ return f" - {qc.title()}" if qc.title() else ""
+
+ @render.data_frame
+ def data_table():
+ return qc.df()
+
+ with ui.card():
+ ui.card_header("Survival by Class")
+
+ @render_plotly
+ def survival_by_class():
+ df = qc.df()
+ summary = df.groupby('pclass')['survived'].mean().reset_index()
+ return px.bar(
+ summary,
+ x='pclass',
+ y='survived',
+ labels={'pclass': 'Class', 'survived': 'Survival Rate'},
+ )
+
+with ui.layout_columns():
+ with ui.card():
+ ui.card_header("Age Distribution")
+
+ @render_plotly
+ def age_dist():
+ df = qc.df()
+ return px.histogram(df, x='age', nbins=30)
+
+ with ui.card():
+ ui.card_header("Fare by Class")
+
+ @render_plotly
+ def fare_by_class():
+ df = qc.df()
+ return px.box(df, x='pclass', y='fare', color='survived')
+
+ui.page_opts(
+ title="Titanic Survival Analysis",
+ fillable=True,
+ class_="bslib-page-dashboard",
+)
+```
+
+app.py
-The small open source models (8B and below) we've tested have fared extremely poorly. Sorry. 🤷
+
+Explore data using natural language queries +
-### Powered by SQL + -querychat does not have direct access to the raw data; it can _only_ read or filter the data by writing SQL `SELECT` statements. This is crucial for ensuring relability, transparency, and reproducibility: -- **Reliability:** Today's LLMs are excellent at writing SQL, but bad at direct calculation. -- **Transparency:** querychat always displays the SQL to the user, so it can be vetted instead of blindly trusted. -- **Reproducibility:** The SQL query can be easily copied and reused. +Querychat makes it easy to explore data with natural language through the power of [Shiny](https://shiny.posit.co/py) and large language models (LLMs). Start chatting with your data in just one line of code. Or, with a few more lines, design your own rich user experience around data exploration and analysis through natural language. -Currently, querychat uses DuckDB for its SQL engine. It's extremely fast and has a surprising number of [statistical functions](https://duckdb.org/docs/stable/sql/functions/aggregates.html#statistical-aggregates). - -## Customizing querychat - -### Provide a greeting (recommended) - -When the querychat UI first appears, you will usually want it to greet the user with some basic instructions. By default, these instructions are auto-generated every time a user arrives; this is slow, wasteful, and unpredictable. Instead, you should create a file called `greeting.md`, and when calling `querychat.init`, pass `greeting=Path("greeting.md")`. - -You can provide suggestions to the user by using the ` ` tag. - -For example: +## Installation -```markdown -* **Filter and sort the data:** - * Show only survivors - * Filter to first class passengers under 30 - * Sort by fare from highest to lowest +Install the latest stable release [from PyPI](https://pypi.org/project/querychat/): -* **Answer questions about the data:** - * What was the survival rate by gender? - * What's the average age of children who survived? - * How many passengers were traveling alone? +```bash +pip install querychat ``` -These suggestions appear in the greeting and automatically populate the chat text box when clicked. -This gives the user a few ideas to explore on their own. -You can see this behavior in our [`querychat template`](https://shiny.posit.co/py/templates/querychat/). - -### Generate a greeting +## Quick start -If you need help coming up with a greeting, you can use the `querychat.greeting()` function to generate one: +The main entry point is the [`QueryChat` class](reference/QueryChat.qmd). It requires a [data source](data-sources.qmd) (e.g., pandas, polars, etc) and a name for the data. It also accepts optional parameters to customize the behavior, such as the `client` [model](models.qmd). +The quickest way to start chatting is to call the `.app()` method, which returns a Shiny app object. -```python -import querychat -from palmerpenguins import load_penguins -# Create config with your dataset -penguins = load_penguins() -penguins_config = querychat.init(penguins, "penguins") +```{.python filename="titanic-app.py"} +from seaborn import load_dataset +from querychat import QueryChat -# Generate a greeting -querychat.greeting(penguins_config) -#> Hello! I'm here to help you explore and analyze the penguins dataset. -#> Here are some example prompts you can try: -#> ... - -# Update the config with the generated greeting -penguins_config = querychat.init( - penguins, - "penguins", - greeting="Hello! I'm here to help you explore and analyze the penguins dataset..." -) +titanic = load_dataset("titanic") +qc = QueryChat(titanic, "titanic", client="openai/gpt-4.1") +app = qc.app() ``` -This will use the LLM to generate a friendly greeting message with sample prompts. -In Shiny apps, you could also generate the greeting once when the app starts up so that it's shared among all users: +With the above code saved to `titanic-app.py` and an API key set[^api-key], you can [run the app](https://shiny.posit.co/py/get-started/create-run.html#run-your-shiny-application) from a terminal (or [VSCode](https://marketplace.visualstudio.com/items?itemName=Posit.shiny)): -```python -penguins_config = querychat.init(penguins, "penguins") -penguins_config.greeting = querychat.greeting(penguins_config, echo="none") +```bash +export OPENAI_API_KEY="your_api_key_here" +shiny run --reload titanic-app.py ``` -### Augment the system prompt (recommended) +[^api-key]: By default, Querychat uses OpenAI to power the chat experience. So, for this example to work, you'll need [an OpenAI API key](https://platform.openai.com/). See the [Models](models.qmd) page for details on how to set up credentials for other model providers. -In LLM parlance, the _system prompt_ is the set of instructions and specific knowledge you want the model to use during a conversation. querychat automatically creates a system prompt which is comprised of: +Once running, you'll notice 3 main views: -1. The basic set of behaviors the LLM must follow in order for querychat to work properly. (See `querychat/prompt/prompt.md` if you're curious what this looks like.) -2. The SQL schema of the data frame you provided. -3. (Optional) Any additional description of the data you choose to provide. -4. (Optional) Any additional instructions you want to use to guide querychat's behavior. +1. A sidebar chat with suggestions on where to start exploring. +2. A data table that updates to reflect filtering and sorting queries. +3. The SQL query behind the data table, for transparency and reproducibility. -#### Data description +{fig-alt="Screenshot of querychat's app with the titanic dataset." class="lightbox shadow rounded mb-3"} -If you give querychat your dataset and nothing else, it will provide the LLM with the basic schema of your data: +Suppose we pick a suggestion like "Show me passengers who survived". Since this is a filtering operation, both the data table and SQL query update accordingly. -- Column names -- DuckDB data type (integer, float, boolean, datetime, text) -- For text columns with less than 10 unique values, we assume they are categorical variables and include the list of values. This threshold is configurable. -- For integer and float columns, we include the range +{fig-alt="Screenshot of the querychat's app with the titanic dataset filtered to passengers who survived." class="lightbox shadow rounded mb-3"} -And that's all the LLM will know about your data. -The actual data does not get passed into the LLM. -We calculate these values before we pass the schema information into the LLM. +Querychat can also handle more general questions about the data that require calculations and aggregations. For example, we can ask "What is the average age of passengers who survived?". In this case, querychat will generate/execute the SQL query to perform the relevant calculation, and return the result in the chat: -If the column names are usefully descriptive, it may be able to make a surprising amount of sense out of the data. But if your data frame's columns are `x`, `V1`, `value`, etc., then the model will need to be given more background info--just like a human would. +{fig-alt="Screenshot of the querychat's app with a summary statistic inlined in the chat." class="lightbox shadow rounded mb-3"} -To provide this information, use the `data_description` argument. For example, if you're using the `titanic` dataset, you might create a `data_description.md` like this: +As you'll learn later in [Build an app](build.qmd), you can also access the SQL query and filtered/sorted data frame programmatically for use elsewhere in your app. This makes it rather seemless to have natural language interaction with your data alongside other visualizations and analyses. -```markdown -This dataset contains information about Titanic passengers, collected for predicting survival. +Before we build though, let's take a moment to better understand how querychat works under the hood, and whether it's right for you. -- survived: Survival (0 = No, 1 = Yes) -- pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd) -- sex: Sex of passenger -- age: Age in years -- sibsp: Number of siblings/spouses aboard -- parch: Number of parents/children aboard -- fare: Passenger fare -- embarked: Port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton) -- class: Same as pclass but as text -- who: Man, woman, or child -- adult_male: Boolean for adult males -- deck: Deck of the ship -- embark_town: Town of embarkation -- alive: Survival status as text -- alone: Whether the passenger was alone -``` -which you can then pass via: - -```python -qc_config = querychat.init( - titanic, - "titanic", - data_description=Path("data_description.md") -) -``` - -querychat doesn't need this information in any particular format; just put whatever information, in whatever format, you think a human would find helpful. +## How it works -#### Additional instructions +Querychat leverages LLMs incredible capability to translate natural language into SQL queries. Frontier models are shockingly good at this task, but even the best models still need to know the overall data structure to perform well. For this reason, querychat supplies a [system prompt](context.qmd) with the schema of the data (i.e., column names, types, ranges, etc), but never the raw data itself. -You can add additional instructions of your own to the end of the system prompt, by passing `extra_instructions` into `querychat.init`. +When the LLM generates a SQL query, querychat executes it against a SQL database (DuckDB[^duckdb] by default) to get results in a **safe**, **reliable**, and **verifiable** manner. In short, this execution is **safe** since only `SELECT` statements are allowed, **reliable** since the database engine handles all calculations, and **verifiable** since the user can always see the SQL query that was run. This makes querychat a trustworthy tool for data exploration, as every action taken by the LLM is transparent and independently reproducible. -```python -qc_config = querychat.init( - titanic, - "titanic", - extra_instructions=[ - "You're speaking to a British audience--please use appropriate spelling conventions.", - "Use lots of emojis! 😃 Emojis everywhere, 🌍 emojis forever. ♾️", - "Stay on topic, only talk about the data dashboard and refuse to answer other questions." - ] -) -``` -You can also put these instructions in a separate file and use `Path("instructions.md")` to load them, as we did for `data_description` above. +::: callout-important +### Data privacy -**Warning:** It is not 100% guaranteed that the LLM will always—or in many cases, ever—obey your instructions, and it can be difficult to predict which instructions will be a problem. So be sure to test extensively each time you change your instructions, and especially, if you change the model you use. +See the [Provide context](context.qmd) and [Tools](tools.qmd) articles to learn more about what information is provided to the LLM and what it's capable of doing with code execution. +:::: -### Use a different LLM provider +[^duckdb]: Duckdb is extremely fast and has a surprising number of [statistical functions](https://duckdb.org/docs/stable/sql/functions/aggregates.html#statistical-aggregates). -By default, querychat uses GPT-4o via the OpenAI API. If you want to use a different model, you can provide a `create_chat_callback` function that takes a `system_prompt` parameter, and returns a chatlas Chat object: +### Bespoke interfaces -```python -import querychat -import chatlas +While the quickstart app is a great way to get started, querychat is designed to be highly extensible. +You can not only customize the underlying model and data source, but also build fully custom Shiny apps around the core chat functionality. -qc_config = querychat.init( - titanic, - "titanic", - client=chatlas.ChatAnthropic(model="claude-3-7-sonnet-latest") -) -``` +For a motivating example, consider the following ([sidebot](https://shiny.posit.co/py/docs/genai-inspiration.html#sidebot)) app that leverages querychat's tooling to create reactive summaries and visualizations based on the user's natural language queries: -This would use Claude 3.7 Sonnet instead, which would require you to provide an API key. See the [chatlas documentation](https://github.com/posit-dev/chatlas) for more information on how to authenticate with different providers. +{fig-alt="Screenshot of sidebot, a custom shiny app built with querychat." class="lightbox shadow rounded mb-3"} -## Complete example -For a complete working example, see the [examples/app-dataframe.py](examples/app-dataframe.py) file in the repository. -This example includes: +## Next steps -- Loading a dataset -- Reading greeting and data description from files -- Setting up the querychat configuration -- Creating a Shiny UI with the chat sidebar -- Displaying the filtered data in the main panel +From here, you might want to learn more about: -If you have Shiny installed, and want to get started right away, you can use our -[querychat template](https://shiny.posit.co/py/templates/querychat/) -or -[sidebot template](https://shiny.posit.co/py/templates/sidebot/). +- [Models](models.qmd): customize the LLM behind querychat. +- [Data sources](data-sources.qmd): different data sources you can use with querychat. +- [Provide context](context.qmd): provide the LLM with the context it needs to work well. +- [Build an app](build.qmd): design a custom Shiny app around querychat. diff --git a/pkg-py/docs/models.qmd b/pkg-py/docs/models.qmd new file mode 100644 index 00000000..3d19e133 --- /dev/null +++ b/pkg-py/docs/models.qmd @@ -0,0 +1,88 @@ +--- +title: Models +--- + +Under the hood, `querychat` is powered by [chatlas](https://posit-dev.github.io/chatlas/), a library for building chat-based applications with large language models (LLMs). `chatlas` supports a wide range of LLM providers -- [see here](https://posit-dev.github.io/chatlas/get-started/models.html) for a full list. + +### Specify a model + +To use a particular model, pass a `"{provider}/{model}"` string to the `client` parameter. Under the hood, this gets passed along to [`chatlas.ChatAuto`](https://posit-dev.github.io/chatlas/reference/ChatAuto.html) + +```python +from querychat import QueryChat +from seaborn import load_dataset +titanic = load_dataset("titanic") + +qc = QueryChat( + titanic, + "titanic", + client="anthropic/claude-sonnet-4-5" +) +``` + +And, if you'd like to effectively set a new default model, you can use the `QUERYCHAT_CLIENT` environment variable. + +```shell +export QUERYCHAT_CLIENT="anthropic/claude-sonnet-4-5" +``` + +Note that it can also be useful to pass a full `Chat` object to the `client` parameter for more advanced use cases (e.g., custom [parameters](https://posit-dev.github.io/chatlas/get-started/parameters.html), [tools](https://posit-dev.github.io/chatlas/get-started/tools.html), etc). It can also be useful for getting some [helpful autocomplete](https://posit-dev.github.io/chatlas/get-started/models.html#model-type-hints) of available models. + +```python +from chatlas import ChatAnthropic + +client = ChatAnthropic(model="claude-sonnet-4-5") +``` + +### Credentials + +Most models require an API key or some other form of authentication. See the reference page for the relevant [model provider](https://posit-dev.github.io/chatlas/get-started/models.html) (e.g., [ChatAnthropic](https://posit-dev.github.io/chatlas/reference/ChatAnthropic.html)) to learn more on how to set up credentials. + +::: callout-tip +### Github model marketplace + +If you are already setup with Github credentials, [Github model marketplace](https://github.com/marketplace/models) provides a free and easy way to get started. See [here](https://posit-dev.github.io/chatlas/reference/ChatGithub.html) for more details on how to get setup. + +```{.python filename="github-model.py"} +from chatlas import ChatGithub + +# Just works if GITHUB_TOKEN is set in your environment +client = ChatGithub(model="gpt-4.1") +``` +::: + +In general, most providers will prefer credentials stored as environment variables, and common practice is to use a `.env` file to manage these variables. For example, for `ChatOpenAI()`, you might create a `.env` file like so: + +```{.shell filename=".env"} +OPENAI_API_KEY="your_api_key_here" +``` + +Then, load the environment variables via the `dotenv` package: + +```shell +pip install dotenv +``` + +```python +from dotenv import load_dotenv +load_dotenv() +``` + + +### Recommended models + +In theory, you could use any model that has tool calling support, but we currently recommend (as of November 2025): + +- GPT-4.1 (the default) +- Claude 4.5 Sonnet +- Google Gemini 3.0 + +In our testing, we've found that those models strike a good balance between accuracy and latency. Smaller/cheaper models like GPT-4o-mini are fine for simple queries but make surprising mistakes with more complex ones; and reasoning models like o3-mini slow down responses without providing meaningfully better results. + +We've also seen some decent results with frontier local models, but even if you have the compute to run the largest models, they still tend to lag behind the cloud-hosted options in terms of accuracy and speed. + +::: callout-tip +## Data privacy concerns? + +If you have data privacy concerns, consider that your org may provide access to private instances of these models with data residency guarantees. For example, Azure, AWS Bedrock, and Google Vertex AI all provide private instances of popular LLMs. You can interface with these enterprise providers by passing the right string (e.g., `"bedrock-anthropic"`) or `Chat` object (e.g., `ChatBedrockAnthropic()`) to the `client` parameter. See the [chatlas docs](https://posit-dev.github.io/chatlas/get-started/models.html) for more details. +::: diff --git a/pkg-py/docs/styles.scss b/pkg-py/docs/styles.scss new file mode 100644 index 00000000..44b3bbeb --- /dev/null +++ b/pkg-py/docs/styles.scss @@ -0,0 +1,70 @@ +/*-- scss:defaults --*/ + +$font-family-sans-serif: 'Public Sans', sans-serif; +$font-family-monospace: 'Fira Code', monospace; +$headings-font-family: 'Hubot Sans', sans-serif; +$display-font-family: 'Hubot Sans', sans-serif; +$headings-color: #193D56; + +/*-- scss:rules --*/ + +@import url('https://fonts.googleapis.com/css2?family=Public+Sans:ital,wght@0,100..900;1,100..900&display=swap'); +@import url('https://fonts.googleapis.com/css?family=Fira Code'); +@import url('https://fonts.googleapis.com/css?family=Hubot Sans'); + +.header { + font-family: $headings-font-family; + color: $headings-color; +} + +/* css styles */ + +.cell-output pre code { + white-space: pre-wrap; +} + +/* Undo somebody's aggressive CSS */ +pre { + font-family: var(--bs-font-monospace); +} + + +/* sidebar */ +.sidebar-item-container { + font-size: 1rem; + + .text-start { + font-weight: 600; + } +} + +.sidebar-item-section { + padding-top: 0.5rem; +} + +// make it even more noticable +.sidebar-link { + &:hover { + font-weight: 500; + } + + &.active { + position: relative; + + &::before { + content: "\23F5"; + position: absolute; + left: -0.9em; + font-size: 1em; + color: var(--bs-primary); + } + } +} + + +/* Get code output to look like a sourceCode block */ +pre:has(> code) { + background-color: rgba(233, 236, 239, 0.65); + border-radius: .25em; + padding: .4em; +} \ No newline at end of file diff --git a/pkg-py/docs/tools.qmd b/pkg-py/docs/tools.qmd new file mode 100644 index 00000000..cd348eba --- /dev/null +++ b/pkg-py/docs/tools.qmd @@ -0,0 +1,71 @@ +--- +title: Tools +--- + +QueryChat combines [tool calling](https://posit-dev.github.io/chatlas/get-started/tools.html) with [reactivity](https://shiny.posit.co/py/docs/reactive-foundations.html) to not only execute SQL, but also reactively update dependent data views. Understanding how these tools work will help you better understand what QueryChat is capable of and how to customize/extend to its behavior. + +One important thing to understand generally about Querychat's tools is they are Python functions, and that execution happens on _your machine_, not on the LLM provider's side. In other words, the SQL queries generated by the LLM are executed locally in the Shiny app process, and only the results (if any) are sent back to the LLM. + +Querychat provides the LLM access to three tools, serving two primary purposes: + +1. **Data updating** - Filter and sort data (without sending results to the LLM). +2. **Data analysis** - Calculate summaries and return results for interpretation by the LLM. + +## Data updating + +When a user asks to "Show me..." or "Filter to..." or "Sort by...", the LLM requests a call to the `update_dashboard` tool with an appropriate SQL query as input. An important constraint is that the query must return all original schema columns (typically using `SELECT *`). When called, Querychat will both set a reactive value holding [the current SQL query](build.qmd#sql-query) and execute the query to get the result. + +The result of query then used to set a reactive value holding the [filtered/sorted data frame](build.qmd#filtered-data). Thanks to reactivity, this will automatically update any views depending on this data frame, such as the data table displayed in the UI. + +This tool also takes a `title` parameter, which is a short description of the filter/sort operation (e.g., "First-class passengers"). This, also, is made available through [a reactive value](build.qmd#title) for display somewhere in your app. + +Here's a basic example of this tool in action with the `.app()` method. Notice how this pre-built app not only shows the data table, but also the SQL query and title generated by the LLM (for transparency): + +```{.python filename="titanic-app.py"} +from querychat import QueryChat +from seaborn import load_dataset + +titanic = load_dataset("titanic") +qc = QueryChat(titanic, "titanic") +app = qc.app() +``` + +{fig-alt="Screenshot of the querychat's app with the titanic dataset filtered to passengers who survived." class="lightbox shadow rounded mb-3"} + +The other data updating tool is `reset_dashboard`, which clears any active filters and returns the data table to its original unfiltered state. The LLM typically uses this when users say "reset", "start over", or "clear filters". + +## Data analysis + +When a user asks analytical questions like "What is the average...?", "How many...?", or "Which item has the highest...?", the LLM generates a SQL query and requests a call to the `query` tool. Unlike the data updating tools, this tool will not update any reactive values. Instead, it will: + +1. Execute the SQL query +2. Display both the SQL query and results in the UI +3. Return the results back to the LLM for interpretation + +Here's an example of it in action: + +```{.python filename="titanic-app.py"} +from querychat import QueryChat +from seaborn import load_dataset + +titanic = load_dataset("titanic") +qc = QueryChat(titanic, "titanic") +app = qc.app() +``` + +{fig-alt="Screenshot of the querychat's app with a summary statistic inlined in the chat." class="lightbox shadow rounded mb-3"} + + +## View the source + +If you'd like to better understand how the tools work and how the LLM is prompted to use them, check out the following resources: + +**Source code:** + +- [`tools.py`](https://github.com/posit-dev/querychat/blob/main/pkg-py/src/querychat/tools.py) + +**Prompts:** + +- [`prompts/tool-update-dashboard.md`](https://github.com/posit-dev/querychat/blob/main/pkg-py/src/querychat/prompts/tool-update-dashboard.md) +- [`prompts/tool-reset-dashboard.md`](https://github.com/posit-dev/querychat/blob/main/pkg-py/src/querychat/prompts/tool-reset-dashboard.md) +- [`prompts/tool-query.md`](https://github.com/posit-dev/querychat/blob/main/pkg-py/src/querychat/prompts/tool-query.md) diff --git a/pkg-py/examples/01-hello-app.py b/pkg-py/examples/01-hello-app.py new file mode 100644 index 00000000..856c93bd --- /dev/null +++ b/pkg-py/examples/01-hello-app.py @@ -0,0 +1,6 @@ +from seaborn import load_dataset +from querychat import QueryChat + +titanic = load_dataset("titanic") +qc = QueryChat(titanic, "titanic") +app = qc.app() diff --git a/pkg-py/examples/02-prompt-app.py b/pkg-py/examples/02-prompt-app.py new file mode 100644 index 00000000..832059f1 --- /dev/null +++ b/pkg-py/examples/02-prompt-app.py @@ -0,0 +1,18 @@ + +from pathlib import Path +from seaborn import load_dataset +from querychat import QueryChat + +titanic = load_dataset("titanic") + +greeting = Path(__file__).parent / "greeting.md" +data_desc = Path(__file__).parent / "data_description.md" + +qc = QueryChat( + titanic, + "titanic", + greeting=greeting, + data_description=data_desc, +) + +qc.app() diff --git a/pkg-py/examples/03-sidebar-core-app.py b/pkg-py/examples/03-sidebar-core-app.py new file mode 100644 index 00000000..b37dc750 --- /dev/null +++ b/pkg-py/examples/03-sidebar-core-app.py @@ -0,0 +1,36 @@ +from seaborn import load_dataset +from shiny import App, render, ui +from querychat import QueryChat + +titanic = load_dataset("titanic") + +# 1. Provide data source to QueryChat +qc = QueryChat(titanic, "titanic") + +app_ui = ui.page_sidebar( + # 2. Create sidebar chat control + qc.sidebar(), + ui.card( + ui.card_header(ui.output_text("title")), + ui.output_data_frame("data_table"), + fill=True, + ), + fillable=True +) + + +def server(input, output, session): + # 3. Add server logic (to get reactive data frame and title) + qc_vals = qc.server() + + # 4. Use the filtered/sorted data frame reactively + @render.data_frame + def data_table(): + return qc_vals.df() + + @render.text + def title(): + return qc_vals.title() or "Titanic Dataset" + + +app = App(app_ui, server) diff --git a/pkg-py/examples/03-sidebar-express-app.py b/pkg-py/examples/03-sidebar-express-app.py new file mode 100644 index 00000000..8e0b7cac --- /dev/null +++ b/pkg-py/examples/03-sidebar-express-app.py @@ -0,0 +1,28 @@ +from seaborn import load_dataset +from shiny.express import render, ui +from querychat.express import QueryChat + +titanic = load_dataset("titanic") + +# 1. Provide data source to QueryChat +qc = QueryChat(titanic, "titanic") + +# 2. Add sidebar chat control +qc.sidebar() + +# 3. Add a card with reactive title and data frame +with ui.card(): + with ui.card_header(): + @render.text + def title(): + return qc.title() or "Titanic Dataset" + + @render.data_frame + def data_table(): + return qc.df() + +# 4. Set some page options (optional) +ui.page_opts( + fillable=True, + title="Titanic Dataset Explorer" +) diff --git a/pkg-py/examples/app-database-sqlite.py b/pkg-py/examples/app-database-sqlite.py deleted file mode 100644 index 54327d5c..00000000 --- a/pkg-py/examples/app-database-sqlite.py +++ /dev/null @@ -1,68 +0,0 @@ -from pathlib import Path - -import chatlas -import querychat -from seaborn import load_dataset -from shiny import App, render, ui -from sqlalchemy import create_engine - -# Load titanic data and create SQLite database -db_path = Path(__file__).parent / "titanic.db" -engine = create_engine("sqlite:///" + str(db_path)) - -if not db_path.exists(): - # For example purposes, we'll create the database if it doesn't exist. Don't - # do this in your app! - titanic = load_dataset("titanic") - titanic.to_sql("titanic", engine, if_exists="replace", index=False) - -greeting = Path(__file__).parent / "greeting.md" -data_desc = Path(__file__).parent / "data_description.md" - -# 1. Configure querychat - -def use_github_models(system_prompt: str) -> chatlas.Chat: - # GitHub models give us free rate-limited access to the latest LLMs - # you will need to have GITHUB_PAT defined in your environment - return chatlas.ChatGithub( - model="gpt-4.1", - system_prompt=system_prompt, - ) - -qc_config = querychat.init( - engine, - "titanic", - greeting=greeting, - data_description=data_desc, - client=use_github_models, -) - -# Create UI -app_ui = ui.page_sidebar( - # 2. Place the chat component in the sidebar - querychat.sidebar("chat"), - # Main panel with data viewer - ui.card( - ui.output_data_frame("data_table"), - fill=True, - ), - title="querychat with Python (SQLite)", - fillable=True, - class_="bslib-page-dashboard", -) - - -# Define server logic -def server(input, output, session): - # 3. Initialize querychat server with the config from step 1 - qc = querychat.server("chat", qc_config) - - # 4. Display the filtered dataframe - @render.data_frame - def data_table(): - # Access filtered data via qc.df() reactive - return qc.df() - - -# Create Shiny app -app = App(app_ui, server) diff --git a/pkg-py/examples/app-dataframe-pandas.py b/pkg-py/examples/app-dataframe-pandas.py deleted file mode 100644 index 487095a5..00000000 --- a/pkg-py/examples/app-dataframe-pandas.py +++ /dev/null @@ -1,59 +0,0 @@ -from pathlib import Path - -import chatlas -import querychat -from seaborn import load_dataset -from shiny import App, render, ui - -titanic = load_dataset("titanic") - -greeting = Path(__file__).parent / "greeting.md" -data_desc = Path(__file__).parent / "data_description.md" - -# 1. Configure querychat - -def use_github_models(system_prompt: str) -> chatlas.Chat: - # GitHub models give us free rate-limited access to the latest LLMs - # you will need to have GITHUB_PAT defined in your environment - return chatlas.ChatGithub( - model="gpt-4.1", - system_prompt=system_prompt, - ) - -qc_config = querychat.init( - titanic, - "titanic", - greeting=greeting, - data_description=data_desc, - client=use_github_models, -) - -# Create UI -app_ui = ui.page_sidebar( - # 2. Place the chat component in the sidebar - querychat.sidebar("chat"), - # Main panel with data viewer - ui.card( - ui.output_data_frame("data_table"), - fill=True, - ), - title="querychat with Python", - fillable=True, - class_="bslib-page-dashboard", -) - - -# Define server logic -def server(input, output, session): - # 3. Initialize querychat server with the config from step 1 - qc = querychat.server("chat", qc_config) - - # 4. Display the filtered dataframe - @render.data_frame - def data_table(): - # Access filtered data via chat.df() reactive - return qc.df() - - -# Create Shiny app -app = App(app_ui, server) diff --git a/pkg-py/examples/app.py b/pkg-py/examples/app.py deleted file mode 100644 index 02614833..00000000 --- a/pkg-py/examples/app.py +++ /dev/null @@ -1,66 +0,0 @@ -import chatlas -from seaborn import load_dataset -from shiny import App, render, ui - -from dotenv import load_dotenv -load_dotenv() - -import querychat - -titanic = load_dataset("titanic") - -# 1. Configure querychat. -# This is where you specify the dataset and can also -# override options like the greeting message, system prompt, model, etc. - - -def use_github_models(system_prompt: str) -> chatlas.Chat: - # GitHub models give us free rate-limited access to the latest LLMs - # you will need to have GITHUB_PAT defined in your environment - return chatlas.ChatGithub( - model="gpt-4.1", - system_prompt=system_prompt, - ) - - -qc_config = querychat.init( - data_source=titanic, - table_name="titanic", - client=use_github_models, -) - -# Create UI -app_ui = ui.page_sidebar( - # 2. Use querychat.sidebar(id) in a ui.page_sidebar. - # Alternatively, use querychat.ui(id) elsewhere if you don't want your - # chat interface to live in a sidebar. - querychat.sidebar("chat"), - ui.card( - ui.card_header(ui.output_text("title")), - ui.output_data_frame("data_table"), - fill=True, - ), - fillable=True, - class_="bslib-page-dashboard" -) - - -# Define server logic -def server(input, output, session): - # 3. Create a querychat object using the config from step 1. - qc = querychat.server("chat", qc_config) - - # 4. Use the filtered/sorted data frame anywhere you wish, via the - # chat.df() reactive. - @render.data_frame - def data_table(): - return qc.df() - - @render.text - def title(): - title = qc.title() - return title if title else "Titanic Dataset" - - -# Create Shiny app -app = App(app_ui, server)