Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exp: pygwalker data explorer in streamlit #2216

Draft
wants to merge 1 commit into
base: devel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions dlt/helpers/streamlit_app/blocks/explorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import dlt
import pygwalker
import streamlit as st
from pygwalker.api.streamlit import StreamlitRenderer

st.set_page_config(layout="wide")





@st.cache_resource
def pygwalker_renderer(pipeline_name: str, table_name: str) -> StreamlitRenderer:
pipeline = dlt.attach(pipeline_name)
dataset = pipeline.dataset()
df = dataset[table_name].df()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will load the full table into memory, is this desired? on a large table this will take a long time and at some point kill the host. is there some other way we could do this?

return StreamlitRenderer(
df,
kernel_computation=True, # use duckdb under the hood
default_tab="vis"
)


def select_table_name(pipeline: dlt.Pipeline) -> str:
current_schema = pipeline.default_schema
if schema_name := st.session_state.get("schema_name"):
current_schema = pipeline.schemas[schema_name]

selected_table = st.selectbox(
label="Active table",
options=current_schema.data_tables(),
format_func=lambda t: t["name"]
)
return selected_table["name"]


def show_explorer(pipeline: dlt.Pipeline) -> None:
selected_table_name = select_table_name(pipeline)
renderer = pygwalker_renderer(
pipeline_name=pipeline.pipeline_name,
table_name=selected_table_name,
)
renderer.explorer()
1 change: 1 addition & 0 deletions dlt/helpers/streamlit_app/blocks/menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ def menu(pipeline: dlt.Pipeline) -> None:
mode_selector()
logo()
st.page_link(f"{HERE}/pages/dashboard.py", label="Explore data", icon="🕹️")
st.page_link(f"{HERE}/pages/explorer.py", label="Visualize data", icon="📊")
st.page_link(f"{HERE}/pages/load_info.py", label="Load info", icon="💾")
pipeline_summary(pipeline)
17 changes: 17 additions & 0 deletions dlt/helpers/streamlit_app/pages/explorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import dlt
import streamlit as st

from dlt.helpers.streamlit_app.blocks.explorer import show_explorer
from dlt.helpers.streamlit_app.blocks.menu import menu
from dlt.helpers.streamlit_app.utils import render_with_pipeline


def show(pipeline: dlt.Pipeline) -> None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should have at least simple tests for this page to make sure it renders.

with st.sidebar:
menu(pipeline)

show_explorer(pipeline)


if __name__ == "__main__":
render_with_pipeline(show)
Loading