Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5585328
Add local dashboard classes.
goodwillpunning Sep 8, 2025
07caf36
Update job deployer with profiler ingestion job.
goodwillpunning Sep 9, 2025
d03d81e
Add initial integration test.
goodwillpunning Sep 11, 2025
f8982dd
Add method to upload DuckDB files to Unity Catalog Volume with tests
radhikaathalye-db Sep 16, 2025
a4d2520
Update app context to call dashboard manager with WorkspaceClient.
goodwillpunning Sep 25, 2025
4ebb53e
Add LSQL definitions for Synapse Profiler Dashboard
goodwillpunning Sep 25, 2025
8370ef6
Merge latest from feature/add_local_dashboards into feature/upload_du…
radhikaathalye-db Sep 29, 2025
72c3f87
refactor: use workspaceClient instead of requests; fix error logging
radhikaathalye-db Sep 29, 2025
03ff5bf
Add more specific exception handling.
goodwillpunning Oct 23, 2025
2aeab84
Update dedicated SQL pool LSQL widgets.
goodwillpunning Sep 29, 2025
c34394d
Replace LSQL dashboards with Python SDK.
goodwillpunning Oct 1, 2025
ac81031
Add private functions for creating/replacing profiler dashboard.
goodwillpunning Oct 15, 2025
6070973
Add more specific error handling to dashboard manager.
goodwillpunning Oct 15, 2025
fb9eb00
Update args for CLI command.
goodwillpunning Oct 15, 2025
ac7c806
Remove profiler extract ingestion job deployer.
goodwillpunning Oct 17, 2025
a094691
Remove unit tests for profiler ingestion job.
goodwillpunning Oct 20, 2025
f8f11aa
Add method to upload DuckDB files to Unity Catalog Volume with tests
radhikaathalye-db Sep 16, 2025
56be197
Merge upstream changes and update test cases.
goodwillpunning Oct 24, 2025
136f115
Add more specific exception handling.
goodwillpunning Oct 23, 2025
5fec3c6
Remove unnecessary params in dashboard manager.
goodwillpunning Oct 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ commands:
description: Aggregates Reconcile is an utility to streamline the reconciliation process, specific aggregate metric is compared between source and target data residing on Databricks.
- name: configure-database-profiler
description: "Configure Database Profiler"
- name: create-profiler-dashboard
description: "Upload the Profiler Results as a Databricks Dashboard."
flags:
- name: extract-file
description: Path Location of the Profiler Extract File
- name: source-tech
description: Name of the Source System Technology that was Profiled
- name: catalog-name
description: (Optional) Name of the Catalog that extract data will be uploaded to
- name: schema-name
description: (Optional) Name of the Schema that the extract tables will be uploaded to
- name: install-transpile
description: "Install & Configure Necessary Transpiler Dependencies"
flags:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ classifiers = [
]

dependencies = [
"databricks-sdk~=0.51.0",
"databricks-sdk~=0.67.0",
"standard-distutils~=3.11.9; python_version>='3.11'",
"databricks-bb-analyzer~=0.1.9",
"sqlglot==26.1.3",
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import io
import os
import json

import logging
from pathlib import Path

from databricks.sdk.errors import PermissionDenied, NotFound, InternalError
from databricks.sdk.errors.platform import ResourceAlreadyExists, DatabricksError
from databricks.sdk.service.dashboards import Dashboard
from databricks.sdk.service.iam import User
from databricks.sdk import WorkspaceClient

from databricks.labs.blueprint.wheels import find_project_root

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class DashboardTemplateLoader:
"""
Class for loading the JSON representation of a Databricks dashboard
according to the source system.
"""

def __init__(self, templates_dir: Path | None):
self.templates_dir = templates_dir

def load(self, source_system: str) -> dict:
"""
Loads a profiler summary dashboard.
:param source_system: - the name of the source data warehouse
"""
if self.templates_dir is None:
raise ValueError("Dashboard template path cannot be empty.")

filename = f"{source_system.lower()}_dashboard.lvdash.json"
filepath = os.path.join(self.templates_dir, filename)
if not os.path.exists(filepath):
raise FileNotFoundError(f"Could not find dashboard template matching '{source_system}'.")
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)


class DashboardManager:
"""
Class for managing the lifecycle of a profiler dashboard summary, a.k.a. "local dashboards"
"""

_DASHBOARD_NAME = "Lakebridge Profiler Assessment"

def __init__(self, ws: WorkspaceClient, is_debug: bool = False):
self._ws = ws
self._is_debug = is_debug

@staticmethod
def _replace_catalog_schema(
serialized_dashboard: str,
new_catalog: str,
new_schema: str,
old_catalog: str = "`PROFILER_CATALOG`",
old_schema: str = "`PROFILER_SCHEMA`",
):
"""Given a serialized JSON dashboard, replaces all catalog and schema references with the
provided catalog and schema names."""
updated_dashboard = serialized_dashboard.replace(old_catalog, f"`{new_catalog}`")
return updated_dashboard.replace(old_schema, f"`{new_schema}`")

def _create_or_replace_dashboard(
self, folder: Path, ws_parent_path: str, dest_catalog: str, dest_schema: str
) -> Dashboard:
"""
Creates or updates a profiler summary dashboard in the current user’s Databricks workspace home.
Existing dashboards are automatically replaced with the latest dashboard template.
"""

# Load the dashboard template
logging.info(f"Loading dashboard template {folder}")
dashboard_loader = DashboardTemplateLoader(folder)
dashboard_json = dashboard_loader.load(source_system="synapse")
dashboard_str = json.dumps(dashboard_json)

# Replace catalog and schema placeholders
updated_dashboard_str = self._replace_catalog_schema(
dashboard_str, new_catalog=dest_catalog, new_schema=dest_schema
)
dashboard = Dashboard(
display_name=self._DASHBOARD_NAME,
parent_path=ws_parent_path,
warehouse_id=self._ws.config.warehouse_id,
serialized_dashboard=updated_dashboard_str,
)

# Create dashboard or replace if previously deployed
try:
dashboard = self._ws.lakeview.create(dashboard=dashboard)
except ResourceAlreadyExists:
logging.info("Dashboard already exists! Removing dashboard from workspace location.")
dashboard_ws_path = str(Path(ws_parent_path) / f"{self._DASHBOARD_NAME}.lvdash.json")
self._ws.workspace.delete(dashboard_ws_path)
dashboard = self._ws.lakeview.create(dashboard=dashboard)
except DatabricksError as e:
logging.error(f"Could not create profiler summary dashboard: {e}")

if dashboard.dashboard_id:
logging.info(f"Created dashboard '{dashboard.dashboard_id}' in workspace location '{ws_parent_path}'.")

return dashboard

def create_profiler_summary_dashboard(
self,
extract_file: str,
source_tech: str,
catalog_name: str = "lakebridge_profiler",
schema_name: str = "profiler_runs",
) -> None:
"""Deploys a profiler summary dashboard to the current Databricks user’s workspace home."""

logger.info("Deploying profiler summary dashboard.")

# Load the AI/BI Dashboard template for the source system
template_folder = (
find_project_root(__file__)
/ f"src/databricks/labs/lakebridge/resources/assessments/dashboards/{source_tech}"
)
ws_path = f"/Workspace/Users/{self._current_user}/Lakebridge/Dashboards/"
self._create_or_replace_dashboard(
folder=template_folder, ws_parent_path=ws_path, dest_catalog=catalog_name, dest_schema=schema_name
)

def upload_duckdb_to_uc_volume(self, local_file_path, volume_path):
"""
Upload a DuckDB file to Unity Catalog Volume
Args:
local_file_path (str): Local path to the DuckDB file
volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb')
Returns:
bool: True if successful, False otherwise
"""

# Validate inputs
if not os.path.exists(local_file_path):
logger.error(f"Local file not found: {local_file_path}")
return False

if not volume_path.startswith('/Volumes/'):
logger.error("Volume path must start with '/Volumes/'")
return False

try:
with open(local_file_path, 'rb') as f:
file_bytes = f.read()
binary_data = io.BytesIO(file_bytes)
self._ws.files.upload(volume_path, binary_data, overwrite=True)
logger.info(f"Successfully uploaded {local_file_path} to {volume_path}")
return True
except FileNotFoundError as e:
logger.error(f"Profiler extract file was not found: \n{e}")
return False
except PermissionDenied as e:
logger.error(f"Insufficient privileges detected while accessing Volume path: \n{e}")
return False
except NotFound as e:
logger.error(f"Invalid Volume path provided: \n{e}")
return False
except InternalError as e:
logger.error(f"Internal Databricks error while uploading extract file: \n{e}")
return False
except Exception as e:
logger.error(f"Failed to upload file: {str(e)}")
return False
15 changes: 15 additions & 0 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,21 @@ def configure_database_profiler() -> None:
assessment.run()


@lakebridge.command()
def create_profiler_dashboard(
*,
w: WorkspaceClient,
extract_file: str,
source_tech: str,
catalog_name: str,
schema_name: str,
) -> None:
"""Deploys a profiler summary as an AI/BI dashboard."""
with_user_agent_extra("cmd", "create-profiler-dashboard")
ctx = ApplicationContext(w)
ctx.dashboard_manager.create_profiler_summary_dashboard(extract_file, source_tech, catalog_name, schema_name)


@lakebridge.command
def install_transpile(
*,
Expand Down
6 changes: 6 additions & 0 deletions src/databricks/labs/lakebridge/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from databricks.sdk.service.iam import User

from databricks.labs.lakebridge.analyzer.lakebridge_analyzer import LakebridgeAnalyzer
from databricks.labs.lakebridge.assessments.dashboards.dashboard_manager import DashboardManager
from databricks.labs.lakebridge.config import TranspileConfig, ReconcileConfig, LakebridgeConfiguration
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
Expand Down Expand Up @@ -107,6 +108,11 @@ def job_deployment(self) -> JobDeployment:
def dashboard_deployment(self) -> DashboardDeployment:
return DashboardDeployment(self.workspace_client, self.installation, self.install_state)

@cached_property
def dashboard_manager(self) -> DashboardManager:
is_debug = logger.getEffectiveLevel() == logging.DEBUG
return DashboardManager(self.workspace_client, self.current_user, is_debug)

@cached_property
def recon_deployment(self) -> ReconDeployment:
return ReconDeployment(
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/lakebridge/helpers/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def has_privileges(

@functools.lru_cache(maxsize=1024)
def _get_user_privileges(self, user: str, securable_type: SecurableType, full_name: str) -> set[Privilege]:
permissions = self._ws.grants.get_effective(securable_type, full_name, principal=user)
permissions = self._ws.grants.get_effective(str(securable_type), full_name, principal=user)
if not permissions or not permissions.privilege_assignments:
return set()
return {
Expand Down
Loading