Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
117 commits
Select commit Hold shift + click to select a range
dd726b5
feat: integrate Switch transpiler with Lakebridge installer
hiroyukinakazato-db Sep 30, 2025
febb62d
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Sep 30, 2025
fa26b4c
fix: remove undefined URLError from exception handling
hiroyukinakazato-db Sep 30, 2025
6511e20
refactor: streamline SwitchInstaller deployment logic and update tests
hiroyukinakazato-db Oct 3, 2025
33ea7de
refactor: simplify SwitchInstaller test structure and improve assertions
hiroyukinakazato-db Oct 3, 2025
d0c63c3
Merge remote-tracking branch 'origin/main' into feature/switch-instal…
hiroyukinakazato-db Oct 3, 2025
7cb9ea9
feat: add Switch transpiler installer for Lakebridge integration
hiroyukinakazato-db Oct 7, 2025
467dea9
fix: support case-insensitive config lookup in SwitchInstaller
hiroyukinakazato-db Oct 8, 2025
57298b0
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 8, 2025
09c0eb8
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
8439314
refactor: separate Switch installation from workspace deployment
hiroyukinakazato-db Oct 9, 2025
5f66f3f
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
fae9880
feat: add llm-transpile command with Switch integration
hiroyukinakazato-db Oct 7, 2025
2ee157f
refactor: encapsulate Switch package path resolution in SwitchDeployment
hiroyukinakazato-db Oct 9, 2025
9dc4b04
refactor: encapsulate Switch package path resolution in SwitchDeployment
hiroyukinakazato-db Oct 9, 2025
7637234
test: update Switch installation tests for refactored interface
hiroyukinakazato-db Oct 9, 2025
b736965
test: update Switch installation tests for refactored interface
hiroyukinakazato-db Oct 9, 2025
bacd5f6
fix: update error messages to include 'true' flag for install-transpi…
hiroyukinakazato-db Oct 9, 2025
729cb0d
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
21b6629
Merge branch 'main' into feature/llm-transpile
hiroyukinakazato-db Oct 9, 2025
42ce0df
fix: exclude wait_for_completion from Switch job parameters
hiroyukinakazato-db Oct 10, 2025
81c32e5
fix: exclude wait_for_completion from Switch job parameters
hiroyukinakazato-db Oct 10, 2025
13bcc15
chore: update Switch wheel with wait_for_completion fix
hiroyukinakazato-db Oct 10, 2025
8dcf8f3
feat: add E2E test for Switch transpiler with environment variable co…
hiroyukinakazato-db Oct 14, 2025
83678b8
feat: enhance E2E testing for Switch with resource management and uni…
hiroyukinakazato-db Oct 14, 2025
f698470
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 14, 2025
22cadc9
Defaults in `labs.yml` are strings.
asnare Oct 14, 2025
b3d2441
Update flag description to use placeholder syntax.
asnare Oct 14, 2025
ac7e2a4
Disable flag pending completion of integration.
asnare Oct 14, 2025
ee5c892
chore: merge main into feature/llm-transpile
hiroyukinakazato-db Oct 14, 2025
f0426e1
Leave pylint's max-args as-is.
asnare Oct 15, 2025
934c2e8
Remove unnecessary include_llm arguments.
asnare Oct 15, 2025
74923cc
Refactor Switch installation.
asnare Oct 16, 2025
084f90f
upgrade to latest switch plugin
sundarshankar89 Oct 21, 2025
61f796f
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 21, 2025
0c1d1d5
fixed package dependencies
sundarshankar89 Oct 21, 2025
6aeea25
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 22, 2025
468f8de
added additional configuration for making switch
sundarshankar89 Oct 22, 2025
6a57570
Latest Switch
sundarshankar89 Oct 22, 2025
2c3d153
Sorted List for FMAPI
sundarshankar89 Oct 22, 2025
f41dee8
setting logging level
sundarshankar89 Oct 22, 2025
950c1b8
setting logging level
sundarshankar89 Oct 22, 2025
6ca78ed
setting logging level
sundarshankar89 Oct 22, 2025
ba65df4
setting logging level
sundarshankar89 Oct 22, 2025
2e2abcb
setting logging level
sundarshankar89 Oct 22, 2025
bcbe4df
make default as first choice
sundarshankar89 Oct 22, 2025
486250f
fix tests
sundarshankar89 Oct 22, 2025
fc1ddca
fix tests
sundarshankar89 Oct 22, 2025
42c9c4e
fixes few bugs
sundarshankar89 Oct 23, 2025
1831076
update databricks-switch-plugin dependency to version 0.1.4
hiroyukinakazato-db Oct 26, 2025
01a0c87
Review Comments
sundarshankar89 Oct 27, 2025
ccce0f2
Review Comments
sundarshankar89 Oct 27, 2025
ac382d6
Merge branch 'main' into feature/llm-transpile
sundarshankar89 Oct 27, 2025
4600583
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 27, 2025
7f0eaa4
Rebased from switch installer integration
sundarshankar89 Oct 27, 2025
0e22abe
Rebased from switch installer integration
sundarshankar89 Oct 27, 2025
43cc0f5
Intermediate check in
sundarshankar89 Oct 27, 2025
bb7c3d6
Intermediate check in
sundarshankar89 Oct 27, 2025
bd70638
Intermediate check in
sundarshankar89 Oct 27, 2025
0eb1570
Intermediate check in
sundarshankar89 Oct 27, 2025
eae5997
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 28, 2025
9823201
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 28, 2025
eb46f24
initial tests
sundarshankar89 Oct 28, 2025
34c9f8f
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 28, 2025
c54e68f
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 28, 2025
23df37b
added tests for configurator
sundarshankar89 Oct 29, 2025
4a0bf49
added tests for installer
sundarshankar89 Oct 29, 2025
c49c5b3
added tests for installer
sundarshankar89 Oct 29, 2025
fab0e87
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 29, 2025
078a0bc
added tests for switch
sundarshankar89 Oct 29, 2025
1e10b60
added flag to fail if users use regular transpile after installing sw…
sundarshankar89 Oct 30, 2025
394ad9d
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 30, 2025
69f93b2
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 30, 2025
aeff475
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 30, 2025
7a336f9
added additional tests improved cuj
sundarshankar89 Oct 31, 2025
2602577
added user agent extra
sundarshankar89 Oct 31, 2025
b5bcbcd
Merge branch 'main' into feature/switch-installer-integration
asnare Nov 4, 2025
6132f04
removed interactive prompt with include_llm_transpiler
sundarshankar89 Nov 5, 2025
fb2ae71
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Nov 5, 2025
53077ec
removed interactive prompt with include_llm_transpiler
sundarshankar89 Nov 5, 2025
186ab59
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Nov 5, 2025
508d6c1
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Nov 5, 2025
363a31d
execute llm transpile
sundarshankar89 Nov 5, 2025
c01d605
execute llm transpile
sundarshankar89 Nov 5, 2025
ebf969a
execute llm transpile
sundarshankar89 Nov 5, 2025
eb1d497
Merge branch 'main' into feature/switch-installer-integration
asnare Nov 6, 2025
52d6613
Log why the configuration questionnaire won't happen.
asnare Nov 6, 2025
11903f9
Logging tweaks.
asnare Nov 6, 2025
a62194d
Fix incorrect comment.
asnare Nov 6, 2025
24ad1b3
Style tweak, mark a method as static.
asnare Nov 6, 2025
4083dd6
Merge branch 'feature/switch-installer-integration' into feature/llm-…
asnare Nov 6, 2025
59fe56e
Merge branch 'main' into feature/switch-installer-integration
gueniai Nov 7, 2025
ab3851e
Merge branch 'feature/switch-installer-integration' into feature/llm-…
gueniai Nov 7, 2025
5a26f86
addressed review comments
sundarshankar89 Nov 7, 2025
efc3c64
Merge remote-tracking branch 'origin/feature/llm-transpile' into feat…
sundarshankar89 Nov 7, 2025
1a64612
addressed review comments
sundarshankar89 Nov 7, 2025
6ac2e68
addressed review comments
sundarshankar89 Nov 7, 2025
9799a23
addressed review comments
sundarshankar89 Nov 7, 2025
ddf7f1a
update latest switch plugin
sundarshankar89 Nov 7, 2025
0323ae0
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Nov 7, 2025
cff62b1
removed duplicate
sundarshankar89 Nov 7, 2025
a5e06e3
Avoid raising DatabricksError ourselves.
asnare Nov 7, 2025
3ce1ffd
Avoid raising DatabricksError ourselves.
asnare Nov 7, 2025
df7b15c
Merge branch 'feature/switch-installer-integration' into feature/llm-…
asnare Nov 7, 2025
a659992
Update llm-transpile CLI description and help text.
asnare Nov 7, 2025
459d941
Foundational -> Foundation
asnare Nov 7, 2025
1aab9bf
Improve logging if there as issue locating the Switch job.
asnare Nov 7, 2025
c9bfda0
Simplify logic for computing the path where sources to convert are up…
asnare Nov 7, 2025
8ec00ca
Merge branch 'main' into feature/llm-transpile
asnare Nov 7, 2025
eefda07
Remove unnecessary exception wrapping.
asnare Nov 7, 2025
e0814c3
Reformat the legal disclaimer for LLM use, and adjust grammar.
asnare Nov 7, 2025
a54c2a5
Remove some unused code.
asnare Nov 7, 2025
6d41b07
Improve variable name.
asnare Nov 7, 2025
8332c41
Don't dump a JSON response: this isn't supported for commands that pr…
asnare Nov 7, 2025
a6f1c84
Fix tests broken by removal of JSON response.
asnare Nov 7, 2025
23d9537
Merge branch 'main' into feature/llm-transpile
asnare Nov 7, 2025
e298db1
Update `llm-transpile` to require `--accept-terms=true` (#2136)
asnare Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ remorph_transpile/
/linter/src/main/antlr4/library/gen/
.databricks-login.json
.mypy_cache
.env
20 changes: 20 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ commands:
{{range .}}{{.total_files_processed}}\t{{.total_queries_processed}}\t{{.analysis_error_count}}\t{{.parsing_error_count}}\t{{.validation_error_count}}\t{{.generation_error_count}}\t{{.error_log_file}}
{{end}}

- name: llm-transpile
description: Transpile SQL/ETL sources to Databricks using LLM-based conversion (EXPERIMENTAL)
flags:
- name: accept-terms
description: Whether to accept the terms for using LLM-based transpilation (`true|false`).
- name: input-source
description: Local `path` of the sources to be convert
- name: output-ws-folder
description: Output `path` where converted code will be written in the workspace. (Must start with '/Workspace/'.)
- name: source-dialect
description: The source dialect to use when performing conversion
- name: catalog-name
description: Databricks Catalog `name` to use. (Must already exist and have permissions.)
- name: schema-name
description: Databricks Schema `name` to use. (Must already exist and have permissions.)
- name: volume
description: Databricks UC Volume `name` for staging sources to convert. (Must already exist and have permissions.)
- name: foundation-model
description: The Foundation Model to use for conversion. (Must be available via the Databricks Model Serving Endpoint.)

- name: reconcile
description: Reconcile source and target data residing on Databricks

Expand Down
133 changes: 133 additions & 0 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
from databricks.labs.lakebridge.transpiler.switch_runner import SwitchRunner
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine

from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity
from databricks.labs.switch.lsp import get_switch_dialects


# Subclass to allow controlled access to protected methods.
Expand Down Expand Up @@ -827,6 +829,137 @@ def analyze(
logger.debug(f"User: {ctx.current_user}")


def _validate_llm_transpile_args(
input_source: str | None,
output_ws_folder: str | None,
source_dialect: str | None,
prompts: Prompts,
) -> tuple[str, str, str]:

_switch_dialects = get_switch_dialects()

# Validate presence after attempting to source from config
if not input_source:
input_source = prompts.question("Enter input SQL path")
if not output_ws_folder:
output_ws_folder = prompts.question("Enter output workspace folder must start with /Workspace/")
if not source_dialect:
source_dialect = prompts.choice("Select the source dialect", sorted(_switch_dialects))

# Validate input_source path exists (local path)
if not Path(input_source).exists():
raise_validation_exception(f"Invalid path for '--input-source': Path '{input_source}' does not exist.")

# Validate output_ws_folder is a workspace path
if not str(output_ws_folder).startswith("/Workspace/"):
raise_validation_exception(
f"Invalid value for '--output-ws-folder': workspace output path must start with /Workspace/. Got: {output_ws_folder!r}"
)

if source_dialect not in _switch_dialects:
raise_validation_exception(
f"Invalid value for '--source-dialect': {source_dialect!r} must be one of: {', '.join(sorted(_switch_dialects))}"
)

return input_source, output_ws_folder, source_dialect


@lakebridge.command
def llm_transpile(
*,
w: WorkspaceClient,
accept_terms: bool = False,
input_source: str | None = None,
output_ws_folder: str | None = None,
source_dialect: str | None = None,
catalog_name: str | None = None,
schema_name: str | None = None,
volume: str | None = None,
foundation_model: str | None = None,
ctx: ApplicationContext | None = None,
) -> None:
"""Transpile source code to Databricks using LLM Transpiler (Switch)"""
if ctx is None:
ctx = ApplicationContext(w)
del w
ctx.add_user_agent_extra("cmd", "llm-transpile")
user = ctx.current_user
logger.debug(f"User: {user}")

if not accept_terms:
logger.warning(
"""Please read and accept these terms before proceeding:
This feature leverages a Large Language Model (LLM) to analyse and convert
your provided content, code and data. You consent to your content being
transmitted to, processed by, and returned from the foundation models hosted
by Databricks or external foundation models you have configured in your
workspace. The outputs of the LLM are generated automatically without human
review, and may contain inaccuracies or errors. You are responsible for
reviewing and validating all outputs before relying on them for any critical
or production use.

By using this feature you accept these terms, re-run with '--accept-terms=true'.
"""
)
raise SystemExit("LLM transpiler terms not accepted, exiting.")

prompts = ctx.prompts
resource_configurator = ctx.resource_configurator

# If CLI args are missing, try to read them from config.yml
input_source, output_ws_folder, source_dialect = _validate_llm_transpile_args(
input_source,
output_ws_folder,
source_dialect,
prompts,
)

if catalog_name is None:
catalog_name = resource_configurator.prompt_for_catalog_setup(default_catalog_name="lakebridge")

if schema_name is None:
schema_name = resource_configurator.prompt_for_schema_setup(catalog=catalog_name, default_schema_name="switch")

if volume is None:
volume = resource_configurator.prompt_for_volume_setup(
catalog=catalog_name, schema=schema_name, default_volume_name="switch_volume"
)

resource_configurator.has_necessary_access(catalog_name, schema_name, volume)

if foundation_model is None:
foundation_model = resource_configurator.prompt_for_foundation_model_choice()

job_list = ctx.install_state.jobs
if "Switch" not in job_list:
logger.debug(f"Missing Switch from installed state jobs: {job_list!r}")
raise RuntimeError(
"Switch Job not found. "
"Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first."
)
job_id = int(job_list["Switch"])
logger.debug(f"Switch job ID found: {job_id}")

ctx.add_user_agent_extra("transpiler_source_dialect", source_dialect)
job_runner = SwitchRunner(ctx.workspace_client)
volume_input_path = job_runner.upload_to_volume(
local_path=Path(input_source),
catalog=catalog_name,
schema=schema_name,
volume=volume,
)

job_runner.run(
volume_input_path=volume_input_path,
output_ws_folder=output_ws_folder,
source_tech=source_dialect,
catalog=catalog_name,
schema=schema_name,
foundation_model=foundation_model,
job_id=job_id,
)


@lakebridge.command()
def create_profiler_dashboard(
*,
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class TranspileConfig:
error_file_path: str | None = None
sdk_config: dict[str, str] | None = None
skip_validation: bool = False
include_llm: bool = False
catalog_name: str = "remorph"
schema_name: str = "transpiler"
transpiler_options: JsonValue = None
Expand Down
139 changes: 139 additions & 0 deletions src/databricks/labs/lakebridge/transpiler/switch_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import io
import logging
import os
import random
import string
from datetime import datetime, timezone
from pathlib import Path

from databricks.labs.blueprint.installation import RootJsonValue
from databricks.sdk import WorkspaceClient

logger = logging.getLogger(__name__)


class SwitchRunner:
"""Runner for Switch LLM transpilation jobs."""

def __init__(
self,
ws: WorkspaceClient,
):
self._ws = ws

def run(
self,
volume_input_path: str,
output_ws_folder: str,
source_tech: str,
catalog: str,
schema: str,
foundation_model: str,
job_id: int,
) -> RootJsonValue:
"""Trigger Switch job."""

job_params = self._build_job_parameters(
input_dir=volume_input_path,
output_dir=output_ws_folder,
source_tech=source_tech,
catalog=catalog,
schema=schema,
foundation_model=foundation_model,
)
logger.info(f"Triggering Switch job with job_id: {job_id}")

return self._run_job(job_id, job_params)

def upload_to_volume(
self,
local_path: Path,
catalog: str,
schema: str,
volume: str,
) -> str:
"""Upload local files to UC Volume with unique timestamped path."""
now = datetime.now(timezone.utc)
time_part = now.strftime("%Y%m%d%H%M%S")
random_part = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))
volume_base_path = f"/Volumes/{catalog}/{schema}/{volume}"
volume_input_path = f"{volume_base_path}/input-{time_part}-{random_part}"

logger.info(f"Uploading {local_path} to {volume_input_path}...")

# File upload
if local_path.is_file():
if local_path.name.startswith('.'):
logger.debug(f"Skipping hidden file: {local_path}")
return volume_input_path
volume_file_path = f"{volume_input_path}/{local_path.name}"
with open(local_path, 'rb') as f:
content = f.read()
self._ws.files.upload(file_path=volume_file_path, contents=io.BytesIO(content), overwrite=True)
logger.debug(f"Uploaded: {local_path} -> {volume_file_path}")

# Directory upload
else:
for root, dirs, files in os.walk(local_path):
# remove hidden directories
dirs[:] = [d for d in dirs if not d.startswith('.')]
# skip hidden files
files = [f for f in files if not f.startswith('.')]
for file in files:
local_file = Path(root) / file
relative_path = local_file.relative_to(local_path)
volume_file_path = f"{volume_input_path}/{relative_path}"

with open(local_file, 'rb') as f:
content = f.read()

self._ws.files.upload(file_path=volume_file_path, contents=io.BytesIO(content), overwrite=True)
logger.debug(f"Uploaded: {local_file} -> {volume_file_path}")

logger.info(f"Upload complete: {volume_input_path}")
return volume_input_path

def _build_job_parameters(
self,
input_dir: str,
output_dir: str,
source_tech: str,
catalog: str,
schema: str,
foundation_model: str,
switch_options: dict | None = None,
) -> dict[str, str]:
"""Build Switch job parameters."""
if switch_options is None:
switch_options = {}
return {
"input_dir": input_dir,
"output_dir": output_dir,
"source_tech": source_tech,
"catalog": catalog,
"schema": schema,
"foundation_model": foundation_model,
**switch_options,
}

def _run_job(
self,
job_id: int,
job_params: dict[str, str],
) -> RootJsonValue:
"""Trigger Switch job and return run information."""
job_run = self._ws.jobs.run_now(job_id, job_parameters=job_params)

if not job_run.run_id:
raise SystemExit(f"Job {job_id} execution failed.")

job_run_url = f"{self._ws.config.host}/jobs/{job_id}/runs/{job_run.run_id}"
logger.info(f"Switch LLM transpilation job started: {job_run_url}")

return [
{
"job_id": job_id,
"run_id": job_run.run_id,
"run_url": job_run_url,
}
]
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,21 @@ def morpheus_artifact() -> Path:
return artifact


@pytest.fixture
def switch_artifact() -> Path:
"""Get Switch wheel for testing."""
artifact = (
Path(__file__).parent
/ "resources"
/ "transpiler_configs"
/ "switch"
/ "wheel"
/ "databricks_switch_plugin-0.1.2-py3-none-any.whl"
)
assert artifact.exists(), f"Switch artifact not found: {artifact}"
return artifact


class FakeDataSource(DataSource):

def __init__(self, start_delimiter: str, end_delimiter: str):
Expand Down
Loading
Loading