Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for catalog configs #334

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
40fca93
add dep on dbt_config & and update protocol
colin-rogers-dbt Oct 1, 2024
00a74fe
add catalog concept
colin-rogers-dbt Oct 3, 2024
345ced9
add catalogs.py
colin-rogers-dbt Oct 9, 2024
d75155d
update contracts
colin-rogers-dbt Oct 11, 2024
f9eae6f
update contracts
colin-rogers-dbt Oct 11, 2024
41bc4a4
merge main
colin-rogers-dbt Dec 5, 2024
8386b5f
update to add catalog integration client
colin-rogers-dbt Dec 6, 2024
4822c2e
update with adapter_configs
colin-rogers-dbt Dec 13, 2024
98095f4
add catalog_name as key
colin-rogers-dbt Dec 13, 2024
58550d5
add managed
colin-rogers-dbt Dec 17, 2024
3fc57cf
fix broken import
colin-rogers-dbt Dec 18, 2024
c5dd131
remove CatalogIntegration from relation
colin-rogers-dbt Dec 18, 2024
349cc0e
remove CatalogIntegration from impl init
colin-rogers-dbt Dec 18, 2024
8d06ebd
add concrete catalog integration config
colin-rogers-dbt Jan 6, 2025
83e5c2a
add concrete catalog integration config
colin-rogers-dbt Jan 6, 2025
ebfdb92
Merge branch 'main' into feature/externalCatalogConfig
colin-rogers-dbt Jan 7, 2025
7e24326
fix args and protocols
colin-rogers-dbt Jan 7, 2025
4e333f6
delete file
colin-rogers-dbt Jan 7, 2025
45a0803
Merge branch 'main' into feature/externalCatalogConfig
colin-rogers-dbt Jan 8, 2025
e2fb7a3
add catalog_name to base relation
colin-rogers-dbt Jan 10, 2025
b981f04
add integration validation
colin-rogers-dbt Jan 15, 2025
54a62a2
add catalog unit test
colin-rogers-dbt Jan 15, 2025
526b8d3
Merge branch 'main' into feature/externalCatalogConfig
colin-rogers-dbt Jan 24, 2025
9a88310
merge main
colin-rogers-dbt Feb 3, 2025
3f53704
remove extraneous file
colin-rogers-dbt Feb 3, 2025
a1c89e7
add unit tests
colin-rogers-dbt Feb 4, 2025
971e7eb
fix formatting and protocol typing
colin-rogers-dbt Feb 4, 2025
89297fd
Merge branch 'main' into feature/externalCatalogConfig
colin-rogers-dbt Feb 4, 2025
8f749ec
Fix issue templates and triage (#777)
mikealfare Feb 4, 2025
180526e
Update project URLs for all adapters (#778)
mikealfare Feb 4, 2025
bb2bf43
Add license to each package and standardize on file name (#779)
mikealfare Feb 4, 2025
5e4a4b6
Add unit test fixture
colin-rogers-dbt Feb 5, 2025
ad5012b
Merge branch 'main' into feature/externalCatalogConfig
colin-rogers-dbt Feb 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dbt-adapters/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ Unit tests can be run locally without setting up a database connection:
# Note: replace $strings with valid names

# run all unit tests
hatch run unit-test
hatch run unit-tests

# run all unit tests in a module
hatch run unit-test tests/unit/$test_file_name.py
hatch run unit-tests tests/unit/$test_file_name.py

# run a specific unit test
hatch run unit-test tests/unit/$test_file_name.py::$test_class_name::$test_method_name
Expand Down
28 changes: 27 additions & 1 deletion dbt-adapters/src/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
)
from dbt.adapters.cache import RelationsCache, _make_ref_key_dict
from dbt.adapters.capability import Capability, CapabilityDict
from dbt.adapters.clients import catalogs as catalogs_client
from dbt.adapters.contracts.connection import Credentials
from dbt.adapters.contracts.macros import MacroResolverProtocol
from dbt.adapters.contracts.relation import RelationConfig
Expand All @@ -88,7 +89,12 @@
SnapshotTargetNotSnapshotTableError,
UnexpectedNonTimestampError,
)
from dbt.adapters.protocol import AdapterConfig, MacroContextGeneratorCallable
from dbt.adapters.protocol import (
AdapterConfig,
MacroContextGeneratorCallable,
CatalogIntegrationConfigProtocol,
CatalogIntegrationProtocol,
)

if TYPE_CHECKING:
import agate
Expand Down Expand Up @@ -251,6 +257,7 @@ class BaseAdapter(metaclass=AdapterMeta):
- expand_column_types
- list_relations_without_caching
- is_cancelable
- execute
- create_schema
- drop_schema
- quote
Expand All @@ -264,11 +271,13 @@ class BaseAdapter(metaclass=AdapterMeta):

Macros:
- get_catalog

"""

Relation: Type[BaseRelation] = BaseRelation
Column: Type[BaseColumn] = BaseColumn
ConnectionManager: Type[BaseConnectionManager]
CatalogIntegrations: Dict[str, Type[CatalogIntegrationProtocol]]

# A set of clobber config fields accepted by this adapter
# for use in materializations
Expand Down Expand Up @@ -296,6 +305,23 @@ def __init__(self, config, mp_context: SpawnContext) -> None:
self._macro_context_generator: Optional[MacroContextGeneratorCallable] = None
self.behavior = DEFAULT_BASE_BEHAVIOR_FLAGS # type: ignore

def add_catalog_integrations(
self, catalog_integrations: Optional[List[CatalogIntegrationConfigProtocol]]
) -> None:
if catalog_integrations:
for integration_config in catalog_integrations:
catalog_type = integration_config.catalog_type
if catalog_type not in self.CatalogIntegrations:
raise DbtValidationError(
f"requested catalog type, {catalog_type}, is not supported."
)
integration = self.CatalogIntegrations[catalog_type](integration_config)
catalogs_client.add_catalog(integration, integration_config.catalog_name)

@available
def get_catalog_integration(self, integration_name: str) -> CatalogIntegrationProtocol:
return catalogs_client.get_catalog(integration_name)

###
# Methods to set / access a macro resolver
###
Expand Down
9 changes: 9 additions & 0 deletions dbt-adapters/src/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class BaseRelation(FakeAPIObject, Hashable):
require_alias: bool = (
True # used to govern whether to add an alias when render_limited is called
)
catalog_name: Optional[str] = None

# register relation types that can be renamed for the purpose of replacing relations using stages and backups
# adding a relation type here also requires defining the associated rename macro
Expand Down Expand Up @@ -305,6 +306,13 @@ def create_from(

config_quoting = relation_config.quoting_dict
config_quoting.pop("column", None)

catalog_name = (
relation_config.catalog_name
if hasattr(relation_config, "catalog_name")
else relation_config.config.get("catalog", None) # type: ignore
)

# precedence: kwargs quoting > relation config quoting > base quoting > default quoting
quote_policy = deep_merge(
cls.get_default_quote_policy().to_dict(omit_none=True),
Expand All @@ -318,6 +326,7 @@ def create_from(
schema=relation_config.schema,
identifier=relation_config.identifier,
quote_policy=quote_policy,
catalog_name=catalog_name,
**kwargs,
)

Expand Down
27 changes: 27 additions & 0 deletions dbt-adapters/src/dbt/adapters/clients/catalogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from dbt.adapters.protocol import CatalogIntegrationProtocol


class CatalogIntegrations:
def __init__(self):
self._integrations = {}

def get(self, name: str) -> CatalogIntegrationProtocol:
return self.integrations[name]

@property
def integrations(self) -> dict[str, CatalogIntegrationProtocol]:
return self._integrations

def add_integration(self, integration: CatalogIntegrationProtocol, catalog_name: str):
self._integrations[catalog_name] = integration


_CATALOG_CLIENT = CatalogIntegrations()


def get_catalog(integration_name: str) -> CatalogIntegrationProtocol:
return _CATALOG_CLIENT.get(integration_name)


def add_catalog(integration: CatalogIntegrationProtocol, catalog_name: str):
_CATALOG_CLIENT.add_integration(integration, catalog_name)
59 changes: 59 additions & 0 deletions dbt-adapters/src/dbt/adapters/contracts/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import abc
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Dict

from dbt.adapters.contracts.relation import RelationConfig
from dbt.adapters.relation_configs.formats import TableFormat


class CatalogIntegrationType(Enum):
managed = "managed"
iceberg_rest = "iceberg_rest"
glue = "glue"
unity = "unity"


@dataclass
class CatalogIntegrationConfig:
catalog_name: str
integration_name: str
table_format: str
catalog_type: str
external_volume: Optional[str] = None
namespace: Optional[str] = None
adapter_properties: Optional[Dict] = None


class CatalogIntegration(abc.ABC):
"""
An external catalog integration is a connection to an external catalog that can be used to
interact with the catalog. This class is an abstract base class that should be subclassed by
specific integrations in the adapters.

Implements the CatalogIntegrationProtocol.

"""

catalog_name: str
integration_name: str
table_format: TableFormat
integration_type: CatalogIntegrationType
external_volume: Optional[str] = None
namespace: Optional[str] = None

def __init__(self, integration_config: CatalogIntegrationConfig):
self.catalog_name = integration_config.catalog_name
self.integration_name = integration_config.integration_name
self.table_format = TableFormat(integration_config.table_format)
self.type = CatalogIntegrationType(integration_config.catalog_type)
self.external_volume = integration_config.external_volume
self.namespace = integration_config.namespace
if integration_config.adapter_properties:
self._handle_adapter_properties(integration_config.adapter_properties)

@abc.abstractmethod
def _handle_adapter_properties(self, adapter_properties: Dict) -> None: ...

@abc.abstractmethod
def render_ddl_predicates(self, relation, config: RelationConfig) -> str: ...
1 change: 1 addition & 0 deletions dbt-adapters/src/dbt/adapters/contracts/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
dbtClassMixin,
)


# TODO: this is a very bad dependency - shared global state
from dbt_common.events.contextvars import get_node_info
from dbt_common.events.functions import fire_event
Expand Down
1 change: 1 addition & 0 deletions dbt-adapters/src/dbt/adapters/contracts/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class RelationConfig(Protocol):
tags: List[str]
quoting_dict: Dict[str, bool]
config: Optional[MaterializationConfig]
catalog_name: Optional[str]


class ComponentName(StrEnum):
Expand Down
30 changes: 24 additions & 6 deletions dbt-adapters/src/dbt/adapters/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,27 @@ class ColumnProtocol(Protocol):
pass


class CatalogIntegrationConfigProtocol(Protocol):
catalog_name: str
integration_name: str
table_format: str
catalog_type: str
external_volume: Optional[str]
namespace: Optional[str]
adapter_properties: Optional[Dict]


class CatalogIntegrationProtocol(Protocol):
catalog_name: str
integration_name: str
table_format: str
integration_type: str
external_volume: Optional[str]
namespace: Optional[str]

def __init__(self, integration_config: CatalogIntegrationConfigProtocol) -> None: ...


Self = TypeVar("Self", bound="RelationProtocol")


Expand All @@ -62,6 +83,7 @@ def create_from(
ConnectionManager_T = TypeVar("ConnectionManager_T", bound=ConnectionManagerProtocol)
Relation_T = TypeVar("Relation_T", bound=RelationProtocol)
Column_T = TypeVar("Column_T", bound=ColumnProtocol)
CatalogIntegration_T = TypeVar("CatalogIntegration_T", bound=CatalogIntegrationProtocol)


class MacroContextGeneratorCallable(Protocol):
Expand All @@ -77,12 +99,7 @@ def __call__(
# TODO CT-211
class AdapterProtocol( # type: ignore[misc]
Protocol,
Generic[
AdapterConfig_T,
ConnectionManager_T,
Relation_T,
Column_T,
],
Generic[AdapterConfig_T, ConnectionManager_T, Relation_T, Column_T, CatalogIntegration_T],
):
# N.B. Technically these are ClassVars, but mypy doesn't support putting type vars in a
# ClassVar due to the restrictiveness of PEP-526
Expand All @@ -91,6 +108,7 @@ class AdapterProtocol( # type: ignore[misc]
Column: Type[Column_T]
Relation: Type[Relation_T]
ConnectionManager: Type[ConnectionManager_T]
CatalogIntegrations: Dict[str, Type[CatalogIntegration_T]]
connections: ConnectionManager_T

def __init__(self, config: AdapterRequiredConfig) -> None: ...
Expand Down
19 changes: 19 additions & 0 deletions dbt-adapters/src/dbt/adapters/relation_configs/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from dbt_common.dataclass_schema import StrEnum # doesn't exist in standard library until py3.11
from typing_extensions import Self


class TableFormat(StrEnum):
"""
Some platforms may refer to this 'Object' or 'File Format'.
Data practitioners and interfaces refer to this as 'Table Format's, hence the term's use here.
"""

DEFAULT = "default"
ICEBERG = "iceberg"

@classmethod
def default(cls) -> Self:
return cls("default")

def __str__(self):
return self.value
7 changes: 7 additions & 0 deletions dbt-adapters/tests/unit/clients/test_catalogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dbt.adapters.clients.catalogs import add_catalog, get_catalog


def test_adding_catalog_integration(fake_catalog_integration):
catalog = fake_catalog_integration
add_catalog(catalog, catalog_name="fake_catalog")
get_catalog("fake_catalog")
1 change: 1 addition & 0 deletions dbt-adapters/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
behavior_flags,
config,
flags,
fake_catalog_integration,
)
4 changes: 4 additions & 0 deletions dbt-adapters/tests/unit/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@
config,
flags,
)

from tests.unit.fixtures.catalog_integration import (
fake_catalog_integration,
)
3 changes: 3 additions & 0 deletions dbt-adapters/tests/unit/fixtures/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from dbt.adapters.base.column import Column
from dbt.adapters.base.impl import BaseAdapter
from dbt.adapters.base.relation import BaseRelation
from dbt.adapters.contracts.catalog import CatalogIntegrationType
from dbt.adapters.contracts.connection import AdapterRequiredConfig, QueryComment

from tests.unit.fixtures.connection_manager import ConnectionManagerStub
from tests.unit.fixtures.credentials import CredentialsStub
from tests.unit.fixtures.catalog_integration import FakeCatalogIntegration


class BaseAdapterStub(BaseAdapter):
Expand All @@ -21,6 +23,7 @@ class BaseAdapterStub(BaseAdapter):
"""

ConnectionManager = ConnectionManagerStub
CatalogIntegrations = {CatalogIntegrationType.managed.value: FakeCatalogIntegration}

###
# Abstract methods for database-specific values, attributes, and types
Expand Down
33 changes: 33 additions & 0 deletions dbt-adapters/tests/unit/fixtures/catalog_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Dict

import pytest

from dbt.adapters.contracts.catalog import CatalogIntegration, CatalogIntegrationConfig, CatalogIntegrationType
from dbt.adapters.relation_configs.formats import TableFormat


class FakeCatalogIntegration(CatalogIntegration):
fake_property: int

def _handle_adapter_properties(self, adapter_properties: Dict) -> None:
if 'fake_property' in adapter_properties:
self.fake_property = adapter_properties['fake_property']

def render_ddl_predicates(self, relation):
return "mocked"


catalog = FakeCatalogIntegration(
integration_config=CatalogIntegrationConfig(
catalog_type=CatalogIntegrationType.managed.value,
catalog_name="snowflake_managed",
integration_name="test_integration",
table_format=TableFormat.ICEBERG,
external_volume="test_volume",
)
)


@pytest.fixture
def fake_catalog_integration():
return catalog
Loading
Loading