Skip to content

Commit

Permalink
Gate sample mode functionality via env var `DBT_EXPERIMENTAL_SAMPLE_M…
Browse files Browse the repository at this point in the history
…ODE`

At this point sample mode is _alpha_ and should not be depended upon. To make
this crystal clear we've gated the functionality behind an environment variable.
We'll likely remove this gate in the coming month.
  • Loading branch information
QMalcolm committed Jan 31, 2025
1 parent 2942861 commit 1811754
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 18 deletions.
3 changes: 2 additions & 1 deletion core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ def resolve_limit(self) -> Optional[int]:
def resolve_event_time_filter(self, target: ManifestNode) -> Optional[EventTimeFilter]:
event_time_filter = None
sample_mode = bool(
getattr(self.config.args, "sample", False)
os.environ.get("DBT_EXPERIMENTAL_SAMPLE_MODE")
and getattr(self.config.args, "sample", False)
and getattr(self.config.args, "sample_window", None)
)

Expand Down
21 changes: 16 additions & 5 deletions tests/functional/sample_mode/test_sample_mode.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
from datetime import datetime

import freezegun
import pytest
import pytz
from pytest_mock import MockerFixture

from dbt.artifacts.resources.types import BatchSize
from dbt.event_time.sample_window import SampleWindow
Expand Down Expand Up @@ -69,30 +71,37 @@ def event_catcher(self) -> EventCatcher:
return EventCatcher(event_to_catch=JinjaLogInfo) # type: ignore

@pytest.mark.parametrize(
"use_sample_mode,expected_row_count,arg_value_in_jinja",
"sample_mode_available,run_sample_mode,expected_row_count,arg_value_in_jinja",
[
(True, 1, True),
(False, 3, False),
(True, True, 1, True),
(True, False, 3, False),
(False, True, 3, True),
(False, False, 3, False),
],
)
@freezegun.freeze_time("2025-01-03T02:03:0Z")
def test_sample_mode(
self,
project,
mocker: MockerFixture,
event_catcher: EventCatcher,
use_sample_mode: bool,
sample_mode_available: bool,
run_sample_mode: bool,
expected_row_count: int,
arg_value_in_jinja: bool,
):
run_args = ["run"]
expected_sample_window = None
if use_sample_mode:
if run_sample_mode:
run_args.extend(["--sample", "--sample-window=1 day"])
expected_sample_window = SampleWindow(
start=datetime(2025, 1, 2, 2, 3, 0, 0, tzinfo=pytz.UTC),
end=datetime(2025, 1, 3, 2, 3, 0, 0, tzinfo=pytz.UTC),
)

if sample_mode_available:
mocker.patch.dict(os.environ, {"DBT_EXPERIMENTAL_SAMPLE_MODE": "1"})

_ = run_dbt(run_args, callbacks=[event_catcher.catch])
assert len(event_catcher.caught_events) == 2
assert event_catcher.caught_events[0].info.msg == f"Sample mode: {arg_value_in_jinja}" # type: ignore
Expand Down Expand Up @@ -124,6 +133,7 @@ def event_time_end_catcher(self) -> EventCatcher:
def test_sample_mode(
self,
project,
mocker: MockerFixture,
event_time_end_catcher: EventCatcher,
event_time_start_catcher: EventCatcher,
):
Expand All @@ -146,6 +156,7 @@ def test_sample_mode(
),
]

mocker.patch.dict(os.environ, {"DBT_EXPERIMENTAL_SAMPLE_MODE": "True"})
_ = run_dbt(
["run", "--sample", "--sample-window=2 day"],
callbacks=[event_time_end_catcher.catch, event_time_start_catcher.catch],
Expand Down
61 changes: 49 additions & 12 deletions tests/unit/context/test_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from argparse import Namespace
from datetime import datetime
from typing import Optional
Expand Down Expand Up @@ -45,93 +46,124 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit):
assert resolver.resolve_limit == expected_resolve_limit

@pytest.mark.parametrize(
"use_microbatch_batches,materialized,incremental_strategy,sample_mode,sample_window,resolver_model_node,expect_filter",
"use_microbatch_batches,materialized,incremental_strategy,sample_mode_available,run_sample_mode,sample_window,resolver_model_node,expect_filter",
[
# Microbatch model without sample
(
True,
"incremental",
"microbatch",
True,
False,
None,
True,
True,
), # Microbatch model without sample
),
# Microbatch model with sample
(
True,
"incremental",
"microbatch",
True,
True,
SampleWindow(
start=datetime(2024, 1, 1, tzinfo=pytz.UTC),
end=datetime(2025, 1, 1, tzinfo=pytz.UTC),
),
True,
True,
), # Microbatch model with sample
),
# Normal model with sample
(
False,
"table",
None,
True,
True,
SampleWindow(
start=datetime(2024, 1, 1, tzinfo=pytz.UTC),
end=datetime(2025, 1, 1, tzinfo=pytz.UTC),
),
True,
True,
), # Normal model with sample
),
# Incremental merge model with sample
(
True,
"incremental",
"merge",
True,
True,
SampleWindow(
start=datetime(2024, 1, 1, tzinfo=pytz.UTC),
end=datetime(2025, 1, 1, tzinfo=pytz.UTC),
),
True,
True,
), # Incremental merge model with sample
),
# Normal model with sample, but sample mode not available
(
False,
"table",
None,
False,
True,
SampleWindow(
start=datetime(2024, 1, 1, tzinfo=pytz.UTC),
end=datetime(2025, 1, 1, tzinfo=pytz.UTC),
),
True,
False,
),
# Sample, but not model node
(
False,
), # Sample, but not model node
"table",
None,
True,
True,
SampleWindow(
start=datetime(2024, 1, 1, tzinfo=pytz.UTC),
end=datetime(2025, 1, 1, tzinfo=pytz.UTC),
),
False,
False,
),
# Microbatch, but not model node
(
True,
"incremental",
"microbatch",
False,
False,
None,
False,
False,
), # Microbatch, but not model node
),
# Mircrobatch model, but not using batches
(
False,
"incremental",
"microbatch",
False,
False,
None,
True,
False,
), # Mircrobatch model, but not using batches
),
# Non microbatch model, but supposed to use batches
(
True,
"table",
"microbatch",
False,
False,
None,
True,
False,
), # Non microbatch model, but supposed to use batches
(True, "incremental", "merge", False, None, True, False), # Incremental merge
),
# Incremental merge
(True, "incremental", "merge", False, False, None, True, False),
],
)
def test_resolve_event_time_filter(
Expand All @@ -141,7 +173,8 @@ def test_resolve_event_time_filter(
use_microbatch_batches: bool,
materialized: str,
incremental_strategy: Optional[str],
sample_mode: bool,
sample_mode_available: bool,
run_sample_mode: bool,
sample_window: Optional[SampleWindow],
resolver_model_node: bool,
expect_filter: bool,
Expand All @@ -151,10 +184,14 @@ def test_resolve_event_time_filter(
target.config = mock.MagicMock(NodeConfig)
target.config.event_time = "created_at"

# Declare whether sample mode is available
if sample_mode_available:
mocker.patch.dict(os.environ, {"DBT_EXPERIMENTAL_SAMPLE_MODE": "1"})

# Resolver mocking
resolver.config.args.EVENT_TIME_END = None
resolver.config.args.EVENT_TIME_START = None
resolver.config.args.sample = sample_mode
resolver.config.args.sample = run_sample_mode
resolver.config.args.sample_window = sample_window
if resolver_model_node:
resolver.model = mock.MagicMock(spec=ModelNode)
Expand Down

0 comments on commit 1811754

Please sign in to comment.