diff --git a/.changes/unreleased/Features-20230303-144700.yaml b/.changes/unreleased/Features-20230303-144700.yaml new file mode 100644 index 00000000000..b424813302e --- /dev/null +++ b/.changes/unreleased/Features-20230303-144700.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Make MAXIMUM_SEED_SIZE configurable +time: 2023-03-03T14:47:00.079887-05:00 +custom: + Author: acurtis-evi + Issue: "7117" diff --git a/core/dbt/constants.py b/core/dbt/constants.py index 63213476e54..2417a037e1c 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -1,9 +1,16 @@ +import os + SECRET_ENV_PREFIX = "DBT_ENV_SECRET_" DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER" METADATA_ENV_PREFIX = "DBT_ENV_CUSTOM_ENV_" -MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 -MAXIMUM_SEED_SIZE_NAME = "1MB" +def get_max_seed_size(): + mx = os.getenv('DBT_MAXIMUM_SEED_SIZE', '1') + return int(mx) + +DEFAULT_MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 +MAXIMUM_SEED_SIZE = get_max_seed_size() * DEFAULT_MAXIMUM_SEED_SIZE +MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MB" PIN_PACKAGE_URL = ( "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 9e82247da00..bff7436cd52 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -5,7 +5,6 @@ from mashumaro.types import SerializableType from typing import List, Optional, Union, Dict, Any -from dbt.constants import MAXIMUM_SEED_SIZE from dbt.dataclass_schema import dbtClassMixin, StrEnum from .util import SourceKey @@ -65,10 +64,8 @@ def original_file_path(self) -> str: # name, should it? return os.path.join(self.searched_path, self.relative_path) - def seed_too_large(self) -> bool: - """Return whether the file this represents is over the seed size limit""" - return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE - + def file_size(self) -> int: + return os.stat(self.full_path).st_size @dataclass class FileHash(dbtClassMixin): diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py index 531e5f39560..95e3e697219 100644 --- a/core/dbt/parser/read_files.py +++ b/core/dbt/parser/read_files.py @@ -16,6 +16,7 @@ from dbt.parser.search import filesystem_search from typing import Optional +from dbt.constants import MAXIMUM_SEED_SIZE, DEFAULT_MAXIMUM_SEED_SIZE # This loads the files contents and creates the SourceFile object def load_source_file( @@ -94,14 +95,17 @@ def validate_yaml(file_path, dct): # Special processing for big seed files def load_seed_source_file(match: FilePath, project_name) -> SourceFile: - if match.seed_too_large(): + if match.file_size() < MAXIMUM_SEED_SIZE: # We don't want to calculate a hash of this file. Use the path. source_file = SourceFile.big_seed(match) - else: + elif match.file_size() <= DEFAULT_MAXIMUM_SEED_SIZE: file_contents = load_file_contents(match.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=match, checksum=checksum) source_file.contents = "" + else: + # Do new hash method + pass source_file.parse_file_type = ParseFileType.Seed source_file.project_name = project_name return source_file