From 1d2c27c7ab594d20b7f81f133aebb7bf09ecbfae Mon Sep 17 00:00:00 2001 From: Andy Curtis Date: Fri, 3 Mar 2023 14:35:12 -0500 Subject: [PATCH 1/5] Make MAXIMUM_SEED_SIZE configurable --- core/dbt/constants.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/dbt/constants.py b/core/dbt/constants.py index 63213476e54..b7ab5266c64 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -1,8 +1,14 @@ +import os + SECRET_ENV_PREFIX = "DBT_ENV_SECRET_" DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER" METADATA_ENV_PREFIX = "DBT_ENV_CUSTOM_ENV_" -MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 +def get_max_seed_size(): + mx = os.getenv('DBT_MAXIMUM_SEED_SIZE', '1') + return int(mx) + +MAXIMUM_SEED_SIZE = get_max_seed_size() * 1024 * 1024 MAXIMUM_SEED_SIZE_NAME = "1MB" PIN_PACKAGE_URL = ( From a4d29d791646b4bbd7970815a448bb4cc6fbbcca Mon Sep 17 00:00:00 2001 From: Andy Curtis Date: Fri, 3 Mar 2023 14:42:01 -0500 Subject: [PATCH 2/5] Updated MAXIMUM_SEED_SIZE_NAME --- core/dbt/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/constants.py b/core/dbt/constants.py index b7ab5266c64..ffe2e1d99d9 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -9,7 +9,7 @@ def get_max_seed_size(): return int(mx) MAXIMUM_SEED_SIZE = get_max_seed_size() * 1024 * 1024 -MAXIMUM_SEED_SIZE_NAME = "1MB" +MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MB" PIN_PACKAGE_URL = ( "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" From 352a0f865b060813da2c0ac23f50b02d5c968554 Mon Sep 17 00:00:00 2001 From: Andy Curtis Date: Fri, 3 Mar 2023 14:47:17 -0500 Subject: [PATCH 3/5] Added changie --- .changes/unreleased/Features-20230303-144700.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20230303-144700.yaml diff --git a/.changes/unreleased/Features-20230303-144700.yaml b/.changes/unreleased/Features-20230303-144700.yaml new file mode 100644 index 00000000000..b424813302e --- /dev/null +++ b/.changes/unreleased/Features-20230303-144700.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Make MAXIMUM_SEED_SIZE configurable +time: 2023-03-03T14:47:00.079887-05:00 +custom: + Author: acurtis-evi + Issue: "7117" From 8858c66af94b3918575a90581fff777b14bc1d3b Mon Sep 17 00:00:00 2001 From: acurtis-evi <82470012+acurtis-evi@users.noreply.github.com> Date: Sat, 4 Mar 2023 06:08:42 -0500 Subject: [PATCH 4/5] Update core/dbt/constants.py Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- core/dbt/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/constants.py b/core/dbt/constants.py index ffe2e1d99d9..5ef9246a4d2 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -9,7 +9,7 @@ def get_max_seed_size(): return int(mx) MAXIMUM_SEED_SIZE = get_max_seed_size() * 1024 * 1024 -MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MB" +MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MiB" PIN_PACKAGE_URL = ( "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" From 6bf1ef39f744224b6652f31732801b15c34241da Mon Sep 17 00:00:00 2001 From: Andy Curtis Date: Mon, 6 Mar 2023 09:44:33 -0500 Subject: [PATCH 5/5] Adding suggested change --- core/dbt/constants.py | 5 +++-- core/dbt/contracts/files.py | 7 ++----- core/dbt/parser/read_files.py | 8 ++++++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/core/dbt/constants.py b/core/dbt/constants.py index 5ef9246a4d2..2417a037e1c 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -8,8 +8,9 @@ def get_max_seed_size(): mx = os.getenv('DBT_MAXIMUM_SEED_SIZE', '1') return int(mx) -MAXIMUM_SEED_SIZE = get_max_seed_size() * 1024 * 1024 -MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MiB" +DEFAULT_MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 +MAXIMUM_SEED_SIZE = get_max_seed_size() * DEFAULT_MAXIMUM_SEED_SIZE +MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MB" PIN_PACKAGE_URL = ( "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 9e82247da00..bff7436cd52 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -5,7 +5,6 @@ from mashumaro.types import SerializableType from typing import List, Optional, Union, Dict, Any -from dbt.constants import MAXIMUM_SEED_SIZE from dbt.dataclass_schema import dbtClassMixin, StrEnum from .util import SourceKey @@ -65,10 +64,8 @@ def original_file_path(self) -> str: # name, should it? return os.path.join(self.searched_path, self.relative_path) - def seed_too_large(self) -> bool: - """Return whether the file this represents is over the seed size limit""" - return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE - + def file_size(self) -> int: + return os.stat(self.full_path).st_size @dataclass class FileHash(dbtClassMixin): diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py index 531e5f39560..95e3e697219 100644 --- a/core/dbt/parser/read_files.py +++ b/core/dbt/parser/read_files.py @@ -16,6 +16,7 @@ from dbt.parser.search import filesystem_search from typing import Optional +from dbt.constants import MAXIMUM_SEED_SIZE, DEFAULT_MAXIMUM_SEED_SIZE # This loads the files contents and creates the SourceFile object def load_source_file( @@ -94,14 +95,17 @@ def validate_yaml(file_path, dct): # Special processing for big seed files def load_seed_source_file(match: FilePath, project_name) -> SourceFile: - if match.seed_too_large(): + if match.file_size() < MAXIMUM_SEED_SIZE: # We don't want to calculate a hash of this file. Use the path. source_file = SourceFile.big_seed(match) - else: + elif match.file_size() <= DEFAULT_MAXIMUM_SEED_SIZE: file_contents = load_file_contents(match.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=match, checksum=checksum) source_file.contents = "" + else: + # Do new hash method + pass source_file.parse_file_type = ParseFileType.Seed source_file.project_name = project_name return source_file