diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1bbcf64e..ed6b5f3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: with: repository: NatLabRockies/resstock path: resstock - ref: develop + ref: sampling_regions - name: Remove AWS from resstock yaml run: | cd resstock diff --git a/buildstockbatch/local.py b/buildstockbatch/local.py index 44886853..d94a4da4 100644 --- a/buildstockbatch/local.py +++ b/buildstockbatch/local.py @@ -114,6 +114,12 @@ def run_building( (resources_path / "hpxml-measures").symlink_to(hpxml_measures_path, target_is_directory=True) else: resources_path = None + # samplers_path = buildstock_path / "samplers" + # if samplers_path.exists(): + # sampler_path = sim_path / "samplers" + # (sampler_path).symlink_to(samplers_path, target_is_directory=True) + # else: + # sampler_path = None custom_gems_path = buildstock_path / ".custom_gems" if custom_gems_path.exists(): gems_path = sim_path / ".custom_gems" @@ -205,6 +211,8 @@ def run_building( if resources_path: (resources_path / "hpxml-measures").unlink() resources_path.rmdir() + # if sampler_path: + # sampler_path.unlink() if gems_path: gems_path.unlink() diff --git a/buildstockbatch/sampler/__init__.py b/buildstockbatch/sampler/__init__.py index f821ad37..f4fbe0c9 100644 --- a/buildstockbatch/sampler/__init__.py +++ b/buildstockbatch/sampler/__init__.py @@ -4,5 +4,6 @@ ResidentialQuotaSampler, ResidentialQuotaDownselectSampler, ) # noqa F041 +from .residential_stratified import ResidentialStratifiedSampler # noqa F041 from .commercial_sobol import CommercialSobolSampler # noqa F041 from .precomputed import PrecomputedSampler # noqa F041 diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py index 4757a837..ba9ecb04 100644 --- a/buildstockbatch/sampler/base.py +++ b/buildstockbatch/sampler/base.py @@ -85,7 +85,7 @@ def run_sampling(self): return self._run_sampling_apptainer() else: assert self.container_runtime == ContainerRuntime.LOCAL_OPENSTUDIO - return self._run_sampling_local_openstudio() + return self._run_sampling_local() def _run_sampling_docker(self): """ @@ -103,9 +103,9 @@ def _run_sampling_apptainer(self): """ raise NotImplementedError - def _run_sampling_local_openstudio(self): + def _run_sampling_local(self): """ - Execute the sampling on the local openstudio instance + Execute the sampling on the local instance Replace this in a subclass as necessary """ diff --git a/buildstockbatch/sampler/residential_quota.py b/buildstockbatch/sampler/residential_quota.py index 73d9b185..090f5abf 100644 --- a/buildstockbatch/sampler/residential_quota.py +++ b/buildstockbatch/sampler/residential_quota.py @@ -63,7 +63,7 @@ def _run_sampling_docker(self): self.parent().docker_image, [ "ruby", - "resources/run_sampling.rb", + "samplers/quota/run_sampling.rb", "-p", self.cfg["project_directory"], "-n", @@ -100,7 +100,7 @@ def _run_sampling_apptainer(self): "{}:/outbind".format(os.path.dirname(self.csv_path)), self.parent().apptainer_image, "ruby", - "resources/run_sampling.rb", + "samplers/quota/run_sampling.rb", "-p", self.cfg["project_directory"], "-n", @@ -113,11 +113,11 @@ def _run_sampling_apptainer(self): logger.debug("Apptainer sampling completed.") return self.csv_path - def _run_sampling_local_openstudio(self): + def _run_sampling_local(self): subprocess.run( [ - self.parent().openstudio_exe(), - str(pathlib.Path("resources", "run_sampling.rb")), + "ruby", + str(pathlib.Path("samplers", "quota", "run_sampling.rb")), "-p", self.cfg["project_directory"], "-n", diff --git a/buildstockbatch/sampler/residential_stratified.py b/buildstockbatch/sampler/residential_stratified.py new file mode 100644 index 00000000..f3a77daa --- /dev/null +++ b/buildstockbatch/sampler/residential_stratified.py @@ -0,0 +1,181 @@ +""" +buildstockbatch.sampler.residential_stratified +~~~~~~~~~~~~~~~ +This object contains the code required for generating the set of simulations to execute + +:author: Noel Merket, Ry Horsey +:copyright: (c) 2020 by The Alliance for Sustainable Energy +:license: BSD-3 +""" + +import docker +import logging +import os +import pathlib +import shutil +import subprocess +import sys +import time +import yaml + +from .base import BuildStockSampler +from buildstockbatch.exc import ValidationError + +logger = logging.getLogger(__name__) + + +class ResidentialStratifiedSampler(BuildStockSampler): + def __init__( + self, + parent, + n_datapoints, + segment_vars=[ + "Federal Poverty Level", + "Geometry Floor Area Bin", + "Geometry Building Type RECS", + "Vintage", + "Heating Fuel", + "Sampling Region", + ], + segment_selection_sample_size=10000000, + num_samples_per_segment=8, + ): + """Residential Stratified Sampler + + :param parent: BuildStockBatchBase object + :type parent: BuildStockBatchBase (or subclass) + :param n_datapoints: number of datapoints to sample + :type n_datapoints: int + """ + super().__init__(parent) + self.validate_args(self.parent().project_filename, n_datapoints=n_datapoints) + self.n_datapoints = n_datapoints + self.sampler_config = self.create_sampler_config( + os.path.dirname(self.parent().project_filename), + segment_vars, + segment_selection_sample_size, + num_samples_per_segment, + ) + + @classmethod + def validate_args(cls, project_filename, **kw): + expected_args = set(["n_datapoints"]) + for k, v in kw.items(): + expected_args.discard(k) + if k == "n_datapoints": + if not isinstance(v, int): + raise ValidationError("n_datapoints needs to be an integer") + if v <= 0: + raise ValidationError("n_datapoints need to be >= 1") + elif k == "segment_vars": + pass + elif k == "segment_selection_sample_size": + pass + elif k == "num_samples_per_segment": + pass + else: + raise ValidationError(f"Unknown argument for sampler: {k}") + if len(expected_args) > 0: + raise ValidationError("The following sampler arguments are required: " + ", ".join(expected_args)) + return True + + @classmethod + def create_sampler_config(self, folderpath, segment_vars, segment_selection_sample_size, num_samples_per_segment): + data = {} + data["segment_vars"] = segment_vars + data["segment_selection_sample_size"] = segment_selection_sample_size + data["num_samples_per_segment"] = num_samples_per_segment + filename = pathlib.Path(folderpath) / "sampler_config.yaml" + with open(filename, "w") as file: + yaml.dump(data, file) + return filename + + def _run_sampling_docker(self): + docker_client = docker.DockerClient.from_env() + tick = time.time() + extra_kws = {} + if sys.platform.startswith("linux"): + extra_kws["user"] = f"{os.getuid()}:{os.getgid()}" + container_output = docker_client.containers.run( + self.parent().docker_image, + [ + "python", + "samplers/stratified/sampler/run_sampler.py", + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-o", + "buildstock.csv", + ], + remove=True, + volumes={self.buildstock_dir: {"bind": "/var/simdata/openstudio", "mode": "rw"}}, + name="buildstock_sampling", + **extra_kws, + ) + tick = time.time() - tick + for line in container_output.decode("utf-8").split("\n"): + logger.debug(line) + logger.debug("Sampling took {:.1f} seconds".format(tick)) + destination_filename = self.csv_path + if os.path.exists(destination_filename): + os.remove(destination_filename) + shutil.move( + os.path.join(self.buildstock_dir, "resources", "buildstock.csv"), + destination_filename, + ) + return destination_filename + + def _run_sampling_apptainer(self): + args = [ + "apptainer", + "exec", + "--contain", + "--home", + "{}:/buildstock".format(self.buildstock_dir), + "--bind", + "{}:/outbind".format(os.path.dirname(self.csv_path)), + self.parent().apptainer_image, + "python", + "samplers/stratified/sampler/run_sampler.py", + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-o", + "../../outbind/{}".format(os.path.basename(self.csv_path)), + ] + logger.debug(f"Starting apptainer sampling with command: {' '.join(args)}") + subprocess.run(args, check=True, env=os.environ, cwd=self.parent().output_dir) + logger.debug("Apptainer sampling completed.") + return self.csv_path + + def _run_sampling_local(self): + subprocess.run( + [ + "python", + str(pathlib.Path("samplers", "stratified", "sampler", "run_sampler.py")), + "sample", + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-c", + self.sampler_config, + "-o", + "buildstock.csv", + ], + cwd=self.buildstock_dir, + check=True, + ) + destination_filename = pathlib.Path(self.csv_path) + if destination_filename.exists(): + os.remove(destination_filename) + shutil.move( + pathlib.Path(self.buildstock_dir, "resources", "buildstock.csv"), + destination_filename, + ) + config_filename = pathlib.Path(self.sampler_config) + if config_filename.exists(): + os.remove(config_filename) + return destination_filename diff --git a/buildstockbatch/sampler/tests/test_residential_stratified.py b/buildstockbatch/sampler/tests/test_residential_stratified.py new file mode 100644 index 00000000..69eecdd4 --- /dev/null +++ b/buildstockbatch/sampler/tests/test_residential_stratified.py @@ -0,0 +1,172 @@ +import os +import pathlib +import tempfile +from unittest.mock import MagicMock, patch + +import pytest + +from buildstockbatch.exc import ValidationError +from buildstockbatch.sampler.residential_stratified import ResidentialStratifiedSampler +from buildstockbatch.utils import ContainerRuntime + + +def _make_parent(container_runtime, project_dir, buildstock_dir=None, output_dir=None): + parent = MagicMock() + parent.project_filename = "test_project.yml" + parent.cfg = {"project_directory": "project_resstock_national"} + parent.CONTAINER_RUNTIME = container_runtime + parent.project_dir = project_dir + parent.buildstock_dir = buildstock_dir + parent.output_dir = output_dir + parent.docker_image = "buildstockbatch:latest" + parent.apptainer_image = "/path/to/image.sif" + return parent + + +def test_residential_stratified_validate_args(): + assert ResidentialStratifiedSampler.validate_args("dummy_project.yml", n_datapoints=1) + assert ResidentialStratifiedSampler.validate_args("dummy_project.yml", n_datapoints=1000) + + +@pytest.mark.parametrize("n_datapoints", ["1000", 1000.5]) +def test_residential_stratified_validate_args_non_integer(n_datapoints): + with pytest.raises(ValidationError, match="n_datapoints needs to be an integer"): + ResidentialStratifiedSampler.validate_args("dummy_project.yml", n_datapoints=n_datapoints) + + +@pytest.mark.parametrize("n_datapoints", [0, -1]) +def test_residential_stratified_validate_args_non_positive(n_datapoints): + with pytest.raises(ValidationError, match="n_datapoints need to be >= 1"): + ResidentialStratifiedSampler.validate_args("dummy_project.yml", n_datapoints=n_datapoints) + + +def test_residential_stratified_validate_args_missing_required(): + with pytest.raises(ValidationError, match="The following sampler arguments are required"): + ResidentialStratifiedSampler.validate_args("dummy_project.yml") + + +def test_residential_stratified_validate_args_unknown_arg(): + with pytest.raises(ValidationError, match="Unknown argument for sampler"): + ResidentialStratifiedSampler.validate_args("dummy_project.yml", n_datapoints=10, foo="bar") + + +def test_residential_stratified_initialization(): + parent = _make_parent(ContainerRuntime.LOCAL_OPENSTUDIO, project_dir="/tmp/project") + sampler = ResidentialStratifiedSampler(parent, n_datapoints=100) + assert sampler.n_datapoints == 100 + assert sampler.parent() == parent + + +@pytest.mark.parametrize( + "container_runtime,sampler_method", + [ + (ContainerRuntime.DOCKER, "_run_sampling_docker"), + (ContainerRuntime.APPTAINER, "_run_sampling_apptainer"), + (ContainerRuntime.LOCAL_OPENSTUDIO, "_run_sampling_local"), + ], +) +def test_residential_stratified_run_sampling_dispatch(container_runtime, sampler_method): + parent = _make_parent(container_runtime, project_dir="/tmp/project", output_dir="/tmp/output") + sampler = ResidentialStratifiedSampler(parent, n_datapoints=100) + + with patch.object(sampler, sampler_method) as mocked_method: + sampler.run_sampling() + mocked_method.assert_called_once() + + +def test_residential_stratified_run_sampling_local(): + with tempfile.TemporaryDirectory() as tmpdir: + buildstock_dir = os.path.join(tmpdir, "buildstock") + resources_dir = os.path.join(buildstock_dir, "resources") + os.makedirs(resources_dir) + + project_dir = os.path.join(tmpdir, "project") + os.makedirs(os.path.join(project_dir, "housing_characteristics")) + + with open(os.path.join(resources_dir, "buildstock.csv"), "w") as f: + f.write("building_id\n1\n") + + parent = _make_parent( + ContainerRuntime.LOCAL_OPENSTUDIO, + project_dir=project_dir, + buildstock_dir=buildstock_dir, + ) + sampler = ResidentialStratifiedSampler(parent, n_datapoints=350) + + with patch("buildstockbatch.sampler.residential_stratified.subprocess") as subprocess_mock: + result = sampler._run_sampling_local() + + subprocess_mock.run.assert_called_once() + args = subprocess_mock.run.call_args[0][0] + assert args[0] == "python" + assert any("run_sampler.py" in arg for arg in args) + assert "sample" in args + assert "-n" in args + assert "350" in args + assert pathlib.Path(result).exists() + + +def test_residential_stratified_run_sampling_apptainer(): + with tempfile.TemporaryDirectory() as tmpdir: + buildstock_dir = os.path.join(tmpdir, "buildstock") + os.makedirs(os.path.join(buildstock_dir, "resources")) + + output_dir = os.path.join(tmpdir, "output") + os.makedirs(os.path.join(output_dir, "housing_characteristics")) + + parent = _make_parent( + ContainerRuntime.APPTAINER, + project_dir="/tmp/project", + buildstock_dir=buildstock_dir, + output_dir=output_dir, + ) + sampler = ResidentialStratifiedSampler(parent, n_datapoints=2000) + + with patch("buildstockbatch.sampler.residential_stratified.subprocess") as subprocess_mock: + result = sampler._run_sampling_apptainer() + + subprocess_mock.run.assert_called_once() + args = subprocess_mock.run.call_args[0][0] + assert args[0] == "apptainer" + assert "exec" in args + assert "python" in args + assert any("run_sampler.py" in arg for arg in args) + assert "-n" in args + assert "2000" in args + assert result == sampler.csv_path + + +def test_residential_stratified_run_sampling_docker(): + with tempfile.TemporaryDirectory() as tmpdir: + buildstock_dir = os.path.join(tmpdir, "buildstock") + resources_dir = os.path.join(buildstock_dir, "resources") + os.makedirs(resources_dir) + + project_dir = os.path.join(tmpdir, "project") + os.makedirs(os.path.join(project_dir, "housing_characteristics")) + + with open(os.path.join(resources_dir, "buildstock.csv"), "w") as f: + f.write("building_id\n1\n") + + parent = _make_parent( + ContainerRuntime.DOCKER, + project_dir=project_dir, + buildstock_dir=buildstock_dir, + ) + sampler = ResidentialStratifiedSampler(parent, n_datapoints=5000) + + with patch("buildstockbatch.sampler.residential_stratified.docker") as docker_mock: + docker_client_mock = MagicMock() + docker_mock.DockerClient.from_env.return_value = docker_client_mock + docker_client_mock.containers.run.return_value = b"Sampling completed" + + result = sampler._run_sampling_docker() + + docker_mock.DockerClient.from_env.assert_called_once() + docker_client_mock.containers.run.assert_called_once() + args = docker_client_mock.containers.run.call_args[0][1] + assert "python" in args + assert "samplers/stratified/sampler/run_sampler.py" in args + assert "-n" in args + assert "5000" in args + assert result == sampler.csv_path diff --git a/buildstockbatch/test/test_local.py b/buildstockbatch/test/test_local.py index 82309274..e71b95ef 100644 --- a/buildstockbatch/test/test_local.py +++ b/buildstockbatch/test/test_local.py @@ -44,6 +44,9 @@ def test_resstock_local_batch(project_filename): else: n_datapoints = 2 batch.cfg["sampler"]["args"]["n_datapoints"] = n_datapoints + elif batch.cfg["sampler"]["type"] == "residential_stratified": + n_datapoints = 8 # if less than 8, it seems to sample 0 due to "rounding" + batch.cfg["sampler"]["args"]["n_datapoints"] = n_datapoints else: sample_file = batch.cfg["sampler"]["args"]["sample_file"] if not os.path.isabs(sample_file): diff --git a/docs/samplers/index.rst b/docs/samplers/index.rst index 10e117de..888cc73e 100644 --- a/docs/samplers/index.rst +++ b/docs/samplers/index.rst @@ -8,5 +8,6 @@ A sampler is a class thats main function is to enumerate the buildings and build residential_quota residential_quota_downselect + residential_stratified precomputed commercial_sobol diff --git a/docs/samplers/residential_quota.rst b/docs/samplers/residential_quota.rst index ffa8e6ed..528a7953 100644 --- a/docs/samplers/residential_quota.rst +++ b/docs/samplers/residential_quota.rst @@ -1,7 +1,7 @@ Residential Quota Sampler ------------------------- -The Residential Quota sampler utilizes a `quota-based sampling method `_ to determine the buildings to simulate. It is the primary sampling algorithm used in ResStock. +The Residential Quota sampler utilizes a `quota-based sampling method `_ to determine the buildings to simulate. Configuration Example ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/samplers/residential_stratified.rst b/docs/samplers/residential_stratified.rst new file mode 100644 index 00000000..a7edac73 --- /dev/null +++ b/docs/samplers/residential_stratified.rst @@ -0,0 +1,36 @@ +Residential Stratified Sampler +------------------------------ + +The Residential Stratfied sampler utilizes a `stratified-based sampling method `_ to determine the buildings to simulate. It is the primary sampling algorithm used in ResStock. + +Configuration Example +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: yaml + + sampler: + type: residential_stratified + args: + n_datapoints: 350000 + segment_vars: + - Vintage + - Heating Fuel + - Sampling Region + segment_selection_sample_size: 5000000 + num_samples_per_segment: 10 + +Arguments +~~~~~~~~~ + +- ``n_datapoints``: The number of datapoints to sample. +- ``segment_vars`` (optional): TODO The segment variables. Default is: + + - Federal Poverty Level + - Geometry Floor Area Bin + - Geometry Building Type RECS + - Vintage + - Heating Fuel + - Sampling Region + +- ``segment_selection_sample_size`` (optional): TODO The segment selection sample size. Default is 10000000. +- ``num_samples_per_segment`` (optional): TODO The number of samples per segment. Default is 8. diff --git a/setup.py b/setup.py index 99f3f2fa..d29ab4e6 100644 --- a/setup.py +++ b/setup.py @@ -44,11 +44,11 @@ "": ["LICENSE"], }, install_requires=[ - "pyyaml", + "pyyaml>=6.0.2", "requests", "numpy", "pandas>=2", - "polars>=1.2.0", + "polars>=1.8.2", "geopandas>=1.0.0", "joblib", "pyarrow", @@ -62,6 +62,8 @@ "semver", "tqdm", "boto3", + "click", + "networkx", ], extras_require={ "dev": [