-
Notifications
You must be signed in to change notification settings - Fork 22
Sampling regions characteristic #525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: nlr-name-updates
Are you sure you want to change the base?
Changes from all commits
ff856c5
9cb29b0
feee545
2d29172
ceb601a
687068c
a2425b1
a117c74
2689072
151066a
b44525b
a64a19b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,7 +63,7 @@ def _run_sampling_docker(self): | |
| self.parent().docker_image, | ||
| [ | ||
| "ruby", | ||
| "resources/run_sampling.rb", | ||
| "samplers/quota/run_sampling.rb", | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
|
|
@@ -100,7 +100,7 @@ def _run_sampling_apptainer(self): | |
| "{}:/outbind".format(os.path.dirname(self.csv_path)), | ||
| self.parent().apptainer_image, | ||
| "ruby", | ||
| "resources/run_sampling.rb", | ||
| "samplers/quota/run_sampling.rb", | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
|
|
@@ -113,11 +113,11 @@ def _run_sampling_apptainer(self): | |
| logger.debug("Apptainer sampling completed.") | ||
| return self.csv_path | ||
|
|
||
| def _run_sampling_local_openstudio(self): | ||
| def _run_sampling_local(self): | ||
| subprocess.run( | ||
| [ | ||
| self.parent().openstudio_exe(), | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not totally sure why we needed the openstudio cli to run the quota sampler script (possibly pre-dates the run_sampling / run_sampling_lib split?), but it seems to work fine using system ruby. |
||
| str(pathlib.Path("resources", "run_sampling.rb")), | ||
| "ruby", | ||
| str(pathlib.Path("samplers", "quota", "run_sampling.rb")), | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| """ | ||
| buildstockbatch.sampler.residential_stratified | ||
| ~~~~~~~~~~~~~~~ | ||
| This object contains the code required for generating the set of simulations to execute | ||
|
|
||
| :author: Noel Merket, Ry Horsey | ||
| :copyright: (c) 2020 by The Alliance for Sustainable Energy | ||
| :license: BSD-3 | ||
| """ | ||
|
|
||
| import docker | ||
| import logging | ||
| import os | ||
| import pathlib | ||
| import shutil | ||
| import subprocess | ||
| import sys | ||
| import time | ||
| import yaml | ||
|
|
||
| from .base import BuildStockSampler | ||
| from buildstockbatch.exc import ValidationError | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class ResidentialStratifiedSampler(BuildStockSampler): | ||
| def __init__( | ||
| self, | ||
| parent, | ||
| n_datapoints, | ||
| segment_vars=[ | ||
| "Federal Poverty Level", | ||
| "Geometry Floor Area Bin", | ||
| "Geometry Building Type RECS", | ||
| "Vintage", | ||
| "Heating Fuel", | ||
| "Sampling Region", | ||
| ], | ||
| segment_selection_sample_size=10000000, | ||
| num_samples_per_segment=8, | ||
| ): | ||
| """Residential Stratified Sampler | ||
|
|
||
| :param parent: BuildStockBatchBase object | ||
| :type parent: BuildStockBatchBase (or subclass) | ||
| :param n_datapoints: number of datapoints to sample | ||
| :type n_datapoints: int | ||
| """ | ||
| super().__init__(parent) | ||
| self.validate_args(self.parent().project_filename, n_datapoints=n_datapoints) | ||
| self.n_datapoints = n_datapoints | ||
| self.sampler_config = self.create_sampler_config( | ||
| os.path.dirname(self.parent().project_filename), | ||
| segment_vars, | ||
| segment_selection_sample_size, | ||
| num_samples_per_segment, | ||
| ) | ||
|
|
||
| @classmethod | ||
| def validate_args(cls, project_filename, **kw): | ||
| expected_args = set(["n_datapoints"]) | ||
| for k, v in kw.items(): | ||
| expected_args.discard(k) | ||
| if k == "n_datapoints": | ||
| if not isinstance(v, int): | ||
| raise ValidationError("n_datapoints needs to be an integer") | ||
| if v <= 0: | ||
| raise ValidationError("n_datapoints need to be >= 1") | ||
| elif k == "segment_vars": | ||
| pass | ||
| elif k == "segment_selection_sample_size": | ||
| pass | ||
| elif k == "num_samples_per_segment": | ||
| pass | ||
| else: | ||
| raise ValidationError(f"Unknown argument for sampler: {k}") | ||
| if len(expected_args) > 0: | ||
| raise ValidationError("The following sampler arguments are required: " + ", ".join(expected_args)) | ||
| return True | ||
|
|
||
| @classmethod | ||
| def create_sampler_config(self, folderpath, segment_vars, segment_selection_sample_size, num_samples_per_segment): | ||
| data = {} | ||
| data["segment_vars"] = segment_vars | ||
| data["segment_selection_sample_size"] = segment_selection_sample_size | ||
| data["num_samples_per_segment"] = num_samples_per_segment | ||
| filename = pathlib.Path(folderpath) / "sampler_config.yaml" | ||
| with open(filename, "w") as file: | ||
| yaml.dump(data, file) | ||
| return filename | ||
|
|
||
| def _run_sampling_docker(self): | ||
| docker_client = docker.DockerClient.from_env() | ||
| tick = time.time() | ||
| extra_kws = {} | ||
| if sys.platform.startswith("linux"): | ||
| extra_kws["user"] = f"{os.getuid()}:{os.getgid()}" | ||
| container_output = docker_client.containers.run( | ||
| self.parent().docker_image, | ||
| [ | ||
| "python", | ||
| "samplers/stratified/sampler/run_sampler.py", | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
| str(self.n_datapoints), | ||
| "-o", | ||
| "buildstock.csv", | ||
| ], | ||
| remove=True, | ||
| volumes={self.buildstock_dir: {"bind": "/var/simdata/openstudio", "mode": "rw"}}, | ||
| name="buildstock_sampling", | ||
| **extra_kws, | ||
| ) | ||
| tick = time.time() - tick | ||
| for line in container_output.decode("utf-8").split("\n"): | ||
| logger.debug(line) | ||
| logger.debug("Sampling took {:.1f} seconds".format(tick)) | ||
| destination_filename = self.csv_path | ||
| if os.path.exists(destination_filename): | ||
| os.remove(destination_filename) | ||
| shutil.move( | ||
| os.path.join(self.buildstock_dir, "resources", "buildstock.csv"), | ||
| destination_filename, | ||
| ) | ||
| return destination_filename | ||
|
|
||
| def _run_sampling_apptainer(self): | ||
| args = [ | ||
| "apptainer", | ||
| "exec", | ||
| "--contain", | ||
| "--home", | ||
| "{}:/buildstock".format(self.buildstock_dir), | ||
| "--bind", | ||
| "{}:/outbind".format(os.path.dirname(self.csv_path)), | ||
| self.parent().apptainer_image, | ||
| "python", | ||
| "samplers/stratified/sampler/run_sampler.py", | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
| str(self.n_datapoints), | ||
| "-o", | ||
| "../../outbind/{}".format(os.path.basename(self.csv_path)), | ||
| ] | ||
| logger.debug(f"Starting apptainer sampling with command: {' '.join(args)}") | ||
| subprocess.run(args, check=True, env=os.environ, cwd=self.parent().output_dir) | ||
| logger.debug("Apptainer sampling completed.") | ||
| return self.csv_path | ||
|
|
||
| def _run_sampling_local(self): | ||
| subprocess.run( | ||
| [ | ||
| "python", | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to https://github.com/NatLabRockies/buildstockbatch/pull/525/changes#r3390798176, another reason to not use the openstudio cli is that I tried it with this stratified sampler script (python), and ran into a bunch of issues related to missing packages (and needing to use |
||
| str(pathlib.Path("samplers", "stratified", "sampler", "run_sampler.py")), | ||
| "sample", | ||
| "-p", | ||
| self.cfg["project_directory"], | ||
| "-n", | ||
| str(self.n_datapoints), | ||
| "-c", | ||
| self.sampler_config, | ||
| "-o", | ||
| "buildstock.csv", | ||
| ], | ||
| cwd=self.buildstock_dir, | ||
| check=True, | ||
| ) | ||
| destination_filename = pathlib.Path(self.csv_path) | ||
| if destination_filename.exists(): | ||
| os.remove(destination_filename) | ||
| shutil.move( | ||
| pathlib.Path(self.buildstock_dir, "resources", "buildstock.csv"), | ||
| destination_filename, | ||
| ) | ||
| config_filename = pathlib.Path(self.sampler_config) | ||
| if config_filename.exists(): | ||
| os.remove(config_filename) | ||
| return destination_filename | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not totally sure if I need this yet.