Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions libensemble/libE.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,13 @@ def libE_local(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, li
if resources is not None:
local_host = [socket.gethostname()]
resources.add_comm_info(libE_nodes=local_host)
if libE_specs.get("set_workers_by_gpus", False):
# set num_resource_sets and nworkers is that + 1 incase have a persistent gen
num_resource_sets = resources.glob_resources.num_resource_sets
nworkers = num_resource_sets + 1 # Should I honor workers if exist (whether more or less than rsets)
print(f"\nChange nworkers from {libE_specs['nworkers']} to {nworkers}") # SH: remove after testing
print(f"num_resource_sets {num_resource_sets}\n") # SH: remove after testing
# libE_specs["nworkers"] = nworkers

exctr = Executor.executor
if exctr is not None:
Expand Down
5 changes: 5 additions & 0 deletions libensemble/resources/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def __init__(self, libE_specs, top_level_dir=None):
self.num_resource_sets = libE_specs.get("num_resource_sets", None)
self.enforce_worker_core_bounds = libE_specs.get("enforce_worker_core_bounds", False)

set_workers_by_gpus = libE_specs["set_workers_by_gpus"]
resource_info = libE_specs.get("resource_info", {})
cores_on_node = resource_info.get("cores_on_node", None)
gpus_on_node = resource_info.get("gpus_on_node", None)
Expand Down Expand Up @@ -226,6 +227,10 @@ def __init__(self, libE_specs, top_level_dir=None):
print(f"From resources: {gpus_on_node=}") # testing
self.libE_nodes = None

if set_workers_by_gpus:
new_rsets = self.gpus_avail_per_node * len(self.global_nodelist)
self.num_resource_sets = new_rsets

def add_comm_info(self, libE_nodes):
"""Adds comms-specific information to resources

Expand Down
3 changes: 3 additions & 0 deletions libensemble/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ class LibeSpecs(BaseModel):
nworkers: Optional[int]
""" Number of worker processes to spawn (only in local/tcp modes) """

set_workers_by_gpus: Optional[bool] = False
"""Allow nworkers to be set by number of GPUs available"""

port: Optional[int] = 0
""" TCP Only: Port number for Manager's system """

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,23 @@

nworkers, is_manager, libE_specs, _ = parse_args()

# ---------------- Alt. settings for workers/resource sets ----------------

# The persistent gen does not need resources

libE_specs["num_resource_sets"] = nworkers - 1 # Any worker can be the gen
# libE_specs["num_resource_sets"] = nworkers - 1 # Any worker can be the gen

# libE_specs["zero_resource_workers"] = [1] # If first worker must be gen, use this instead

# Or do not give nworkers - and allow workers and resource sets to be set by no. of gpus.

libE_specs["set_workers_by_gpus"] = True

# For laptop testing - comment out for testing on actual GPU system
libE_specs["resource_info"] = {"gpus_on_node": 4}

# ----------------------------------------------------------

libE_specs["sim_dirs_make"] = True
libE_specs["ensemble_dir_path"] = "./ensemble_CUDA_variable_w" + str(nworkers)

Expand Down