diff --git a/libensemble/libE.py b/libensemble/libE.py index 994b19176..bfb3cc013 100644 --- a/libensemble/libE.py +++ b/libensemble/libE.py @@ -478,6 +478,13 @@ def libE_local(sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, li if resources is not None: local_host = [socket.gethostname()] resources.add_comm_info(libE_nodes=local_host) + if libE_specs.get("set_workers_by_gpus", False): + # set num_resource_sets and nworkers is that + 1 incase have a persistent gen + num_resource_sets = resources.glob_resources.num_resource_sets + nworkers = num_resource_sets + 1 # Should I honor workers if exist (whether more or less than rsets) + print(f"\nChange nworkers from {libE_specs['nworkers']} to {nworkers}") # SH: remove after testing + print(f"num_resource_sets {num_resource_sets}\n") # SH: remove after testing + # libE_specs["nworkers"] = nworkers exctr = Executor.executor if exctr is not None: diff --git a/libensemble/resources/resources.py b/libensemble/resources/resources.py index b8e920a98..b4106cde2 100644 --- a/libensemble/resources/resources.py +++ b/libensemble/resources/resources.py @@ -166,6 +166,7 @@ def __init__(self, libE_specs, top_level_dir=None): self.num_resource_sets = libE_specs.get("num_resource_sets", None) self.enforce_worker_core_bounds = libE_specs.get("enforce_worker_core_bounds", False) + set_workers_by_gpus = libE_specs["set_workers_by_gpus"] resource_info = libE_specs.get("resource_info", {}) cores_on_node = resource_info.get("cores_on_node", None) gpus_on_node = resource_info.get("gpus_on_node", None) @@ -226,6 +227,10 @@ def __init__(self, libE_specs, top_level_dir=None): print(f"From resources: {gpus_on_node=}") # testing self.libE_nodes = None + if set_workers_by_gpus: + new_rsets = self.gpus_avail_per_node * len(self.global_nodelist) + self.num_resource_sets = new_rsets + def add_comm_info(self, libE_nodes): """Adds comms-specific information to resources diff --git a/libensemble/specs.py b/libensemble/specs.py index ffb31a85e..d76455f39 100644 --- a/libensemble/specs.py +++ b/libensemble/specs.py @@ -220,6 +220,9 @@ class LibeSpecs(BaseModel): nworkers: Optional[int] """ Number of worker processes to spawn (only in local/tcp modes) """ + set_workers_by_gpus: Optional[bool] = False + """Allow nworkers to be set by number of GPUs available""" + port: Optional[int] = 0 """ TCP Only: Port number for Manager's system """ diff --git a/libensemble/tests/functionality_tests/test_persistent_sampling_CUDA_variable_resources.py b/libensemble/tests/functionality_tests/test_persistent_sampling_CUDA_variable_resources.py index 024046060..9c98d795d 100644 --- a/libensemble/tests/functionality_tests/test_persistent_sampling_CUDA_variable_resources.py +++ b/libensemble/tests/functionality_tests/test_persistent_sampling_CUDA_variable_resources.py @@ -30,12 +30,23 @@ nworkers, is_manager, libE_specs, _ = parse_args() + # ---------------- Alt. settings for workers/resource sets ---------------- + # The persistent gen does not need resources - libE_specs["num_resource_sets"] = nworkers - 1 # Any worker can be the gen + # libE_specs["num_resource_sets"] = nworkers - 1 # Any worker can be the gen # libE_specs["zero_resource_workers"] = [1] # If first worker must be gen, use this instead + # Or do not give nworkers - and allow workers and resource sets to be set by no. of gpus. + + libE_specs["set_workers_by_gpus"] = True + + # For laptop testing - comment out for testing on actual GPU system + libE_specs["resource_info"] = {"gpus_on_node": 4} + + # ---------------------------------------------------------- + libE_specs["sim_dirs_make"] = True libE_specs["ensemble_dir_path"] = "./ensemble_CUDA_variable_w" + str(nworkers)