Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API.
Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API.
CLI module for tuning model hyperparameters using Ray Tune.
This module provides functionality to tune hyperparameters of machine learning models using Ray Tune. It supports configuring resources like GPUs/CPUs, saving best models and metrics, and debugging capabilities.
defget_args()->argparse.Namespace:
-"""Get the arguments when using from the commandline."""
- parser=argparse.ArgumentParser(description="")
+133
defget_args()->argparse.Namespace:
+"""Get the arguments when using from the commandline.
+
+ Returns:
+ Parsed command line arguments.
+ """
+ parser=argparse.ArgumentParser(description="Launch check_model.")
+ parser.add_argument("-d","--data",type=str,required=True,metavar="FILE",help="Path to input csv file.")
+ parser.add_argument("-m","--model",type=str,required=True,metavar="FILE",help="Path to model file.")parser.add_argument(
- "-c",
- "--config",
+ "-e",
+ "--data_config",type=str,required=True,metavar="FILE",
- help="The file path for the config file",
+ help="Path to data config file.",)
- parser.add_argument("-m","--model",type=str,required=True,metavar="FILE",help="The model file")
- parser.add_argument("-d","--data",type=str,required=True,metavar="FILE",help="The data file")parser.add_argument(
- "-e",
- "--experiment_config",
+ "-c",
+ "--model_config",type=str,required=True,metavar="FILE",
- help="The json used to modify the data. Inside it has the experiment name as specified in the experimets.py, this will then be dinamically imported during training. It is necessary to recover how the user specified the encoding of the data. Data is encoded on the fly.",
+ help="Path to yaml config training file.",)parser.add_argument(
- "-o",
- "--output",
+ "-w",
+ "--initial_weights",type=str,required=False,nargs="?",
- const="best_model.pt",
- default="best_model.pt",
+ const=None,
+ default=None,metavar="FILE",
- help="The output file path to write the trained model to",
+ help="The path to the initial weights (optional).",)parser.add_argument(
- "-bc",
- "--best_config",
+ "--ray_results_dirpath",
+ type=str,
+ required=False,
+ nargs="?",
+ const=None,
+ default=None,
+ metavar="DIR_PATH",
+ help="Location where ray_results output dir should be written. If None, uses ~/ray_results.",
+ )
+ parser.add_argument(
+ "-o",
+ "--output",type=str,required=False,nargs="?",
- const="best_config.json",
- default="best_config.json",
+ const="best_model.pt",
+ default="best_model.pt",metavar="FILE",
- help="The path to write the best config to",
+ help="The output file path to write the trained model to",)parser.add_argument("-bm",
@@ -191,66 +177,26 @@
help="The path to write the best metrics to",)parser.add_argument(
- "-bo",
- "--best_optimizer",
+ "-bc",
+ "--best_config",type=str,required=False,nargs="?",
- const="best_optimizer.pt",
- default="best_optimizer.pt",
+ const="best_config.yaml",
+ default="best_config.yaml",metavar="FILE",
- help="The path to write the best optimizer to",
+ help="The path to write the best config to",)parser.add_argument(
- "-w",
- "--initial_weights",
+ "-bo",
+ "--best_optimizer",type=str,required=False,nargs="?",
- const=None,
- default=None,
+ const="best_optimizer.pt",
+ default="best_optimizer.pt",metavar="FILE",
- help="The path to the initial weights. These can be used by the model instead of the random initialization",
- )
- parser.add_argument(
- "--gpus",
- type=int,
- required=False,
- nargs="?",
- const=None,
- default=None,
- metavar="NUM_OF_MAX_GPU",
- help="Use to limit the number of GPUs ray can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all GPUs available. It can be set to 0 to use only CPUs.",
- )
- parser.add_argument(
- "--cpus",
- type=int,
- required=False,
- nargs="?",
- const=None,
- default=None,
- metavar="NUM_OF_MAX_CPU",
- help="Use to limit the number of CPUs ray can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all CPUs available. It can be set to 0 to use only GPUs.",
- )
- parser.add_argument(
- "--memory",
- type=str,
- required=False,
- nargs="?",
- const=None,
- default=None,
- metavar="MAX_MEMORY",
- help="ray can have a limiter on the total memory it can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all memory available.",
- )
- parser.add_argument(
- "--ray_results_dirpath",
- type=str,
- required=False,
- nargs="?",
- const=None,
- default=None,
- metavar="DIR_PATH",
- help="the location where ray_results output dir should be written. if set to None (default) ray will be place it in ~/ray_results ",
+ help="The path to write the best optimizer to",)parser.add_argument("--tune_run_name",
@@ -260,39 +206,61 @@
const=None,default=None,metavar="CUSTOM_RUN_NAME",
- help="tells ray tune what that the 'experiment_name' aka the given tune_run name should be. This is controlled be the variable name in the RunConfig class of tune. This has two behaviuors: 1 if set the subdir of ray_results is going to be named with this value, 2 the subdir of the above mentioned will also have this value as prefix for the single train dir name. Default None, meaning ray will generate such a name on its own.",
+ help=(
+ "Tells ray tune what the 'experiment_name' (i.e. the given tune_run name) should be. "
+ "If set, the subdirectory of ray_results is named with this value and its train dir is prefixed accordingly. "
+ "Default None means that ray will generate such a name on its own."
+ ),)parser.add_argument("--debug_mode",
- type=str,
- required=False,
- nargs="?",
- const=False,
- default=False,
- metavar="DEV",
- help="activate debug mode for tuning. default false, no debug.",
+ action="store_true",
+ help="Activate debug mode for tuning. Default false, no debug.",)
-
returnparser.parse_args()
defmain(model_path:str,data_path:str,
- experiment_config:str,
- output:str,
- best_config_path:str,
- best_metrics_path:str,
- best_optimizer_path:str,
- initial_weights_path:Optional[str]=None,
- gpus:Optional[int]=None,
- cpus:Optional[int]=None,
- memory:Optional[str]=None,
- ray_results_dirpath:Optional[str]=None,
- tune_run_name:Optional[str]=None,
+ data_config_path:str,
+ model_config_path:str,
+ initial_weights:str|None=None,# noqa: ARG001
+ ray_results_dirpath:str|None=None,
+ output_path:str|None=None,
+ best_optimizer_path:str|None=None,
+ best_metrics_path:str|None=None,
+ best_config_path:str|None=None,*,debug_mode:bool=False,)->None:
-"""This launcher use ray tune to find the best hyperparameters for a given model."""
- # TODO update to yaml the experiment config
- # load json into dictionary
- exp_config={}
- withopen(experiment_config)asin_json:
- exp_config=json.load(in_json)
-
- # initialize the experiment class
- initialized_experiment_class=get_experiment(exp_config["experiment"])
+"""Run the main model checking pipeline.
- # import the model correctly but do not initialize it yet, ray_tune does that itself
- model_class=import_class_from_file(model_path)
+ Args:
+ data_path: Path to input data file.
+ model_path: Path to model file.
+ data_config_path: Path to data config file.
+ model_config_path: Path to model config file.
+ initial_weights: Optional path to initial weights.
+ ray_results_dirpath: Directory for ray results.
+ debug_mode: Whether to run in debug mode.
+ output_path: Path to write the best model to.
+ best_optimizer_path: Path to write the best optimizer to.
+ best_metrics_path: Path to write the best metrics to.
+ best_config_path: Path to write the best config to.
+ """
+ # Convert data config to proper type
+ withopen(data_config_path)asfile:
+ data_config_dict:dict[str,Any]=yaml.safe_load(file)
+ data_config:yaml_data.YamlSubConfigDict=yaml_data.YamlSubConfigDict(**data_config_dict)
- # Update the tune config file. Because if resources are specified for cpu and gpu they are overwritten with what nextflow has otherwise this field is created
- updated_tune_conf="check_model_modified_tune_config.yaml"
- withopen(config_path)asconf_file,open(updated_tune_conf,"w")asnew_conf:
- user_tune_config=yaml.safe_load(conf_file)
+ withopen(model_config_path)asfile:
+ model_config_dict:dict[str,Any]=yaml.safe_load(file)
+ model_config:yaml_model_schema.Model=yaml_model_schema.Model(**model_config_dict)
- # add initial weights to the config, when provided
- ifinitial_weights_pathisnotNone:
- user_tune_config["model_params"]["initial_weights"]=os.path.abspath(initial_weights_path)
+ encoder_loader=loaders.EncoderLoader()
+ encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)
- # save to file the new dictionary because StimulusTuneWrapper only takes paths
- yaml.dump(user_tune_config,new_conf)
+ model_class=launch_utils.import_class_from_file(model_path)
- # compute the memory requirements for ray init. Usefull in case ray detects them wrongly. Memory is split in two for ray: for store_object memory and the other actual memory for tuning. The following function takes the total possible usable/allocated memory as a string parameter and return in bytes the values for store_memory (30% as default in ray) and memory (70%).
- object_store_mem,mem=memory_split_for_ray_init(memory)
+ ray_config_loader=yaml_model_schema.YamlRayConfigLoader(model=model_config)
+ ray_config_model=ray_config_loader.get_config()
- # set ray_result dir ubication. TODO this version of pytorch does not support relative paths, in future maybe good to remove abspath.
- ray_results_dirpath=Noneifray_results_dirpathisNoneelseos.path.abspath(ray_results_dirpath)
-
- # Create the learner
- learner=StimulusTuneWrapper(
- updated_tune_conf,
- model_class,
- data_path,
- initialized_experiment_class,
- max_gpus=gpus,
- max_cpus=cpus,
- max_object_store_mem=object_store_mem,
- max_mem=mem,
+ tuner=raytune_learner.TuneWrapper(
+ model_config=ray_config_model,
+ data_config_path=data_config_path,
+ model_class=model_class,
+ data_path=data_path,
+ encoder_loader=encoder_loader,
+ seed=42,ray_results_dir=ray_results_dirpath,
- tune_run_name=tune_run_name,
- _debug=debug_mode,
+ debug=debug_mode,)
- # Tune the model and get the tuning results
- grid_results=learner.tune()
+ # Ensure output_path is provided
+ ifoutput_pathisNone:
+ raiseValueError("output_path must not be None")
+ try:
+ grid_results=tuner.tune()
+ ifnotgrid_results:
+ _raise_empty_grid()
+
+ # Initialize parser with results
+ parser=raytune_parser.TuneParser(result=grid_results)
- # parse raytune results
- results=StimulusTuneParser(grid_results)
- results.save_best_model(output)
- results.save_best_config(best_config_path)
- results.save_best_metrics_dataframe(best_metrics_path)
- results.save_best_optimizer(best_optimizer_path)
+ # Ensure output directory exists
+ Path(output_path).parent.mkdir(parents=True,exist_ok=True)
- # debug section. predict the validation data using the best model.
- ifdebug_mode:
- # imitialize the model class with the respective tune parameters from the associated config
- best_tune_config=results.get_best_config()
- best_model=model_class(**best_tune_config["model_params"])
- # get the weights associated to the best model and load them onto the model class
- best_model.load_state_dict(results.get_best_model())
- # load the data in a dataloader and then predict them in an ordered manner, aka no shuffle.
- validation_set=DataLoader(
- TorchDataset(data_path,initialized_experiment_class,split=1),
- batch_size=learner.config["data_params"]["batch_size"].sample(),
- shuffle=False,
- )
- predictions=PredictWrapper(best_model,validation_set).predict()
- # write to file the predictions, in the ray result tune specific folder.
- pred_filename=os.path.join(learner.config["tune_run_path"],"debug","best_model_val_pred.txt")
- # save which was the best model found, the easiest is to get its seed
- best_model_seed=os.path.join(learner.config["tune_run_path"],"debug","best_model_seed.txt")
- withopen(pred_filename,"w")aspred_f,open(best_model_seed,"w")asseed_f:
- pred_f.write(str(predictions))
- seed_f.write(str(best_tune_config["ray_worker_seed"]))
+ # Save outputs using proper Result object API
+ parser.save_best_model(output=output_path)
+ parser.save_best_optimizer(output=best_optimizer_path)
+ parser.save_best_metrics_dataframe(output=best_metrics_path)
+ parser.save_best_config(output=best_config_path)
+
+ exceptRuntimeError:
+ logger.exception("Tuning failed")
+ raise
+ exceptKeyError:
+ logger.exception("Missing expected result key")
+ raise
+ finally:
+ ifdebug_mode:
+ logger.info("Debug mode - preserving Ray results directory")
+ elifray_results_dirpath:
+ shutil.rmtree(ray_results_dirpath,ignore_errors=True)
Source code in src/stimulus/learner/raytune_parser.py
38
-39
-40
def__init__(self,results:ExperimentAnalysis)->None:
-"""`results` is the output of ray.tune."""
- self.results=results
+ stimulus.learner.raytune_parser - stimulus-py
Source code in src/stimulus/learner/raytune_parser.py
39
+40
+41
+42
def__init__(self,result:ResultGrid)->None:
+"""Initialize with the given Ray Tune result grid."""
+ self.result:ResultGrid=result
+ self.best_result:Result=self._validate_best_result()
Source code in src/stimulus/learner/raytune_parser.py
57
-58
-59
-60
+
Correct config values.
This method modifies the configuration dictionary to remove or convert non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary can be safely dumped to a YAML file.
deffix_config_values(self,config:dict[str,Any])->dict[str,Any]:
+"""Correct config values.
+
+ This method modifies the configuration dictionary to remove or convert
+ non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary
+ can be safely dumped to a YAML file.
+
+ Args:
+ config: Configuration dictionary to fix.
+
+ Returns:
+ Fixed configuration dictionary.
+ """
+ # Replace the model class with its name for serialization purposes
+ config["model"]=config["model"].__name__
+
+ # Remove keys that contain non-serializable objects
+ keys_to_remove=[
+ "_debug",
+ "tune_run_path",
+ "_training_ref",
+ "_validation_ref",
+ "encoder_loader",# if this key holds a non-serializable object
+ ]
+ forkeyinkeys_to_remove:
+ config.pop(key,None)
+
+ returnconfig
+
defget_best_config(self)->dict[str,Any]:
+"""Get the best config from the results. Returns:
- Fixed configuration dictionary
- """
- # fix the model and experiment values to avoid problems with serialization
- # TODO this is a quick fix to avoid the problem with serializing class objects. maybe there is a better way.
- config["model"]=config["model"].__name__
- config["experiment"]=config["experiment"].__class__.__name__
- if"tune"inconfigand"tune_params"inconfig["tune"]:
- delconfig["tune"]["tune_params"]["scheduler"]
- # delete miscellaneus keys, used only during debug mode for example
- delconfig["_debug"],config["tune_run_path"]
+ The configuration dictionary of the best result.
+ Raises:
+ ValueError: If the config is missing.
+ """
+ config:dict[str,Any]|None=self.best_result.config
+ ifconfigisNone:
+ raiseValueError("Best result does not contain a configuration.")returnconfig
-
Source code in src/stimulus/learner/raytune_parser.py
42
-43
-44
-45
defget_best_config(self)->dict[str,Any]:
-"""Get the best config from the results."""
- best_result=cast(RayTuneResult,self.results.best_result)
- returnbest_result["config"]
Source code in src/stimulus/learner/raytune_parser.py
85
-86
-87
-88
-89
-90
defget_best_model(self)->dict[str,torch.Tensor]:
-"""Get the best model weights from the results."""
- best_result=cast(RayTuneMetrics,self.results.best_result)
- checkpoint_dir=best_result["checkpoint"]
- checkpoint=os.path.join(checkpoint_dir,"model.safetensors")
+
defget_best_model(self)->dict[str,torch.Tensor]:
+"""Get the best model weights from the results.
+
+ Returns:
+ Dictionary of model weights.
+
+ Raises:
+ ValueError: If the checkpoint is missing.
+ """
+ ifself.best_result.checkpointisNone:
+ raiseValueError("Best result does not contain a checkpoint for the model.")
+ checkpoint_dir:str=self.best_result.checkpoint.to_directory()
+ checkpoint:str=os.path.join(checkpoint_dir,"model.safetensors")returnsafe_load_file(checkpoint)
Source code in src/stimulus/learner/raytune_parser.py
96
- 97
- 98
- 99
-100
-101
defget_best_optimizer(self)->dict[str,Any]:
-"""Get the best optimizer state from the results."""
- best_result=cast(RayTuneOptimizer,self.results.best_result)
- checkpoint_dir=best_result["checkpoint"]
- checkpoint=os.path.join(checkpoint_dir,"optimizer.pt")
+
defget_best_optimizer(self)->dict[str,Any]:
+"""Get the best optimizer state from the results.
+
+ Returns:
+ Optimizer state dictionary.
+
+ Raises:
+ ValueError: If the checkpoint is missing.
+ """
+ ifself.best_result.checkpointisNone:
+ raiseValueError("Best result does not contain a checkpoint for the optimizer.")
+ checkpoint_dir:str=self.best_result.checkpoint.to_directory()
+ checkpoint:str=os.path.join(checkpoint_dir,"optimizer.pt")returntorch.load(checkpoint)
Source code in src/stimulus/learner/raytune_parser.py
74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
defsave_best_config(self,output:str)->None:"""Save the best config to a file.
- TODO maybe only save the relevant config values.
+ TODO: maybe only save the relevant config values.
+
+ Args:
+ output: File path to save the configuration. """
- config=self.get_best_config()
+ config:dict[str,Any]=self.get_best_config()config=self.fix_config_values(config)withopen(output,"w")asf:
- json.dump(config,f,indent=4)
+ yaml.safe_dump(config,f)
Save the dataframe with the metrics at each iteration of the best sample to a file.
Source code in src/stimulus/learner/raytune_parser.py
77
-78
-79
-80
-81
-82
-83
defsave_best_metrics_dataframe(self,output:str)->None:
-"""Save the dataframe with the metrics at each iteration of the best sample to a file."""
- best_result=cast(RayTuneMetrics,self.results.best_result)
- metrics_df=best_result["metrics_dataframe"]
- columns=[colforcolinmetrics_df.columnsif"config"notincol]
- metrics_df=metrics_df[columns]
+
Save the dataframe with the metrics at each iteration of the best sample to a file.
Source code in src/stimulus/learner/raytune_parser.py
116
+117
+118
+119
+120
+121
+122
+123
defsave_best_metrics_dataframe(self,output:str)->None:
+"""Save the dataframe with the metrics at each iteration of the best sample to a file.
+
+ Args:
+ output: CSV file path to save the metrics.
+ """
+ metrics_df:pd.DataFrame=pd.DataFrame([self.best_result.metrics])metrics_df.to_csv(output,index=False)
Source code in src/stimulus/learner/raytune_parser.py
92
-93
-94
defsave_best_model(self,output:str)->None:
-"""Save the best model weights to a file."""
- safe_save_file(self.get_best_model(),output)
+
Save the best model weights to a file.
This method retrieves the best model weights using the get_best_model helper which loads the model data from the checkpoint's directory, then re-saves it using safe_save_file.
defsave_best_model(self,output:str)->None:
+"""Save the best model weights to a file.
+
+ This method retrieves the best model weights using the get_best_model helper
+ which loads the model data from the checkpoint's directory, then re-saves
+ it using safe_save_file.
+
+ Args:
+ output: Path where the best model weights will be saved.
+ """
+ model:dict[str,torch.Tensor]=self.get_best_model()
+ safe_save_file(model,output)
This module contains all Stimulus types which will be used for variable typing and likely not instantiated, as well as aliases for other types to use for typing purposes.
The aliases from this module should be used for typing purposes only.
This module contains all Stimulus types which will be used for variable typing and likely not instantiated, as well as aliases for other types to use for typing purposes.
The aliases from this module should be used for typing purposes only.