diff --git a/nextflow.config b/nextflow.config deleted file mode 100644 index 790df96e..00000000 --- a/nextflow.config +++ /dev/null @@ -1,152 +0,0 @@ -params { - - // Inputs - csv = null // the input file containing all input data - model = null // the model file in python, the model that will be tested by this pipeline - exp_conf = null // the json config file that specifies all the parameters relative to the data manipulation - tune_conf = null // the config file with all the hyperparameter directives (choiches) and all ray tune specs - - // Optional inputs - initial_weights = null // the initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run. - - // Output options - outdir = "./results/" // the outdir has to be the one the user specify _ the unique name of the run _ the time so that multiple runs will not overlap - publish_dir_mode = "copy" - - // Computational resources - max_cpus = 12 // this flasg and the following are for regulating resources, profiles can overwrite these. - max_gpus = 1 // requesting the gpus for the tuning steps. - max_memory = 32.GB - max_time = "72.h" - - // Error options - max_retries = 0 - err_start = 'finish' - - // Optional flags - check_model = true // flag to tell whether to check or not if the model can be tuned and trained. It does one call of the batch function, (predicting), of the model importing and using everything needed for that. Default run such a check. - check_model_num_samples = null // optional flag to do a more extensive check during check_model. This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be. - shuffle = true // flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. (If the way we think at shuffle change maybe is better to remove this flag and make it into a parameter of the user given json for noise nad split) - debug_mode = false // flag used to switch to debug mode for the pipeline. - - // General - singularity_cache_dir = "singularity_cache" - help = false - validate_params = true // tells wether or not to validate input values using nf-schema. - - // Config options - config_profile_name = null - config_profile_description = null -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -profiles { - docker { - docker.enabled = true - docker.runOptions = '-u $(id -u):$(id -g)' - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - apptainer.cacheDir = "${params.singularity_cache_dir}" - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - singularity.cacheDir = "${params.singularity_cache_dir}" - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - process.debug = true - cleanup = false - nextflow.enable.configProcessNamesValidation = true - } - crg { includeConfig "conf/crg.config" } - crg_slurm { includeConfig "conf/crg_slurm.config" } - test { includeConfig "conf/test.config" } - test_learn { includeConfig "conf/test_learn.config" } - test_stub { includeConfig "conf/test_stub.config" } - local { includeConfig "conf/local.config" } -} - - -// Nextflow plugins -plugins { - id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} - - -// trace/report options -// this will allow the pipeline to create tracing/report files with all the steps and the time/memory/cpu they took -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -def trace_dir = "${params.outdir}/pipeline_info" -timeline { - enabled = true - file = "${trace_dir}/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${trace_dir}/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${trace_dir}/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${trace_dir}/execution_dag_${trace_timestamp}.html" -} - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/nextflow_schema.json b/nextflow_schema.json deleted file mode 100644 index 1d0d0c36..00000000 --- a/nextflow_schema.json +++ /dev/null @@ -1,237 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/master/nextflow_schema.json", - "title": "stimulus pipeline parameters", - "description": "Pipeline for statistically testing training procedures of machine learning models", - "type": "object", - "definitions": { - "input_files_options": { - "title": "Input files options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data.", - "properties": { - "csv": { - "type": "string", - "format": "file-path", - "description": "Test data as CSV file", - "fa_icon": "fas fa-folder-open", - "mimetype": "tesxt/csv", - "help_text": "the input file containing all input data" - }, - "exp_conf": { - "type": "string", - "format": "file-path", - "description": "Experiment config as JSON format", - "fa_icon": "fas fa-folder-open", - "help_text": "the json config file that specifies all the parameters relative to the data manipulation." - }, - "model": { - "type": "string", - "format": "file-path", - "description": "Model file in Python", - "fa_icon": "fas fa-folder-open", - "mimetype": "text/py", - "help_text": "the model file in python, the model that will be tested by this pipeline." - }, - "tune_conf": { - "type": "string", - "format": "file-path", - "description": "Tuning config in yaml format", - "fa_icon": "fas fa-folder-open", - "mimetype": "text/yaml", - "help_text": "the config file with all the hyperparameter directives (choiches) and all ray tune specs." - } - }, - "required": ["csv", "exp_conf", "model", "tune_conf"] - }, - "optional_inputs": { - "title": "Optional inputs", - "type": "object", - "description": "files that can be omitted", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "initial_weights": { - "type": "string", - "fa_icon": "fas fa-folder-open", - "help_text": "the initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run.", - "description": "file to be used to initialize the miodel in tuning", - "format": "path" - } - } - }, - "output_options": { - "title": "Output options", - "type": "object", - "description": "Define where and how to publish", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "outdir": { - "type": "string", - "default": "./results/", - "description": "output directory", - "help_text": "The directory will contain a subdirectory with a name unique to each stimulus pipeline run.", - "fa_icon": "fas fa-folder-open" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "publish miode" - } - } - }, - "resorces_options": { - "title": "Resorces options", - "type": "object", - "description": "Specify maximun processes resources", - "default": "", - "properties": { - "max_cpus": { - "type": "integer", - "default": 12, - "minimum": 1, - "description": "set maximum CPU limit" - }, - "max_gpus": { - "type": "integer", - "default": 1, - "minimum": 0, - "help_text": "requesting the gpus for the tuning steps.", - "description": "set maximum GPU limit" - }, - "max_memory": { - "type": "string", - "default": "32 GB", - "description": "set maximum memory" - }, - "max_time": { - "type": "string", - "default": "72.h", - "description": "set maximum running time" - } - }, - "help_text": "The process specify the resources through the label and the config. But each resource type is then checked against the max value specified here, if it is bigger then the value specified in the corresponding max_ is used instead. This happens through the check_max custom function present in the main nextflow.config.", - "fa_icon": "fas fa-terminal" - }, - "on_error_options": { - "title": "On error options", - "type": "object", - "description": "What to do and how to handle errors", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "err_start": { - "type": "string", - "default": "finish", - "description": "Tells the pipeline how to behave on error", - "help_text": "refer to nextflow errorStrategy documentation for more details." - }, - "max_retries": { - "type": "integer", - "default": 0, - "description": "number of time to retry if err_strat is\u00a0set to retry", - "help_text": "this also acts as a multiplier for recources request. If it failed for lack of resources it automaticly asks more the second time. take a look at test.conf for more details." - } - } - }, - "skip_options": { - "title": "Skip options", - "type": "object", - "description": "options to skip or change bhaviour of pipeline", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "check_model": { - "type": "boolean", - "default": true, - "description": "checks if all input are comatible and the model can be tuned.", - "help_text": "flag to tell whether to check or not if the model can be tuned and trained. It does one call of the batch function, (predicting), of the model importing and using everything needed for that. Default run such a check." - }, - "check_model_num_samples": { - "type": "string", - "description": "optional flag to do a more/less extensive check during check_model.", - "help_text": "This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be. by default is going to be set to 3." - }, - "shuffle": { - "type": "boolean", - "default": true, - "description": "run the shuffle sanity check", - "help_text": "flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. " - }, - "debug_mode": { - "type": "boolean", - "description": "developer flag", - "help_text": "flag used to switch to debug mode for the pipeline. more verbose outputs." - } - } - }, - "general_options": { - "title": "General options", - "type": "object", - "description": "generic options", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "singularity_cache_dir": { - "type": "string", - "default": "singularity_cache", - "description": "the directory where singularity images will be placed" - }, - "help": { - "type": "boolean", - "description": "prints this help section" - }, - "validate_params": { - "type": "boolean", - "description": "to validate or not the input params", - "default": true - } - } - }, - "config_options": { - "title": "Config options", - "type": "object", - "description": "options specific for config files", - "default": "", - "properties": { - "config_profile_name": { - "type": "string", - "description": "the name of the config used" - }, - "config_profile_description": { - "type": "string", - "description": "the description of the config fil,e" - } - }, - "fa_icon": "fas fa-terminal" - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_files_options" - }, - { - "$ref": "#/definitions/optional_inputs" - }, - { - "$ref": "#/definitions/output_options" - }, - { - "$ref": "#/definitions/resorces_options" - }, - { - "$ref": "#/definitions/on_error_options" - }, - { - "$ref": "#/definitions/skip_options" - }, - { - "$ref": "#/definitions/general_options" - }, - { - "$ref": "#/definitions/config_options" - } - ] -} diff --git a/nf-test.config b/nf-test.config deleted file mode 100644 index 82d2611b..00000000 --- a/nf-test.config +++ /dev/null @@ -1,13 +0,0 @@ -config { - // Location of nf-tests - testsDir "." - - // nf-test directory used to create temporary files for each test - workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" - - // Location of an optional nextflow.config file specific for executing pipeline tests - configFile "tests/nextflow.config" - - // use a given profile for input specifications - profile "test" -} diff --git a/src/__pycache__/__init__.cpython-311.pyc b/src/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 1923d2b5..00000000 Binary files a/src/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/templates/model_config_template.yaml b/templates/model_config_template.yaml new file mode 100644 index 00000000..e043681e --- /dev/null +++ b/templates/model_config_template.yaml @@ -0,0 +1,41 @@ +model_params: + param_to_optimize_1: # name of the parameter to optimize, should match the name of the parameter in the model + space: ['space to optimize param 1', 'space to optimize param 2'] # space to optimize definition, as given to Ray + mode: RayFunction # mode to define the space, for instance choice, randint, loguniform, uniform... check Ray documentation + +optimizer: + method: + space: ['optimizer1', 'optimizer2'] # a choice of optimizers to use for optimization + mode: choice + +optimizer_params: + lr: + space: ['lower_bound', 'upper_bound'] # defining the space for learning rate search + mode: loguniform + +loss_params: + loss_fn: # loss function name, should match the name of the loss function in the model "batch" method + space: ['loss_fn1', 'loss_fn2'] # a choice of loss functions to use for optimization + mode: choice + +data_params: + batch_size: + space: [16, 32, 64, 128, 256] # defining the space for batch size search + mode: choice + +tune: + config_name: "name of Ray config" + tune_params: + metric: "name of metric to optimize" # should match available metrics as defined in the raytune_learner.py (currently only "val_loss" is available) + mode: "max" # max or min + num_samples: 10 # number of samples to try + scheduler: + name: "name of Ray scheduler" + params: + param1: value1 + param2: value2 + param3: value3 + step_size: 1 + + +