From 8d6a356369c154c187d90aef55c6da5158166d3b Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Fri, 5 Jul 2024 17:06:22 +0200
Subject: [PATCH 1/7] use commandline to specify enviornemnt variable

---
 run_benchmark_slurm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_benchmark_slurm.sh b/run_benchmark_slurm.sh
index 72b8bbd83..36aeb7c8d 100755
--- a/run_benchmark_slurm.sh
+++ b/run_benchmark_slurm.sh
@@ -32,4 +32,4 @@ echo "Number of GPUs: $NUMBER_GPUS"
 echo "Results will be stored in: $results_dir"
 
 # Helmholtz
-snakemake --profile "examples/yaml/slurm" --config yaml_file="$CONFIGFILE" --keep-going --keep-incomplete --notemp --cores 3 -s "domainlab/exp_protocol/benchmark.smk" --configfile "$CONFIGFILE" --config output_dir="$results_dir" 2>&1 | tee "$logfile"
\ No newline at end of file
+snakemake --config logdir="zoutput/benchmark/logs" --profile "examples/yaml/slurm" --config yaml_file="$CONFIGFILE" --keep-going --keep-incomplete --notemp --cores 3 -s "domainlab/exp_protocol/benchmark.smk" --configfile "$CONFIGFILE" --config output_dir="$results_dir" 2>&1 | tee "$logfile"

From 8182f5b4c9d7e4b1eb627062c2ffe5fb1450121e Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Fri, 5 Jul 2024 17:07:43 +0200
Subject: [PATCH 2/7] {log_dir}

---
 examples/yaml/slurm/config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/yaml/slurm/config.yaml b/examples/yaml/slurm/config.yaml
index e180df74d..fb229c922 100644
--- a/examples/yaml/slurm/config.yaml
+++ b/examples/yaml/slurm/config.yaml
@@ -1,6 +1,6 @@
 # This yaml file has been adapted from https://github.com/jdblischak/smk-simple-slurm
 cluster:
-  mkdir -p zoutput/slurm_logs/{rule} &&
+  mkdir -p zoutput/{log_dir}/{rule} &&
   sbatch
     --partition=gpu_p
     --qos=gpu_normal
@@ -10,8 +10,8 @@ cluster:
     -c 2
     --mem=160G
     --job-name=smk-{rule}-{wildcards}
-    --output=zoutput/slurm_logs/{rule}/{rule}-{wildcards}-%j.out
-    --error=zoutput/slurm_logs/{rule}/{rule}-{wildcards}-%j.err
+    --output=zoutput/{log_dir}/{rule}/{rule}-{wildcards}-%j.out
+    --error=zoutput/{log_dir}/{rule}/{rule}-{wildcards}-%j.err
 default-resources:
   - partition=gpu_p
   - qos=gpu_normal

From 422aaa513257bf1e2d4f37e011735b84f13c96fc Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Mon, 8 Jul 2024 15:36:40 +0200
Subject: [PATCH 3/7] define resources in each rule

---
 domainlab/exp_protocol/benchmark.smk | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/domainlab/exp_protocol/benchmark.smk b/domainlab/exp_protocol/benchmark.smk
index fcc9eb4f3..d33f29a59 100644
--- a/domainlab/exp_protocol/benchmark.smk
+++ b/domainlab/exp_protocol/benchmark.smk
@@ -72,6 +72,8 @@ rule parameter_sampling:
         expand("{path}", path=config_path)
     output:
         dest=expand("{output_dir}/hyperparameters.csv", output_dir=config["output_dir"])
+    resources:
+        log_dir="slurm_logs_test"
     params:
         sampling_seed=os.environ["DOMAINLAB_CUDA_HYPERPARAM_SEED"]
     run:
@@ -107,6 +109,8 @@ rule parameter_sampling:
 rule run_experiment:
     input:
         param_file=rules.parameter_sampling.output
+    resources:
+        log_dir="slurm_logs_test"
     output:
         # snakemake keyword temporary for temporary directory
         # like f-string in python {index} is generated in the run block as wildcards
@@ -159,6 +163,8 @@ rule agg_results:
     # put different csv file in a big csv file
     input:
         exp_results=experiment_result_files
+    resources:
+        log_dir="slurm_logs_test"
     output:
         out_file=expand("{output_dir}/results.csv", output_dir=config["output_dir"])
     run:

From 75d3aca73b8951acbbdda061c3f9dd129b451433 Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 11 Jul 2024 11:54:08 +0200
Subject: [PATCH 4/7] slurm custom dir works

---
 examples/yaml/slurm/config.yaml | 6 +++---
 run_benchmark_slurm.sh          | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/yaml/slurm/config.yaml b/examples/yaml/slurm/config.yaml
index fb229c922..6775d8a75 100644
--- a/examples/yaml/slurm/config.yaml
+++ b/examples/yaml/slurm/config.yaml
@@ -1,6 +1,6 @@
 # This yaml file has been adapted from https://github.com/jdblischak/smk-simple-slurm
 cluster:
-  mkdir -p zoutput/{log_dir}/{rule} &&
+  mkdir -p $logdir/{rule} &&
   sbatch
     --partition=gpu_p
     --qos=gpu_normal
@@ -10,8 +10,8 @@ cluster:
     -c 2
     --mem=160G
     --job-name=smk-{rule}-{wildcards}
-    --output=zoutput/{log_dir}/{rule}/{rule}-{wildcards}-%j.out
-    --error=zoutput/{log_dir}/{rule}/{rule}-{wildcards}-%j.err
+    --output=zoutput/$logdir/{rule}/{rule}-{wildcards}-%j.out
+    --error=zoutput/$logdir/{rule}/{rule}-{wildcards}-%j.err
 default-resources:
   - partition=gpu_p
   - qos=gpu_normal
diff --git a/run_benchmark_slurm.sh b/run_benchmark_slurm.sh
index 36aeb7c8d..5ad2b94ff 100755
--- a/run_benchmark_slurm.sh
+++ b/run_benchmark_slurm.sh
@@ -32,4 +32,7 @@ echo "Number of GPUs: $NUMBER_GPUS"
 echo "Results will be stored in: $results_dir"
 
 # Helmholtz
-snakemake --config logdir="zoutput/benchmark/logs" --profile "examples/yaml/slurm" --config yaml_file="$CONFIGFILE" --keep-going --keep-incomplete --notemp --cores 3 -s "domainlab/exp_protocol/benchmark.smk" --configfile "$CONFIGFILE" --config output_dir="$results_dir" 2>&1 | tee "$logfile"
+export logdir="${results_dir}/slurm_logs/"
+echo "slurm logs going into ${logdir}"
+# snakemake --config logdir="zoutput/benchmark/logs" does not seem to work
+snakemake --profile "examples/yaml/slurm" --config yaml_file="$CONFIGFILE" --keep-going --keep-incomplete --notemp --cores 3 -s "domainlab/exp_protocol/benchmark.smk" --configfile "$CONFIGFILE" --config output_dir="$results_dir" 2>&1 | tee "$logfile"

From eb445cb93cf5c42b532e03b74d5636a293cb343a Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:44:29 +0200
Subject: [PATCH 5/7] Update benchmark.smk

---
 domainlab/exp_protocol/benchmark.smk | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/domainlab/exp_protocol/benchmark.smk b/domainlab/exp_protocol/benchmark.smk
index d33f29a59..84661ee61 100644
--- a/domainlab/exp_protocol/benchmark.smk
+++ b/domainlab/exp_protocol/benchmark.smk
@@ -72,8 +72,8 @@ rule parameter_sampling:
         expand("{path}", path=config_path)
     output:
         dest=expand("{output_dir}/hyperparameters.csv", output_dir=config["output_dir"])
-    resources:
-        log_dir="slurm_logs_test"
+    # resources:
+    #    log_dir="slurm_logs_test"
     params:
         sampling_seed=os.environ["DOMAINLAB_CUDA_HYPERPARAM_SEED"]
     run:
@@ -109,8 +109,6 @@ rule parameter_sampling:
 rule run_experiment:
     input:
         param_file=rules.parameter_sampling.output
-    resources:
-        log_dir="slurm_logs_test"
     output:
         # snakemake keyword temporary for temporary directory
         # like f-string in python {index} is generated in the run block as wildcards
@@ -163,8 +161,8 @@ rule agg_results:
     # put different csv file in a big csv file
     input:
         exp_results=experiment_result_files
-    resources:
-        log_dir="slurm_logs_test"
+    # resources:
+    #    log_dir="slurm_logs_test"
     output:
         out_file=expand("{output_dir}/results.csv", output_dir=config["output_dir"])
     run:

From bac7907ca513a450c416bac898165ff3f210c687 Mon Sep 17 00:00:00 2001
From: Xudong Sun <smilesun@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:47:16 +0200
Subject: [PATCH 6/7] Update config.yaml

---
 examples/yaml/slurm/config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/yaml/slurm/config.yaml b/examples/yaml/slurm/config.yaml
index 6775d8a75..642367adc 100644
--- a/examples/yaml/slurm/config.yaml
+++ b/examples/yaml/slurm/config.yaml
@@ -10,8 +10,8 @@ cluster:
     -c 2
     --mem=160G
     --job-name=smk-{rule}-{wildcards}
-    --output=zoutput/$logdir/{rule}/{rule}-{wildcards}-%j.out
-    --error=zoutput/$logdir/{rule}/{rule}-{wildcards}-%j.err
+    --output=$logdir/{rule}/{rule}-{wildcards}-%j.out
+    --error=$logdir/{rule}/{rule}-{wildcards}-%j.err
 default-resources:
   - partition=gpu_p
   - qos=gpu_normal

From d72a8e45b205eefcd01468d7160244e04d7211c4 Mon Sep 17 00:00:00 2001
From: smilesun <smilesun.east@gmail.com>
Date: Thu, 11 Jul 2024 14:58:44 +0200
Subject: [PATCH 7/7] update doc

---
 docs/doc_benchmark.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/doc_benchmark.md b/docs/doc_benchmark.md
index 41b19bffb..ec73fb36e 100644
--- a/docs/doc_benchmark.md
+++ b/docs/doc_benchmark.md
@@ -74,10 +74,10 @@ hyperparameter sampling and pytorch.
 The following script will help to find out which job has failed and the error message, so that you could direct to the
 specific log file
 ```cluster
-bash ./sh_list_error.sh ./zoutput/slurm_logs
+bash ./sh_list_error.sh ./zoutput/benchmarks/[output folder of the sepcifed benchmark in the yaml file]/slurm_logs
 ```
 #### Map between slurm job id and sampled hyperparameter index
-suppose the slurm job id is 14144163, one could the corresponding log file in `./zoutput/slurm_logs` folder via
+suppose the slurm job id is 14144163, one could the corresponding log file in `./zoutput/[output folder of the sepcifed benchmark in the yaml file]/slurm_logs` folder via
 `find . | grep -i "14144163"`
 
 the results can be