diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh index 48007623351..0bfbfa51ce9 100755 --- a/ush/bash_utils.sh +++ b/ush/bash_utils.sh @@ -110,7 +110,93 @@ function wait_for_file() { return 1 } +# Initialize stacks for tick-tock profiling (initialize only once) +if [[ -z ${_GW_TIMER_STACK+x} ]]; then + declare -xa _GW_TIMER_STACK=() + declare -xa _GW_LABEL_STACK=() +fi + +# Function: tick [label] +tick() { + # + # Start timer for profiling + # + # Starts a timer by storing current time in a stack + # Accepts an optional label to identify the timer instance + # + # Syntax: + # tick [label] + # label: Optional label to identify the timer instance [default: "Timer"] + # + set +x + local start_time label + start_time=$(date +%s%N) + # Use provided label or default to "Timer" if $1 is empty + label="${1:-Timer}" + + # Push values onto the stacks + _GW_TIMER_STACK+=("${start_time}") + _GW_LABEL_STACK+=("${label}") + set_trace +} + +# Function: tock +tock() { + # + # Stop timer and print elapsed time in seconds + # + # Stops a timer by calculating elapsed time since last tick and outputs the elapsed time in seconds. + # Accepts an optional label to check for the timer instance. + # If the provided label does not match the one stored during tick, a warning is issued. + # + # Syntax: + # tock [label] + # + set +x + local end_time + end_time=$(date +%s%N) + + # Safety check + if [[ ${#_GW_TIMER_STACK[@]} -eq 0 ]]; then + echo "WARNING: 'tock' called without a matching 'tick'." + set_trace + return 1 + fi + + local last_idx + # Retrieve the last element index + last_idx=$((${#_GW_TIMER_STACK[@]} - 1)) + + # Get the start time and label + local start_time label + start_time=${_GW_TIMER_STACK[${last_idx}]} + label=${_GW_LABEL_STACK[${last_idx}]} + + local label_input + label_input="${1:-}" + if [[ -n ${label_input} ]]; then + if [[ ${label_input} != "${label}" ]]; then + echo "WARNING: 'tock' label '${label_input}' does not match 'tick' label '${label}'." + fi + fi + + # Remove (pop) elements from stacks + unset "_GW_TIMER_STACK[${last_idx}]" + unset "_GW_LABEL_STACK[${last_idx}]" + + # Calculate elapsed time + local elapsed_nanos elapsed_secs + elapsed_nanos=$((end_time - start_time)) + elapsed_secs=$(echo "scale=3; ${elapsed_nanos} / 1000000000" | bc -l) + + # Output the result with the label + echo "[${label}] Elapsed: ${elapsed_secs}s" + set_trace +} + # shellcheck disable= declare -xf declare_from_tmpl declare -xf wait_for_file +declare -xf tick +declare -xf tock diff --git a/ush/run_mpmd.sh b/ush/run_mpmd.sh index 36ddc4bc4cc..773f8042d34 100755 --- a/ush/run_mpmd.sh +++ b/ush/run_mpmd.sh @@ -19,6 +19,9 @@ # launcher: Command to launch the MPMD job. Default is empty. # Supported launchers are 'srun' and 'mpiexec'. # mpmd_opt: Additional options to pass to the launcher. Default is empty. +# Example: +# srun: "--multi-prog --output=mpmd.%j.%t.out" +# mpiexec: "--cpu-bind verbose,core cfp" # # Input: # cmdfile: File containing commands to execute in MPMD/serial mode @@ -50,12 +53,14 @@ nprocs=$(wc -l < "${cmdfile}") # Local MPMD file containing instructions to run in CFP mpmd_cmdfile="${DATA:-}/mpmd_cmdfile" -if [[ -s "${mpmd_cmdfile}" ]]; then rm -f "${mpmd_cmdfile}"; fi +if [[ -s "${mpmd_cmdfile}" ]]; then + rm -f "${mpmd_cmdfile}" +fi cat << EOF - INFO: Executing MPMD job, STDOUT redirected for each process separately - INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out - INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' +INFO: Executing MPMD job, STDOUT and STDERR redirected for each process separately +INFO: On failure, logs for each job will be available in ${DATA}/mpmd.proc_num.out +INFO: The proc_num corresponds to the line in '${mpmd_cmdfile}' EOF if [[ "${launcher:-}" =~ ^srun.* ]]; then # srun-based system e.g. Hera, Orion, etc. @@ -81,7 +86,7 @@ elif [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec nm=0 echo "#!/bin/bash" >> "${mpmd_cmdfile}" while IFS= read -r line; do - echo "${line} > mpmd.${nm}.out" >> "${mpmd_cmdfile}" + echo "${line} > mpmd.${nm}.out 2>&1" >> "${mpmd_cmdfile}" ((nm = nm + 1)) done < "${cmdfile}" chmod 755 "${mpmd_cmdfile}" @@ -90,10 +95,12 @@ elif [[ "${launcher:-}" =~ ^mpiexec.* ]]; then # mpiexec ${launcher:-} -np ${nprocs} ${mpmd_opt:-} "${mpmd_cmdfile}" err=$? -else +else # Unsupported or empty launcher, run in serial mode - echo "FATAL ERROR: CFP is not usable with launcher: '${launcher:-}'" - err=1 + echo "WARNING: CFP is not usable with launcher: '${launcher:-}', using serial mode instead" + chmod 755 "${cmdfile}" + bash +x "${cmdfile}" > mpmd.out 2>&1 + err=$? fi @@ -102,7 +109,11 @@ if [[ ${err} -eq 0 ]]; then rm -f "${mpmd_cmdfile}" out_files=$(find . -name 'mpmd.*.out') for file in ${out_files}; do - cat "${file}" >> mpmd.out + { + echo "BEGIN OUTPUT FROM ${file}" + cat "${file}" + echo "END OUTPUT FROM ${file}" + } >> mpmd.out rm -f "${file}" done cat mpmd.out