diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index be4a49ff..e21cd943 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -73,6 +73,7 @@ if( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) endif() target_compile_definitions( fiat PRIVATE ${FIAT_DEFINITIONS} ) +target_compile_definitions( fiat PRIVATE fiat_GIT_SHA1="${fiat_GIT_SHA1}" ) if( HAVE_FCKIT ) target_link_libraries( fiat PRIVATE fckit ) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index e089be1a..47b0e712 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -335,7 +335,11 @@ static long long int opt_timeline_freq = 1000000; /* How often to print : every static double opt_timeline_MB = 1.0; /* ... rss or curheap jumps up/down by more than this many MBytes (default = 1) : unit MBytes */ static volatile sig_atomic_t opt_gencore = 0; -static int opt_gencore_signal = 0; +/* signal 0 might not always be counted, but we can spare sizeof(int) to be careful */ +static int opt_gencore_signals[NSIG + 1]; +static int* opt_gencore_processes; +static int opt_gencore_user_specified = 0; +static int opt_gencore_all_procs = 0; static int opt_random_memstat = 0; /* > 0 if to obtain random memory stats (maxhwm, maxstk) for tid=1. Updated when rand() % opt_random_memstat == 0 */ @@ -558,8 +562,6 @@ static drhook_calltree_t **thiscall = NULL; static int signals_set = 0; static volatile sig_atomic_t signal_handler_called = 0; static volatile sig_atomic_t signal_handler_ignore_atexit = 0; -static volatile sig_atomic_t unlimited_corefile_retcode = 9999; -static volatile unsigned long long int saved_corefile_hardlimit = 0; static int allow_coredump = 0; /* -1 denotes ALL MPI-tasks, 1..NPES == myproc, 0 = coredump will not be enabled by DrHook at init */ static drhook_sig_t siglist[1+NSIG] = { 0 }; static char *a_out = NULL; @@ -751,7 +753,7 @@ static void dump_hugepages(int enforce, const char *pfx, int tid, int sig, int n /*--- set_default_handler ---*/ -static int set_unlimited_corefile(unsigned long long int *hardlimit, int enforce); +static int set_corefile_to_hard_limit(unsigned long long int *hardlimit, int enforce); static int set_default_handler(int sig, int unlimited_corefile, int verbose) { @@ -766,12 +768,14 @@ static int set_default_handler(int sig, int unlimited_corefile, int verbose) sigaddset(&sa.sa_mask, some_signal_to_be_blocked); ... just in case */ sigaction(sig, &sa, NULL); - if (unlimited_corefile) rc = set_unlimited_corefile(&hardlimit,0); /* unconditionally */ +// TODO: Is this needed here? +// if (unlimited_corefile) rc = set_corefile_to_hard_limit(&hardlimit,0); /* unconditionally */ if (verbose) { int tid = drhook_oml_get_thread_num(); char *pfx = PREFIX(tid); char buf[128] = ""; - if (unlimited_corefile && rc == 0) snprintf(buf,sizeof(buf)," -- hardlimit for core file is now %llu (0x%llx)", hardlimit, hardlimit); +// TODO: Is this needed here? +// if (unlimited_corefile && rc == 0) snprintf(buf,sizeof(buf)," -- hardlimit for core file is now %llu (0x%llx)", hardlimit, hardlimit); fprintf(stderr, "%s %s [%s@%s:%d] " "Enabled default signal handler (SIG_DFL) for signal#%d%s\n", @@ -1437,7 +1441,7 @@ ignore_signals(int silent) #define DRH_GETRLIMIT getrlimit #define DRH_SETRLIMIT setrlimit -static int set_unlimited_corefile(unsigned long long int *hardlimit, int enforce) +static int set_corefile_to_hard_limit(unsigned long long int *hardlimit, int enforce) { /* Make sure we *only* set soft-limit (not hard-limit) to 0 in our scripts i.e. : @@ -1446,46 +1450,77 @@ static int set_unlimited_corefile(unsigned long long int *hardlimit, int enforce $ ulimit -c 0 See man ksh or man bash for more */ - int rc = -1; - if (enforce || unlimited_corefile_retcode == 9999) { /* Done only once -- or if enforced*/ + static int previously_set = 0; + static volatile unsigned long long int saved_corefile_hardlimit = 0; + /* + Mirror old behaviour where this either returns the previous successful value + or 0 if it was never successfully set + */ + if (hardlimit) *hardlimit = saved_corefile_hardlimit; + + if (enforce || !previously_set) { /* Done only once -- or if enforced*/ + DRH_STRUCT_RLIMIT r; - if (DRH_GETRLIMIT(RLIMIT_CORE, &r) == 0) { - r.rlim_cur = r.rlim_max; - if (DRH_SETRLIMIT(RLIMIT_CORE, &r) == 0) { - saved_corefile_hardlimit = r.rlim_cur; - rc = 0; - } - } - unlimited_corefile_retcode = rc; + + if (DRH_GETRLIMIT(RLIMIT_CORE, &r)) return -1; + if (!r.rlim_cur) r.rlim_cur = r.rlim_max; + + if (DRH_SETRLIMIT(RLIMIT_CORE, &r)) return -1; + + saved_corefile_hardlimit = r.rlim_cur; + previously_set = 1; } + if (hardlimit) *hardlimit = saved_corefile_hardlimit; - rc = unlimited_corefile_retcode; - return rc; + return 0; } static void signal_gencore(int sig SIG_EXTRA_ARGS) { - if (opt_gencore > 0) { - opt_gencore = 0; /* A tiny chance for a race condition between threads */ - if (sig == opt_gencore_signal && sig >= 1 && sig <= NSIG) { - signal(sig, SIG_IGN); - signal(SIGABRT, SIG_DFL); - { /* Enable unlimited cores (up to hard-limit) and call abort() --> generates core dump */ - if (set_unlimited_corefile(NULL,1) == 0) { - int tid = drhook_oml_get_thread_num(); - char *pfx = PREFIX(tid); - fprintf(stderr, - "%s %s [%s@%s:%d] Received signal#%d and now calling abort() ...\n", - pfx,TIMESTR(tid),FFL, - sig); - LinuxTraceBack(pfx,TIMESTR(tid),NULL); - abort(); /* Dump core, too */ + if (opt_gencore) { + if ( sig >= 1 && sig <= NSIG && opt_gencore_signals[sig] ) { + /* User has specified procs & I'm that proc + * or user hasn't specified procs & either all procs dump or should attempt getting a lock */ + if ( (opt_gencore_user_specified && opt_gencore_processes[myproc]) || + (!opt_gencore_user_specified && (opt_gencore_all_procs || drhook_use_lockfile)) ) { + int fd = -1; + if (drhook_use_lockfile) + fd = open(drhook_lockfile,O_CREAT|O_WRONLY|O_TRUNC|O_EXCL,S_IRUSR|S_IWUSR); + + /* Allowed through or gotten lock */ + if (opt_gencore_all_procs || !drhook_use_lockfile || (drhook_use_lockfile && fd >= 0)) { + + /* Ignore whatever signal brought us here (In case other processes get it too), + * and restore the default handler for aborts + */ + signal(sig, SIG_IGN); + signal(SIGABRT, SIG_DFL); + /* If we got through with a file lock, note some details and safely close it */ + if (fd >= 0) { + size_t count = sizeof(myproc); + ssize_t sz = write(fd, &myproc, count); // Now we know which MPL-task got the lock (use octal-dump "od" command) + close(fd); + } + + // TODO: Should set_corefile_to_hard_limit be here? We check it with process_options, but it could change between then and now + { /* Enable unlimited cores (up to hard-limit) and call abort() --> generates core dump */ + if (!set_corefile_to_hard_limit(NULL, 1)) { + int tid = drhook_oml_get_thread_num(); + char *pfx = PREFIX(tid); + fprintf(stderr, + "%s %s [%s@%s:%d] Received signal#%d and now calling abort() ...\n", + pfx, TIMESTR(tid), FFL, + sig); + LinuxTraceBack(pfx, TIMESTR(tid), NULL); + abort(); /* Dump core, too. This should now call the kernel's handler */ + } + } + /* Should never end up here */ + _exit(128 + ABS(sig)); } } - /* Should never end up here */ - _exit(128+ABS(sig)); - } /* if (sig == opt_gencore_signal && sig >= 1 && sig <= NSIG) */ + } /* if ( sig >= 1 && sig <= NSIG && opt_gencore_signals[sig] ) */ } } @@ -1640,15 +1675,15 @@ signal_drhook(int sig SIG_EXTRA_ARGS) " %lldMB (maxrss), %lldMB (maxstack), %lldMB (vmpeak), %lld (paging), nsigs = %d\n", pfx,TIMESTR(tid),FFL, sig, sl->name, hwm, rss, maxstack, vmpeak, pag, nsigs); - if (allow_coredump) { - unsigned long long int hardlimit = 0; - int rc = set_unlimited_corefile(&hardlimit,1); - if (rc == 0) { - fprintf(stderr, - "%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", - pfx,TIMESTR(tid),FFL,hardlimit,hardlimit); - } - } +// if (allow_coredump) { +// unsigned long long int hardlimit = 0; +// int rc = set_corefile_to_hard_limit(&hardlimit,1); +// if (rc == 0) { +// fprintf(stderr, +// "%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", +// pfx,TIMESTR(tid),FFL,hardlimit,hardlimit); +// } +// } #if 1 fprintf(stderr, @@ -2109,10 +2144,13 @@ signal_drhook_init(int enforce) #endif */ catch_signals(silent); /* Additional signals to be seen by DR_HOOK */ - if (opt_gencore > 0 && opt_gencore_signal >= 1 && opt_gencore_signal <= NSIG) { - drhook_sigfunc_t u; - u.func3args = signal_gencore; - signal(opt_gencore_signal, u.func1args); /* A facility to dump core */ + if (opt_gencore) { + for (int cur_signal = 0; cur_signal <= NSIG; cur_signal++) { + if (!opt_gencore_signals[cur_signal]) continue; + drhook_sigfunc_t u; + u.func3args = signal_gencore; + signal(cur_signal, u.func1args); /* A facility to dump core */ + } } signals_set = 1; /* Signals are set now */ } @@ -2206,6 +2244,7 @@ process_options() if(fp) fprintf(fp,"[EC_DRHOOK:hostname:myproc:omltid:pid:unixtid] [YYYYMMDD:HHMMSS:walltime] [function@file:lineno] -- Max OpenMP threads = %d\n",drhook_oml_get_max_threads()); + OPTPRINT(fp,"%s %s [%s@%s:%d] Built from commit %s\n",pfx,TIMESTR(tid),FFL,BUILD_GIT_HASH); OPTPRINT(fp,"%s %s [%s@%s:%d] fp = %p\n",pfx,TIMESTR(tid),FFL,(void*)fp); env = getenv("ATP_ENABLED"); @@ -2223,24 +2262,6 @@ process_options() OPTPRINT(fp,"%s %s [%s@%s:%d] ATP_IGNORE_SIGTERM=%d\n",pfx,TIMESTR(tid),FFL,atp_ignore_sigterm); } - env = getenv("DR_HOOK_ALLOW_COREDUMP"); - if (env) { - ienv = atoi(env); - allow_coredump = (ienv == -1 || ienv == myproc) ? ienv : 0; - } - OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_ALLOW_COREDUMP=%d\n",pfx,TIMESTR(tid),FFL,allow_coredump); -#if 0 - // Postponed until DrHook actully has caught the signal - if (allow_coredump) { - unsigned long long int hardlimit = 0; - int rc = set_unlimited_corefile(&hardlimit,1); - if (rc == 0) { - OPTPRINT(fp,"%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", - pfx,TIMESTR(tid),FFL,hardlimit,hardlimit); - } - } -#endif - env = getenv("DR_HOOK_PROFILE"); if (env) { char *s = calloc_drhook(strlen(env) + 15, sizeof(*s)); @@ -2465,17 +2486,96 @@ process_options() opt_gencore = atoi(env); } + int print_gencore_signals = 0; if (opt_gencore) { OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_GENCORE=%d\n",pfx,TIMESTR(tid),FFL,opt_gencore); - + /* This is here to not break the previous flags */ env = getenv("DR_HOOK_GENCORE_SIGNAL"); if (env) { int itmp = atoi(env); if (itmp >= 1 && itmp <= NSIG && itmp != SIGABRT) { - opt_gencore_signal = itmp; + opt_gencore_signals[itmp] = 1; + print_gencore_signals = 1; + } + } + + env = getenv("DR_HOOK_GENCORE_SIGNALS"); + if (env) { + print_gencore_signals = 1; + const char delim[] = ", \t/"; + char *s = strdup_drhook(env); + char *p = strtok(s,delim); + + while (p) { + int itmp = atoi(p); + if (1 <= itmp && itmp <= NSIG && itmp != SIGABRT) + opt_gencore_signals[itmp] = 1; + p = strtok(NULL,delim); + } + free_drhook(s); + + if (print_gencore_signals) { + OPTPRINT(fp, "%s %s [%s@%s:%d] DR_HOOK_GENCORE_SIGNALS=", pfx, TIMESTR(tid), FFL); + for (int i = 0; i < NSIG; i++) { + OPTPRINT(fp, "%d:%d, ", i, opt_gencore_signals[i]); + } + OPTPRINT(fp, "%d:%d\n", NSIG, opt_gencore_signals[NSIG]); + } + } + + env = getenv("DR_HOOK_GENCORE_PROCS"); + if (env) { +// TODO: Is nproc the right size?? + opt_gencore_processes = calloc_drhook(nproc, sizeof(int)); + opt_gencore_user_specified = 1; + const char delim[] = ", \t/"; + char *s = strdup_drhook(env); + char *p = strtok(s,delim); + + while (p) { + int itmp = atoi(p); + if (0 <= itmp && itmp < nproc) + opt_gencore_processes[itmp] = 1; + p = strtok(NULL,delim); + } + free_drhook(s); + + OPTPRINT(fp, "%s %s [%s@%s:%d] DR_HOOK_GENCORE_PROCS=", pfx, TIMESTR(tid), FFL); + for (int i = 0; i < nproc - 1; i++) { + OPTPRINT(fp, "%d:%d, ", i, opt_gencore_processes[i]); } + OPTPRINT(fp, "%d:%d\n", nproc - 1, opt_gencore_processes[nproc - 1]); + } + + /* Super secret flag that enables the fs killing opt_gencore_all_procs option */ + env = getenv("DR_HOOK_SECRET"); + if (env) { + opt_gencore_all_procs = atoi(env); + + OPTPRINT(fp, "%s %s [%s@%s:%d] WARNING: The following option can easily bring down entire file systems on its own. " + "By enabling this, you are claiming you know what you're doing. If you do not, then disable it IMMEDIATELY!\n", + pfx, TIMESTR(tid), FFL); + OPTPRINT(fp, "%s %s [%s@%s:%d] DR_HOOK_SECRET=%d\n", pfx, TIMESTR(tid), FFL, opt_gencore_all_procs); + } + } + + env = getenv("DR_HOOK_ALLOW_COREDUMP"); + if (env) { + ienv = atoi(env); + allow_coredump = (ienv == -1 || ienv == myproc) ? ienv : 0; + } + + /* opt_gencore implies allow_coredump */ + allow_coredump |= opt_gencore; + OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_ALLOW_COREDUMP=%d\n",pfx,TIMESTR(tid),FFL,allow_coredump); + + if (allow_coredump) { + unsigned long long int hardlimit = 0; + int rc = set_corefile_to_hard_limit(&hardlimit,1); + if (rc == 0) { + OPTPRINT(fp,"%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", + pfx,TIMESTR(tid),FFL,hardlimit,hardlimit); } - OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_GENCORE_SIGNAL=%d\n",pfx,TIMESTR(tid),FFL,opt_gencore_signal); } newline = 0; @@ -3940,6 +4040,7 @@ c_drhook_print_(const int *ftnunitno, ) { static int first_time = 0; + static int reported_open_regions = 0; int tid = (thread_id && (*thread_id >= 1) && (*thread_id <= numthreads)) ? *thread_id : drhook_oml_get_thread_num(); int mytid = drhook_oml_get_thread_num(); @@ -4189,6 +4290,11 @@ c_drhook_print_(const int *ftnunitno, cycles[t] += self_cycles; } nprof++; + } else if (keyptr->name && keyptr->status > 0 && !reported_open_regions) { + fprintf(stderr, + "%s %s [%s@%s:%d] WARNING: Region '%s' was never closed or stopped by a signal (Opened %d time(s) without closing). No output will be produced for this region.\n", + pfx,TIMESTR(tid),FFL, keyptr->name, keyptr->status); + reported_open_regions = 1; } keyptr = keyptr->next; } /* while (keyptr && keyptr->status == 0) */ @@ -4511,6 +4617,11 @@ c_drhook_print_(const int *ftnunitno, tot[t] += self; maxseen_tot[t] = MAX(maxseen_tot[t], keyptr->mem_seenmax); nprof++; + } else if (keyptr->name && keyptr->status > 0 && !reported_open_regions) { + fprintf(stderr, + "%s %s [%s@%s:%d] WARNING: Region '%s' was never closed or stopped by a signal (Opened %d time(s) without closing). No output will be produced for this region.\n", + pfx,TIMESTR(tid),FFL, keyptr->name, keyptr->status); + reported_open_regions = 1; } keyptr = keyptr->next; } /* while (keyptr && keyptr->status == 0) */ diff --git a/tests/drhook/CMakeLists.txt b/tests/drhook/CMakeLists.txt index 8c82b181..1593ffbd 100644 --- a/tests/drhook/CMakeLists.txt +++ b/tests/drhook/CMakeLists.txt @@ -76,3 +76,5 @@ ecbuild_add_test( TARGET fiat_test_drhook_ex5 # TODO: # Better parse output to see if it matches. + +add_subdirectory(drhook_flags) \ No newline at end of file diff --git a/tests/drhook/drhook_flags/CMakeLists.txt b/tests/drhook/drhook_flags/CMakeLists.txt new file mode 100644 index 00000000..dcb77775 --- /dev/null +++ b/tests/drhook/drhook_flags/CMakeLists.txt @@ -0,0 +1,14 @@ +# (C) Copyright 2024- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# ------------------------------------------------------------------------------ +# Dr Hook Gencore Tests +# ------------------------------------------------------------------------------ + +add_subdirectory(drhook_gencore) \ No newline at end of file diff --git a/tests/drhook/drhook_flags/drhook_gencore/CMakeLists.txt b/tests/drhook/drhook_flags/drhook_gencore/CMakeLists.txt new file mode 100644 index 00000000..b832e7c9 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/CMakeLists.txt @@ -0,0 +1,98 @@ +# (C) Copyright 2024- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# ------------------------------------------------------------------------------ +# Dr Hook Gencore Tests +# ------------------------------------------------------------------------------ + +# Test single threaded basic gencore +ecbuild_add_executable( TARGET drhook_gencore_basic + SOURCES drhook_gencore_basic.F90 drhook_gencore_utilities.c + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_gencore_basic + COMMAND gencore_test_runner.sh + ARGS drhook_gencore_basic 1 "DR_HOOK_GENCORE=1" "DR_HOOK_ALLOW_COREDUMP=1" "Received signal#11 and now calling abort" + RESOURCES gencore_test_runner.sh + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 DR_HOOK_GENCORE_SIGNALS=11 + ) + +# Test allow_coredump respects existing soft limits +ecbuild_add_executable( TARGET drhook_gencore_existing_soft_limits + SOURCES drhook_gencore_existing_soft_limits.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +# ulimit -S -c 555 is set in the runner +ecbuild_add_test( TARGET fiat_test_drhook_gencore_existing_soft_limits + COMMAND gencore_test_runner.sh + # The soft limit is set in increments of 1024, so 555*1024=568320 + ARGS drhook_gencore_existing_soft_limits 0 "DR_HOOK_GENCORE=1" "DR_HOOK_ALLOW_COREDUMP=1" "Hardlimit for core file is now 568320" + RESOURCES gencore_test_runner.sh + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 ) + +# Test user specified processes for allowing coredumps +ecbuild_add_executable( TARGET drhook_gencore_user_procs + SOURCES drhook_gencore_user_procs.F90 drhook_gencore_utilities.c + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_gencore_user_procs + COMMAND gencore_test_runner.sh + ARGS drhook_gencore_user_procs 2 "DR_HOOK_GENCORE_PROCS=0:0, 1:1, 2:0, 3:1, 4:0" + RESOURCES gencore_test_runner.sh + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 DR_HOOK_GENCORE_SIGNALS=11 DR_HOOK_GENCORE_PROCS=1,3 + CONDITION HAVE_MPI ) + +# Test super secret flag for allowing coredumps +ecbuild_add_executable( TARGET drhook_gencore_super_secret + SOURCES drhook_gencore_super_secret.F90 drhook_gencore_utilities.c + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_gencore_super_secret + COMMAND gencore_test_runner.sh + ARGS drhook_gencore_super_secret 5 "DR_HOOK_SECRET" "WARNING: The following option can easily bring down entire file systems on its own." + RESOURCES gencore_test_runner.sh + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 DR_HOOK_GENCORE_SIGNALS=11 DR_HOOK_SECRET=1 + CONDITION HAVE_MPI ) + +# Test default only creates a coredump for the first process +ecbuild_add_executable( TARGET drhook_gencore_first_only + SOURCES drhook_gencore_first_only.F90 drhook_gencore_utilities.c + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_gencore_first_only + COMMAND gencore_test_runner.sh + ARGS drhook_gencore_first_only 1 + RESOURCES gencore_test_runner.sh + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 DR_HOOK_GENCORE_SIGNALS=11 + CONDITION HAVE_MPI ) + +# Test user specified signals for allowing coredumps +ecbuild_add_executable( TARGET drhook_gencore_user_sigs + SOURCES drhook_gencore_user_sigs.F90 drhook_gencore_utilities.c + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_gencore_user_sigs + COMMAND drhook_gencore_user_sigs + ENVIRONMENT DR_HOOK=1 DR_HOOK_GENCORE=1 DR_HOOK_GENCORE_SIGNALS=11 ) +set_tests_properties( fiat_test_drhook_gencore_user_sigs + PROPERTIES PASS_REGULAR_EXPRESSION "Received signal#8 and now calling abort()" + WILL_FAIL TRUE + ) diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_basic.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_basic.F90 new file mode 100644 index 00000000..db205847 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_basic.F90 @@ -0,0 +1,21 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_gencore_basic + use yomhook, only : jphook, dr_hook + + implicit none + real(jphook) :: zhook_handle + + call dr_hook('drhook_gencore_basic',0,zhook_handle) + + call raise(11) + + call dr_hook('drhook_gencore_basic',1,zhook_handle) +end program drhook_gencore_basic diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_existing_soft_limits.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_existing_soft_limits.F90 new file mode 100644 index 00000000..ee484d65 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_existing_soft_limits.F90 @@ -0,0 +1,22 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_coredump_existing_soft_limits + use yomhook, only : jphook, dr_hook + + implicit none + real(jphook) :: zhook_handle + integer :: a + + call dr_hook('drhook_coredump_existing_soft_limits',0,zhook_handle) + + a = 2 + + call dr_hook('drhook_coredump_existing_soft_limits',1,zhook_handle) +end program drhook_coredump_existing_soft_limits diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_first_only.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_first_only.F90 new file mode 100644 index 00000000..90bb060b --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_first_only.F90 @@ -0,0 +1,31 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_gencore_first_only + use mpl_module + use yomhook, only : jphook, dr_hook + use sdl_mod, only : sdl_traceback + implicit none + integer jpe, npes, mype + character(len=256) arg, env + real(jphook) :: zhook_handle + + call mpl_init(ldinfo=.false.) + call dr_hook('drhook_gencore_first_only',0,zhook_handle) + + npes = mpl_nproc() + mype = mpl_myrank() + + call mpl_barrier() + call raise(11) + + call mpl_barrier() + call dr_hook('drhook_gencore_first_only',1,zhook_handle) + call mpl_end() +end program drhook_gencore_first_only diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_super_secret.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_super_secret.F90 new file mode 100644 index 00000000..de8d996a --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_super_secret.F90 @@ -0,0 +1,31 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_gencore_super_secret + use mpl_module + use yomhook, only : jphook, dr_hook + use sdl_mod, only : sdl_traceback + implicit none + integer jpe, npes, mype + character(len=256) arg, env + real(jphook) :: zhook_handle + + call mpl_init(ldinfo=.false.) + call dr_hook('drhook_gencore_super_secret',0,zhook_handle) + + npes = mpl_nproc() + mype = mpl_myrank() + + call mpl_barrier() + call raise(11) + + call mpl_barrier() + call dr_hook('drhook_gencore_super_secret',1,zhook_handle) + call mpl_end() +end program drhook_gencore_super_secret diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_procs.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_procs.F90 new file mode 100644 index 00000000..f14a4bd7 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_procs.F90 @@ -0,0 +1,34 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_gencore_user_procs + use mpl_module + use yomhook, only : jphook, dr_hook + use sdl_mod, only : sdl_traceback + implicit none + integer jpe, npes, mype + character(len=256) arg, env + real(jphook) :: zhook_handle + + call mpl_init(ldinfo=.false.) + call dr_hook('drhook_gencore_user_procs',0,zhook_handle) + + npes = mpl_nproc() + mype = mpl_myrank() + + do jpe=1,npes + if (jpe == mype) then + call raise(11) + endif + enddo + + call mpl_barrier() + call dr_hook('drhook_gencore_user_procs',1,zhook_handle) + call mpl_end() +end program drhook_gencore_user_procs diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_sigs.F90 b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_sigs.F90 new file mode 100644 index 00000000..f4d1338f --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_user_sigs.F90 @@ -0,0 +1,21 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_gencore_user_sigs + use yomhook, only : jphook, dr_hook + + implicit none + real(jphook) :: zhook_handle + + call dr_hook('drhook_gencore_user_sigs',0,zhook_handle) + + call raise(8) + + call dr_hook('drhook_gencore_user_sigs',1,zhook_handle) +end program drhook_gencore_user_sigs diff --git a/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_utilities.c b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_utilities.c new file mode 100644 index 00000000..ada04760 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/drhook_gencore_utilities.c @@ -0,0 +1,14 @@ +// (C) Copyright 2024- ECMWF. +// +// This software is licensed under the terms of the Apache Licence Version 2.0 +// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +// +// In applying this licence, ECMWF does not waive the privileges and immunities +// granted to it by virtue of its status as an intergovernmental organisation +// nor does it submit to any jurisdiction. + +#include + +int raise_(int* sig) { + return raise(*sig); +} \ No newline at end of file diff --git a/tests/drhook/drhook_flags/drhook_gencore/gencore_test_runner.sh b/tests/drhook/drhook_flags/drhook_gencore/gencore_test_runner.sh new file mode 100755 index 00000000..83562e79 --- /dev/null +++ b/tests/drhook/drhook_flags/drhook_gencore/gencore_test_runner.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +cmd=$1 +expected_count=$2 + +if [ "$cmd" == "drhook_gencore_existing_soft_limits" ]; then + ulimit -S -c 555 +fi + +if [ "$cmd" == "drhook_gencore_user_procs" ] || [ "$cmd" == "drhook_gencore_super_secret" ] || [ "$cmd" == "drhook_gencore_first_only" ]; then + output="$( mpiexec -n 5 ./$cmd 2>&1)" +else + output="$(./$cmd 2>&1)" +fi + +echo "$output" + +for ((i=3; i <= "$#"; i++)); do + grep --silent "${!i}" <<< "$output" + res=$? + if [ $res == 1 ]; then + echo "Couldn't find \"${!i}\" in the output!" + exit 1 + fi +done + +rm -rf "./${cmd}_coredumps" + +mkdir "${cmd}_coredumps" + +mv drhook_lock core* "./${cmd}_coredumps" 2>/dev/null + +actual_count=$(find "./${cmd}_coredumps" \! -name drhook_lock -type f | wc -l) +if [[ $actual_count == "$expected_count" ]]; then + exit 0 +else + echo "Expected $expected_count coredumps, got $actual_count!" + exit 1 +fi