Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
31928ae
Merge branch 'master' into gpu-mixed-krylov
oseikuffuor1 Oct 21, 2025
a8835bb
Update autoconf build to prevent use of long double with GPUs.
oseikuffuor1 Oct 23, 2025
57d6ebb
Add utility functions for mixed-precision function development.
oseikuffuor1 Nov 14, 2025
542180e
Add new mixed-precision matrix and vector functionality.
oseikuffuor1 Nov 14, 2025
cd69eba
Updates to support use of hypre_double for dnum_nonzeros in parcsr_ma…
oseikuffuor1 Nov 14, 2025
7781cad
Minor updates to test drivers.
oseikuffuor1 Nov 14, 2025
5ce7060
Fixes for GPU build errors.
oseikuffuor1 Nov 14, 2025
91a169f
Cleanup typos and unnecessary print statements.
oseikuffuor1 Nov 14, 2025
cb030d2
Change utility function name GetSizeOfPrecision to GetSizeOfReal.
oseikuffuor1 Nov 14, 2025
8bfdbb9
Generate configure script with correct autoconf version.
oseikuffuor1 Nov 15, 2025
887c4c3
Cleanup unused variable warnings.
oseikuffuor1 Nov 15, 2025
dfea312
Some fixes and cleanup and minor refactoring for GPU build.
oseikuffuor1 Nov 18, 2025
18e458a
Add gpu function lists for utilities folder.
oseikuffuor1 Nov 18, 2025
7c44083
Added GPU-specific function lists for remaining directories.
oseikuffuor1 Nov 19, 2025
7163b69
Revert changes to MPI function location and remove unnecessary guards.
oseikuffuor1 Nov 19, 2025
1c74ee1
Updating gen_code.sh script to build code for gpu functions
rfalgout Nov 20, 2025
40f8a3a
Merge branch 'gpu-mixed-krylov' of github.com:hypre-space/hypre into …
rfalgout Nov 20, 2025
ac3e7f3
Fixed astyle changes in previous commits
oseikuffuor1 Nov 20, 2025
fd1e9e0
Merge branch 'gpu-mixed-krylov' of github.com:hypre-space/hypre into …
rfalgout Nov 20, 2025
366e620
Minor changes to config/gen_code.sh script
rfalgout Nov 20, 2025
05baa66
Removed empty gpu function lists
oseikuffuor1 Nov 21, 2025
489e1fa
Minor edit to suppress warnings for directories with no GPU function …
oseikuffuor1 Nov 21, 2025
3264a49
Minor changes to gen_code.sh to make it easier to follow
rfalgout Dec 3, 2025
1fecea4
Cosmetic changes to gen_code.sh
rfalgout Dec 3, 2025
f052718
Changed mup.*.gpu to mup.*_gpu in utilities and modified the gen_code…
rfalgout Dec 3, 2025
34762e3
Renaming mup.*.gpu to mup.*_gpu
rfalgout Dec 4, 2025
5bc2ddb
Modified gen_code.sh script to write 'mup_*_gpu.c' files
rfalgout Dec 4, 2025
3996555
First draft of gen_code.sh to put the gpu functions in separate files
rfalgout Dec 4, 2025
e20afec
Updating gen_code.sh to fix a couple of issues
rfalgout Dec 5, 2025
3fd5776
Added gpu if test to gen_code.sh script before writing '.hpp' file
rfalgout Dec 5, 2025
69c34e8
Minor edit to include internal header in wrapper function file.
oseikuffuor1 Dec 9, 2025
d4fe2a5
Changes to unify some vendor-specific return types.
oseikuffuor1 Dec 9, 2025
a3c320e
Generated new MuP wrapper code and headers for GPU support and some r…
oseikuffuor1 Dec 9, 2025
49f8e70
Fixed linker issues and updated gen_code.sh script.
oseikuffuor1 Dec 15, 2025
905f1b7
Some minor re-org. of gen_code.sh script and gix for test driver.
oseikuffuor1 Dec 16, 2025
feb75f2
Merge branch 'master' into gpu-mixed-krylov
oseikuffuor1 Dec 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/IJ_mv/mup.fixed.gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
hypreCUDAKernel_IJVectorUpdateValues
hypreGPUKernel_IJMatrixValues_dev1
hypreGPUKernel_IJVectorAssemblePar
hypre_AuxParCSRMatrixStackReallocate
hypre_IJMatrixAssembleCommunicate
hypre_IJMatrixAssembleCompressDevice
hypre_IJMatrixAssembleParCSRDevice
hypre_IJMatrixAssembleSortAndReduce1
hypre_IJMatrixAssembleSortAndReduce2
hypre_IJMatrixAssembleSortAndReduce3
hypre_IJMatrixGetValuesParCSRDevice
hypre_IJMatrixSetAddValuesParCSRDevice
hypre_IJMatrixSetConstantValuesParCSRDevice
hypre_IJVectorAssembleParDevice
hypre_IJVectorAssembleSortAndReduce1
hypre_IJVectorAssembleSortAndReduce3
hypre_IJVectorSetAddValuesParDevice
hypre_IJVectorUpdateValuesDevice
Empty file added src/IJ_mv/mup.functions.gpu
Empty file.
Empty file added src/IJ_mv/mup.methods.gpu
Empty file.
Empty file added src/blas/mup.fixed.gpu
Empty file.
Empty file added src/blas/mup.functions.gpu
Empty file.
Empty file added src/blas/mup.methods.gpu
Empty file.
21 changes: 17 additions & 4 deletions src/config/configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -347,10 +347,6 @@ AS_HELP_STRING([--enable-longdouble],
esac],
[hypre_using_longdouble=no]
)
if test "$hypre_using_longdouble" = "yes"
then
AC_DEFINE(HYPRE_LONG_DOUBLE, 1, [Define to 1 if using quad precision values for HYPRE_Real])
fi

AC_ARG_ENABLE(complex,
AS_HELP_STRING([--enable-complex],
Expand Down Expand Up @@ -2424,6 +2420,23 @@ AS_IF([ test x"$hypre_using_hip" == x"yes" ],
[AC_MSG_ERROR([unable to find ${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h ... Ensure ROCm is installed and set ROCM_PATH environment variable to ROCm installation path.])] )],
[])

dnl *********************************************************************
dnl * Check for longdouble support
dnl *********************************************************************

if test "$hypre_using_longdouble" = "yes"
then
if [test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes" || test "x$hypre_using_sycl" = "xyes"]
then
AC_MSG_ERROR([******************** Incompatible precision on device **********************
Long double data format is not supported on device.
For GPU builds, please use the default double precision or --enable-single.
****************************************************************************])
else
AC_DEFINE(HYPRE_LONG_DOUBLE, 1, [Define to 1 if using quad precision values for HYPRE_Real])
fi
fi

dnl *********************************************************************
dnl * Set GPU warp size
dnl *********************************************************************
Expand Down
165 changes: 120 additions & 45 deletions src/config/gen_code.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,38 @@ OUTP=$3
#### then need to do a manual merge. The header files can be generated fully
#### automatically as below.

############################################################################
# Define gpu tag - set to an empty string if there are no gpu list files
#
# Note that the following line will always generate an empty-string value for c,
# but it will only generate another value for c when $gpu is non-empty
#
# for c in "" $gpu --> c="" when gpu=""
# --> c="", c="$gpu" when gpu="non-empty-string"
#
# This is useful below for executing the same lines of code for the normal case
# and the gpu case (and potential future cases).
#
############################################################################

gpu=""
if [ -f mup.fixed.gpu ]
then
gpu=".gpu"
fi

############################################################################
# Make sure the function list files are sorted (list capital letters first)
############################################################################

for i in fixed functions methods
for c in "" $gpu
do
(export LC_COLLATE=C; sort mup.${i} | uniq) > mup.${i}.tmp
mv mup.${i}.tmp mup.${i}
for i in fixed functions methods
do
tag=${i}${c}
(export LC_COLLATE=C; sort mup.${tag} | uniq) > mup.${tag}.tmp
mv mup.${tag}.tmp mup.${tag}
done
done

############################################################################
Expand Down Expand Up @@ -78,14 +102,25 @@ cat >> $MUP_HEADER <<@

cat>> $MUP_HEADER <<@
$(
cat mup.functions mup.methods | while read -r func_name
do
echo "#define $func_name HYPRE_MULTIPRECISION_FUNC ( $func_name )"
done
cat mup.fixed | while read -r func_name
do
echo "#define $func_name HYPRE_FIXEDPRECISION_FUNC ( $func_name )"
done
for c in "" $gpu
do
for i in functions methods
do
tag=${i}${c}
cat mup.${tag} | while read -r func_name
do
echo "#define $func_name HYPRE_MULTIPRECISION_FUNC ( $func_name )"
done
done
for i in fixed
do
tag=${i}${c}
cat mup.${tag} | while read -r func_name
do
echo "#define $func_name HYPRE_FIXEDPRECISION_FUNC ( $func_name )"
done
done
done
)

#endif
Expand All @@ -97,7 +132,7 @@ done

if [ "${OUTP}" = "onlydef" ]
then
exit
exit
fi

#===========================================================================
Expand All @@ -124,34 +159,46 @@ cat >> $MUP_HEADER <<@

cat>> $MUP_HEADER <<@
$(
cat mup.functions mup.methods mup.fixed | while read -r func_name
do
echo "#undef $func_name"
done
for c in "" $gpu
do
for i in functions methods fixed
do
tag=${i}${c}
cat mup.${tag} | while read -r func_name
do
echo "#undef $func_name"
done
done
done
)
@

############################################################################
# Create temporary files and initial code
############################################################################

# Create file with list of MUP functions and methods
cat mup.functions mup.methods > mup.pre

# Create prototype information files
for i in fixed functions
for c in "" $gpu
do
$scriptdir/gen_proto_info.sh mup.${i} ${EXTH} > ${OUTP}.${i}.ext.proto
$scriptdir/gen_proto_info.sh mup.${i} ${INTH} > ${OUTP}.${i}.int.proto
for i in fixed functions methods
do
tag=${i}${c}
$scriptdir/gen_proto_info.sh mup.${tag} ${EXTH} > ${OUTP}.${tag}.ext.proto
$scriptdir/gen_proto_info.sh mup.${tag} ${INTH} > ${OUTP}.${tag}.int.proto
done
cat ${OUTP}.functions${c}.ext.proto ${OUTP}.methods${c}.ext.proto > ${OUTP}.pre${c}.ext.proto
cat ${OUTP}.functions${c}.int.proto ${OUTP}.methods${c}.int.proto > ${OUTP}.pre${c}.int.proto
done
cat ${OUTP}.functions.ext.proto > ${OUTP}.pre.ext.proto
cat ${OUTP}.functions.int.proto > ${OUTP}.pre.int.proto

# Create C implementation files and header files
for i in fixed functions pre
for c in "" $gpu
do
$scriptdir/gen_code_awk.sh ${OUTP}.${i}.ext.proto ${OUTP}_${i}_ext ${i}
$scriptdir/gen_code_awk.sh ${OUTP}.${i}.int.proto ${OUTP}_${i}_int ${i}
for i in fixed functions pre
do
tag=${i}${c}
$scriptdir/gen_code_awk.sh ${OUTP}.${tag}.ext.proto ${OUTP}_${tag}_ext ${i}
$scriptdir/gen_code_awk.sh ${OUTP}.${tag}.int.proto ${OUTP}_${tag}_int ${i}
done
done

############################################################################
Expand Down Expand Up @@ -179,6 +226,12 @@ cat > $FOUT <<@

$scriptdir/write_header.sh >> $FOUT
cat ${OUTP}_${i}_ext.c ${OUTP}_${i}_int.c >> $FOUT
if [ -n "$gpu" ]
then
echo "#if defined(HYPRE_USING_GPU)" >> $FOUT
cat ${OUTP}_${i}${gpu}_ext.c ${OUTP}_${i}${gpu}_int.c >> $FOUT
echo "#endif" >> $FOUT
fi

cat >> $FOUT <<@

Expand All @@ -190,7 +243,15 @@ cat >> $FOUT <<@
# add header info to the prototype files
#========================================

# external file

FOUT=${OUTP}_${i}_ext.h
if [ -n "$gpu" ]
then
echo "#if defined(HYPRE_USING_GPU)" >> $FOUT
cat ${OUTP}_${i}${gpu}_ext.h >> $FOUT
echo "#endif" >> $FOUT
fi
cat > $FOUT.tmp <<@

/*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/
Expand All @@ -200,7 +261,15 @@ $scriptdir/write_header.sh >> $FOUT.tmp
cat $FOUT >> $FOUT.tmp
mv $FOUT.tmp $FOUT

# internal file

FOUT=${OUTP}_${i}_int.h
if [ -n "$gpu" ]
then
echo "#if defined(HYPRE_USING_GPU)" >> $FOUT
cat ${OUTP}_${i}${gpu}_int.h >> $FOUT
echo "#endif" >> $FOUT
fi
cat > $FOUT.tmp <<@

/*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/
Expand All @@ -212,15 +281,6 @@ mv $FOUT.tmp $FOUT

done

############################################################################
# Remove temporary files
############################################################################

rm -f mup.pre
rm -f ${OUTP}.*.proto
rm -f ${OUTP}_*_ext.c
rm -f ${OUTP}_*_int.c

############################################################################
# Generate header files
############################################################################
Expand All @@ -245,9 +305,10 @@ extern "C" {
#if defined (HYPRE_MIXED_PRECISION)
@

cat ${OUTP}_fixed_ext.h >> $MUP_HEADER
cat ${OUTP}_functions_ext.h >> $MUP_HEADER
cat ${OUTP}_pre_ext.h >> $MUP_HEADER
for i in fixed functions pre
do
cat ${OUTP}_${i}_ext.h >> $MUP_HEADER
done

cat >> $MUP_HEADER <<@

Expand All @@ -261,8 +322,6 @@ cat >> $MUP_HEADER <<@

@

rm -f ${OUTP}_fixed_ext.h ${OUTP}_functions_ext.h ${OUTP}_pre_ext.h

#===========================================================================
# Create internal header file
#===========================================================================
Expand All @@ -283,9 +342,10 @@ extern "C" {
#if defined (HYPRE_MIXED_PRECISION)
@

cat ${OUTP}_fixed_int.h >> $MUP_HEADER
cat ${OUTP}_functions_int.h >> $MUP_HEADER
cat ${OUTP}_pre_int.h >> $MUP_HEADER
for i in fixed functions pre
do
cat ${OUTP}_${i}_int.h >> $MUP_HEADER
done

cat >> $MUP_HEADER <<@

Expand All @@ -299,4 +359,19 @@ cat >> $MUP_HEADER <<@

@

rm -f ${OUTP}_fixed_int.h ${OUTP}_functions_int.h ${OUTP}_pre_int.h
############################################################################
# Remove temporary files
############################################################################

for c in "" $gpu
do
for i in fixed functions methods pre
do
tag=${i}${c}
rm -f ${OUTP}.${tag}.ext.proto
rm -f ${OUTP}.${tag}.int.proto
rm -f ${OUTP}_${tag}_ext.[ch]
rm -f ${OUTP}_${tag}_int.[ch]
done
done

4 changes: 3 additions & 1 deletion src/config/generate_function_list.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@
shopt -s nullglob

# Use awk to avoid issues with spacing
nm -P *.o *.obj | awk '$2 == "T" {print $1}' | sed -e 's/^_//' -e 's/_$//'
# Demangle any c++ name mangling and filter _device_stub_ prefixes.
nm -P *.o *.obj | awk '$2 == "T" {print $1}' | c++filt | sed -e 's/(.*$//' -e 's/^__device_stub__//' -e 's/^_//' -e 's/_$//'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you looked into the portability of c++flt?

Copy link
Contributor Author

@oseikuffuor1 oseikuffuor1 Nov 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have thought a bit about this. From what I can tell, it is supported in binutils, macOS, most BSD systems, MinGW/Cygwin and so should be fairly portable. Ideally we would use the demangle option the "-C" for nm, since it is built into the function, but unfortunately work well. There is also --demangle for nm on GNU/linux systems, but c++filt is more portable.


13 changes: 12 additions & 1 deletion src/config/mup_check_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

scriptdir=`dirname $0`

BUILD_TYPE=$(echo "$1" | tr '[:lower:]' '[:upper:]')

# Check if terminal supports colors
if [ -t 1 ]; then
# Use colors
Expand All @@ -30,7 +32,16 @@ fi

export LC_COLLATE=C # sort by listing capital letters first

cat mup.fixed mup.functions mup.methods | sort | uniq > mup_check.old

if [ "$BUILD_TYPE" = "GPU" ]; then
cat mup.fixed mup.fixed.gpu \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You will probably need to modify this if we choose to only include the '.gpu' files in those directories that need them. Maybe a better way to do this is to create a 'FILES' variable that starts out having the standard three, then appends the '.gpu' files if needed, then runs only one 'cat' line at the end. This would also be easily extensible if something else comes up in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct. For now, I parsed it to 2>/dev/null to suppress the "file not found messages" so I don't have to check for the files each directory, but your suggestion is probably a better approach.

mup.functions mup.functions.gpu \
mup.methods mup.methods.gpu \
| sort | uniq > mup_check.old
else
cat mup.fixed mup.functions mup.methods | sort | uniq > mup_check.old
fi

$scriptdir/generate_function_list.sh | sort | uniq > mup_check.new

# Remove functions listed in mup.exclude (if it exists)
Expand Down
22 changes: 16 additions & 6 deletions src/configure
Original file line number Diff line number Diff line change
Expand Up @@ -3608,12 +3608,6 @@ else $as_nop

fi

if test "$hypre_using_longdouble" = "yes"
then

printf "%s\n" "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h

fi

# Check whether --enable-complex was given.
if test ${enable_complex+y}
Expand Down Expand Up @@ -10651,6 +10645,22 @@ fi
fi


if test "$hypre_using_longdouble" = "yes"
then
if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes" || test "x$hypre_using_sycl" = "xyes"
then
as_fn_error $? "******************** Incompatible precision on device **********************
Long double data format is not supported on device.
For GPU builds, please use the default double precision or --enable-single.
****************************************************************************" "$LINENO" 5
else

printf "%s\n" "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h

fi
fi


if test "x$hypre_warp_size" = "xauto"
then
if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_sycl" = "xyes"
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file added src/krylov/mup.fixed.gpu
Empty file.
Empty file added src/krylov/mup.functions.gpu
Empty file.
Empty file added src/krylov/mup.methods.gpu
Empty file.
Empty file added src/lapack/mup.fixed.gpu
Empty file.
Empty file added src/lapack/mup.functions.gpu
Empty file.
Empty file added src/lapack/mup.methods.gpu
Empty file.
Empty file added src/matrix_matrix/mup.fixed.gpu
Empty file.
Empty file.
Empty file.
Empty file added src/multivector/mup.fixed.gpu
Empty file.
Empty file.
Empty file added src/multivector/mup.methods.gpu
Empty file.
2 changes: 1 addition & 1 deletion src/parcsr_block_mv/_hypre_parcsr_block_mv.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ typedef struct
HYPRE_Int owns_data;

HYPRE_BigInt num_nonzeros;
HYPRE_Real d_num_nonzeros;
hypre_double d_num_nonzeros;

/* Buffers used by GetRow to hold row currently being accessed. AJC, 4/99 */
HYPRE_Int *rowindices;
Expand Down
Empty file.
Empty file.
Empty file.
Loading