diff --git a/src/IJ_mv/HYPRE_IJ_mv_mup.h b/src/IJ_mv/HYPRE_IJ_mv_mup.h index 2f991cb873..4975546162 100644 --- a/src/IJ_mv/HYPRE_IJ_mv_mup.h +++ b/src/IJ_mv/HYPRE_IJ_mv_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_IJ_MV_MUP_HEADER -#define HYPRE_IJ_MV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_IJ_MV_MUP_HEADER +#define HYPRE_IJ_MV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_IJMatrixAdd_flt( hypre_float alpha, HYPRE_IJMatrix matrix_A, hypre_float beta, HYPRE_IJMatrix matrix_B, HYPRE_IJMatrix *matrix_C ); @@ -607,16 +597,7 @@ HYPRE_IJVectorUpdateValues_long_dbl( HYPRE_IJVector vector, HYPRE_Int nvalues, c HYPRE_Int HYPRE_IJVectorUpdateValues( HYPRE_IJVector vector, HYPRE_Int nvalues, const HYPRE_BigInt *indices, const void *values, HYPRE_Int action ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_IJMatrixAdd_pre( HYPRE_Precision precision, hypre_long_double alpha, HYPRE_IJMatrix matrix_A, hypre_long_double beta, HYPRE_IJMatrix matrix_B, HYPRE_IJMatrix *matrix_C ); @@ -810,7 +791,6 @@ HYPRE_IJVectorSetValues_pre( HYPRE_Precision precision, HYPRE_IJVector vector, H HYPRE_Int HYPRE_IJVectorUpdateValues_pre( HYPRE_Precision precision, HYPRE_IJVector vector, HYPRE_Int nvalues, const HYPRE_BigInt *indices, const void *values, HYPRE_Int action ); - #endif #ifdef __cplusplus diff --git a/src/IJ_mv/Makefile b/src/IJ_mv/Makefile index 7d7a0ca2f8..6858997ea2 100644 --- a/src/IJ_mv/Makefile +++ b/src/IJ_mv/Makefile @@ -54,6 +54,11 @@ MP_FILES = \ mup_functions.c\ mup_pre.c +MP_CUFILES=\ + mup_fixed_gpu.c\ + mup_functions_gpu.c\ + mup_pre_gpu.c + COBJS = ${FILES:.c=.o} CUOBJS = ${CUFILES:.c=.obj} OBJS = ${COBJS} ${CUOBJS} @@ -67,9 +72,10 @@ CUOBJS_single = ${CUFILES:.c=.obj_flt} CUOBJS_double = ${CUFILES:.c=.obj_dbl} CUOBJS_longdouble = ${CUFILES:.c=.obj_ldbl} MP_COBJS = ${MP_FILES:.c=.o} +MP_CUOBJS = ${MP_CUFILES:.c=.obj} OBJS = ${COBJS_single} ${COBJS_double} ${COBJS_longdouble} ${MP_COBJS} -OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} +OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} ${MP_CUOBJS} endif diff --git a/src/IJ_mv/_hypre_IJ_mv.hpp b/src/IJ_mv/_hypre_IJ_mv.hpp new file mode 100644 index 0000000000..6d480ac018 --- /dev/null +++ b/src/IJ_mv/_hypre_IJ_mv.hpp @@ -0,0 +1,36 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_IJ_HPP +#define hypre_IJ_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_IJ_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_IJ_mv_mup_undef.h" +#include "_hypre_IJ_mv_mup.h" +#include "_hypre_IJ_mv_mup.hpp" +#endif +#endif + +#endif + diff --git a/src/IJ_mv/_hypre_IJ_mv_mup.h b/src/IJ_mv/_hypre_IJ_mv_mup.h index accdaa9363..d71497f027 100644 --- a/src/IJ_mv/_hypre_IJ_mv_mup.h +++ b/src/IJ_mv/_hypre_IJ_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_IJ_MV_MUP_HEADER #define hypre_IJ_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_AuxParCSRMatrixCreate_flt( hypre_AuxParCSRMatrix **aux_matrix, HYPRE_Int local_num_rows, HYPRE_Int local_num_cols, HYPRE_Int *sizes ); @@ -447,28 +446,9 @@ hypre_IJVectorZeroValuesPar_dbl( hypre_IJVector *vector ); HYPRE_Int hypre_IJVectorZeroValuesPar_long_dbl( hypre_IJVector *vector ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/IJ_mv/_hypre_IJ_mv_mup.hpp b/src/IJ_mv/_hypre_IJ_mv_mup.hpp new file mode 100644 index 0000000000..6236539f53 --- /dev/null +++ b/src/IJ_mv/_hypre_IJ_mv_mup.hpp @@ -0,0 +1,116 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_IJ_MV_MUP_HEADER_CXX +#define hypre_IJ_MV_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +HYPRE_Int +hypre_IJMatrixAssembleCommunicate_flt( hypre_IJMatrix *matrix ); +HYPRE_Int +hypre_IJMatrixAssembleCommunicate_dbl( hypre_IJMatrix *matrix ); +HYPRE_Int +hypre_IJMatrixAssembleCommunicate_long_dbl( hypre_IJMatrix *matrix ); + +HYPRE_Int +hypre_IJMatrixAssembleCompressDevice_flt( hypre_IJMatrix *matrix, HYPRE_Int reduce_stack_size ); +HYPRE_Int +hypre_IJMatrixAssembleCompressDevice_dbl( hypre_IJMatrix *matrix, HYPRE_Int reduce_stack_size ); +HYPRE_Int +hypre_IJMatrixAssembleCompressDevice_long_dbl( hypre_IJMatrix *matrix, HYPRE_Int reduce_stack_size ); + +HYPRE_Int +hypre_IJMatrixAssembleParCSRDevice_flt( hypre_IJMatrix *matrix ); +HYPRE_Int +hypre_IJMatrixAssembleParCSRDevice_dbl( hypre_IJMatrix *matrix ); +HYPRE_Int +hypre_IJMatrixAssembleParCSRDevice_long_dbl( hypre_IJMatrix *matrix ); + +HYPRE_Int +hypre_IJMatrixGetValuesParCSRDevice_flt( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, HYPRE_BigInt *rows, HYPRE_Int *row_indexes, HYPRE_BigInt *cols, hypre_float *values, HYPRE_Int zero_out ); +HYPRE_Int +hypre_IJMatrixGetValuesParCSRDevice_dbl( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, HYPRE_BigInt *rows, HYPRE_Int *row_indexes, HYPRE_BigInt *cols, hypre_double *values, HYPRE_Int zero_out ); +HYPRE_Int +hypre_IJMatrixGetValuesParCSRDevice_long_dbl( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, HYPRE_BigInt *rows, HYPRE_Int *row_indexes, HYPRE_BigInt *cols, hypre_long_double *values, HYPRE_Int zero_out ); + +HYPRE_Int +hypre_IJMatrixSetAddValuesParCSRDevice_flt( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, const HYPRE_BigInt *rows, const HYPRE_Int *row_indexes, const HYPRE_BigInt *cols, const hypre_float *values, const char *action ); +HYPRE_Int +hypre_IJMatrixSetAddValuesParCSRDevice_dbl( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, const HYPRE_BigInt *rows, const HYPRE_Int *row_indexes, const HYPRE_BigInt *cols, const hypre_double *values, const char *action ); +HYPRE_Int +hypre_IJMatrixSetAddValuesParCSRDevice_long_dbl( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, const HYPRE_BigInt *rows, const HYPRE_Int *row_indexes, const HYPRE_BigInt *cols, const hypre_long_double *values, const char *action ); + +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSRDevice_flt( hypre_IJMatrix *matrix, hypre_float value ); +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSRDevice_dbl( hypre_IJMatrix *matrix, hypre_double value ); +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSRDevice_long_dbl( hypre_IJMatrix *matrix, hypre_long_double value ); + +HYPRE_Int +hypre_IJVectorAssembleParDevice_flt( hypre_IJVector *vector ); +HYPRE_Int +hypre_IJVectorAssembleParDevice_dbl( hypre_IJVector *vector ); +HYPRE_Int +hypre_IJVectorAssembleParDevice_long_dbl( hypre_IJVector *vector ); + +HYPRE_Int +hypre_IJVectorSetAddValuesParDevice_flt( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_float *values, const char *action ); +HYPRE_Int +hypre_IJVectorSetAddValuesParDevice_dbl( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_double *values, const char *action ); +HYPRE_Int +hypre_IJVectorSetAddValuesParDevice_long_dbl( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_long_double *values, const char *action ); + +HYPRE_Int +hypre_IJVectorUpdateValuesDevice_flt( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_float *values, HYPRE_Int action ); +HYPRE_Int +hypre_IJVectorUpdateValuesDevice_dbl( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_double *values, HYPRE_Int action ); +HYPRE_Int +hypre_IJVectorUpdateValuesDevice_long_dbl( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const hypre_long_double *values, HYPRE_Int action ); + +/* functions_gpu */ + +/* pre_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/IJ_mv/_hypre_IJ_mv_mup_def.h b/src/IJ_mv/_hypre_IJ_mv_mup_def.h index 9cbfa154ac..79359b76fb 100644 --- a/src/IJ_mv/_hypre_IJ_mv_mup_def.h +++ b/src/IJ_mv/_hypre_IJ_mv_mup_def.h @@ -180,5 +180,23 @@ #define hypre_ijvectorsetmaxoffprocelmt HYPRE_FIXEDPRECISION_FUNC ( hypre_ijvectorsetmaxoffprocelmt ) #define hypre_ijvectorsetobjecttype HYPRE_FIXEDPRECISION_FUNC ( hypre_ijvectorsetobjecttype ) #define hypre_ijvectorsetvalues HYPRE_FIXEDPRECISION_FUNC ( hypre_ijvectorsetvalues ) +#define hypreCUDAKernel_IJVectorUpdateValues HYPRE_FIXEDPRECISION_FUNC ( hypreCUDAKernel_IJVectorUpdateValues ) +#define hypreGPUKernel_IJMatrixValues_dev1 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IJMatrixValues_dev1 ) +#define hypreGPUKernel_IJVectorAssemblePar HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IJVectorAssemblePar ) +#define hypre_AuxParCSRMatrixStackReallocate HYPRE_FIXEDPRECISION_FUNC ( hypre_AuxParCSRMatrixStackReallocate ) +#define hypre_IJMatrixAssembleCommunicate HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleCommunicate ) +#define hypre_IJMatrixAssembleCompressDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleCompressDevice ) +#define hypre_IJMatrixAssembleParCSRDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleParCSRDevice ) +#define hypre_IJMatrixAssembleSortAndReduce1 HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleSortAndReduce1 ) +#define hypre_IJMatrixAssembleSortAndReduce2 HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleSortAndReduce2 ) +#define hypre_IJMatrixAssembleSortAndReduce3 HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixAssembleSortAndReduce3 ) +#define hypre_IJMatrixGetValuesParCSRDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixGetValuesParCSRDevice ) +#define hypre_IJMatrixSetAddValuesParCSRDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixSetAddValuesParCSRDevice ) +#define hypre_IJMatrixSetConstantValuesParCSRDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJMatrixSetConstantValuesParCSRDevice ) +#define hypre_IJVectorAssembleParDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJVectorAssembleParDevice ) +#define hypre_IJVectorAssembleSortAndReduce1 HYPRE_FIXEDPRECISION_FUNC ( hypre_IJVectorAssembleSortAndReduce1 ) +#define hypre_IJVectorAssembleSortAndReduce3 HYPRE_FIXEDPRECISION_FUNC ( hypre_IJVectorAssembleSortAndReduce3 ) +#define hypre_IJVectorSetAddValuesParDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJVectorSetAddValuesParDevice ) +#define hypre_IJVectorUpdateValuesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IJVectorUpdateValuesDevice ) #endif diff --git a/src/IJ_mv/_hypre_IJ_mv_mup_undef.h b/src/IJ_mv/_hypre_IJ_mv_mup_undef.h index e123a57762..4d718b6da9 100644 --- a/src/IJ_mv/_hypre_IJ_mv_mup_undef.h +++ b/src/IJ_mv/_hypre_IJ_mv_mup_undef.h @@ -177,3 +177,21 @@ #undef hypre_ijvectorsetmaxoffprocelmt #undef hypre_ijvectorsetobjecttype #undef hypre_ijvectorsetvalues +#undef hypreCUDAKernel_IJVectorUpdateValues +#undef hypreGPUKernel_IJMatrixValues_dev1 +#undef hypreGPUKernel_IJVectorAssemblePar +#undef hypre_AuxParCSRMatrixStackReallocate +#undef hypre_IJMatrixAssembleCommunicate +#undef hypre_IJMatrixAssembleCompressDevice +#undef hypre_IJMatrixAssembleParCSRDevice +#undef hypre_IJMatrixAssembleSortAndReduce1 +#undef hypre_IJMatrixAssembleSortAndReduce2 +#undef hypre_IJMatrixAssembleSortAndReduce3 +#undef hypre_IJMatrixGetValuesParCSRDevice +#undef hypre_IJMatrixSetAddValuesParCSRDevice +#undef hypre_IJMatrixSetConstantValuesParCSRDevice +#undef hypre_IJVectorAssembleParDevice +#undef hypre_IJVectorAssembleSortAndReduce1 +#undef hypre_IJVectorAssembleSortAndReduce3 +#undef hypre_IJVectorSetAddValuesParDevice +#undef hypre_IJVectorUpdateValuesDevice diff --git a/src/IJ_mv/headers b/src/IJ_mv/headers index 450f0a0ef9..8bf10ddde9 100755 --- a/src/IJ_mv/headers +++ b/src/IJ_mv/headers @@ -71,3 +71,59 @@ cat >> $INTERNAL_HEADER <<@ @ + +INTERNAL_HEADER=_hypre_IJ_mv.hpp + +#=========================================================================== +# Include guards and other includes +#=========================================================================== + +cat > $INTERNAL_HEADER <<@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_IJ_HPP +#define hypre_IJ_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_IJ_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +@ + +#=========================================================================== +# Structures and prototypes +#=========================================================================== + +#=========================================================================== +# Include guards +#=========================================================================== + +cat >> $INTERNAL_HEADER <<@ + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_IJ_mv_mup_undef.h" +#include "_hypre_IJ_mv_mup.h" +#include "_hypre_IJ_mv_mup.hpp" +#endif +#endif + +#endif + +@ diff --git a/src/IJ_mv/mup.fixed_gpu b/src/IJ_mv/mup.fixed_gpu new file mode 100644 index 0000000000..bf9549461f --- /dev/null +++ b/src/IJ_mv/mup.fixed_gpu @@ -0,0 +1,18 @@ +hypreCUDAKernel_IJVectorUpdateValues +hypreGPUKernel_IJMatrixValues_dev1 +hypreGPUKernel_IJVectorAssemblePar +hypre_AuxParCSRMatrixStackReallocate +hypre_IJMatrixAssembleCommunicate +hypre_IJMatrixAssembleCompressDevice +hypre_IJMatrixAssembleParCSRDevice +hypre_IJMatrixAssembleSortAndReduce1 +hypre_IJMatrixAssembleSortAndReduce2 +hypre_IJMatrixAssembleSortAndReduce3 +hypre_IJMatrixGetValuesParCSRDevice +hypre_IJMatrixSetAddValuesParCSRDevice +hypre_IJMatrixSetConstantValuesParCSRDevice +hypre_IJVectorAssembleParDevice +hypre_IJVectorAssembleSortAndReduce1 +hypre_IJVectorAssembleSortAndReduce3 +hypre_IJVectorSetAddValuesParDevice +hypre_IJVectorUpdateValuesDevice diff --git a/src/IJ_mv/mup.functions_gpu b/src/IJ_mv/mup.functions_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/IJ_mv/mup.methods_gpu b/src/IJ_mv/mup.methods_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/IJ_mv/mup_fixed_gpu.c b/src/IJ_mv/mup_fixed_gpu.c new file mode 100644 index 0000000000..c0106d9157 --- /dev/null +++ b/src/IJ_mv/mup_fixed_gpu.c @@ -0,0 +1,95 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_IJ_mv.h" +#include "_hypre_IJ_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixAssembleCommunicate( hypre_IJMatrix *matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixAssembleCommunicate)( matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixAssembleCompressDevice( hypre_IJMatrix *matrix, HYPRE_Int reduce_stack_size ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixAssembleCompressDevice)( matrix, reduce_stack_size ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixAssembleParCSRDevice( hypre_IJMatrix *matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixAssembleParCSRDevice)( matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixGetValuesParCSRDevice( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, HYPRE_BigInt *rows, HYPRE_Int *row_indexes, HYPRE_BigInt *cols, HYPRE_Complex *values, HYPRE_Int zero_out ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixGetValuesParCSRDevice)( matrix, nrows, ncols, rows, row_indexes, cols, values, zero_out ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixSetAddValuesParCSRDevice( hypre_IJMatrix *matrix, HYPRE_Int nrows, HYPRE_Int *ncols, const HYPRE_BigInt *rows, const HYPRE_Int *row_indexes, const HYPRE_BigInt *cols, const HYPRE_Complex *values, const char *action ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixSetAddValuesParCSRDevice)( matrix, nrows, ncols, rows, row_indexes, cols, values, action ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSRDevice( hypre_IJMatrix *matrix, HYPRE_Complex value ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJMatrixSetConstantValuesParCSRDevice)( matrix, value ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJVectorAssembleParDevice( hypre_IJVector *vector ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJVectorAssembleParDevice)( vector ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJVectorSetAddValuesParDevice( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const HYPRE_Complex *values, const char *action ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJVectorSetAddValuesParDevice)( vector, num_values, indices, values, action ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IJVectorUpdateValuesDevice( hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const HYPRE_Complex *values, HYPRE_Int action ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IJVectorUpdateValuesDevice)( vector, num_values, indices, values, action ); +} + + +#endif + +#endif + diff --git a/src/IJ_mv/mup_functions_gpu.c b/src/IJ_mv/mup_functions_gpu.c new file mode 100644 index 0000000000..2f6be3e3ae --- /dev/null +++ b/src/IJ_mv/mup_functions_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_IJ_mv.h" +#include "_hypre_IJ_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/IJ_mv/mup_pre_gpu.c b/src/IJ_mv/mup_pre_gpu.c new file mode 100644 index 0000000000..2f6be3e3ae --- /dev/null +++ b/src/IJ_mv/mup_pre_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_IJ_mv.h" +#include "_hypre_IJ_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/blas/_hypre_blas_mup.h b/src/blas/_hypre_blas_mup.h index ce0cd56754..8d44e47bbd 100644 --- a/src/blas/_hypre_blas_mup.h +++ b/src/blas/_hypre_blas_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_BLAS_MUP_HEADER #define hypre_BLAS_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Real hypre_dasum_flt( HYPRE_Int *n, hypre_float *dx, HYPRE_Int *incx ); @@ -167,28 +166,9 @@ hypre_idamax_dbl( HYPRE_Int *n, hypre_double *dx, HYPRE_Int *incx ); HYPRE_Int hypre_idamax_long_dbl( HYPRE_Int *n, hypre_long_double *dx, HYPRE_Int *incx ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/config/configure.in b/src/config/configure.in index c6a188292b..98fb6cb0a1 100644 --- a/src/config/configure.in +++ b/src/config/configure.in @@ -346,10 +346,6 @@ AS_HELP_STRING([--enable-longdouble], esac], [hypre_using_longdouble=no] ) -if test "$hypre_using_longdouble" = "yes" -then - AC_DEFINE(HYPRE_LONG_DOUBLE, 1, [Define to 1 if using quad precision values for HYPRE_Real]) -fi AC_ARG_ENABLE(complex, AS_HELP_STRING([--enable-complex], @@ -2432,6 +2428,23 @@ AS_IF([ test x"$hypre_using_hip" == x"yes" ], [AC_MSG_ERROR([unable to find ${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h ... Ensure ROCm is installed and set ROCM_PATH environment variable to ROCm installation path.])] )], []) +dnl ********************************************************************* +dnl * Check for longdouble support +dnl ********************************************************************* + +if test "$hypre_using_longdouble" = "yes" +then + if [test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes" || test "x$hypre_using_sycl" = "xyes"] + then + AC_MSG_ERROR([******************** Incompatible precision on device ********************** + Long double data format is not supported on device. + For GPU builds, please use the default double precision or --enable-single. + ****************************************************************************]) + else + AC_DEFINE(HYPRE_LONG_DOUBLE, 1, [Define to 1 if using quad precision values for HYPRE_Real]) + fi +fi + dnl ********************************************************************* dnl * Set GPU warp size dnl ********************************************************************* diff --git a/src/config/gen_code.sh b/src/config/gen_code.sh index 3952a4aed2..5223ef6e76 100755 --- a/src/config/gen_code.sh +++ b/src/config/gen_code.sh @@ -30,14 +30,38 @@ OUTP=$3 #### then need to do a manual merge. The header files can be generated fully #### automatically as below. +############################################################################ +# Define gpu tag - set to an empty string if there are no gpu list files +# +# Note that the following line will always generate an empty-string value for c, +# but it will only generate another value for c when $gpu is non-empty +# +# for c in "" $gpu --> c="" when gpu="" +# --> c="", c="$gpu" when gpu="non-empty-string" +# +# This is useful below for executing the same lines of code for the normal case +# and the gpu case (and potential future cases). +# +############################################################################ + +gpu="" +if [ -f mup.fixed_gpu ] +then + gpu="_gpu" +fi + ############################################################################ # Make sure the function list files are sorted (list capital letters first) ############################################################################ -for i in fixed functions methods +for c in "" $gpu do - (export LC_COLLATE=C; sort mup.${i} | uniq) > mup.${i}.tmp - mv mup.${i}.tmp mup.${i} + for i in fixed functions methods + do + tag=${i}${c} + (export LC_COLLATE=C; sort mup.${tag} | uniq) > mup.${tag}.tmp + mv mup.${tag}.tmp mup.${tag} + done done ############################################################################ @@ -51,9 +75,9 @@ intp=${INTH%.*} # extract the prefix from the internal header extname=`echo $extp | sed -e 's/HYPRE_//g' -e 's/_hypre_//g' | awk '{print toupper($0)}'` intname=`echo $intp | sed -e 's/HYPRE_//g' -e 's/_hypre_//g' | awk '{print toupper($0)}'` -#=========================================================================== +#============================================================ # Create def header file -#=========================================================================== +#============================================================ MUP_HEADER=${intp}_mup_def.h @@ -78,31 +102,42 @@ cat >> $MUP_HEADER <<@ cat>> $MUP_HEADER <<@ $( -cat mup.functions mup.methods | while read -r func_name -do - echo "#define $func_name HYPRE_MULTIPRECISION_FUNC ( $func_name )" -done -cat mup.fixed | while read -r func_name -do - echo "#define $func_name HYPRE_FIXEDPRECISION_FUNC ( $func_name )" -done + for c in "" $gpu + do + for i in functions methods + do + tag=${i}${c} + cat mup.${tag} | while read -r func_name + do + echo "#define $func_name HYPRE_MULTIPRECISION_FUNC ( $func_name )" + done + done + for i in fixed + do + tag=${i}${c} + cat mup.${tag} | while read -r func_name + do + echo "#define $func_name HYPRE_FIXEDPRECISION_FUNC ( $func_name )" + done + done + done ) #endif @ -#=========================================================================== +#============================================================ # Exit if we only need the def header file -#=========================================================================== +#============================================================ if [ "${OUTP}" = "onlydef" ] then - exit + exit fi -#=========================================================================== +#============================================================ # Create undef header file -#=========================================================================== +#============================================================ MUP_HEADER=${intp}_mup_undef.h @@ -124,10 +159,17 @@ cat >> $MUP_HEADER <<@ cat>> $MUP_HEADER <<@ $( -cat mup.functions mup.methods mup.fixed | while read -r func_name -do - echo "#undef $func_name" -done + for c in "" $gpu + do + for i in functions methods fixed + do + tag=${i}${c} + cat mup.${tag} | while read -r func_name + do + echo "#undef $func_name" + done + done + done ) @ @@ -135,39 +177,65 @@ done # Create temporary files and initial code ############################################################################ -# Create file with list of MUP functions and methods -cat mup.functions mup.methods > mup.pre +# Create prototype information files. We treat C header files and C++ +# header files separately to correctly define C/ C++ linkage. C++ header +# file contains functions requiring C++ linkage. -# Create prototype information files -for i in fixed functions +for c in "" $gpu do - $scriptdir/gen_proto_info.sh mup.${i} ${EXTH} > ${OUTP}.${i}.ext.proto - $scriptdir/gen_proto_info.sh mup.${i} ${INTH} > ${OUTP}.${i}.int.proto + for i in fixed functions methods + do + tag=${i}${c} + $scriptdir/gen_proto_info.sh mup.${tag} ${EXTH} > ${OUTP}.${tag}.ext.proto + $scriptdir/gen_proto_info.sh mup.${tag} ${INTH} > ${OUTP}.${tag}.int.proto + done + cat ${OUTP}.functions${c}.ext.proto ${OUTP}.methods${c}.ext.proto > ${OUTP}.pre${c}.ext.proto + cat ${OUTP}.functions${c}.int.proto ${OUTP}.methods${c}.int.proto > ${OUTP}.pre${c}.int.proto done -cat ${OUTP}.functions.ext.proto > ${OUTP}.pre.ext.proto -cat ${OUTP}.functions.int.proto > ${OUTP}.pre.int.proto # Create C implementation files and header files -for i in fixed functions pre +for c in "" $gpu do - $scriptdir/gen_code_awk.sh ${OUTP}.${i}.ext.proto ${OUTP}_${i}_ext ${i} - $scriptdir/gen_code_awk.sh ${OUTP}.${i}.int.proto ${OUTP}_${i}_int ${i} + for i in fixed functions pre + do + tag=${i}${c} + $scriptdir/gen_code_awk.sh ${OUTP}.${tag}.ext.proto ${OUTP}_${tag}_ext ${i} + $scriptdir/gen_code_awk.sh ${OUTP}.${tag}.int.proto ${OUTP}_${tag}_int ${i} + done done +# Special case for .hpp functions: +# These are only for GPU and are assumed to be strictly multiprecision +# functions. Hence only the fixed precision code and protos are generated. +INTHPP="" +if [ -f "${intp}.hpp" ] +then + INTHPP=${intp}.hpp + # generate prototype info file + tagpp=fixed${gpu} + $scriptdir/gen_proto_info.sh mup.${tagpp} ${INTHPP} > ${OUTP}.${tagpp}.intpp.proto + + # implementation for .hpp functions. + $scriptdir/gen_code_awk.sh ${OUTP}.${tagpp}.intpp.proto ${OUTP}_${tagpp}_intpp fixed + cat ${OUTP}_${tagpp}_intpp.c >> ${OUTP}_${tagpp}_int.c +fi + ############################################################################ # Finalize code ############################################################################ +#============================================================ +# Create implementation files +#============================================================ + for i in fixed functions pre do -#======================================== -# generate implementation file -#======================================== - -FOUT=${OUTP}_${i}.c + #===== CPU files -cat > $FOUT <<@ + tag=${i} + FOUT=${OUTP}_${tag}.c + cat > $FOUT <<@ /*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ @@ -176,65 +244,56 @@ cat > $FOUT <<@ #ifdef HYPRE_MIXED_PRECISION @ - -$scriptdir/write_header.sh >> $FOUT -cat ${OUTP}_${i}_ext.c ${OUTP}_${i}_int.c >> $FOUT - -cat >> $FOUT <<@ + $scriptdir/write_header.sh >> $FOUT + cat ${OUTP}_${tag}_ext.c ${OUTP}_${tag}_int.c >> $FOUT + cat >> $FOUT <<@ #endif @ -#======================================== -# add header info to the prototype files -#======================================== + #===== GPU files (compiled with C++) -FOUT=${OUTP}_${i}_ext.h -cat > $FOUT.tmp <<@ + if [ -n "$gpu" ] + then + tag=${i}${gpu} + FOUT=${OUTP}_${tag}.c + cat > $FOUT <<@ /*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ -@ -$scriptdir/write_header.sh >> $FOUT.tmp -cat $FOUT >> $FOUT.tmp -mv $FOUT.tmp $FOUT - -FOUT=${OUTP}_${i}_int.h -cat > $FOUT.tmp <<@ +#include "$INTH" +#include "${intp}.hpp" -/*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ +#ifdef HYPRE_MIXED_PRECISION @ -$scriptdir/write_header.sh >> $FOUT.tmp -cat $FOUT >> $FOUT.tmp -mv $FOUT.tmp $FOUT + $scriptdir/write_header.sh >> $FOUT + echo "#if defined(HYPRE_USING_GPU)" >> $FOUT + cat ${OUTP}_${tag}_ext.c ${OUTP}_${tag}_int.c >> $FOUT + echo "#endif" >> $FOUT + cat >> $FOUT <<@ -done - -############################################################################ -# Remove temporary files -############################################################################ +#endif -rm -f mup.pre -rm -f ${OUTP}.*.proto -rm -f ${OUTP}_*_ext.c -rm -f ${OUTP}_*_int.c +@ + fi -############################################################################ -# Generate header files -############################################################################ +done -#=========================================================================== +#============================================================ # Create external header file -#=========================================================================== +#============================================================ -MUP_HEADER=${extp}_mup.h +FOUT=${extp}_mup.h -cat > $MUP_HEADER <<@ +cat > $FOUT <<@ /*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ +@ +$scriptdir/write_header.sh >> $FOUT +cat >> $FOUT <<@ #ifndef HYPRE_${extname}_MUP_HEADER #define HYPRE_${extname}_MUP_HEADER @@ -243,14 +302,14 @@ extern "C" { #endif #if defined (HYPRE_MIXED_PRECISION) -@ - -cat ${OUTP}_fixed_ext.h >> $MUP_HEADER -cat ${OUTP}_functions_ext.h >> $MUP_HEADER -cat ${OUTP}_pre_ext.h >> $MUP_HEADER - -cat >> $MUP_HEADER <<@ +@ +for i in fixed functions pre +do + echo "/* ${i} */" >> $FOUT + cat ${OUTP}_${i}_ext.h >> $FOUT +done +cat >> $FOUT <<@ #endif #ifdef __cplusplus @@ -261,18 +320,21 @@ cat >> $MUP_HEADER <<@ @ -rm -f ${OUTP}_fixed_ext.h ${OUTP}_functions_ext.h ${OUTP}_pre_ext.h +#============================================================ +# Create internal header files +#============================================================ -#=========================================================================== -# Create internal header file -#=========================================================================== +#===== C header file -MUP_HEADER=${intp}_mup.h +FOUT=${intp}_mup.h -cat > $MUP_HEADER <<@ +cat > $FOUT <<@ /*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ +@ +$scriptdir/write_header.sh >> $FOUT +cat >> $FOUT <<@ #ifndef hypre_${intname}_MUP_HEADER #define hypre_${intname}_MUP_HEADER @@ -281,14 +343,86 @@ extern "C" { #endif #if defined (HYPRE_MIXED_PRECISION) + @ +for i in fixed functions pre +do + tag=${i} + echo "/* ${tag} */" >> $FOUT + cat ${OUTP}_${tag}_int.h >> $FOUT +done +cat >> $FOUT <<@ +#endif -cat ${OUTP}_fixed_int.h >> $MUP_HEADER -cat ${OUTP}_functions_int.h >> $MUP_HEADER -cat ${OUTP}_pre_int.h >> $MUP_HEADER +#ifdef __cplusplus +} +#endif -cat >> $MUP_HEADER <<@ +#endif + +@ + +#===== C++ header file + +if [ -n "$gpu" ] +then + FOUT=${intp}_mup.hpp + + cat > $FOUT <<@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use $0 to generate) ***/ + +@ + $scriptdir/write_header.sh >> $FOUT + cat >> $FOUT <<@ +#ifndef hypre_${intname}_MUP_HEADER_CXX +#define hypre_${intname}_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +@ +#===== Declarations with C linkage + for i in fixed functions pre + do + tag=${i}${gpu} + echo "/* ${tag} */" >> $FOUT + cat ${OUTP}_${tag}_int.h >> $FOUT + done + cat >> $FOUT <<@ + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) +@ +#===== Decalations with C++ linkage + if [ -n "$INTHPP" ] + then + for i in fixed + do + tag=${i}${gpu} + echo "/* ${tag} */" >> $FOUT + cat ${OUTP}_${tag}_intpp.h >> $FOUT + done + fi + cat >> $FOUT <<@ + +#endif #endif #ifdef __cplusplus @@ -298,5 +432,23 @@ cat >> $MUP_HEADER <<@ #endif @ +fi + +############################################################################ +# Remove temporary files +############################################################################ + +for c in "" $gpu +do + for i in fixed functions methods pre + do + tag=${i}${c} + rm -f ${OUTP}.${tag}.ext.proto + rm -f ${OUTP}.${tag}.int.proto + rm -f ${OUTP}.${tag}.intpp.proto + rm -f ${OUTP}_${tag}_ext.[ch] + rm -f ${OUTP}_${tag}_int.[ch] + rm -f ${OUTP}_${tag}_intpp.[ch] + done +done -rm -f ${OUTP}_fixed_int.h ${OUTP}_functions_int.h ${OUTP}_pre_int.h diff --git a/src/config/gen_code_awk.sh b/src/config/gen_code_awk.sh index a461c3a077..b8a3b078dd 100755 --- a/src/config/gen_code_awk.sh +++ b/src/config/gen_code_awk.sh @@ -4,7 +4,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -# Generate code by for all implementations by invoking an awk program +# Generate code for all implementations by invoking an awk program # # The script takes a prototype info file (generated by 'gen_proto_info.sh') and # an output file prefix, then generates files prefix.c and prefix.h containing diff --git a/src/config/generate_function_list.sh b/src/config/generate_function_list.sh index f99cabf4e6..9aed19c8f5 100755 --- a/src/config/generate_function_list.sh +++ b/src/config/generate_function_list.sh @@ -13,4 +13,6 @@ shopt -s nullglob # Use awk to avoid issues with spacing -nm -P *.o *.obj | awk '$2 == "T" {print $1}' | sed -e 's/^_//' -e 's/_$//' +# Demangle any c++ name mangling and filter _device_stub_ prefixes. +nm -P *.o *.obj | awk '$2 == "T" {print $1}' | c++filt | sed -e 's/(.*$//' -e 's/^__device_stub__//' -e 's/^_//' -e 's/_$//' + diff --git a/src/config/mup_check_dir.sh b/src/config/mup_check_dir.sh index 3d8a95cd87..86729681ac 100755 --- a/src/config/mup_check_dir.sh +++ b/src/config/mup_check_dir.sh @@ -15,6 +15,8 @@ scriptdir=`dirname $0` +BUILD_TYPE=$(echo "$1" | tr '[:lower:]' '[:upper:]') + # Check if terminal supports colors if [ -t 1 ]; then # Use colors @@ -30,7 +32,18 @@ fi export LC_COLLATE=C # sort by listing capital letters first -cat mup.fixed mup.functions mup.methods | sort | uniq > mup_check.old +if [ "$BUILD_TYPE" = "GPU" ]; then + # Combine CPU and GPU function lists. + # Some directories may not have GPU lists so we suppress "file not found" warnings + cat mup.fixed mup.fixed.gpu \ + mup.functions mup.functions.gpu \ + mup.methods mup.methods.gpu \ + 2>/dev/null \ + | sort | uniq > mup_check.old +else + cat mup.fixed mup.functions mup.methods | sort | uniq > mup_check.old +fi + $scriptdir/generate_function_list.sh | sort | uniq > mup_check.new # Remove functions listed in mup.exclude (if it exists) diff --git a/src/configure b/src/configure index 6deb4356a3..591b43d900 100755 --- a/src/configure +++ b/src/configure @@ -3607,12 +3607,6 @@ else $as_nop fi -if test "$hypre_using_longdouble" = "yes" -then - -printf "%s\n" "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h - -fi # Check whether --enable-complex was given. if test ${enable_complex+y} @@ -10661,6 +10655,22 @@ fi fi +if test "$hypre_using_longdouble" = "yes" +then + if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes" || test "x$hypre_using_sycl" = "xyes" + then + as_fn_error $? "******************** Incompatible precision on device ********************** + Long double data format is not supported on device. + For GPU builds, please use the default double precision or --enable-single. + ****************************************************************************" "$LINENO" 5 + else + +printf "%s\n" "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h + + fi +fi + + if test "x$hypre_warp_size" = "xauto" then if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_sycl" = "xyes" diff --git a/src/krylov/HYPRE_krylov_mup.h b/src/krylov/HYPRE_krylov_mup.h index facd612a29..c459b9c009 100644 --- a/src/krylov/HYPRE_krylov_mup.h +++ b/src/krylov/HYPRE_krylov_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_KRYLOV_MUP_HEADER -#define HYPRE_KRYLOV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_KRYLOV_MUP_HEADER +#define HYPRE_KRYLOV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_BiCGSTABDestroy_flt( HYPRE_Solver solver ); @@ -1642,16 +1632,7 @@ HYPRE_PCGSolve_long_dbl( HYPRE_Solver solver, HYPRE_Matrix A, HYPRE_Vector b, HY HYPRE_Int HYPRE_PCGSolve( HYPRE_Solver solver, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_BiCGSTABDestroy_pre( HYPRE_Precision precision, HYPRE_Solver solver ); @@ -2190,7 +2171,6 @@ HYPRE_PCGSetup_pre( HYPRE_Precision precision, HYPRE_Solver solver, HYPRE_Matrix HYPRE_Int HYPRE_PCGSolve_pre( HYPRE_Precision precision, HYPRE_Solver solver, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x ); - #endif #ifdef __cplusplus diff --git a/src/krylov/HYPRE_lobpcg_mup.h b/src/krylov/HYPRE_lobpcg_mup.h index e53c076cd6..6e4a4bb414 100644 --- a/src/krylov/HYPRE_lobpcg_mup.h +++ b/src/krylov/HYPRE_lobpcg_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_LOBPCG_MUP_HEADER -#define HYPRE_LOBPCG_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_LOBPCG_MUP_HEADER +#define HYPRE_LOBPCG_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_LOBPCGCreate_flt( mv_InterfaceInterpreter *interpreter, HYPRE_MatvecFunctions *mvfunctions, HYPRE_Solver *solver ); @@ -184,16 +174,7 @@ HYPRE_LOBPCGSolve_long_dbl( HYPRE_Solver solver, mv_MultiVectorPtr y, mv_MultiVe HYPRE_Int HYPRE_LOBPCGSolve( HYPRE_Solver solver, mv_MultiVectorPtr y, mv_MultiVectorPtr x, void *lambda ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_LOBPCGCreate_pre( HYPRE_Precision precision, mv_InterfaceInterpreter *interpreter, HYPRE_MatvecFunctions *mvfunctions, HYPRE_Solver *solver ); @@ -246,7 +227,6 @@ HYPRE_LOBPCGSetupT_pre( HYPRE_Precision precision, HYPRE_Solver solver, HYPRE_Ma HYPRE_Int HYPRE_LOBPCGSolve_pre( HYPRE_Precision precision, HYPRE_Solver solver, mv_MultiVectorPtr y, mv_MultiVectorPtr x, void *lambda ); - #endif #ifdef __cplusplus diff --git a/src/krylov/_hypre_krylov_mup.h b/src/krylov/_hypre_krylov_mup.h index 15cb6a9abd..8cf53de9d6 100644 --- a/src/krylov/_hypre_krylov_mup.h +++ b/src/krylov/_hypre_krylov_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_KRYLOV_MUP_HEADER #define hypre_KRYLOV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ void * hypre_BiCGSTABCreate_flt( hypre_BiCGSTABFunctions *bicgstab_functions ); @@ -1546,28 +1545,9 @@ hypre_PCGSolve_dbl( void *pcg_vdata, void *A, void *b, void *x ); HYPRE_Int hypre_PCGSolve_long_dbl( void *pcg_vdata, void *A, void *b, void *x ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/krylov/_hypre_lobpcg_mup.h b/src/krylov/_hypre_lobpcg_mup.h index 4a7ae30054..992af75d3c 100644 --- a/src/krylov/_hypre_lobpcg_mup.h +++ b/src/krylov/_hypre_lobpcg_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_LOBPCG_MUP_HEADER #define hypre_LOBPCG_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_LOBPCGDestroy_flt( void *pcg_vdata ); @@ -202,28 +201,9 @@ lobpcg_solve_dbl( mv_MultiVectorPtr blockVectorX, void* operatorAData, lobpcg_op HYPRE_Int lobpcg_solve_long_dbl( mv_MultiVectorPtr blockVectorX, void* operatorAData, lobpcg_operator operatorA, void* operatorBData, lobpcg_operator operatorB, void* operatorTData, lobpcg_operator operatorT, mv_MultiVectorPtr blockVectorY, lobpcg_BLASLAPACKFunctions blap_fn, lobpcg_Tolerance tolerance, HYPRE_Int maxIterations, HYPRE_Int verbosityLevel, HYPRE_Int* iterationNumber, hypre_long_double * lambda_values, hypre_long_double * lambdaHistory_values, HYPRE_BigInt lambdaHistory_gh, hypre_long_double * residualNorms_values, hypre_long_double * residualNormsHistory_values, HYPRE_BigInt residualNormsHistory_gh ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/lapack/_hypre_lapack_mup.h b/src/lapack/_hypre_lapack_mup.h index bebd85a9aa..0cd625f004 100644 --- a/src/lapack/_hypre_lapack_mup.h +++ b/src/lapack/_hypre_lapack_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_LAPACK_MUP_HEADER #define hypre_LAPACK_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_dbdsqr_flt( const char *uplo, HYPRE_Int *n, HYPRE_Int *ncvt, HYPRE_Int *nru, HYPRE_Int *ncc, hypre_float *d__, hypre_float *e, hypre_float *vt, HYPRE_Int *ldvt, hypre_float *u, HYPRE_Int *ldu, hypre_float *c__, HYPRE_Int *ldc, hypre_float *work, HYPRE_Int *info ); @@ -314,28 +313,9 @@ hypre_dtrtri_dbl( const char *uplo, const char *diag, HYPRE_Int *n, hypre_double HYPRE_Int hypre_dtrtri_long_dbl( const char *uplo, const char *diag, HYPRE_Int *n, hypre_long_double *a, HYPRE_Int *lda, HYPRE_Int *info ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/lib/Makefile b/src/lib/Makefile index 9249543202..3f46d5292f 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -18,6 +18,7 @@ IJMVFILES = ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.o IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.o_flt IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.o_dbl IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.o_ldbl +IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.obj IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.obj_flt IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.obj_dbl IJMVFILES += ${HYPRE_SRC_TOP_DIR}/IJ_mv/*.obj_ldbl @@ -56,6 +57,7 @@ PARCSRLSFILES = ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.o PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.o_flt PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.o_dbl PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.o_ldbl +PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.obj PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.obj_flt PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.obj_dbl PARCSRLSFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_ls/*.obj_ldbl @@ -64,6 +66,7 @@ PARCSRMVFILES = ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.o PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.o_flt PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.o_dbl PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.o_ldbl +PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.obj PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.obj_flt PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.obj_dbl PARCSRMVFILES += ${HYPRE_SRC_TOP_DIR}/parcsr_mv/*.obj_ldbl @@ -76,6 +79,7 @@ SEQMVFILES = ${HYPRE_SRC_TOP_DIR}/seq_mv/*.o SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.o_flt SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.o_dbl SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.o_ldbl +SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.obj SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.obj_flt SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.obj_dbl SEQMVFILES += ${HYPRE_SRC_TOP_DIR}/seq_mv/*.obj_ldbl @@ -121,6 +125,7 @@ UTILITIESFILES = ${HYPRE_SRC_TOP_DIR}/utilities/*.o UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.o_flt UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.o_dbl UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.o_ldbl +UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.obj UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.obj_flt UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.obj_dbl UTILITIESFILES += ${HYPRE_SRC_TOP_DIR}/utilities/*.obj_ldbl diff --git a/src/parcsr_block_mv/_hypre_parcsr_block_mv.h b/src/parcsr_block_mv/_hypre_parcsr_block_mv.h index 0da74341e8..c2b5d31228 100644 --- a/src/parcsr_block_mv/_hypre_parcsr_block_mv.h +++ b/src/parcsr_block_mv/_hypre_parcsr_block_mv.h @@ -243,7 +243,7 @@ typedef struct HYPRE_Int owns_data; HYPRE_BigInt num_nonzeros; - HYPRE_Real d_num_nonzeros; + hypre_double d_num_nonzeros; /* Buffers used by GetRow to hold row currently being accessed. AJC, 4/99 */ HYPRE_Int *rowindices; diff --git a/src/parcsr_block_mv/par_csr_block_matrix.c b/src/parcsr_block_mv/par_csr_block_matrix.c index 1736e1babc..a46f014d16 100644 --- a/src/parcsr_block_mv/par_csr_block_matrix.c +++ b/src/parcsr_block_mv/par_csr_block_matrix.c @@ -200,7 +200,7 @@ hypre_ParCSRBlockMatrixSetDNumNonzeros( hypre_ParCSRBlockMatrix *matrix) local_num_nonzeros = (HYPRE_Real) diag_i[local_num_rows] + (HYPRE_Real) offd_i[local_num_rows]; hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRBlockMatrixDNumNonzeros(matrix) = total_num_nonzeros; + hypre_ParCSRBlockMatrixDNumNonzeros(matrix) = (hypre_double)total_num_nonzeros; return ierr; } diff --git a/src/parcsr_block_mv/par_csr_block_matrix.h b/src/parcsr_block_mv/par_csr_block_matrix.h index c29870e235..99e825f0d1 100644 --- a/src/parcsr_block_mv/par_csr_block_matrix.h +++ b/src/parcsr_block_mv/par_csr_block_matrix.h @@ -59,7 +59,7 @@ typedef struct HYPRE_Int owns_data; HYPRE_BigInt num_nonzeros; - HYPRE_Real d_num_nonzeros; + hypre_double d_num_nonzeros; /* Buffers used by GetRow to hold row currently being accessed. AJC, 4/99 */ HYPRE_Int *rowindices; diff --git a/src/parcsr_ls/HYPRE_parcsr_ls_mup.h b/src/parcsr_ls/HYPRE_parcsr_ls_mup.h index fae4ea9d0b..6455095568 100644 --- a/src/parcsr_ls/HYPRE_parcsr_ls_mup.h +++ b/src/parcsr_ls/HYPRE_parcsr_ls_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef HYPRE_PARCSR_LS_MUP_HEADER #define HYPRE_PARCSR_LS_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_ParCSRMatrix GenerateDifConv_flt( MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, hypre_float *value ); @@ -76,16 +75,7 @@ hypre_GenerateCoordinates_dbl( MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, float* hypre_GenerateCoordinates_long_dbl( MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Int coorddim ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* functions */ HYPRE_Int HYPRE_ADSCreate_flt( HYPRE_Solver *solver ); @@ -5874,16 +5864,7 @@ HYPRE_TempParCSRSetupInterpreter_long_dbl( mv_InterfaceInterpreter *i ); HYPRE_Int HYPRE_TempParCSRSetupInterpreter( mv_InterfaceInterpreter *i ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_ADSCreate_pre( HYPRE_Precision precision, HYPRE_Solver *solver ); @@ -7814,7 +7795,6 @@ HYPRE_SchwarzSolve_pre( HYPRE_Precision precision, HYPRE_Solver solver, HYPRE_Pa HYPRE_Int HYPRE_TempParCSRSetupInterpreter_pre( HYPRE_Precision precision, mv_InterfaceInterpreter *i ); - #endif #ifdef __cplusplus diff --git a/src/parcsr_ls/Makefile b/src/parcsr_ls/Makefile index e90ace5d30..cb151013f1 100644 --- a/src/parcsr_ls/Makefile +++ b/src/parcsr_ls/Makefile @@ -192,6 +192,11 @@ MP_FILES = \ mup_pre.c\ HYPRE_parcsr_ls_mp.c +MP_CUFILES=\ + mup_fixed_gpu.c\ + mup_functions_gpu.c\ + mup_pre_gpu.c + COBJS = ${FILES:.c=.o} CUOBJS = ${CUFILES:.c=.obj} OBJS = ${COBJS} ${CUOBJS} @@ -205,9 +210,10 @@ CUOBJS_single = ${CUFILES:.c=.obj_flt} CUOBJS_double = ${CUFILES:.c=.obj_dbl} CUOBJS_longdouble = ${CUFILES:.c=.obj_ldbl} MP_COBJS = ${MP_FILES:.c=.o} +MP_CUOBJS = ${MP_CUFILES:.c=.obj} OBJS = ${COBJS_single} ${COBJS_double} ${COBJS_longdouble} ${MP_COBJS} -OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} +OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} ${MP_CUOBJS} endif diff --git a/src/parcsr_ls/_hypre_parcsr_ls.hpp b/src/parcsr_ls/_hypre_parcsr_ls.hpp new file mode 100644 index 0000000000..cdb231092f --- /dev/null +++ b/src/parcsr_ls/_hypre_parcsr_ls.hpp @@ -0,0 +1,36 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_PARCSR_LS_HPP +#define hypre_PARCSR_LS_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_parcsr_ls_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_parcsr_ls_mup_undef.h" +#include "_hypre_parcsr_ls_mup.h" +#include "_hypre_parcsr_ls_mup.hpp" +#endif +#endif + +#endif + diff --git a/src/parcsr_ls/_hypre_parcsr_ls_mup.h b/src/parcsr_ls/_hypre_parcsr_ls_mup.h index 4216360e82..764add749e 100644 --- a/src/parcsr_ls/_hypre_parcsr_ls_mup.h +++ b/src/parcsr_ls/_hypre_parcsr_ls_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_PARCSR_LS_MUP_HEADER #define hypre_PARCSR_LS_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_ParCSRMatrix GenerateSysLaplacian_flt( MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Int num_fun, hypre_float *mtrx, hypre_float *value ); @@ -6964,28 +6963,9 @@ transpose_matrix_create_dbl( HYPRE_Int **i_face_element_pointer, HYPRE_Int **j_f HYPRE_Int transpose_matrix_create_long_dbl( HYPRE_Int **i_face_element_pointer, HYPRE_Int **j_face_element_pointer, HYPRE_Int *i_element_face, HYPRE_Int *j_element_face, HYPRE_Int num_elements, HYPRE_Int num_faces ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/parcsr_ls/_hypre_parcsr_ls_mup.hpp b/src/parcsr_ls/_hypre_parcsr_ls_mup.hpp new file mode 100644 index 0000000000..d6c869e332 --- /dev/null +++ b/src/parcsr_ls/_hypre_parcsr_ls_mup.hpp @@ -0,0 +1,452 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_PARCSR_LS_MUP_HEADER_CXX +#define hypre_PARCSR_LS_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +HYPRE_Int +hypreDevice_extendWtoP_flt( HYPRE_Int P_nr_of_rows, HYPRE_Int W_nr_of_rows, HYPRE_Int W_nr_of_cols, HYPRE_Int *CF_marker, HYPRE_Int W_diag_nnz, HYPRE_Int *W_diag_i, HYPRE_Int *W_diag_j, hypre_float *W_diag_data, HYPRE_Int *P_diag_i, HYPRE_Int *P_diag_j, hypre_float *P_diag_data, HYPRE_Int *W_offd_i, HYPRE_Int *P_offd_i ); +HYPRE_Int +hypreDevice_extendWtoP_dbl( HYPRE_Int P_nr_of_rows, HYPRE_Int W_nr_of_rows, HYPRE_Int W_nr_of_cols, HYPRE_Int *CF_marker, HYPRE_Int W_diag_nnz, HYPRE_Int *W_diag_i, HYPRE_Int *W_diag_j, hypre_double *W_diag_data, HYPRE_Int *P_diag_i, HYPRE_Int *P_diag_j, hypre_double *P_diag_data, HYPRE_Int *W_offd_i, HYPRE_Int *P_offd_i ); +HYPRE_Int +hypreDevice_extendWtoP_long_dbl( HYPRE_Int P_nr_of_rows, HYPRE_Int W_nr_of_rows, HYPRE_Int W_nr_of_cols, HYPRE_Int *CF_marker, HYPRE_Int W_diag_nnz, HYPRE_Int *W_diag_i, HYPRE_Int *W_diag_j, hypre_long_double *W_diag_data, HYPRE_Int *P_diag_i, HYPRE_Int *P_diag_j, hypre_long_double *P_diag_data, HYPRE_Int *W_offd_i, HYPRE_Int *P_offd_i ); + +HYPRE_Int +hypre_BoomerAMGBuildDirInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, HYPRE_Int interp_type, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildDirInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, HYPRE_Int interp_type, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildDirInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, HYPRE_Int interp_type, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildExtInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildExtPEInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtPEInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtPEInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildExtPIInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtPIInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildExtPIInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildInterpOnePntDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildInterpOnePntDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildInterpOnePntDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildModMultipassDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, hypre_float trunc_factor, HYPRE_Int P_max_elmts, HYPRE_Int interp_type, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModMultipassDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, hypre_double trunc_factor, HYPRE_Int P_max_elmts, HYPRE_Int interp_type, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModMultipassDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, hypre_long_double trunc_factor, HYPRE_Int P_max_elmts, HYPRE_Int interp_type, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtPEInterpDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_float trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtPEInterpDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtPEInterpDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, hypre_long_double trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_BoomerAMGBuildRestrNeumannAIRDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int NeumannDeg, hypre_float strong_thresholdR, hypre_float filter_thresholdR, HYPRE_Int debug_flag, hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildRestrNeumannAIRDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int NeumannDeg, hypre_double strong_thresholdR, hypre_double filter_thresholdR, HYPRE_Int debug_flag, hypre_ParCSRMatrix **R_ptr ); +HYPRE_Int +hypre_BoomerAMGBuildRestrNeumannAIRDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int NeumannDeg, hypre_long_double strong_thresholdR, hypre_long_double filter_thresholdR, HYPRE_Int debug_flag, hypre_ParCSRMatrix **R_ptr ); + +HYPRE_Int +hypre_BoomerAMGCFMarkerTo1minus1Device_flt( HYPRE_Int *CF_marker, HYPRE_Int size ); +HYPRE_Int +hypre_BoomerAMGCFMarkerTo1minus1Device_dbl( HYPRE_Int *CF_marker, HYPRE_Int size ); +HYPRE_Int +hypre_BoomerAMGCFMarkerTo1minus1Device_long_dbl( HYPRE_Int *CF_marker, HYPRE_Int size ); + +HYPRE_Int +hypre_BoomerAMGCoarseParmsDevice_flt( MPI_Comm comm, HYPRE_Int local_num_variables, HYPRE_Int num_functions, hypre_IntArray *dof_func, hypre_IntArray *CF_marker, hypre_IntArray **coarse_dof_func_ptr, HYPRE_BigInt *coarse_pnts_global ); +HYPRE_Int +hypre_BoomerAMGCoarseParmsDevice_dbl( MPI_Comm comm, HYPRE_Int local_num_variables, HYPRE_Int num_functions, hypre_IntArray *dof_func, hypre_IntArray *CF_marker, hypre_IntArray **coarse_dof_func_ptr, HYPRE_BigInt *coarse_pnts_global ); +HYPRE_Int +hypre_BoomerAMGCoarseParmsDevice_long_dbl( MPI_Comm comm, HYPRE_Int local_num_variables, HYPRE_Int num_functions, hypre_IntArray *dof_func, hypre_IntArray *CF_marker, hypre_IntArray **coarse_dof_func_ptr, HYPRE_BigInt *coarse_pnts_global ); + +HYPRE_Int +hypre_BoomerAMGCoarsenPMISDevice_flt( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int CF_init, HYPRE_Int debug_flag, hypre_IntArray **CF_marker_ptr ); +HYPRE_Int +hypre_BoomerAMGCoarsenPMISDevice_dbl( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int CF_init, HYPRE_Int debug_flag, hypre_IntArray **CF_marker_ptr ); +HYPRE_Int +hypre_BoomerAMGCoarsenPMISDevice_long_dbl( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int CF_init, HYPRE_Int debug_flag, hypre_IntArray **CF_marker_ptr ); + +HYPRE_Int +hypre_BoomerAMGCorrectCFMarker2Device_flt( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); +HYPRE_Int +hypre_BoomerAMGCorrectCFMarker2Device_dbl( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); +HYPRE_Int +hypre_BoomerAMGCorrectCFMarker2Device_long_dbl( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); + +HYPRE_Int +hypre_BoomerAMGCorrectCFMarkerDevice_flt( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); +HYPRE_Int +hypre_BoomerAMGCorrectCFMarkerDevice_dbl( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); +HYPRE_Int +hypre_BoomerAMGCorrectCFMarkerDevice_long_dbl( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ); + +HYPRE_Int +hypre_BoomerAMGCreate2ndSDevice_flt( hypre_ParCSRMatrix *S, HYPRE_Int *CF_marker, HYPRE_Int num_paths, HYPRE_BigInt *coarse_row_starts, hypre_ParCSRMatrix **C_ptr ); +HYPRE_Int +hypre_BoomerAMGCreate2ndSDevice_dbl( hypre_ParCSRMatrix *S, HYPRE_Int *CF_marker, HYPRE_Int num_paths, HYPRE_BigInt *coarse_row_starts, hypre_ParCSRMatrix **C_ptr ); +HYPRE_Int +hypre_BoomerAMGCreate2ndSDevice_long_dbl( hypre_ParCSRMatrix *S, HYPRE_Int *CF_marker, HYPRE_Int num_paths, HYPRE_BigInt *coarse_row_starts, hypre_ParCSRMatrix **C_ptr ); + +HYPRE_Int +hypre_BoomerAMGCreateSDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int abs_soc, hypre_float strength_threshold, hypre_float max_row_sum, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **S_ptr ); +HYPRE_Int +hypre_BoomerAMGCreateSDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int abs_soc, hypre_double strength_threshold, hypre_double max_row_sum, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **S_ptr ); +HYPRE_Int +hypre_BoomerAMGCreateSDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int abs_soc, hypre_long_double strength_threshold, hypre_long_double max_row_sum, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **S_ptr ); + +HYPRE_Int +hypre_BoomerAMGDD_FAC_CFL1JacobiDevice_flt( void *amgdd_vdata, HYPRE_Int level, HYPRE_Int relax_set ); +HYPRE_Int +hypre_BoomerAMGDD_FAC_CFL1JacobiDevice_dbl( void *amgdd_vdata, HYPRE_Int level, HYPRE_Int relax_set ); +HYPRE_Int +hypre_BoomerAMGDD_FAC_CFL1JacobiDevice_long_dbl( void *amgdd_vdata, HYPRE_Int level, HYPRE_Int relax_set ); + +HYPRE_Int +hypre_BoomerAMGDD_FAC_JacobiDevice_flt( void *amgdd_vdata, HYPRE_Int level ); +HYPRE_Int +hypre_BoomerAMGDD_FAC_JacobiDevice_dbl( void *amgdd_vdata, HYPRE_Int level ); +HYPRE_Int +hypre_BoomerAMGDD_FAC_JacobiDevice_long_dbl( void *amgdd_vdata, HYPRE_Int level ); + +HYPRE_Int +hypre_BoomerAMGIndepSetDevice_flt( hypre_ParCSRMatrix *S, hypre_float *measure_diag, hypre_float *measure_offd, HYPRE_Int graph_diag_size, HYPRE_Int *graph_diag, HYPRE_Int *IS_marker_diag, HYPRE_Int *IS_marker_offd, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int *int_send_buf ); +HYPRE_Int +hypre_BoomerAMGIndepSetDevice_dbl( hypre_ParCSRMatrix *S, hypre_double *measure_diag, hypre_double *measure_offd, HYPRE_Int graph_diag_size, HYPRE_Int *graph_diag, HYPRE_Int *IS_marker_diag, HYPRE_Int *IS_marker_offd, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int *int_send_buf ); +HYPRE_Int +hypre_BoomerAMGIndepSetDevice_long_dbl( hypre_ParCSRMatrix *S, hypre_long_double *measure_diag, hypre_long_double *measure_offd, HYPRE_Int graph_diag_size, HYPRE_Int *graph_diag, HYPRE_Int *IS_marker_diag, HYPRE_Int *IS_marker_offd, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int *int_send_buf ); + +HYPRE_Int +hypre_BoomerAMGIndepSetInitDevice_flt( hypre_ParCSRMatrix *S, hypre_float *measure_array, HYPRE_Int aug_rand ); +HYPRE_Int +hypre_BoomerAMGIndepSetInitDevice_dbl( hypre_ParCSRMatrix *S, hypre_double *measure_array, HYPRE_Int aug_rand ); +HYPRE_Int +hypre_BoomerAMGIndepSetInitDevice_long_dbl( hypre_ParCSRMatrix *S, hypre_long_double *measure_array, HYPRE_Int aug_rand ); + +HYPRE_Int +hypre_BoomerAMGInitDofFuncDevice_flt( HYPRE_Int *dof_func, HYPRE_Int local_size, HYPRE_Int offset, HYPRE_Int num_functions ); +HYPRE_Int +hypre_BoomerAMGInitDofFuncDevice_dbl( HYPRE_Int *dof_func, HYPRE_Int local_size, HYPRE_Int offset, HYPRE_Int num_functions ); +HYPRE_Int +hypre_BoomerAMGInitDofFuncDevice_long_dbl( HYPRE_Int *dof_func, HYPRE_Int local_size, HYPRE_Int offset, HYPRE_Int num_functions ); + +HYPRE_Int +hypre_BoomerAMGInterpTruncationDevice_flt( hypre_ParCSRMatrix *P, hypre_float trunc_factor, HYPRE_Int max_elmts ); +HYPRE_Int +hypre_BoomerAMGInterpTruncationDevice_dbl( hypre_ParCSRMatrix *P, hypre_double trunc_factor, HYPRE_Int max_elmts ); +HYPRE_Int +hypre_BoomerAMGInterpTruncationDevice_long_dbl( hypre_ParCSRMatrix *P, hypre_long_double trunc_factor, HYPRE_Int max_elmts ); + +HYPRE_Int +hypre_BoomerAMGMakeSocFromSDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S ); +HYPRE_Int +hypre_BoomerAMGMakeSocFromSDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S ); +HYPRE_Int +hypre_BoomerAMGMakeSocFromSDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S ); + +HYPRE_Int +hypre_BoomerAMGRelaxHybridGaussSeidelDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, hypre_float relax_weight, hypre_float omega, hypre_float *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int GS_order, HYPRE_Int Symm ); +HYPRE_Int +hypre_BoomerAMGRelaxHybridGaussSeidelDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, hypre_double relax_weight, hypre_double omega, hypre_double *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int GS_order, HYPRE_Int Symm ); +HYPRE_Int +hypre_BoomerAMGRelaxHybridGaussSeidelDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, hypre_long_double relax_weight, hypre_long_double omega, hypre_long_double *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int GS_order, HYPRE_Int Symm ); + +HYPRE_Int +hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_float relax_weight, hypre_float omega, hypre_float *A_diag_diag, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice ); +HYPRE_Int +hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_double relax_weight, hypre_double omega, hypre_double *A_diag_diag, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice ); +HYPRE_Int +hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_long_double relax_weight, hypre_long_double omega, hypre_long_double *A_diag_diag, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice ); + +HYPRE_Int +hypre_FSAISetupDevice_flt( void *fsai_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ); +HYPRE_Int +hypre_FSAISetupDevice_dbl( void *fsai_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ); +HYPRE_Int +hypre_FSAISetupDevice_long_dbl( void *fsai_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ); + +HYPRE_Int +hypre_GaussElimSetupDevice_flt( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); +HYPRE_Int +hypre_GaussElimSetupDevice_dbl( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); +HYPRE_Int +hypre_GaussElimSetupDevice_long_dbl( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); + +HYPRE_Int +hypre_GaussElimSolveDevice_flt( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); +HYPRE_Int +hypre_GaussElimSolveDevice_dbl( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); +HYPRE_Int +hypre_GaussElimSolveDevice_long_dbl( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ); + +HYPRE_Int +hypre_GenerateMultiPiDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *P, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int *dof_func_offd, hypre_ParCSRMatrix **Pi_ptr ); +HYPRE_Int +hypre_GenerateMultiPiDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *P, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int *dof_func_offd, hypre_ParCSRMatrix **Pi_ptr ); +HYPRE_Int +hypre_GenerateMultiPiDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *P, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int *dof_func_offd, hypre_ParCSRMatrix **Pi_ptr ); + +HYPRE_Int +hypre_GenerateMultipassPiDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, hypre_float *row_sums, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_GenerateMultipassPiDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, hypre_double *row_sums, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_GenerateMultipassPiDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, hypre_long_double *row_sums, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_GetGlobalMeasureDevice_flt( hypre_ParCSRMatrix *S, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int CF_init, HYPRE_Int aug_rand, hypre_float *measure_diag, hypre_float *measure_offd, hypre_float *real_send_buf ); +HYPRE_Int +hypre_GetGlobalMeasureDevice_dbl( hypre_ParCSRMatrix *S, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int CF_init, HYPRE_Int aug_rand, hypre_double *measure_diag, hypre_double *measure_offd, hypre_double *real_send_buf ); +HYPRE_Int +hypre_GetGlobalMeasureDevice_long_dbl( hypre_ParCSRMatrix *S, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int CF_init, HYPRE_Int aug_rand, hypre_long_double *measure_diag, hypre_long_double *measure_offd, hypre_long_double *real_send_buf ); + +HYPRE_Int +hypre_ILUApplyLowerJacIterDevice_flt( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, HYPRE_Int lower_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyLowerJacIterDevice_dbl( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, HYPRE_Int lower_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyLowerJacIterDevice_long_dbl( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, HYPRE_Int lower_jacobi_iters ); + +HYPRE_Int +hypre_ILUApplyLowerUpperJacIterDevice_flt( hypre_CSRMatrix *A, hypre_Vector *work1, hypre_Vector *work2, hypre_Vector *inout, hypre_Vector *diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyLowerUpperJacIterDevice_dbl( hypre_CSRMatrix *A, hypre_Vector *work1, hypre_Vector *work2, hypre_Vector *inout, hypre_Vector *diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyLowerUpperJacIterDevice_long_dbl( hypre_CSRMatrix *A, hypre_Vector *work1, hypre_Vector *work2, hypre_Vector *inout, hypre_Vector *diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); + +HYPRE_Int +hypre_ILUApplyUpperJacIterDevice_flt( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, hypre_Vector *diag, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyUpperJacIterDevice_dbl( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, hypre_Vector *diag, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUApplyUpperJacIterDevice_long_dbl( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, hypre_Vector *diag, HYPRE_Int upper_jacobi_iters ); + +HYPRE_Int +hypre_ILUSetupDevice_flt( hypre_ParILUData *ilu_data, hypre_ParCSRMatrix *A, HYPRE_Int *perm_data, HYPRE_Int *qperm_data, HYPRE_Int n, HYPRE_Int nLU, hypre_CSRMatrix **BLUptr, hypre_ParCSRMatrix **matSptr, hypre_CSRMatrix **Eptr, hypre_CSRMatrix **Fptr ); +HYPRE_Int +hypre_ILUSetupDevice_dbl( hypre_ParILUData *ilu_data, hypre_ParCSRMatrix *A, HYPRE_Int *perm_data, HYPRE_Int *qperm_data, HYPRE_Int n, HYPRE_Int nLU, hypre_CSRMatrix **BLUptr, hypre_ParCSRMatrix **matSptr, hypre_CSRMatrix **Eptr, hypre_CSRMatrix **Fptr ); +HYPRE_Int +hypre_ILUSetupDevice_long_dbl( hypre_ParILUData *ilu_data, hypre_ParCSRMatrix *A, HYPRE_Int *perm_data, HYPRE_Int *qperm_data, HYPRE_Int n, HYPRE_Int nLU, hypre_CSRMatrix **BLUptr, hypre_ParCSRMatrix **matSptr, hypre_CSRMatrix **Eptr, hypre_CSRMatrix **Fptr ); + +HYPRE_Int +hypre_ILUSetupIterativeILU0Device_flt( hypre_CSRMatrix *A, HYPRE_Int type, HYPRE_Int option, HYPRE_Int max_iter, hypre_float tolerance, HYPRE_Int *num_iter_ptr, hypre_float **history_ptr ); +HYPRE_Int +hypre_ILUSetupIterativeILU0Device_dbl( hypre_CSRMatrix *A, HYPRE_Int type, HYPRE_Int option, HYPRE_Int max_iter, hypre_double tolerance, HYPRE_Int *num_iter_ptr, hypre_double **history_ptr ); +HYPRE_Int +hypre_ILUSetupIterativeILU0Device_long_dbl( hypre_CSRMatrix *A, HYPRE_Int type, HYPRE_Int option, HYPRE_Int max_iter, hypre_long_double tolerance, HYPRE_Int *num_iter_ptr, hypre_long_double **history_ptr ); + +HYPRE_Int +hypre_ILUSolveLUDevice_flt( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU_d, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp ); +HYPRE_Int +hypre_ILUSolveLUDevice_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU_d, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp ); +HYPRE_Int +hypre_ILUSolveLUDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU_d, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp ); + +HYPRE_Int +hypre_ILUSolveLUIterDevice_flt( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_Vector **diag_ptr, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUSolveLUIterDevice_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_Vector **diag_ptr, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUSolveLUIterDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_Vector **diag_ptr, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); + +HYPRE_Int +hypre_ILUSolveRAPGMRESDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, HYPRE_Int test_opt ); +HYPRE_Int +hypre_ILUSolveRAPGMRESDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, HYPRE_Int test_opt ); +HYPRE_Int +hypre_ILUSolveRAPGMRESDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, HYPRE_Int test_opt ); + +HYPRE_Int +hypre_ILUSolveSchurGMRESDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d ); +HYPRE_Int +hypre_ILUSolveSchurGMRESDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d ); +HYPRE_Int +hypre_ILUSolveSchurGMRESDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d ); + +HYPRE_Int +hypre_ILUSolveSchurGMRESJacIterDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, hypre_ParVector *ztemp, hypre_Vector **Adiag_diag, hypre_Vector **Sdiag_diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUSolveSchurGMRESJacIterDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, hypre_ParVector *ztemp, hypre_Vector **Adiag_diag, hypre_Vector **Sdiag_diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); +HYPRE_Int +hypre_ILUSolveSchurGMRESJacIterDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, hypre_ParVector *ztemp, hypre_Vector **Adiag_diag, hypre_Vector **Sdiag_diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ); + +HYPRE_Int +hypre_MGRBuildPDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_MGRBuildPDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_MGRBuildPDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_MGRBuildPFromWpDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_MGRBuildPFromWpDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); +HYPRE_Int +hypre_MGRBuildPFromWpDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ); + +HYPRE_Int +hypre_MGRBuildRFromWrDevice_flt( hypre_IntArray *C_map, hypre_IntArray *F_map, hypre_ParCSRMatrix *Wr, hypre_ParCSRMatrix *R ); +HYPRE_Int +hypre_MGRBuildRFromWrDevice_dbl( hypre_IntArray *C_map, hypre_IntArray *F_map, hypre_ParCSRMatrix *Wr, hypre_ParCSRMatrix *R ); +HYPRE_Int +hypre_MGRBuildRFromWrDevice_long_dbl( hypre_IntArray *C_map, hypre_IntArray *F_map, hypre_ParCSRMatrix *Wr, hypre_ParCSRMatrix *R ); + +HYPRE_Int +hypre_MGRRelaxL1JacobiDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *CF_marker_host, HYPRE_Int relax_points, hypre_float relax_weight, hypre_float *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp ); +HYPRE_Int +hypre_MGRRelaxL1JacobiDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *CF_marker_host, HYPRE_Int relax_points, hypre_double relax_weight, hypre_double *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp ); +HYPRE_Int +hypre_MGRRelaxL1JacobiDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *CF_marker_host, HYPRE_Int relax_points, hypre_long_double relax_weight, hypre_long_double *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp ); + +HYPRE_Int +hypre_ParCSRMatrixBlockDiagMatrixDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_type, hypre_ParCSRMatrix **B_ptr ); +HYPRE_Int +hypre_ParCSRMatrixBlockDiagMatrixDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_type, hypre_ParCSRMatrix **B_ptr ); +HYPRE_Int +hypre_ParCSRMatrixBlockDiagMatrixDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_type, hypre_ParCSRMatrix **B_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixExtractBlockDiagDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int num_points, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_size, HYPRE_Int diag_type, HYPRE_Int *B_diag_i, HYPRE_Int *B_diag_j, hypre_float *B_diag_data ); +HYPRE_Int +hypre_ParCSRMatrixExtractBlockDiagDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int num_points, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_size, HYPRE_Int diag_type, HYPRE_Int *B_diag_i, HYPRE_Int *B_diag_j, hypre_double *B_diag_data ); +HYPRE_Int +hypre_ParCSRMatrixExtractBlockDiagDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int num_points, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_size, HYPRE_Int diag_type, HYPRE_Int *B_diag_i, HYPRE_Int *B_diag_j, hypre_long_double *B_diag_data ); + +HYPRE_Int +hypre_ParCSRMaxEigEstimateCGDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int scale, HYPRE_Int max_iter, hypre_float *max_eig, hypre_float *min_eig ); +HYPRE_Int +hypre_ParCSRMaxEigEstimateCGDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int scale, HYPRE_Int max_iter, hypre_double *max_eig, hypre_double *min_eig ); +HYPRE_Int +hypre_ParCSRMaxEigEstimateCGDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int scale, HYPRE_Int max_iter, hypre_long_double *max_eig, hypre_long_double *min_eig ); + +HYPRE_Int +hypre_ParCSRMaxEigEstimateDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int scale, hypre_float *max_eig, hypre_float *min_eig ); +HYPRE_Int +hypre_ParCSRMaxEigEstimateDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int scale, hypre_double *max_eig, hypre_double *min_eig ); +HYPRE_Int +hypre_ParCSRMaxEigEstimateDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int scale, hypre_long_double *max_eig, hypre_long_double *min_eig ); + +HYPRE_Int +hypre_ParCSRRelax_Cheby_SolveDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_float *ds_data, hypre_float *coefs, HYPRE_Int order, HYPRE_Int scale, HYPRE_Int variant, hypre_ParVector *u, hypre_ParVector *v, hypre_ParVector *r, hypre_ParVector *orig_u_vec, hypre_ParVector *tmp_vec ); +HYPRE_Int +hypre_ParCSRRelax_Cheby_SolveDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_double *ds_data, hypre_double *coefs, HYPRE_Int order, HYPRE_Int scale, HYPRE_Int variant, hypre_ParVector *u, hypre_ParVector *v, hypre_ParVector *r, hypre_ParVector *orig_u_vec, hypre_ParVector *tmp_vec ); +HYPRE_Int +hypre_ParCSRRelax_Cheby_SolveDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_long_double *ds_data, hypre_long_double *coefs, HYPRE_Int order, HYPRE_Int scale, HYPRE_Int variant, hypre_ParVector *u, hypre_ParVector *v, hypre_ParVector *r, hypre_ParVector *orig_u_vec, hypre_ParVector *tmp_vec ); + +HYPRE_Int +hypre_ParILURAPSchurGMRESMatvecDevice_flt( void *matvec_data, hypre_float alpha, void *ilu_vdata, void *x, hypre_float beta, void *y ); +HYPRE_Int +hypre_ParILURAPSchurGMRESMatvecDevice_dbl( void *matvec_data, hypre_double alpha, void *ilu_vdata, void *x, hypre_double beta, void *y ); +HYPRE_Int +hypre_ParILURAPSchurGMRESMatvecDevice_long_dbl( void *matvec_data, hypre_long_double alpha, void *ilu_vdata, void *x, hypre_long_double beta, void *y ); + +HYPRE_Int +hypre_ParILURAPSchurGMRESSolveDevice_flt( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *par_f, hypre_ParVector *par_u ); +HYPRE_Int +hypre_ParILURAPSchurGMRESSolveDevice_dbl( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *par_f, hypre_ParVector *par_u ); +HYPRE_Int +hypre_ParILURAPSchurGMRESSolveDevice_long_dbl( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *par_f, hypre_ParVector *par_u ); + +HYPRE_Int +hypre_ParILUSchurGMRESCommInfoDevice_flt( void *ilu_vdata, HYPRE_Int *my_id, HYPRE_Int *num_procs ); +HYPRE_Int +hypre_ParILUSchurGMRESCommInfoDevice_dbl( void *ilu_vdata, HYPRE_Int *my_id, HYPRE_Int *num_procs ); +HYPRE_Int +hypre_ParILUSchurGMRESCommInfoDevice_long_dbl( void *ilu_vdata, HYPRE_Int *my_id, HYPRE_Int *num_procs ); + +HYPRE_Int +hypre_ParILUSchurGMRESDummySolveDevice_flt( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *f, hypre_ParVector *u ); +HYPRE_Int +hypre_ParILUSchurGMRESDummySolveDevice_dbl( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *f, hypre_ParVector *u ); +HYPRE_Int +hypre_ParILUSchurGMRESDummySolveDevice_long_dbl( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *f, hypre_ParVector *u ); + +HYPRE_Int +hypre_ParILUSchurGMRESMatvecDevice_flt( void *matvec_data, hypre_float alpha, void *ilu_vdata, void *x, hypre_float beta, void *y ); +HYPRE_Int +hypre_ParILUSchurGMRESMatvecDevice_dbl( void *matvec_data, hypre_double alpha, void *ilu_vdata, void *x, hypre_double beta, void *y ); +HYPRE_Int +hypre_ParILUSchurGMRESMatvecDevice_long_dbl( void *matvec_data, hypre_long_double alpha, void *ilu_vdata, void *x, hypre_long_double beta, void *y ); + +HYPRE_Int +hypre_ParILUSchurGMRESMatvecJacIterDevice_flt( void *matvec_data, hypre_float alpha, void *ilu_vdata, void *x, hypre_float beta, void *y ); +HYPRE_Int +hypre_ParILUSchurGMRESMatvecJacIterDevice_dbl( void *matvec_data, hypre_double alpha, void *ilu_vdata, void *x, hypre_double beta, void *y ); +HYPRE_Int +hypre_ParILUSchurGMRESMatvecJacIterDevice_long_dbl( void *matvec_data, hypre_long_double alpha, void *ilu_vdata, void *x, hypre_long_double beta, void *y ); + +/* functions_gpu */ + +/* pre_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/parcsr_ls/_hypre_parcsr_ls_mup_def.h b/src/parcsr_ls/_hypre_parcsr_ls_mup_def.h index d952c9cde2..d220b525fb 100644 --- a/src/parcsr_ls/_hypre_parcsr_ls_mup_def.h +++ b/src/parcsr_ls/_hypre_parcsr_ls_mup_def.h @@ -2186,5 +2186,124 @@ #define rfun HYPRE_FIXEDPRECISION_FUNC ( rfun ) #define rfun_rs HYPRE_FIXEDPRECISION_FUNC ( rfun_rs ) #define transpose_matrix_create HYPRE_FIXEDPRECISION_FUNC ( transpose_matrix_create ) +#define hypreDevice_extendWtoP HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_extendWtoP ) +#define hypreGPUKernel_AMSComputeGPi_copy2 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_AMSComputeGPi_copy2 ) +#define hypreGPUKernel_AMSComputePi_copy1 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_AMSComputePi_copy1 ) +#define hypreGPUKernel_AMSComputePi_copy2 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_AMSComputePi_copy2 ) +#define hypreGPUKernel_AMSComputePixyz_copy HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_AMSComputePixyz_copy ) +#define hypreGPUKernel_AMSSetupScaleGGt HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_AMSSetupScaleGGt ) +#define hypreGPUKernel_BatchedGaussJordanSolve HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_BatchedGaussJordanSolve ) +#define hypreGPUKernel_CSRMatrixExtractBlockDiag HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixExtractBlockDiag ) +#define hypreGPUKernel_CSRMatrixExtractBlockDiagMarked HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixExtractBlockDiagMarked ) +#define hypreGPUKernel_CSRMaxEigEstimate HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMaxEigEstimate ) +#define hypreGPUKernel_ComplexMatrixBatchedTranspose HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ComplexMatrixBatchedTranspose ) +#define hypreGPUKernel_FSAIExtractSubSystems HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FSAIExtractSubSystems ) +#define hypreGPUKernel_FSAIGatherEntries HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FSAIGatherEntries ) +#define hypreGPUKernel_FSAIScaling HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FSAIScaling ) +#define hypreGPUKernel_FSAITruncateCandidateOrdered HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FSAITruncateCandidateOrdered ) +#define hypreGPUKernel_FSAITruncateCandidateUnordered HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FSAITruncateCandidateUnordered ) +#define hypreGPUKernel_FixInterNodes HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_FixInterNodes ) +#define hypreGPUKernel_GtEliminateBoundary HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_GtEliminateBoundary ) +#define hypreGPUKernel_IndepSetFixMarker HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IndepSetFixMarker ) +#define hypreGPUKernel_IndepSetMain HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IndepSetMain ) +#define hypreGPUKernel_InterpTruncationPass0_v1 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_InterpTruncationPass0_v1 ) +#define hypreGPUKernel_InterpTruncationPass1_v1 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_InterpTruncationPass1_v1 ) +#define hypreGPUKernel_InterpTruncationPass2_v1 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_InterpTruncationPass2_v1 ) +#define hypreGPUKernel_InterpTruncation_v2 HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_InterpTruncation_v2 ) +#define hypreGPUKernel_MMInterpScaleAFF HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_MMInterpScaleAFF ) +#define hypreGPUKernel_MMPEInterpScaleAFF HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_MMPEInterpScaleAFF ) +#define hypreGPUKernel_PMISCoarseningInit HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_PMISCoarseningInit ) +#define hypreGPUKernel_PMISCoarseningUpdateCF HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_PMISCoarseningUpdateCF ) +#define hypreGPUKernel_ParCSRMatrixFixZeroRows HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ParCSRMatrixFixZeroRows ) +#define hypreGPUKernel_ParCSRMatrixSetDiagRows HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ParCSRMatrixSetDiagRows ) +#define hypreGPUKernel_cfmarker_masked_rowsum HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_cfmarker_masked_rowsum ) +#define hypreGPUKernel_compute_aff_afc HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_compute_aff_afc ) +#define hypreGPUKernel_compute_aff_afc_epe HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_compute_aff_afc_epe ) +#define hypreGPUKernel_compute_dlam_dtmp HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_compute_dlam_dtmp ) +#define hypreGPUKernel_compute_twiaff_w HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_compute_twiaff_w ) +#define hypreGPUKernel_compute_weak_rowsums HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_compute_weak_rowsums ) +#define hypreGPUKernel_generate_Pdiag_i_Poffd_i HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_generate_Pdiag_i_Poffd_i ) +#define hypreGPUKernel_generate_Pdiag_j_Poffd_j HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_generate_Pdiag_j_Poffd_j ) +#define hypreGPUKernel_generate_Qdiag_j_Qoffd_j HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_generate_Qdiag_j_Qoffd_j ) +#define hypreGPUKernel_insert_remaining_weights HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_insert_remaining_weights ) +#define hypreGPUKernel_mutli_pi_rowsum HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_mutli_pi_rowsum ) +#define hypreGPUKernel_pass_order_count HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_pass_order_count ) +#define hypreGPUKernel_populate_big_P_offd_j HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_populate_big_P_offd_j ) +#define hypre_BatchedGaussJordanSolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BatchedGaussJordanSolveDevice ) +#define hypre_BoomerAMGBuildDirInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildDirInterpDevice ) +#define hypre_BoomerAMGBuildDirInterp_getcoef HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildDirInterp_getcoef ) +#define hypre_BoomerAMGBuildDirInterp_getcoef_v2 HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildDirInterp_getcoef_v2 ) +#define hypre_BoomerAMGBuildDirInterp_getnnz HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildDirInterp_getnnz ) +#define hypre_BoomerAMGBuildExtInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildExtInterpDevice ) +#define hypre_BoomerAMGBuildExtPEInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildExtPEInterpDevice ) +#define hypre_BoomerAMGBuildExtPIInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildExtPIInterpDevice ) +#define hypre_BoomerAMGBuildInterpOnePntDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildInterpOnePntDevice ) +#define hypre_BoomerAMGBuildInterpOnePnt_getnnz HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildInterpOnePnt_getnnz ) +#define hypre_BoomerAMGBuildModMultipassDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildModMultipassDevice ) +#define hypre_BoomerAMGBuildModPartialExtInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildModPartialExtInterpDevice ) +#define hypre_BoomerAMGBuildModPartialExtPEInterpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildModPartialExtPEInterpDevice ) +#define hypre_BoomerAMGBuildRestrNeumannAIRDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildRestrNeumannAIRDevice ) +#define hypre_BoomerAMGBuildRestrNeumannAIR_assembleRdiag HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGBuildRestrNeumannAIR_assembleRdiag ) +#define hypre_BoomerAMGCFMarkerTo1minus1Device HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCFMarkerTo1minus1Device ) +#define hypre_BoomerAMGCoarseParmsDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCoarseParmsDevice ) +#define hypre_BoomerAMGCoarsenPMISDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCoarsenPMISDevice ) +#define hypre_BoomerAMGCorrectCFMarker2Device HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCorrectCFMarker2Device ) +#define hypre_BoomerAMGCorrectCFMarkerDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCorrectCFMarkerDevice ) +#define hypre_BoomerAMGCreate2ndSDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCreate2ndSDevice ) +#define hypre_BoomerAMGCreateSDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCreateSDevice ) +#define hypre_BoomerAMGCreateS_rowcount HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCreateS_rowcount ) +#define hypre_BoomerAMGCreateSabs_rowcount HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGCreateSabs_rowcount ) +#define hypre_BoomerAMGDD_FAC_CFL1JacobiDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGDD_FAC_CFL1JacobiDevice ) +#define hypre_BoomerAMGDD_FAC_JacobiDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGDD_FAC_JacobiDevice ) +#define hypre_BoomerAMGIndepSetDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGIndepSetDevice ) +#define hypre_BoomerAMGIndepSetInitDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGIndepSetInitDevice ) +#define hypre_BoomerAMGInitDofFuncDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGInitDofFuncDevice ) +#define hypre_BoomerAMGInterpTruncationDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGInterpTruncationDevice ) +#define hypre_BoomerAMGInterpTruncationDevice_v1 HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGInterpTruncationDevice_v1 ) +#define hypre_BoomerAMGInterpTruncationDevice_v2 HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGInterpTruncationDevice_v2 ) +#define hypre_BoomerAMGMakeSocFromSDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGMakeSocFromSDevice ) +#define hypre_BoomerAMGRelaxHybridGaussSeidelDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGRelaxHybridGaussSeidelDevice ) +#define hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ) +#define hypre_FSAIExtractSubSystemsDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAIExtractSubSystemsDevice ) +#define hypre_FSAIGatherEntriesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAIGatherEntriesDevice ) +#define hypre_FSAIScalingDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAIScalingDevice ) +#define hypre_FSAISetupDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAISetupDevice ) +#define hypre_FSAISetupStaticPowerDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAISetupStaticPowerDevice ) +#define hypre_FSAITruncateCandidateDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_FSAITruncateCandidateDevice ) +#define hypre_GaussElimSetupDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_GaussElimSetupDevice ) +#define hypre_GaussElimSolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_GaussElimSolveDevice ) +#define hypre_GenerateMultiPiDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_GenerateMultiPiDevice ) +#define hypre_GenerateMultipassPiDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_GenerateMultipassPiDevice ) +#define hypre_GetGlobalMeasureDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_GetGlobalMeasureDevice ) +#define hypre_ILUApplyLowerJacIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUApplyLowerJacIterDevice ) +#define hypre_ILUApplyLowerUpperJacIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUApplyLowerUpperJacIterDevice ) +#define hypre_ILUApplyUpperJacIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUApplyUpperJacIterDevice ) +#define hypre_ILUSetupDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSetupDevice ) +#define hypre_ILUSetupIterativeILU0Device HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSetupIterativeILU0Device ) +#define hypre_ILUSolveLUDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSolveLUDevice ) +#define hypre_ILUSolveLUIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSolveLUIterDevice ) +#define hypre_ILUSolveRAPGMRESDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSolveRAPGMRESDevice ) +#define hypre_ILUSolveSchurGMRESDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSolveSchurGMRESDevice ) +#define hypre_ILUSolveSchurGMRESJacIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ILUSolveSchurGMRESJacIterDevice ) +#define hypre_MGRBuildPDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_MGRBuildPDevice ) +#define hypre_MGRBuildPFromWpDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_MGRBuildPFromWpDevice ) +#define hypre_MGRBuildRFromWrDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_MGRBuildRFromWrDevice ) +#define hypre_MGRRelaxL1JacobiDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_MGRRelaxL1JacobiDevice ) +#define hypre_PMISCoarseningInitDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_PMISCoarseningInitDevice ) +#define hypre_PMISCoarseningUpdateCFDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_PMISCoarseningUpdateCFDevice ) +#define hypre_ParCSRMatrixBlockDiagMatrixDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixBlockDiagMatrixDevice ) +#define hypre_ParCSRMatrixExtractBlockDiagDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixExtractBlockDiagDevice ) +#define hypre_ParCSRMatrixFixZeroRowsDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixFixZeroRowsDevice ) +#define hypre_ParCSRMaxEigEstimateCGDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMaxEigEstimateCGDevice ) +#define hypre_ParCSRMaxEigEstimateDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMaxEigEstimateDevice ) +#define hypre_ParCSRRelax_Cheby_SolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRRelax_Cheby_SolveDevice ) +#define hypre_ParILURAPSchurGMRESMatvecDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILURAPSchurGMRESMatvecDevice ) +#define hypre_ParILURAPSchurGMRESSolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILURAPSchurGMRESSolveDevice ) +#define hypre_ParILUSchurGMRESCommInfoDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILUSchurGMRESCommInfoDevice ) +#define hypre_ParILUSchurGMRESDummySolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILUSchurGMRESDummySolveDevice ) +#define hypre_ParILUSchurGMRESMatvecDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILUSchurGMRESMatvecDevice ) +#define hypre_ParILUSchurGMRESMatvecJacIterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParILUSchurGMRESMatvecJacIterDevice ) +#define hypre_modmp_compute_num_cols_offd_fine_to_coarse HYPRE_FIXEDPRECISION_FUNC ( hypre_modmp_compute_num_cols_offd_fine_to_coarse ) +#define hypre_modmp_init_fine_to_coarse HYPRE_FIXEDPRECISION_FUNC ( hypre_modmp_init_fine_to_coarse ) #endif diff --git a/src/parcsr_ls/_hypre_parcsr_ls_mup_undef.h b/src/parcsr_ls/_hypre_parcsr_ls_mup_undef.h index 857b96bfac..6524738dcc 100644 --- a/src/parcsr_ls/_hypre_parcsr_ls_mup_undef.h +++ b/src/parcsr_ls/_hypre_parcsr_ls_mup_undef.h @@ -2183,3 +2183,122 @@ #undef rfun #undef rfun_rs #undef transpose_matrix_create +#undef hypreDevice_extendWtoP +#undef hypreGPUKernel_AMSComputeGPi_copy2 +#undef hypreGPUKernel_AMSComputePi_copy1 +#undef hypreGPUKernel_AMSComputePi_copy2 +#undef hypreGPUKernel_AMSComputePixyz_copy +#undef hypreGPUKernel_AMSSetupScaleGGt +#undef hypreGPUKernel_BatchedGaussJordanSolve +#undef hypreGPUKernel_CSRMatrixExtractBlockDiag +#undef hypreGPUKernel_CSRMatrixExtractBlockDiagMarked +#undef hypreGPUKernel_CSRMaxEigEstimate +#undef hypreGPUKernel_ComplexMatrixBatchedTranspose +#undef hypreGPUKernel_FSAIExtractSubSystems +#undef hypreGPUKernel_FSAIGatherEntries +#undef hypreGPUKernel_FSAIScaling +#undef hypreGPUKernel_FSAITruncateCandidateOrdered +#undef hypreGPUKernel_FSAITruncateCandidateUnordered +#undef hypreGPUKernel_FixInterNodes +#undef hypreGPUKernel_GtEliminateBoundary +#undef hypreGPUKernel_IndepSetFixMarker +#undef hypreGPUKernel_IndepSetMain +#undef hypreGPUKernel_InterpTruncationPass0_v1 +#undef hypreGPUKernel_InterpTruncationPass1_v1 +#undef hypreGPUKernel_InterpTruncationPass2_v1 +#undef hypreGPUKernel_InterpTruncation_v2 +#undef hypreGPUKernel_MMInterpScaleAFF +#undef hypreGPUKernel_MMPEInterpScaleAFF +#undef hypreGPUKernel_PMISCoarseningInit +#undef hypreGPUKernel_PMISCoarseningUpdateCF +#undef hypreGPUKernel_ParCSRMatrixFixZeroRows +#undef hypreGPUKernel_ParCSRMatrixSetDiagRows +#undef hypreGPUKernel_cfmarker_masked_rowsum +#undef hypreGPUKernel_compute_aff_afc +#undef hypreGPUKernel_compute_aff_afc_epe +#undef hypreGPUKernel_compute_dlam_dtmp +#undef hypreGPUKernel_compute_twiaff_w +#undef hypreGPUKernel_compute_weak_rowsums +#undef hypreGPUKernel_generate_Pdiag_i_Poffd_i +#undef hypreGPUKernel_generate_Pdiag_j_Poffd_j +#undef hypreGPUKernel_generate_Qdiag_j_Qoffd_j +#undef hypreGPUKernel_insert_remaining_weights +#undef hypreGPUKernel_mutli_pi_rowsum +#undef hypreGPUKernel_pass_order_count +#undef hypreGPUKernel_populate_big_P_offd_j +#undef hypre_BatchedGaussJordanSolveDevice +#undef hypre_BoomerAMGBuildDirInterpDevice +#undef hypre_BoomerAMGBuildDirInterp_getcoef +#undef hypre_BoomerAMGBuildDirInterp_getcoef_v2 +#undef hypre_BoomerAMGBuildDirInterp_getnnz +#undef hypre_BoomerAMGBuildExtInterpDevice +#undef hypre_BoomerAMGBuildExtPEInterpDevice +#undef hypre_BoomerAMGBuildExtPIInterpDevice +#undef hypre_BoomerAMGBuildInterpOnePntDevice +#undef hypre_BoomerAMGBuildInterpOnePnt_getnnz +#undef hypre_BoomerAMGBuildModMultipassDevice +#undef hypre_BoomerAMGBuildModPartialExtInterpDevice +#undef hypre_BoomerAMGBuildModPartialExtPEInterpDevice +#undef hypre_BoomerAMGBuildRestrNeumannAIRDevice +#undef hypre_BoomerAMGBuildRestrNeumannAIR_assembleRdiag +#undef hypre_BoomerAMGCFMarkerTo1minus1Device +#undef hypre_BoomerAMGCoarseParmsDevice +#undef hypre_BoomerAMGCoarsenPMISDevice +#undef hypre_BoomerAMGCorrectCFMarker2Device +#undef hypre_BoomerAMGCorrectCFMarkerDevice +#undef hypre_BoomerAMGCreate2ndSDevice +#undef hypre_BoomerAMGCreateSDevice +#undef hypre_BoomerAMGCreateS_rowcount +#undef hypre_BoomerAMGCreateSabs_rowcount +#undef hypre_BoomerAMGDD_FAC_CFL1JacobiDevice +#undef hypre_BoomerAMGDD_FAC_JacobiDevice +#undef hypre_BoomerAMGIndepSetDevice +#undef hypre_BoomerAMGIndepSetInitDevice +#undef hypre_BoomerAMGInitDofFuncDevice +#undef hypre_BoomerAMGInterpTruncationDevice +#undef hypre_BoomerAMGInterpTruncationDevice_v1 +#undef hypre_BoomerAMGInterpTruncationDevice_v2 +#undef hypre_BoomerAMGMakeSocFromSDevice +#undef hypre_BoomerAMGRelaxHybridGaussSeidelDevice +#undef hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice +#undef hypre_FSAIExtractSubSystemsDevice +#undef hypre_FSAIGatherEntriesDevice +#undef hypre_FSAIScalingDevice +#undef hypre_FSAISetupDevice +#undef hypre_FSAISetupStaticPowerDevice +#undef hypre_FSAITruncateCandidateDevice +#undef hypre_GaussElimSetupDevice +#undef hypre_GaussElimSolveDevice +#undef hypre_GenerateMultiPiDevice +#undef hypre_GenerateMultipassPiDevice +#undef hypre_GetGlobalMeasureDevice +#undef hypre_ILUApplyLowerJacIterDevice +#undef hypre_ILUApplyLowerUpperJacIterDevice +#undef hypre_ILUApplyUpperJacIterDevice +#undef hypre_ILUSetupDevice +#undef hypre_ILUSetupIterativeILU0Device +#undef hypre_ILUSolveLUDevice +#undef hypre_ILUSolveLUIterDevice +#undef hypre_ILUSolveRAPGMRESDevice +#undef hypre_ILUSolveSchurGMRESDevice +#undef hypre_ILUSolveSchurGMRESJacIterDevice +#undef hypre_MGRBuildPDevice +#undef hypre_MGRBuildPFromWpDevice +#undef hypre_MGRBuildRFromWrDevice +#undef hypre_MGRRelaxL1JacobiDevice +#undef hypre_PMISCoarseningInitDevice +#undef hypre_PMISCoarseningUpdateCFDevice +#undef hypre_ParCSRMatrixBlockDiagMatrixDevice +#undef hypre_ParCSRMatrixExtractBlockDiagDevice +#undef hypre_ParCSRMatrixFixZeroRowsDevice +#undef hypre_ParCSRMaxEigEstimateCGDevice +#undef hypre_ParCSRMaxEigEstimateDevice +#undef hypre_ParCSRRelax_Cheby_SolveDevice +#undef hypre_ParILURAPSchurGMRESMatvecDevice +#undef hypre_ParILURAPSchurGMRESSolveDevice +#undef hypre_ParILUSchurGMRESCommInfoDevice +#undef hypre_ParILUSchurGMRESDummySolveDevice +#undef hypre_ParILUSchurGMRESMatvecDevice +#undef hypre_ParILUSchurGMRESMatvecJacIterDevice +#undef hypre_modmp_compute_num_cols_offd_fine_to_coarse +#undef hypre_modmp_init_fine_to_coarse diff --git a/src/parcsr_ls/headers b/src/parcsr_ls/headers index e81c08390e..b55f3e32e0 100755 --- a/src/parcsr_ls/headers +++ b/src/parcsr_ls/headers @@ -74,3 +74,60 @@ cat >> $INTERNAL_HEADER <<@ #endif @ + + +INTERNAL_HEADER=_hypre_parcsr_ls.hpp + +#=========================================================================== +# Include guards and other includes +#=========================================================================== + +cat > $INTERNAL_HEADER <<@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_PARCSR_LS_HPP +#define hypre_PARCSR_LS_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_parcsr_ls_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +@ + +#=========================================================================== +# Structures and prototypes +#=========================================================================== + +#=========================================================================== +# Include guards +#=========================================================================== + +cat >> $INTERNAL_HEADER <<@ + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_parcsr_ls_mup_undef.h" +#include "_hypre_parcsr_ls_mup.h" +#include "_hypre_parcsr_ls_mup.hpp" +#endif +#endif + +#endif + +@ diff --git a/src/parcsr_ls/mup.fixed_gpu b/src/parcsr_ls/mup.fixed_gpu new file mode 100644 index 0000000000..ab707c775a --- /dev/null +++ b/src/parcsr_ls/mup.fixed_gpu @@ -0,0 +1,119 @@ +hypreDevice_extendWtoP +hypreGPUKernel_AMSComputeGPi_copy2 +hypreGPUKernel_AMSComputePi_copy1 +hypreGPUKernel_AMSComputePi_copy2 +hypreGPUKernel_AMSComputePixyz_copy +hypreGPUKernel_AMSSetupScaleGGt +hypreGPUKernel_BatchedGaussJordanSolve +hypreGPUKernel_CSRMatrixExtractBlockDiag +hypreGPUKernel_CSRMatrixExtractBlockDiagMarked +hypreGPUKernel_CSRMaxEigEstimate +hypreGPUKernel_ComplexMatrixBatchedTranspose +hypreGPUKernel_FSAIExtractSubSystems +hypreGPUKernel_FSAIGatherEntries +hypreGPUKernel_FSAIScaling +hypreGPUKernel_FSAITruncateCandidateOrdered +hypreGPUKernel_FSAITruncateCandidateUnordered +hypreGPUKernel_FixInterNodes +hypreGPUKernel_GtEliminateBoundary +hypreGPUKernel_IndepSetFixMarker +hypreGPUKernel_IndepSetMain +hypreGPUKernel_InterpTruncationPass0_v1 +hypreGPUKernel_InterpTruncationPass1_v1 +hypreGPUKernel_InterpTruncationPass2_v1 +hypreGPUKernel_InterpTruncation_v2 +hypreGPUKernel_MMInterpScaleAFF +hypreGPUKernel_MMPEInterpScaleAFF +hypreGPUKernel_PMISCoarseningInit +hypreGPUKernel_PMISCoarseningUpdateCF +hypreGPUKernel_ParCSRMatrixFixZeroRows +hypreGPUKernel_ParCSRMatrixSetDiagRows +hypreGPUKernel_cfmarker_masked_rowsum +hypreGPUKernel_compute_aff_afc +hypreGPUKernel_compute_aff_afc_epe +hypreGPUKernel_compute_dlam_dtmp +hypreGPUKernel_compute_twiaff_w +hypreGPUKernel_compute_weak_rowsums +hypreGPUKernel_generate_Pdiag_i_Poffd_i +hypreGPUKernel_generate_Pdiag_j_Poffd_j +hypreGPUKernel_generate_Qdiag_j_Qoffd_j +hypreGPUKernel_insert_remaining_weights +hypreGPUKernel_mutli_pi_rowsum +hypreGPUKernel_pass_order_count +hypreGPUKernel_populate_big_P_offd_j +hypre_BatchedGaussJordanSolveDevice +hypre_BoomerAMGBuildDirInterpDevice +hypre_BoomerAMGBuildDirInterp_getcoef +hypre_BoomerAMGBuildDirInterp_getcoef_v2 +hypre_BoomerAMGBuildDirInterp_getnnz +hypre_BoomerAMGBuildExtInterpDevice +hypre_BoomerAMGBuildExtPEInterpDevice +hypre_BoomerAMGBuildExtPIInterpDevice +hypre_BoomerAMGBuildInterpOnePntDevice +hypre_BoomerAMGBuildInterpOnePnt_getnnz +hypre_BoomerAMGBuildModMultipassDevice +hypre_BoomerAMGBuildModPartialExtInterpDevice +hypre_BoomerAMGBuildModPartialExtPEInterpDevice +hypre_BoomerAMGBuildRestrNeumannAIRDevice +hypre_BoomerAMGBuildRestrNeumannAIR_assembleRdiag +hypre_BoomerAMGCFMarkerTo1minus1Device +hypre_BoomerAMGCoarseParmsDevice +hypre_BoomerAMGCoarsenPMISDevice +hypre_BoomerAMGCorrectCFMarker2Device +hypre_BoomerAMGCorrectCFMarkerDevice +hypre_BoomerAMGCreate2ndSDevice +hypre_BoomerAMGCreateSDevice +hypre_BoomerAMGCreateS_rowcount +hypre_BoomerAMGCreateSabs_rowcount +hypre_BoomerAMGDD_FAC_CFL1JacobiDevice +hypre_BoomerAMGDD_FAC_JacobiDevice +hypre_BoomerAMGIndepSetDevice +hypre_BoomerAMGIndepSetInitDevice +hypre_BoomerAMGInitDofFuncDevice +hypre_BoomerAMGInterpTruncationDevice +hypre_BoomerAMGInterpTruncationDevice_v1 +hypre_BoomerAMGInterpTruncationDevice_v2 +hypre_BoomerAMGMakeSocFromSDevice +hypre_BoomerAMGRelaxHybridGaussSeidelDevice +hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice +hypre_FSAIExtractSubSystemsDevice +hypre_FSAIGatherEntriesDevice +hypre_FSAIScalingDevice +hypre_FSAISetupDevice +hypre_FSAISetupStaticPowerDevice +hypre_FSAITruncateCandidateDevice +hypre_GaussElimSetupDevice +hypre_GaussElimSolveDevice +hypre_GenerateMultiPiDevice +hypre_GenerateMultipassPiDevice +hypre_GetGlobalMeasureDevice +hypre_ILUApplyLowerJacIterDevice +hypre_ILUApplyLowerUpperJacIterDevice +hypre_ILUApplyUpperJacIterDevice +hypre_ILUSetupDevice +hypre_ILUSetupIterativeILU0Device +hypre_ILUSolveLUDevice +hypre_ILUSolveLUIterDevice +hypre_ILUSolveRAPGMRESDevice +hypre_ILUSolveSchurGMRESDevice +hypre_ILUSolveSchurGMRESJacIterDevice +hypre_MGRBuildPDevice +hypre_MGRBuildPFromWpDevice +hypre_MGRBuildRFromWrDevice +hypre_MGRRelaxL1JacobiDevice +hypre_PMISCoarseningInitDevice +hypre_PMISCoarseningUpdateCFDevice +hypre_ParCSRMatrixBlockDiagMatrixDevice +hypre_ParCSRMatrixExtractBlockDiagDevice +hypre_ParCSRMatrixFixZeroRowsDevice +hypre_ParCSRMaxEigEstimateCGDevice +hypre_ParCSRMaxEigEstimateDevice +hypre_ParCSRRelax_Cheby_SolveDevice +hypre_ParILURAPSchurGMRESMatvecDevice +hypre_ParILURAPSchurGMRESSolveDevice +hypre_ParILUSchurGMRESCommInfoDevice +hypre_ParILUSchurGMRESDummySolveDevice +hypre_ParILUSchurGMRESMatvecDevice +hypre_ParILUSchurGMRESMatvecJacIterDevice +hypre_modmp_compute_num_cols_offd_fine_to_coarse +hypre_modmp_init_fine_to_coarse diff --git a/src/parcsr_ls/mup.functions_gpu b/src/parcsr_ls/mup.functions_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/parcsr_ls/mup.methods_gpu b/src/parcsr_ls/mup.methods_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/parcsr_ls/mup_fixed_gpu.c b/src/parcsr_ls/mup_fixed_gpu.c new file mode 100644 index 0000000000..ff7ac5037f --- /dev/null +++ b/src/parcsr_ls/mup_fixed_gpu.c @@ -0,0 +1,479 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_ls.h" +#include "_hypre_parcsr_ls.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_extendWtoP( HYPRE_Int P_nr_of_rows, HYPRE_Int W_nr_of_rows, HYPRE_Int W_nr_of_cols, HYPRE_Int *CF_marker, HYPRE_Int W_diag_nnz, HYPRE_Int *W_diag_i, HYPRE_Int *W_diag_j, HYPRE_Complex *W_diag_data, HYPRE_Int *P_diag_i, HYPRE_Int *P_diag_j, HYPRE_Complex *P_diag_data, HYPRE_Int *W_offd_i, HYPRE_Int *P_offd_i ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_extendWtoP)( P_nr_of_rows, W_nr_of_rows, W_nr_of_cols, CF_marker, W_diag_nnz, W_diag_i, W_diag_j, W_diag_data, P_diag_i, P_diag_j, P_diag_data, W_offd_i, P_offd_i ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int interp_type, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildDirInterpDevice)( A, CF_marker, S, num_cpts_global, num_functions, dof_func, debug_flag, trunc_factor, max_elmts, interp_type, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildExtInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildExtInterpDevice)( A, CF_marker, S, num_cpts_global, num_functions, dof_func, debug_flag, trunc_factor, max_elmts, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildExtPEInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildExtPEInterpDevice)( A, CF_marker, S, num_cpts_global, num_functions, dof_func, debug_flag, trunc_factor, max_elmts, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildExtPIInterpDevice)( A, CF_marker, S, num_cpts_global, num_functions, dof_func, debug_flag, trunc_factor, max_elmts, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildInterpOnePntDevice)( A, CF_marker, S, num_cpts_global, num_functions, dof_func, debug_flag, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Real trunc_factor, HYPRE_Int P_max_elmts, HYPRE_Int interp_type, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildModMultipassDevice)( A, CF_marker, S, num_cpts_global, trunc_factor, P_max_elmts, interp_type, num_functions, dof_func, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildModPartialExtInterpDevice)( A, CF_marker, S, num_cpts_global, num_old_cpts_global, debug_flag, trunc_factor, max_elmts, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_BigInt *num_old_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildModPartialExtPEInterpDevice)( A, CF_marker, S, num_cpts_global, num_old_cpts_global, debug_flag, trunc_factor, max_elmts, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int NeumannDeg, HYPRE_Real strong_thresholdR, HYPRE_Real filter_thresholdR, HYPRE_Int debug_flag, hypre_ParCSRMatrix **R_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGBuildRestrNeumannAIRDevice)( A, CF_marker, num_cpts_global, num_functions, dof_func, NeumannDeg, strong_thresholdR, filter_thresholdR, debug_flag, R_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCFMarkerTo1minus1Device( HYPRE_Int *CF_marker, HYPRE_Int size ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCFMarkerTo1minus1Device)( CF_marker, size ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCoarseParmsDevice( MPI_Comm comm, HYPRE_Int local_num_variables, HYPRE_Int num_functions, hypre_IntArray *dof_func, hypre_IntArray *CF_marker, hypre_IntArray **coarse_dof_func_ptr, HYPRE_BigInt *coarse_pnts_global ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCoarseParmsDevice)( comm, local_num_variables, num_functions, dof_func, CF_marker, coarse_dof_func_ptr, coarse_pnts_global ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int CF_init, HYPRE_Int debug_flag, hypre_IntArray **CF_marker_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCoarsenPMISDevice)( S, A, CF_init, debug_flag, CF_marker_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCorrectCFMarker2Device( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCorrectCFMarker2Device)( CF_marker, new_CF_marker ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCorrectCFMarkerDevice( hypre_IntArray *CF_marker, hypre_IntArray *new_CF_marker ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCorrectCFMarkerDevice)( CF_marker, new_CF_marker ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCreate2ndSDevice( hypre_ParCSRMatrix *S, HYPRE_Int *CF_marker, HYPRE_Int num_paths, HYPRE_BigInt *coarse_row_starts, hypre_ParCSRMatrix **C_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCreate2ndSDevice)( S, CF_marker, num_paths, coarse_row_starts, C_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGCreateSDevice( hypre_ParCSRMatrix *A, HYPRE_Int abs_soc, HYPRE_Real strength_threshold, HYPRE_Real max_row_sum, HYPRE_Int num_functions, HYPRE_Int *dof_func, hypre_ParCSRMatrix **S_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGCreateSDevice)( A, abs_soc, strength_threshold, max_row_sum, num_functions, dof_func, S_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata, HYPRE_Int level, HYPRE_Int relax_set ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGDD_FAC_CFL1JacobiDevice)( amgdd_vdata, level, relax_set ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGDD_FAC_JacobiDevice( void *amgdd_vdata, HYPRE_Int level ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGDD_FAC_JacobiDevice)( amgdd_vdata, level ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S, HYPRE_Real *measure_diag, HYPRE_Real *measure_offd, HYPRE_Int graph_diag_size, HYPRE_Int *graph_diag, HYPRE_Int *IS_marker_diag, HYPRE_Int *IS_marker_offd, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int *int_send_buf ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGIndepSetDevice)( S, measure_diag, measure_offd, graph_diag_size, graph_diag, IS_marker_diag, IS_marker_offd, comm_pkg, int_send_buf ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGIndepSetInitDevice( hypre_ParCSRMatrix *S, HYPRE_Real *measure_array, HYPRE_Int aug_rand ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGIndepSetInitDevice)( S, measure_array, aug_rand ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGInitDofFuncDevice( HYPRE_Int *dof_func, HYPRE_Int local_size, HYPRE_Int offset, HYPRE_Int num_functions ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGInitDofFuncDevice)( dof_func, local_size, offset, num_functions ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGInterpTruncationDevice( hypre_ParCSRMatrix *P, HYPRE_Real trunc_factor, HYPRE_Int max_elmts ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGInterpTruncationDevice)( P, trunc_factor, max_elmts ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGMakeSocFromSDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGMakeSocFromSDevice)( A, S ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGRelaxHybridGaussSeidelDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int GS_order, HYPRE_Int Symm ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGRelaxHybridGaussSeidelDevice)( A, f, cf_marker, relax_points, relax_weight, omega, l1_norms, u, Vtemp, Ztemp, GS_order, Symm ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real relax_weight, HYPRE_Real omega, HYPRE_Real *A_diag_diag, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice)( A, f, relax_weight, omega, A_diag_diag, u, r, z, choice ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_FSAISetupDevice( void *fsai_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_FSAISetupDevice)( fsai_vdata, A, f, u ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GaussElimSetupDevice( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GaussElimSetupDevice)( amg_data, level, solver_type ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GaussElimSolveDevice( hypre_ParAMGData *amg_data, HYPRE_Int level, HYPRE_Int solver_type ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GaussElimSolveDevice)( amg_data, level, solver_type ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *P, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int *dof_func_offd, hypre_ParCSRMatrix **Pi_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GenerateMultiPiDevice)( A, S, P, c_pts_starts, pass_order, pass_marker, pass_marker_offd, num_points, color, num_functions, dof_func, dof_func_offd, Pi_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *S, HYPRE_BigInt *c_pts_starts, HYPRE_Int *pass_order, HYPRE_Int *pass_marker, HYPRE_Int *pass_marker_offd, HYPRE_Int num_points, HYPRE_Int color, HYPRE_Real *row_sums, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GenerateMultipassPiDevice)( A, S, c_pts_starts, pass_order, pass_marker, pass_marker_offd, num_points, color, row_sums, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int CF_init, HYPRE_Int aug_rand, HYPRE_Real *measure_diag, HYPRE_Real *measure_offd, HYPRE_Real *real_send_buf ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GetGlobalMeasureDevice)( S, comm_pkg, CF_init, aug_rand, measure_diag, measure_offd, real_send_buf ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUApplyLowerJacIterDevice( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, HYPRE_Int lower_jacobi_iters ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUApplyLowerJacIterDevice)( A, input, work, output, lower_jacobi_iters ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUApplyLowerUpperJacIterDevice( hypre_CSRMatrix *A, hypre_Vector *work1, hypre_Vector *work2, hypre_Vector *inout, hypre_Vector *diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUApplyLowerUpperJacIterDevice)( A, work1, work2, inout, diag, lower_jacobi_iters, upper_jacobi_iters ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUApplyUpperJacIterDevice( hypre_CSRMatrix *A, hypre_Vector *input, hypre_Vector *work, hypre_Vector *output, hypre_Vector *diag, HYPRE_Int upper_jacobi_iters ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUApplyUpperJacIterDevice)( A, input, work, output, diag, upper_jacobi_iters ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSetupDevice( hypre_ParILUData *ilu_data, hypre_ParCSRMatrix *A, HYPRE_Int *perm_data, HYPRE_Int *qperm_data, HYPRE_Int n, HYPRE_Int nLU, hypre_CSRMatrix **BLUptr, hypre_ParCSRMatrix **matSptr, hypre_CSRMatrix **Eptr, hypre_CSRMatrix **Fptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSetupDevice)( ilu_data, A, perm_data, qperm_data, n, nLU, BLUptr, matSptr, Eptr, Fptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSetupIterativeILU0Device( hypre_CSRMatrix *A, HYPRE_Int type, HYPRE_Int option, HYPRE_Int max_iter, HYPRE_Real tolerance, HYPRE_Int *num_iter_ptr, HYPRE_Real **history_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSetupIterativeILU0Device)( A, type, option, max_iter, tolerance, num_iter_ptr, history_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSolveLUDevice( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU_d, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSolveLUDevice)( A, matLU_d, f, u, perm, ftemp, utemp ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSolveLUIterDevice( hypre_ParCSRMatrix *A, hypre_CSRMatrix *matLU, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_Vector **diag_ptr, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSolveLUIterDevice)( A, matLU, f, u, perm, ftemp, utemp, xtemp, diag_ptr, lower_jacobi_iters, upper_jacobi_iters ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSolveRAPGMRESDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, HYPRE_Int test_opt ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSolveRAPGMRESDevice)( A, f, u, perm, nLU, S, ftemp, utemp, xtemp, ytemp, schur_solver, schur_precond, rhs, x, u_end, Aperm, matALU_d, matBLU_d, matE_d, matF_d, test_opt ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSolveSchurGMRESDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSolveSchurGMRESDevice)( A, f, u, perm, nLU, S, ftemp, utemp, schur_solver, schur_precond, rhs, x, u_end, matBLU_d, matE_d, matF_d ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ILUSolveSchurGMRESJacIterDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int nLU, hypre_ParCSRMatrix *S, hypre_ParVector *ftemp, hypre_ParVector *utemp, HYPRE_Solver schur_solver, HYPRE_Solver schur_precond, hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d, hypre_ParVector *ztemp, hypre_Vector **Adiag_diag, hypre_Vector **Sdiag_diag, HYPRE_Int lower_jacobi_iters, HYPRE_Int upper_jacobi_iters ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ILUSolveSchurGMRESJacIterDevice)( A, f, u, perm, nLU, S, ftemp, utemp, schur_solver, schur_precond, rhs, x, u_end, matBLU_d, matE_d, matF_d, ztemp, Adiag_diag, Sdiag_diag, lower_jacobi_iters, upper_jacobi_iters ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRBuildPDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *num_cpts_global, HYPRE_Int method, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MGRBuildPDevice)( A, CF_marker, num_cpts_global, method, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRBuildPFromWpDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *Wp, HYPRE_Int *CF_marker, hypre_ParCSRMatrix **P_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MGRBuildPFromWpDevice)( A, Wp, CF_marker, P_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRBuildRFromWrDevice( hypre_IntArray *C_map, hypre_IntArray *F_map, hypre_ParCSRMatrix *Wr, hypre_ParCSRMatrix *R ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MGRBuildRFromWrDevice)( C_map, F_map, Wr, R ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MGRRelaxL1JacobiDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *CF_marker_host, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MGRRelaxL1JacobiDevice)( A, f, CF_marker_host, relax_points, relax_weight, l1_norms, u, Vtemp ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixBlockDiagMatrixDevice( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_type, hypre_ParCSRMatrix **B_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixBlockDiagMatrixDevice)( A, blk_size, point_type, CF_marker, diag_type, B_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixExtractBlockDiagDevice( hypre_ParCSRMatrix *A, HYPRE_Int blk_size, HYPRE_Int num_points, HYPRE_Int point_type, HYPRE_Int *CF_marker, HYPRE_Int diag_size, HYPRE_Int diag_type, HYPRE_Int *B_diag_i, HYPRE_Int *B_diag_j, HYPRE_Complex *B_diag_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixExtractBlockDiagDevice)( A, blk_size, num_points, point_type, CF_marker, diag_size, diag_type, B_diag_i, B_diag_j, B_diag_data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMaxEigEstimateCGDevice( hypre_ParCSRMatrix *A, HYPRE_Int scale, HYPRE_Int max_iter, HYPRE_Real *max_eig, HYPRE_Real *min_eig ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMaxEigEstimateCGDevice)( A, scale, max_iter, max_eig, min_eig ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMaxEigEstimateDevice( hypre_ParCSRMatrix *A, HYPRE_Int scale, HYPRE_Real *max_eig, HYPRE_Real *min_eig ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMaxEigEstimateDevice)( A, scale, max_eig, min_eig ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRRelax_Cheby_SolveDevice( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real *ds_data, HYPRE_Real *coefs, HYPRE_Int order, HYPRE_Int scale, HYPRE_Int variant, hypre_ParVector *u, hypre_ParVector *v, hypre_ParVector *r, hypre_ParVector *orig_u_vec, hypre_ParVector *tmp_vec ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRRelax_Cheby_SolveDevice)( A, f, ds_data, coefs, order, scale, variant, u, v, r, orig_u_vec, tmp_vec ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILURAPSchurGMRESMatvecDevice( void *matvec_data, HYPRE_Complex alpha, void *ilu_vdata, void *x, HYPRE_Complex beta, void *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILURAPSchurGMRESMatvecDevice)( matvec_data, alpha, ilu_vdata, x, beta, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILURAPSchurGMRESSolveDevice( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *par_f, hypre_ParVector *par_u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILURAPSchurGMRESSolveDevice)( ilu_vdata, ilu_vdata2, par_f, par_u ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILUSchurGMRESCommInfoDevice( void *ilu_vdata, HYPRE_Int *my_id, HYPRE_Int *num_procs ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILUSchurGMRESCommInfoDevice)( ilu_vdata, my_id, num_procs ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILUSchurGMRESDummySolveDevice( void *ilu_vdata, void *ilu_vdata2, hypre_ParVector *f, hypre_ParVector *u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILUSchurGMRESDummySolveDevice)( ilu_vdata, ilu_vdata2, f, u ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILUSchurGMRESMatvecDevice( void *matvec_data, HYPRE_Complex alpha, void *ilu_vdata, void *x, HYPRE_Complex beta, void *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILUSchurGMRESMatvecDevice)( matvec_data, alpha, ilu_vdata, x, beta, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParILUSchurGMRESMatvecJacIterDevice( void *matvec_data, HYPRE_Complex alpha, void *ilu_vdata, void *x, HYPRE_Complex beta, void *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParILUSchurGMRESMatvecJacIterDevice)( matvec_data, alpha, ilu_vdata, x, beta, y ); +} + + +#endif + +#endif + diff --git a/src/parcsr_ls/mup_functions_gpu.c b/src/parcsr_ls/mup_functions_gpu.c new file mode 100644 index 0000000000..f6c0121d6a --- /dev/null +++ b/src/parcsr_ls/mup_functions_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_ls.h" +#include "_hypre_parcsr_ls.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/parcsr_ls/mup_pre_gpu.c b/src/parcsr_ls/mup_pre_gpu.c new file mode 100644 index 0000000000..f6c0121d6a --- /dev/null +++ b/src/parcsr_ls/mup_pre_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_ls.h" +#include "_hypre_parcsr_ls.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/parcsr_ls/par_2s_interp_device.c b/src/parcsr_ls/par_2s_interp_device.c index c5bec126d0..b2518b4b2e 100644 --- a/src/parcsr_ls/par_2s_interp_device.c +++ b/src/parcsr_ls/par_2s_interp_device.c @@ -313,7 +313,7 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_ParCSRMatrixNumNonzeros(W) + hypre_ParCSRMatrixGlobalNumCols(W); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_ParCSRMatrixDestroy(W); @@ -666,7 +666,7 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_ParCSRMatrixNumNonzeros(W) + hypre_ParCSRMatrixGlobalNumCols(W); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_ParCSRMatrixDestroy(W); diff --git a/src/parcsr_ls/par_amg_setup.c b/src/parcsr_ls/par_amg_setup.c index 6b8347fcf9..4974710fb2 100644 --- a/src/parcsr_ls/par_amg_setup.c +++ b/src/parcsr_ls/par_amg_setup.c @@ -3806,12 +3806,12 @@ hypre_BoomerAMGSetup( void *amg_vdata, if (cum_nnz_AP > 0.0) { - cum_nnz_AP = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + cum_nnz_AP = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); for (j = 0; j < num_levels - 1; j++) { hypre_ParCSRMatrixSetDNumNonzeros(P_array[j]); - cum_nnz_AP += hypre_ParCSRMatrixDNumNonzeros(P_array[j]); - cum_nnz_AP += hypre_ParCSRMatrixDNumNonzeros(A_array[j + 1]); + cum_nnz_AP += (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(P_array[j]); + cum_nnz_AP += (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[j + 1]); } hypre_ParAMGDataCumNnzAP(amg_data) = cum_nnz_AP; } diff --git a/src/parcsr_ls/par_amg_solve.c b/src/parcsr_ls/par_amg_solve.c index 4a803fad2d..33e2f5b7cc 100644 --- a/src/parcsr_ls/par_amg_solve.c +++ b/src/parcsr_ls/par_amg_solve.c @@ -359,7 +359,7 @@ hypre_BoomerAMGSolve( void *amg_vdata, { num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels, HYPRE_MEMORY_HOST); num_variables = hypre_CTAlloc(HYPRE_Real, num_levels, HYPRE_MEMORY_HOST); - num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A); + num_coeffs[0] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A); num_variables[0] = (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A); if (block_mode) diff --git a/src/parcsr_ls/par_amg_solveT.c b/src/parcsr_ls/par_amg_solveT.c index 853b158eef..0d2ef6d18d 100644 --- a/src/parcsr_ls/par_amg_solveT.c +++ b/src/parcsr_ls/par_amg_solveT.c @@ -92,7 +92,7 @@ hypre_BoomerAMGSolveT( void *amg_vdata, num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels, HYPRE_MEMORY_HOST); num_variables = hypre_CTAlloc(HYPRE_BigInt, num_levels, HYPRE_MEMORY_HOST); - num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + num_coeffs[0] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A_array[0]); A_array[0] = A; @@ -110,7 +110,7 @@ hypre_BoomerAMGSolveT( void *amg_vdata, Vtemp = hypre_ParAMGDataVtemp(amg_data); for (j = 1; j < num_levels; j++) { - num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); + num_coeffs[j] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[j]); num_variables[j] = hypre_ParCSRMatrixGlobalNumRows(A_array[j]); } @@ -396,11 +396,11 @@ hypre_BoomerAMGCycleT( void *amg_vdata, if (grid_relax_points) { old_version = 1; } num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels, HYPRE_MEMORY_HOST); - num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + num_coeffs[0] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); for (j = 1; j < num_levels; j++) { - num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); + num_coeffs[j] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[j]); } /*--------------------------------------------------------------------- diff --git a/src/parcsr_ls/par_cycle.c b/src/parcsr_ls/par_cycle.c index 7f89d50c2f..87ae4c6282 100644 --- a/src/parcsr_ls/par_cycle.c +++ b/src/parcsr_ls/par_cycle.c @@ -169,7 +169,7 @@ hypre_BoomerAMGCycle( void *amg_vdata, } num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels, HYPRE_MEMORY_HOST); - num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + num_coeffs[0] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_rank(comm, &my_id); @@ -184,7 +184,7 @@ hypre_BoomerAMGCycle( void *amg_vdata, { for (j = 1; j < num_levels; j++) { - num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); + num_coeffs[j] = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[j]); } } diff --git a/src/parcsr_ls/par_fsai_setup.c b/src/parcsr_ls/par_fsai_setup.c index 9641d69c3b..bff8492d8b 100644 --- a/src/parcsr_ls/par_fsai_setup.c +++ b/src/parcsr_ls/par_fsai_setup.c @@ -1080,8 +1080,8 @@ hypre_FSAIPrintStats( void *fsai_vdata, /* Compute density */ hypre_ParCSRMatrixSetDNumNonzeros(G); hypre_ParCSRMatrixSetDNumNonzeros(A); - density = hypre_ParCSRMatrixDNumNonzeros(G) / - hypre_ParCSRMatrixDNumNonzeros(A); + density = (HYPRE_Real)(hypre_ParCSRMatrixDNumNonzeros(G) / + hypre_ParCSRMatrixDNumNonzeros(A)); hypre_ParFSAIDataDensity(fsai_data) = density; if (!my_id) diff --git a/src/parcsr_ls/par_ilu_setup.c b/src/parcsr_ls/par_ilu_setup.c index 28945b290d..6fd6a75be3 100644 --- a/src/parcsr_ls/par_ilu_setup.c +++ b/src/parcsr_ls/par_ilu_setup.c @@ -1104,11 +1104,11 @@ hypre_ILUSetup( void *ilu_vdata, if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); - nnzS = hypre_ParCSRMatrixDNumNonzeros(matS); + nnzS = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matS); /* if we have Schur system need to reduce it from size_C */ } hypre_ParILUDataOperatorComplexity(ilu_data) = ((HYPRE_Real)nnzG + nnzS) / - hypre_ParCSRMatrixDNumNonzeros(matA); + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matA); } else if (ilu_type == 50) { @@ -1132,11 +1132,11 @@ hypre_ILUSetup( void *ilu_vdata, if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); - nnzS = hypre_ParCSRMatrixDNumNonzeros(matS); + nnzS = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matS); /* if we have Schur system need to reduce it from size_C */ } hypre_ParILUDataOperatorComplexity(ilu_data) = ((HYPRE_Real)nnzG + nnzS) / - hypre_ParCSRMatrixDNumNonzeros(matA); + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matA); } else #endif @@ -1144,7 +1144,7 @@ hypre_ILUSetup( void *ilu_vdata, if (matS) { hypre_ParCSRMatrixSetDNumNonzeros(matS); - nnzS = hypre_ParCSRMatrixDNumNonzeros(matS); + nnzS = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matS); /* If we have Schur system need to reduce it from size_C */ size_C -= hypre_ParCSRMatrixGlobalNumRows(matS); @@ -1172,9 +1172,9 @@ hypre_ILUSetup( void *ilu_vdata, } hypre_ParILUDataOperatorComplexity(ilu_data) = ((HYPRE_Real)size_C + nnzS + - hypre_ParCSRMatrixDNumNonzeros(matL) + - hypre_ParCSRMatrixDNumNonzeros(matU)) / - hypre_ParCSRMatrixDNumNonzeros(matA); + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matL) + + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matU)) / + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(matA); } /* TODO (VPM): Move ILU statistics printout to its own function */ @@ -2800,7 +2800,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, hypre_ParCSRMatrixGlobalNumRows(A), @@ -2827,7 +2827,7 @@ hypre_ILUSetupMILU0(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); hypre_TFree(iw, HYPRE_MEMORY_HOST); @@ -3807,7 +3807,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, hypre_ParCSRMatrixGlobalNumRows(A), @@ -3833,7 +3833,7 @@ hypre_ILUSetupILUK(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free */ hypre_TFree(iw, HYPRE_MEMORY_HOST); @@ -4686,7 +4686,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[n]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, hypre_ParCSRMatrixGlobalNumRows(A), @@ -4715,7 +4715,7 @@ hypre_ILUSetupILUT(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[n]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free working array */ hypre_TFree(iw, HYPRE_MEMORY_HOST); @@ -4834,8 +4834,8 @@ hypre_NSHSetup( void *nsh_vdata, hypre_ParCSRMatrixSetDNumNonzeros(matM); /* Compute complexity */ - hypre_ParNSHDataOperatorComplexity(nsh_data) = hypre_ParCSRMatrixDNumNonzeros(matM) / - hypre_ParCSRMatrixDNumNonzeros(matA); + hypre_ParNSHDataOperatorComplexity(nsh_data) = (HYPRE_Real)(hypre_ParCSRMatrixDNumNonzeros(matM) / + hypre_ParCSRMatrixDNumNonzeros(matA)); if (my_id == 0 && print_level > 0) { hypre_printf("NSH SETUP: operator complexity = %f \n", @@ -5475,7 +5475,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrL; hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, global_num_rows, @@ -5502,7 +5502,7 @@ hypre_ILUSetupILU0RAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) ctrU; hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free memory */ hypre_TFree(wL, HYPRE_MEMORY_HOST); hypre_TFree(iw, HYPRE_MEMORY_HOST); @@ -6594,7 +6594,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, global_num_rows, @@ -6620,7 +6620,7 @@ hypre_ILUSetupILUKRAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free */ hypre_TFree(iw, HYPRE_MEMORY_HOST); @@ -7539,7 +7539,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (L_diag_i[total_rows]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matL) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matL) = (hypre_double)total_nnz; matU = hypre_ParCSRMatrixCreate( comm, global_num_rows, @@ -7566,7 +7566,7 @@ hypre_ILUSetupILUTRAS(hypre_ParCSRMatrix *A, /* store (global) total number of nonzeros */ local_nnz = (HYPRE_Real) (U_diag_i[total_rows]); hypre_MPI_Allreduce(&local_nnz, &total_nnz, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matU) = total_nnz; + hypre_ParCSRMatrixDNumNonzeros(matU) = (hypre_double)total_nnz; /* free working array */ hypre_TFree(iw, HYPRE_MEMORY_HOST); diff --git a/src/parcsr_ls/par_lr_interp_device.c b/src/parcsr_ls/par_lr_interp_device.c index 744508b441..c3dfff021e 100644 --- a/src/parcsr_ls/par_lr_interp_device.c +++ b/src/parcsr_ls/par_lr_interp_device.c @@ -970,7 +970,7 @@ hypre_BoomerAMGBuildExtInterpDevice(hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_ParCSRMatrixNumNonzeros(W) + hypre_ParCSRMatrixGlobalNumCols(W); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_GpuProfilingPushRange("Truncation"); if (trunc_factor != 0.0 || max_elmts > 0) @@ -1248,7 +1248,7 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_ParCSRMatrixNumNonzeros(W) + hypre_ParCSRMatrixGlobalNumCols(W); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_GpuProfilingPushRange("Truncation"); if (trunc_factor != 0.0 || max_elmts > 0) @@ -1525,7 +1525,7 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_ParCSRMatrixNumNonzeros(W) + hypre_ParCSRMatrixGlobalNumCols(W); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_GpuProfilingPushRange("Truncation"); if (trunc_factor != 0.0 || max_elmts > 0) diff --git a/src/parcsr_ls/par_mgr_device.c b/src/parcsr_ls/par_mgr_device.c index 1999b197be..60c9e9d3f7 100644 --- a/src/parcsr_ls/par_mgr_device.c +++ b/src/parcsr_ls/par_mgr_device.c @@ -70,7 +70,7 @@ hypre_MGRBuildPFromWpDevice( hypre_ParCSRMatrix *A, /* Initialize interpolation matrix */ hypre_ParCSRMatrixInitialize_v2(P, HYPRE_MEMORY_DEVICE); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); P_diag = hypre_ParCSRMatrixDiag(P); P_offd = hypre_ParCSRMatrixOffd(P); @@ -299,7 +299,7 @@ hypre_MGRBuildPDevice(hypre_ParCSRMatrix *A, { hypre_ParCSRMatrixNumNonzeros(P) = nC_global; } - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_MatvecCommPkgCreate(P); diff --git a/src/parcsr_ls/par_mgr_interp.c b/src/parcsr_ls/par_mgr_interp.c index a82fd91a17..37bbe8f075 100644 --- a/src/parcsr_ls/par_mgr_interp.c +++ b/src/parcsr_ls/par_mgr_interp.c @@ -549,7 +549,7 @@ hypre_MGRBuildPFromWpHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixNumNonzeros(P) = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(P)) + hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(P)); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_MatvecCommPkgCreate(P); *P_ptr = P; @@ -888,7 +888,7 @@ hypre_MGRBuildPHost( hypre_ParCSRMatrix *A, memory_location_P, memory_location_P); hypre_ParCSRMatrixSetNumNonzeros(P); - hypre_ParCSRMatrixDNumNonzeros(P) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(P); + hypre_ParCSRMatrixDNumNonzeros(P) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(P); hypre_MatvecCommPkgCreate(P); /* Set output pointer */ diff --git a/src/parcsr_ls/par_mgr_stats.c b/src/parcsr_ls/par_mgr_stats.c index 44fc6ca73d..949f8bd1a1 100644 --- a/src/parcsr_ls/par_mgr_stats.c +++ b/src/parcsr_ls/par_mgr_stats.c @@ -634,23 +634,23 @@ hypre_MGRSetupStats(void *mgr_vdata) } gridcomp[i] += (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[k]); - opcomp[i] += hypre_ParCSRMatrixDNumNonzeros(A_array[k]); - memcomp[i] += hypre_ParCSRMatrixDNumNonzeros(A_array[k]); + opcomp[i] += (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[k]); + memcomp[i] += (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[k]); if (k < (num_sublevels_amg[i] - 1)) { - memcomp[i] += hypre_ParCSRMatrixDNumNonzeros(P_array[k]) + - hypre_ParCSRMatrixDNumNonzeros(RT_array[k]); + memcomp[i] += (HYPRE_Real)(hypre_ParCSRMatrixDNumNonzeros(P_array[k]) + + hypre_ParCSRMatrixDNumNonzeros(RT_array[k])); } } gridcomp[num_levels_mgr + 1] += gridcomp[i]; opcomp[num_levels_mgr + 1] += opcomp[i] / - hypre_ParCSRMatrixDNumNonzeros(A_finest); + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_finest); memcomp[num_levels_mgr + 1] += memcomp[i] / - hypre_ParCSRMatrixDNumNonzeros(A_finest); + (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_finest); gridcomp[i] /= (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[0]); - opcomp[i] /= hypre_ParCSRMatrixDNumNonzeros(A_array[0]); - memcomp[i] /= hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + opcomp[i] /= (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); + memcomp[i] /= (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[0]); } else { @@ -661,10 +661,10 @@ hypre_MGRSetupStats(void *mgr_vdata) A_array[i] = hypre_ParMGRDataA(mgr_data, i); gridcomp[num_levels_mgr + 1] += (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[i]); - opcomp[num_levels_mgr + 1] += hypre_ParCSRMatrixDNumNonzeros(A_array[i]) / - hypre_ParCSRMatrixDNumNonzeros(A_finest); - memcomp[num_levels_mgr + 1] += hypre_ParCSRMatrixDNumNonzeros(A_array[i]) / - hypre_ParCSRMatrixDNumNonzeros(A_finest); + opcomp[num_levels_mgr + 1] += (HYPRE_Real)(hypre_ParCSRMatrixDNumNonzeros(A_array[i]) / + hypre_ParCSRMatrixDNumNonzeros(A_finest)); + memcomp[num_levels_mgr + 1] += (HYPRE_Real)(hypre_ParCSRMatrixDNumNonzeros(A_array[i]) / + hypre_ParCSRMatrixDNumNonzeros(A_finest)); } } gridcomp[num_levels_mgr + 1] /= (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_finest); diff --git a/src/parcsr_ls/par_stats.c b/src/parcsr_ls/par_stats.c index 37e5921145..4d1b04d6d0 100644 --- a/src/parcsr_ls/par_stats.c +++ b/src/parcsr_ls/par_stats.c @@ -463,7 +463,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, else { fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); - global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); + global_nonzeros = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[level]); ndigits[2] = hypre_max(hypre_ndigits((HYPRE_BigInt) global_nonzeros / fine_size ), ndigits[2]); } @@ -512,7 +512,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, row_starts = hypre_ParCSRBlockMatrixRowStarts(A_block_array[level]); fine_size = hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[level]); - global_nonzeros = hypre_ParCSRBlockMatrixDNumNonzeros(A_block_array[level]); + global_nonzeros = (HYPRE_Real)hypre_ParCSRBlockMatrixDNumNonzeros(A_block_array[level]); num_coeffs[level] = global_nonzeros; num_mem[level] = global_nonzeros; num_variables[level] = (HYPRE_Real) fine_size; @@ -593,7 +593,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); - global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); + global_nonzeros = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; if (level == 0) { @@ -753,7 +753,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, fine_size = hypre_ParCSRBlockMatrixGlobalNumRows(P_block_array[level]); coarse_size = hypre_ParCSRBlockMatrixGlobalNumCols(P_block_array[level]); - global_nonzeros = hypre_ParCSRBlockMatrixDNumNonzeros(P_block_array[level]); + global_nonzeros = (HYPRE_Real)hypre_ParCSRBlockMatrixDNumNonzeros(P_block_array[level]); num_mem[level] += global_nonzeros; min_weight = 1.0; @@ -877,7 +877,7 @@ hypre_BoomerAMGSetupStats( void *amg_vdata, fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); hypre_ParCSRMatrixSetDNumNonzeros(P_array[level]); - global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(P_array[level]); + global_nonzeros = (HYPRE_Real)hypre_ParCSRMatrixDNumNonzeros(P_array[level]); num_mem[level] += (HYPRE_Real) global_nonzeros; min_weight = 1.0; diff --git a/src/parcsr_ls/par_strength2nd_device.c b/src/parcsr_ls/par_strength2nd_device.c index d2db35d429..84d7175f75 100644 --- a/src/parcsr_ls/par_strength2nd_device.c +++ b/src/parcsr_ls/par_strength2nd_device.c @@ -113,7 +113,7 @@ hypre_BoomerAMGCreate2ndSDevice( hypre_ParCSRMatrix *S, /* global nnz has changed, but we do not care about it */ /* hypre_ParCSRMatrixSetNumNonzeros(S_CX); - hypre_ParCSRMatrixDNumNonzeros(S_CX) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(S_CX); + hypre_ParCSRMatrixDNumNonzeros(S_CX) = (hypre_double)hypre_ParCSRMatrixNumNonzeros(S_CX); */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(S_CX)); diff --git a/src/parcsr_mv/HYPRE_parcsr_mv_mp.c b/src/parcsr_mv/HYPRE_parcsr_mv_mp.c index c69e558ddf..d23acc3a8f 100644 --- a/src/parcsr_mv/HYPRE_parcsr_mv_mp.c +++ b/src/parcsr_mv/HYPRE_parcsr_mv_mp.c @@ -14,10 +14,10 @@ #include "_hypre_parcsr_mv.h" #ifdef HYPRE_MIXED_PRECISION + /*-------------------------------------------------------------------------- * Mixed-precision HYPRE_ParVectorCopy *--------------------------------------------------------------------------*/ - HYPRE_Int HYPRE_ParVectorCopy_mp( HYPRE_ParVector x, HYPRE_ParVector y ) @@ -26,22 +26,55 @@ HYPRE_ParVectorCopy_mp( HYPRE_ParVector x, (hypre_ParVector *) y ) ); } +/*-------------------------------------------------------------------------- + * Mixed-precision HYPRE_ParVectorAxpy + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_ParVectorAxpy_mp( hypre_long_double alpha, HYPRE_ParVector x, + HYPRE_ParVector y ) +{ + return ( hypre_ParVectorAxpy_mp( alpha, (hypre_ParVector *) x, + (hypre_ParVector *) y ) ); +} + +/*-------------------------------------------------------------------------- + * Mixed-precision HYPRE_ParVectorConvert + *--------------------------------------------------------------------------*/ HYPRE_Int HYPRE_ParVectorConvert_mp( HYPRE_ParVector v, HYPRE_Precision new_precision) { - hypre_ParVectorConvert_mp( (hypre_ParVector *) v, - new_precision ); - return hypre_error_flag; + return (hypre_ParVectorConvert_mp( (hypre_ParVector *) v, + new_precision )); } +/*-------------------------------------------------------------------------- + * Mixed-precision HYPRE_ParCSRMatrixConvert + *--------------------------------------------------------------------------*/ HYPRE_Int HYPRE_ParCSRMatrixConvert_mp( HYPRE_ParCSRMatrix A, HYPRE_Precision new_precision) { - hypre_ParCSRMatrixConvert_mp( (hypre_ParCSRMatrix *) A, - new_precision ); - return hypre_error_flag; + return (hypre_ParCSRMatrixConvert_mp( (hypre_ParCSRMatrix *) A, + new_precision )); +} + +/*-------------------------------------------------------------------------- + * Mixed-precision HYPRE_ParCSRMatrixClone + *--------------------------------------------------------------------------*/ +HYPRE_ParCSRMatrix +HYPRE_ParCSRMatrixClone_mp(HYPRE_ParCSRMatrix A, HYPRE_Precision new_precision) +{ + return ((HYPRE_ParCSRMatrix)(hypre_ParCSRMatrixClone_mp((hypre_ParCSRMatrix *)A, new_precision))); +} + +/*-------------------------------------------------------------------------- + * Mixed-precision HYPRE_ParCSRMatrixCopy + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_ParCSRMatrixCopy_mp( HYPRE_ParCSRMatrix A, HYPRE_ParCSRMatrix B ) +{ + return (hypre_ParCSRMatrixCopy_mp( (hypre_ParCSRMatrix *)A, (hypre_ParCSRMatrix *)B )); } #endif diff --git a/src/parcsr_mv/HYPRE_parcsr_mv_mp.h b/src/parcsr_mv/HYPRE_parcsr_mv_mp.h index 36dbbd5f04..acc3b2bd2e 100644 --- a/src/parcsr_mv/HYPRE_parcsr_mv_mp.h +++ b/src/parcsr_mv/HYPRE_parcsr_mv_mp.h @@ -7,7 +7,7 @@ /****************************************************************************** * - * Header file for HYPRE_parcsr_mv library + * Header file for Mixed-precision HYPRE_parcsr_mv_mp library * *****************************************************************************/ @@ -22,10 +22,17 @@ extern "C" { HYPRE_Int HYPRE_ParVectorCopy_mp( HYPRE_ParVector x, HYPRE_ParVector y ); +HYPRE_Int HYPRE_ParVectorAxpy_mp( hypre_long_double alpha, HYPRE_ParVector x, HYPRE_ParVector y ); + HYPRE_Int HYPRE_ParVectorConvert_mp( HYPRE_ParVector v, HYPRE_Precision new_precision ); HYPRE_Int HYPRE_ParCSRMatrixConvert_mp( HYPRE_ParCSRMatrix A, HYPRE_Precision new_precision ); +HYPRE_ParCSRMatrix HYPRE_ParCSRMatrixClone_mp(HYPRE_ParCSRMatrix A, + HYPRE_Precision new_precision); + +HYPRE_Int HYPRE_ParCSRMatrixCopy_mp( HYPRE_ParCSRMatrix A, HYPRE_ParCSRMatrix B ); + #ifdef __cplusplus } #endif diff --git a/src/parcsr_mv/HYPRE_parcsr_mv_mup.h b/src/parcsr_mv/HYPRE_parcsr_mv_mup.h index e27883f706..1ae5b2191e 100644 --- a/src/parcsr_mv/HYPRE_parcsr_mv_mup.h +++ b/src/parcsr_mv/HYPRE_parcsr_mv_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_PARCSR_MV_MUP_HEADER -#define HYPRE_PARCSR_MV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_PARCSR_MV_MUP_HEADER +#define HYPRE_PARCSR_MV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_CSRMatrixToParCSRMatrix_flt( MPI_Comm comm, HYPRE_CSRMatrix A_CSR, HYPRE_BigInt *row_partitioning, HYPRE_BigInt *col_partitioning, HYPRE_ParCSRMatrix *matrix ); @@ -355,16 +345,7 @@ HYPRE_VectorToParVector_long_dbl( MPI_Comm comm, HYPRE_Vector b, HYPRE_BigInt *p HYPRE_Int HYPRE_VectorToParVector( MPI_Comm comm, HYPRE_Vector b, HYPRE_BigInt *partitioning, HYPRE_ParVector *vector ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_CSRMatrixToParCSRMatrix_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_CSRMatrix A_CSR, HYPRE_BigInt *row_partitioning, HYPRE_BigInt *col_partitioning, HYPRE_ParCSRMatrix *matrix ); @@ -474,7 +455,6 @@ HYPRE_ParVectorSetRandomValues_pre( HYPRE_Precision precision, HYPRE_ParVector v HYPRE_Int HYPRE_VectorToParVector_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_Vector b, HYPRE_BigInt *partitioning, HYPRE_ParVector *vector ); - #endif #ifdef __cplusplus diff --git a/src/parcsr_mv/Makefile b/src/parcsr_mv/Makefile index 05fa5ae7f4..6d80383b6f 100644 --- a/src/parcsr_mv/Makefile +++ b/src/parcsr_mv/Makefile @@ -87,6 +87,11 @@ MP_FILES = \ parcsr_mv_mp.c\ HYPRE_parcsr_mv_mp.c +MP_CUFILES=\ + mup_fixed_gpu.c\ + mup_functions_gpu.c\ + mup_pre_gpu.c + COBJS = ${FILES:.c=.o} CUOBJS = ${CUFILES:.c=.obj} OBJS = ${COBJS} ${CUOBJS} @@ -102,9 +107,10 @@ CUOBJS_single = ${CUFILES:.c=.obj_flt} CUOBJS_double = ${CUFILES:.c=.obj_dbl} CUOBJS_longdouble = ${CUFILES:.c=.obj_ldbl} MP_COBJS = ${MP_FILES:.c=.o} +MP_CUOBJS = ${MP_CUFILES:.c=.obj} OBJS = ${COBJS_single} ${COBJS_double} ${COBJS_longdouble} ${MP_COBJS} -OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} +OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} ${MP_CUOBJS} endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index 2574611e4c..6816506c3e 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -333,7 +333,7 @@ typedef struct hypre_ParCSRMatrix_struct HYPRE_BigInt global_num_cols; HYPRE_BigInt global_num_rownnz; HYPRE_BigInt num_nonzeros; - HYPRE_Real d_num_nonzeros; + hypre_double d_num_nonzeros; HYPRE_BigInt first_row_index; HYPRE_BigInt first_col_diag; @@ -1323,7 +1323,7 @@ hypre_ParVectorCopy_mp( hypre_ParVector *x, hypre_ParVector *y ); HYPRE_Int -hypre_ParVectorAxpy_mp( hypre_double alpha, +hypre_ParVectorAxpy_mp( hypre_long_double alpha, hypre_ParVector *x, hypre_ParVector *y ); @@ -1335,6 +1335,13 @@ HYPRE_Int hypre_ParCSRMatrixConvert_mp ( hypre_ParCSRMatrix *A, HYPRE_Precision new_precision ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_mp(hypre_ParCSRMatrix *A, HYPRE_Precision new_precision); + +HYPRE_Int +hypre_ParCSRMatrixCopy_mp( hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *B ); + #endif #ifdef __cplusplus diff --git a/src/parcsr_mv/_hypre_parcsr_mv.hpp b/src/parcsr_mv/_hypre_parcsr_mv.hpp new file mode 100644 index 0000000000..a6ec43c4ba --- /dev/null +++ b/src/parcsr_mv/_hypre_parcsr_mv.hpp @@ -0,0 +1,36 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_PARCSR_MV_HPP +#define hypre_PARCSR_MV_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_parcsr_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_parcsr_mv_mup_undef.h" +#include "_hypre_parcsr_mv_mup.h" +#include "_hypre_parcsr_mv_mup.hpp" +#endif +#endif + +#endif + diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup.h b/src/parcsr_mv/_hypre_parcsr_mv_mup.h index 39b5e788af..1f1d9178ac 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_PARCSR_MV_MUP_HEADER #define hypre_PARCSR_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ hypre_IJAssumedPart * hypre_AssumedPartitionCreate_flt( MPI_Comm comm, HYPRE_BigInt global_num, HYPRE_BigInt start, HYPRE_BigInt end ); @@ -559,13 +558,6 @@ hypre_ParCSRMatrixBlockColSum_dbl( hypre_ParCSRMatrix *A, HYPRE_Int row_major, H HYPRE_Int hypre_ParCSRMatrixBlockColSum_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int row_major, HYPRE_Int num_rows_block, HYPRE_Int num_cols_block, hypre_DenseBlockMatrix **B_ptr ); -hypre_ParCSRMatrix* -hypre_ParCSRMatrixClone_flt( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); -hypre_ParCSRMatrix* -hypre_ParCSRMatrixClone_dbl( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); -hypre_ParCSRMatrix* -hypre_ParCSRMatrixClone_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); - hypre_ParCSRMatrix* hypre_ParCSRMatrixClone_v2_flt( hypre_ParCSRMatrix *A, HYPRE_Int copy_data, HYPRE_MemoryLocation memory_location ); hypre_ParCSRMatrix* @@ -594,13 +586,6 @@ hypre_ParCSRMatrixComputeScalingTagged_dbl( hypre_ParCSRMatrix *A, HYPRE_Int typ HYPRE_Int hypre_ParCSRMatrixComputeScalingTagged_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int type, HYPRE_MemoryLocation memloc_tags, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_ParVector **scaling_ptr ); -HYPRE_Int -hypre_ParCSRMatrixCopy_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); -HYPRE_Int -hypre_ParCSRMatrixCopy_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); -HYPRE_Int -hypre_ParCSRMatrixCopy_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); - HYPRE_Int hypre_ParCSRMatrixCopyColMapOffdToDevice_flt( hypre_ParCSRMatrix *A ); HYPRE_Int @@ -622,13 +607,6 @@ hypre_ParCSRMatrixCopy_C_dbl( hypre_ParCSRMatrix *P, hypre_ParCSRMatrix *C, HYPR HYPRE_Int hypre_ParCSRMatrixCopy_C_long_dbl( hypre_ParCSRMatrix *P, hypre_ParCSRMatrix *C, HYPRE_Int *CF_marker ); -hypre_ParCSRMatrix * -hypre_ParCSRMatrixCreate_flt( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); -hypre_ParCSRMatrix * -hypre_ParCSRMatrixCreate_dbl( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); -hypre_ParCSRMatrix * -hypre_ParCSRMatrixCreate_long_dbl( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); - HYPRE_Int hypre_ParCSRMatrixCreateAssumedPartition_flt( hypre_ParCSRMatrix *matrix ); HYPRE_Int @@ -811,13 +789,6 @@ hypre_ParCSRMatrixInitialize_dbl( hypre_ParCSRMatrix *matrix ); HYPRE_Int hypre_ParCSRMatrixInitialize_long_dbl( hypre_ParCSRMatrix *matrix ); -HYPRE_Int -hypre_ParCSRMatrixInitialize_v2_flt( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); -HYPRE_Int -hypre_ParCSRMatrixInitialize_v2_dbl( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); -HYPRE_Int -hypre_ParCSRMatrixInitialize_v2_long_dbl( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); - HYPRE_Complex hypre_ParCSRMatrixLocalSumElts_flt( hypre_ParCSRMatrix *A ); HYPRE_Complex @@ -1511,16 +1482,7 @@ hypre_VectorToParVector_dbl( MPI_Comm comm, hypre_Vector *v, HYPRE_BigInt *vec_s hypre_ParVector * hypre_VectorToParVector_long_dbl( MPI_Comm comm, hypre_Vector *v, HYPRE_BigInt *vec_starts ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* functions */ HYPRE_Int HYPRE_Destroy2DSystem_flt( HYPRE_ParCSR_System_Problem *sys_prob ); @@ -1540,16 +1502,43 @@ HYPRE_Generate2DSystem_long_dbl( HYPRE_ParCSRMatrix H_L1, HYPRE_ParCSRMatrix H_L HYPRE_ParCSR_System_Problem * HYPRE_Generate2DSystem( HYPRE_ParCSRMatrix H_L1, HYPRE_ParCSRMatrix H_L2, HYPRE_ParVector H_b1, HYPRE_ParVector H_b2, HYPRE_ParVector H_x1, HYPRE_ParVector H_x2, void *M_vals ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_flt( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_dbl( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +HYPRE_Int +hypre_ParCSRMatrixCopy_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_ParCSRMatrixCopy_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_ParCSRMatrixCopy_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_ParCSRMatrixCopy( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate_flt( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate_dbl( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate_long_dbl( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); + +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2_flt( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2_dbl( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2_long_dbl( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); +/* pre */ HYPRE_Int HYPRE_Destroy2DSystem_pre( HYPRE_Precision precision, HYPRE_ParCSR_System_Problem *sys_prob ); @@ -1557,6 +1546,17 @@ HYPRE_Destroy2DSystem_pre( HYPRE_Precision precision, HYPRE_ParCSR_System_Proble HYPRE_ParCSR_System_Problem * HYPRE_Generate2DSystem_pre( HYPRE_Precision precision, HYPRE_ParCSRMatrix H_L1, HYPRE_ParCSRMatrix H_L2, HYPRE_ParVector H_b1, HYPRE_ParVector H_b2, HYPRE_ParVector H_x1, HYPRE_ParVector H_x2, void *M_vals ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *A, HYPRE_Int copy_data ); + +HYPRE_Int +hypre_ParCSRMatrixCopy_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ); + +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ); + +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); #endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup.hpp b/src/parcsr_mv/_hypre_parcsr_mv_mup.hpp new file mode 100644 index 0000000000..5f1bc55fa0 --- /dev/null +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup.hpp @@ -0,0 +1,305 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_PARCSR_MV_MUP_HEADER_CXX +#define hypre_PARCSR_MV_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +hypre_CSRMatrix* +hypre_ConcatDiagAndOffdDevice_flt( hypre_ParCSRMatrix *A ); +hypre_CSRMatrix* +hypre_ConcatDiagAndOffdDevice_dbl( hypre_ParCSRMatrix *A ); +hypre_CSRMatrix* +hypre_ConcatDiagAndOffdDevice_long_dbl( hypre_ParCSRMatrix *A ); + +HYPRE_Int +hypre_ConcatDiagOffdAndExtDevice_flt( hypre_ParCSRMatrix *A, hypre_CSRMatrix *E, hypre_CSRMatrix **B_ptr, HYPRE_Int *num_cols_offd_ptr, HYPRE_BigInt **cols_map_offd_ptr ); +HYPRE_Int +hypre_ConcatDiagOffdAndExtDevice_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *E, hypre_CSRMatrix **B_ptr, HYPRE_Int *num_cols_offd_ptr, HYPRE_BigInt **cols_map_offd_ptr ); +HYPRE_Int +hypre_ConcatDiagOffdAndExtDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_CSRMatrix *E, hypre_CSRMatrix **B_ptr, HYPRE_Int *num_cols_offd_ptr, HYPRE_BigInt **cols_map_offd_ptr ); + +HYPRE_Int +hypre_ExchangeExternalRowsDeviceInit_flt( hypre_CSRMatrix *B_ext, hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ExchangeExternalRowsDeviceInit_dbl( hypre_CSRMatrix *B_ext, hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ExchangeExternalRowsDeviceInit_long_dbl( hypre_CSRMatrix *B_ext, hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int want_data, void **request_ptr ); + +hypre_CSRMatrix* +hypre_ExchangeExternalRowsDeviceWait_flt( void *vrequest ); +hypre_CSRMatrix* +hypre_ExchangeExternalRowsDeviceWait_dbl( void *vrequest ); +hypre_CSRMatrix* +hypre_ExchangeExternalRowsDeviceWait_long_dbl( void *vrequest ); + +hypre_CSRMatrix * +hypre_MergeDiagAndOffdDevice_flt( hypre_ParCSRMatrix *par_matrix ); +hypre_CSRMatrix * +hypre_MergeDiagAndOffdDevice_dbl( hypre_ParCSRMatrix *par_matrix ); +hypre_CSRMatrix * +hypre_MergeDiagAndOffdDevice_long_dbl( hypre_ParCSRMatrix *par_matrix ); + +HYPRE_Int +hypre_ParCSRCommPkgCreateMatrixE_flt( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); +HYPRE_Int +hypre_ParCSRCommPkgCreateMatrixE_dbl( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); +HYPRE_Int +hypre_ParCSRCommPkgCreateMatrixE_long_dbl( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ); + +HYPRE_Int +hypre_ParCSRDiagScaleVectorDevice_flt( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_y, hypre_ParVector *par_x ); +HYPRE_Int +hypre_ParCSRDiagScaleVectorDevice_dbl( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_y, hypre_ParVector *par_x ); +HYPRE_Int +hypre_ParCSRDiagScaleVectorDevice_long_dbl( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_y, hypre_ParVector *par_x ); + +hypre_ParCSRMatrix * +hypre_ParCSRMatMatDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B ); +hypre_ParCSRMatrix * +hypre_ParCSRMatMatDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B ); +hypre_ParCSRMatrix * +hypre_ParCSRMatMatDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B ); + +HYPRE_Int +hypre_ParCSRMatMatDiagDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BT, hypre_ParCSRMatrix *C ); +HYPRE_Int +hypre_ParCSRMatMatDiagDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BT, hypre_ParCSRMatrix *C ); +HYPRE_Int +hypre_ParCSRMatMatDiagDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BT, hypre_ParCSRMatrix *C ); + +HYPRE_Int +hypre_ParCSRMatrixAddDevice_flt( hypre_float alpha, hypre_ParCSRMatrix *A, hypre_float beta, hypre_ParCSRMatrix *B, hypre_ParCSRMatrix **Cout ); +HYPRE_Int +hypre_ParCSRMatrixAddDevice_dbl( hypre_double alpha, hypre_ParCSRMatrix *A, hypre_double beta, hypre_ParCSRMatrix *B, hypre_ParCSRMatrix **Cout ); +HYPRE_Int +hypre_ParCSRMatrixAddDevice_long_dbl( hypre_long_double alpha, hypre_ParCSRMatrix *A, hypre_long_double beta, hypre_ParCSRMatrix *B, hypre_ParCSRMatrix **Cout ); + +HYPRE_Int +hypre_ParCSRMatrixBlkFilterDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int block_size, hypre_ParCSRMatrix **B_ptr ); +HYPRE_Int +hypre_ParCSRMatrixBlkFilterDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int block_size, hypre_ParCSRMatrix **B_ptr ); +HYPRE_Int +hypre_ParCSRMatrixBlkFilterDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int block_size, hypre_ParCSRMatrix **B_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixColSumDevice_flt( hypre_ParCSRMatrix *A, hypre_ParVector *b ); +HYPRE_Int +hypre_ParCSRMatrixColSumDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *b ); +HYPRE_Int +hypre_ParCSRMatrixColSumDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParVector *b ); + +HYPRE_Int +hypre_ParCSRMatrixCompressOffdMapDevice_flt( hypre_ParCSRMatrix *A ); +HYPRE_Int +hypre_ParCSRMatrixCompressOffdMapDevice_dbl( hypre_ParCSRMatrix *A ); +HYPRE_Int +hypre_ParCSRMatrixCompressOffdMapDevice_long_dbl( hypre_ParCSRMatrix *A ); + +HYPRE_Int +hypre_ParCSRMatrixDiagScaleDevice_flt( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_ld, hypre_ParVector *par_rd ); +HYPRE_Int +hypre_ParCSRMatrixDiagScaleDevice_dbl( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_ld, hypre_ParVector *par_rd ); +HYPRE_Int +hypre_ParCSRMatrixDiagScaleDevice_long_dbl( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_ld, hypre_ParVector *par_rd ); + +HYPRE_Int +hypre_ParCSRMatrixDropSmallEntriesDevice_flt( hypre_ParCSRMatrix *A, hypre_float tol, HYPRE_Int type ); +HYPRE_Int +hypre_ParCSRMatrixDropSmallEntriesDevice_dbl( hypre_ParCSRMatrix *A, hypre_double tol, HYPRE_Int type ); +HYPRE_Int +hypre_ParCSRMatrixDropSmallEntriesDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_long_double tol, HYPRE_Int type ); + +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDevice_flt( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data ); +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDevice_dbl( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data ); +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDevice_long_dbl( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data ); + +HYPRE_Int +hypre_ParCSRMatrixExtractBExtDeviceInit_flt( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ParCSRMatrixExtractBExtDeviceInit_dbl( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ParCSRMatrixExtractBExtDeviceInit_long_dbl( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data, void **request_ptr ); + +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDeviceWait_flt( void *request ); +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDeviceWait_dbl( void *request ); +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDeviceWait_long_dbl( void *request ); + +HYPRE_Int +hypre_ParCSRMatrixGenerate1DCFDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACX_ptr, hypre_ParCSRMatrix **AXC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerate1DCFDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACX_ptr, hypre_ParCSRMatrix **AXC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerate1DCFDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACX_ptr, hypre_ParCSRMatrix **AXC_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateCCCFDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_CC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCCCFDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_CC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCCCFDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_CC_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateCCDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCCDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACC_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCCDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACC_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateCFDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCFDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateCFDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACF_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFCFDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFCFDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFCFDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_FF_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFC3Device_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFC3Device_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFC3Device_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFCDevice_flt( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFCDevice_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFCDevice_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ); + +HYPRE_Int +hypre_ParCSRMatrixGetRowDevice_flt( hypre_ParCSRMatrix *mat, HYPRE_BigInt row, HYPRE_Int *size, HYPRE_BigInt **col_ind, hypre_float **values ); +HYPRE_Int +hypre_ParCSRMatrixGetRowDevice_dbl( hypre_ParCSRMatrix *mat, HYPRE_BigInt row, HYPRE_Int *size, HYPRE_BigInt **col_ind, hypre_double **values ); +HYPRE_Int +hypre_ParCSRMatrixGetRowDevice_long_dbl( hypre_ParCSRMatrix *mat, HYPRE_BigInt row, HYPRE_Int *size, HYPRE_BigInt **col_ind, hypre_long_double **values ); + +HYPRE_Int +hypre_ParCSRMatrixMatvecOutOfPlaceDevice_flt( hypre_float alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_float beta, hypre_ParVector *b, hypre_ParVector *y ); +HYPRE_Int +hypre_ParCSRMatrixMatvecOutOfPlaceDevice_dbl( hypre_double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_double beta, hypre_ParVector *b, hypre_ParVector *y ); +HYPRE_Int +hypre_ParCSRMatrixMatvecOutOfPlaceDevice_long_dbl( hypre_long_double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_long_double beta, hypre_ParVector *b, hypre_ParVector *y ); + +HYPRE_Int +hypre_ParCSRMatrixMatvecTDevice_flt( hypre_float alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_float beta, hypre_ParVector *y ); +HYPRE_Int +hypre_ParCSRMatrixMatvecTDevice_dbl( hypre_double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_double beta, hypre_ParVector *y ); +HYPRE_Int +hypre_ParCSRMatrixMatvecTDevice_long_dbl( hypre_long_double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, hypre_long_double beta, hypre_ParVector *y ); + +HYPRE_Int +hypre_ParCSRMatrixMatvecT_unpack_flt( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int num_cols, hypre_float *recv_data, hypre_float *local_data ); +HYPRE_Int +hypre_ParCSRMatrixMatvecT_unpack_dbl( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int num_cols, hypre_double *recv_data, hypre_double *local_data ); +HYPRE_Int +hypre_ParCSRMatrixMatvecT_unpack_long_dbl( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int num_cols, hypre_long_double *recv_data, hypre_long_double *local_data ); + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixRAPKTDevice_flt( hypre_ParCSRMatrix *R, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, HYPRE_Int keep_transpose, HYPRE_Int has_diagonal ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixRAPKTDevice_dbl( hypre_ParCSRMatrix *R, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, HYPRE_Int keep_transpose, HYPRE_Int has_diagonal ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixRAPKTDevice_long_dbl( hypre_ParCSRMatrix *R, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, HYPRE_Int keep_transpose, HYPRE_Int has_diagonal ); + +HYPRE_Int +hypre_ParCSRMatrixTransposeDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **AT_ptr, HYPRE_Int data ); +HYPRE_Int +hypre_ParCSRMatrixTransposeDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **AT_ptr, HYPRE_Int data ); +HYPRE_Int +hypre_ParCSRMatrixTransposeDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **AT_ptr, HYPRE_Int data ); + +hypre_ParCSRMatrix * +hypre_ParCSRTMatMatKTDevice_flt( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int keep_transpose ); +hypre_ParCSRMatrix * +hypre_ParCSRTMatMatKTDevice_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int keep_transpose ); +hypre_ParCSRMatrix * +hypre_ParCSRTMatMatKTDevice_long_dbl( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int keep_transpose ); + +HYPRE_Int +hypre_ParCSRTMatMatPartialAddDevice_flt( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr ); +HYPRE_Int +hypre_ParCSRTMatMatPartialAddDevice_dbl( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr ); +HYPRE_Int +hypre_ParCSRTMatMatPartialAddDevice_long_dbl( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr ); + +HYPRE_Int +hypre_ParVectorGetValuesDevice_flt( hypre_ParVector *vector, HYPRE_Int num_values, HYPRE_BigInt *indices, HYPRE_BigInt base, hypre_float *values ); +HYPRE_Int +hypre_ParVectorGetValuesDevice_dbl( hypre_ParVector *vector, HYPRE_Int num_values, HYPRE_BigInt *indices, HYPRE_BigInt base, hypre_double *values ); +HYPRE_Int +hypre_ParVectorGetValuesDevice_long_dbl( hypre_ParVector *vector, HYPRE_Int num_values, HYPRE_BigInt *indices, HYPRE_BigInt base, hypre_long_double *values ); + +HYPRE_Int +hypre_ParcsrGetExternalRowsDeviceInit_flt( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ParcsrGetExternalRowsDeviceInit_dbl( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int want_data, void **request_ptr ); +HYPRE_Int +hypre_ParcsrGetExternalRowsDeviceInit_long_dbl( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int want_data, void **request_ptr ); + +hypre_CSRMatrix* +hypre_ParcsrGetExternalRowsDeviceWait_flt( void *vrequest ); +hypre_CSRMatrix* +hypre_ParcsrGetExternalRowsDeviceWait_dbl( void *vrequest ); +hypre_CSRMatrix* +hypre_ParcsrGetExternalRowsDeviceWait_long_dbl( void *vrequest ); + +/* functions_gpu */ + +/* pre_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h b/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h index aee2731ee1..e344436926 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup_def.h @@ -56,6 +56,10 @@ #define HYPRE_ParVectorSetConstantValues HYPRE_MULTIPRECISION_FUNC ( HYPRE_ParVectorSetConstantValues ) #define HYPRE_ParVectorSetRandomValues HYPRE_MULTIPRECISION_FUNC ( HYPRE_ParVectorSetRandomValues ) #define HYPRE_VectorToParVector HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorToParVector ) +#define hypre_ParCSRMatrixClone HYPRE_MULTIPRECISION_FUNC ( hypre_ParCSRMatrixClone ) +#define hypre_ParCSRMatrixCopy HYPRE_MULTIPRECISION_FUNC ( hypre_ParCSRMatrixCopy ) +#define hypre_ParCSRMatrixCreate HYPRE_MULTIPRECISION_FUNC ( hypre_ParCSRMatrixCreate ) +#define hypre_ParCSRMatrixInitialize_v2 HYPRE_MULTIPRECISION_FUNC ( hypre_ParCSRMatrixInitialize_v2 ) #define hypre_AssumedPartitionCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_AssumedPartitionCreate ) #define hypre_AssumedPartitionDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_AssumedPartitionDestroy ) #define hypre_BooleanGenerateDiagAndOffd HYPRE_FIXEDPRECISION_FUNC ( hypre_BooleanGenerateDiagAndOffd ) @@ -137,17 +141,14 @@ #define hypre_ParCSRMatrixBlkFilterHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixBlkFilterHost ) #define hypre_ParCSRMatrixBlockColSum HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixBlockColSum ) #define hypre_ParCSRMatrixBlockColSumHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixBlockColSumHost ) -#define hypre_ParCSRMatrixClone HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixClone ) #define hypre_ParCSRMatrixClone_v2 HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixClone_v2 ) #define hypre_ParCSRMatrixColSum HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixColSum ) #define hypre_ParCSRMatrixColSumHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixColSumHost ) #define hypre_ParCSRMatrixCompressOffdMap HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCompressOffdMap ) #define hypre_ParCSRMatrixComputeScalingTagged HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixComputeScalingTagged ) -#define hypre_ParCSRMatrixCopy HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopy ) #define hypre_ParCSRMatrixCopyColMapOffdToDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopyColMapOffdToDevice ) #define hypre_ParCSRMatrixCopyColMapOffdToHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopyColMapOffdToHost ) #define hypre_ParCSRMatrixCopy_C HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCopy_C ) -#define hypre_ParCSRMatrixCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreate ) #define hypre_ParCSRMatrixCreateAssumedPartition HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateAssumedPartition ) #define hypre_ParCSRMatrixCreateFromDenseBlockMatrix HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateFromDenseBlockMatrix ) #define hypre_ParCSRMatrixCreateFromParVector HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCreateFromParVector ) @@ -176,7 +177,6 @@ #define hypre_ParCSRMatrixGetRowHost HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetRowHost ) #define hypre_ParCSRMatrixInfNorm HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixInfNorm ) #define hypre_ParCSRMatrixInitialize HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixInitialize ) -#define hypre_ParCSRMatrixInitialize_v2 HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixInitialize_v2 ) #define hypre_ParCSRMatrixLocalSumElts HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixLocalSumElts ) #define hypre_ParCSRMatrixLocalTranspose HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixLocalTranspose ) #define hypre_ParCSRMatrixMatvec HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixMatvec ) @@ -323,5 +323,46 @@ #define hypre_setparvectordataowner HYPRE_FIXEDPRECISION_FUNC ( hypre_setparvectordataowner ) #define hypre_setparvectorrandomvalues HYPRE_FIXEDPRECISION_FUNC ( hypre_setparvectorrandomvalues ) #define hypre_vectortoparvector HYPRE_FIXEDPRECISION_FUNC ( hypre_vectortoparvector ) +#define hypreGPUKernel_ConcatDiagAndOffd HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ConcatDiagAndOffd ) +#define hypreGPUKernel_ParCSRMatMatDiag HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ParCSRMatMatDiag ) +#define hypreGPUKernel_ParCSRMatrixBlkFilterCount HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ParCSRMatrixBlkFilterCount ) +#define hypreGPUKernel_ParCSRMatrixBlkFilterFill HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ParCSRMatrixBlkFilterFill ) +#define hypre_ConcatDiagAndOffdDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ConcatDiagAndOffdDevice ) +#define hypre_ConcatDiagOffdAndExtDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ConcatDiagOffdAndExtDevice ) +#define hypre_ExchangeExternalRowsDeviceInit HYPRE_FIXEDPRECISION_FUNC ( hypre_ExchangeExternalRowsDeviceInit ) +#define hypre_ExchangeExternalRowsDeviceWait HYPRE_FIXEDPRECISION_FUNC ( hypre_ExchangeExternalRowsDeviceWait ) +#define hypre_MergeDiagAndOffdDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_MergeDiagAndOffdDevice ) +#define hypre_ParCSRCommPkgCreateMatrixE HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRCommPkgCreateMatrixE ) +#define hypre_ParCSRDiagScaleVectorDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRDiagScaleVectorDevice ) +#define hypre_ParCSRMatMatDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatMatDevice ) +#define hypre_ParCSRMatMatDiagDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatMatDiagDevice ) +#define hypre_ParCSRMatrixAddDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixAddDevice ) +#define hypre_ParCSRMatrixBlkFilterDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixBlkFilterDevice ) +#define hypre_ParCSRMatrixColSumDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixColSumDevice ) +#define hypre_ParCSRMatrixCompressOffdMapDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixCompressOffdMapDevice ) +#define hypre_ParCSRMatrixDiagScaleDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixDiagScaleDevice ) +#define hypre_ParCSRMatrixDropSmallEntriesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixDropSmallEntriesDevice ) +#define hypre_ParCSRMatrixExtractBExtDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixExtractBExtDevice ) +#define hypre_ParCSRMatrixExtractBExtDeviceInit HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixExtractBExtDeviceInit ) +#define hypre_ParCSRMatrixExtractBExtDeviceWait HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixExtractBExtDeviceWait ) +#define hypre_ParCSRMatrixGenerate1DCFDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerate1DCFDevice ) +#define hypre_ParCSRMatrixGenerateCCCFDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateCCCFDevice ) +#define hypre_ParCSRMatrixGenerateCCDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateCCDevice ) +#define hypre_ParCSRMatrixGenerateCFDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateCFDevice ) +#define hypre_ParCSRMatrixGenerateFFCFDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFCFDevice ) +#define hypre_ParCSRMatrixGenerateFFFC3Device HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFC3Device ) +#define hypre_ParCSRMatrixGenerateFFFCDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFCDevice ) +#define hypre_ParCSRMatrixGenerateFFFCDevice_core HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGenerateFFFCDevice_core ) +#define hypre_ParCSRMatrixGetRowDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixGetRowDevice ) +#define hypre_ParCSRMatrixMatvecOutOfPlaceDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixMatvecOutOfPlaceDevice ) +#define hypre_ParCSRMatrixMatvecTDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixMatvecTDevice ) +#define hypre_ParCSRMatrixMatvecT_unpack HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixMatvecT_unpack ) +#define hypre_ParCSRMatrixRAPKTDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixRAPKTDevice ) +#define hypre_ParCSRMatrixTransposeDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRMatrixTransposeDevice ) +#define hypre_ParCSRTMatMatKTDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRTMatMatKTDevice ) +#define hypre_ParCSRTMatMatPartialAddDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParCSRTMatMatPartialAddDevice ) +#define hypre_ParVectorGetValuesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ParVectorGetValuesDevice ) +#define hypre_ParcsrGetExternalRowsDeviceInit HYPRE_FIXEDPRECISION_FUNC ( hypre_ParcsrGetExternalRowsDeviceInit ) +#define hypre_ParcsrGetExternalRowsDeviceWait HYPRE_FIXEDPRECISION_FUNC ( hypre_ParcsrGetExternalRowsDeviceWait ) #endif diff --git a/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h b/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h index 52b29bf8d8..83b05caa33 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h +++ b/src/parcsr_mv/_hypre_parcsr_mv_mup_undef.h @@ -53,6 +53,10 @@ #undef HYPRE_ParVectorSetConstantValues #undef HYPRE_ParVectorSetRandomValues #undef HYPRE_VectorToParVector +#undef hypre_ParCSRMatrixClone +#undef hypre_ParCSRMatrixCopy +#undef hypre_ParCSRMatrixCreate +#undef hypre_ParCSRMatrixInitialize_v2 #undef hypre_AssumedPartitionCreate #undef hypre_AssumedPartitionDestroy #undef hypre_BooleanGenerateDiagAndOffd @@ -134,17 +138,14 @@ #undef hypre_ParCSRMatrixBlkFilterHost #undef hypre_ParCSRMatrixBlockColSum #undef hypre_ParCSRMatrixBlockColSumHost -#undef hypre_ParCSRMatrixClone #undef hypre_ParCSRMatrixClone_v2 #undef hypre_ParCSRMatrixColSum #undef hypre_ParCSRMatrixColSumHost #undef hypre_ParCSRMatrixCompressOffdMap #undef hypre_ParCSRMatrixComputeScalingTagged -#undef hypre_ParCSRMatrixCopy #undef hypre_ParCSRMatrixCopyColMapOffdToDevice #undef hypre_ParCSRMatrixCopyColMapOffdToHost #undef hypre_ParCSRMatrixCopy_C -#undef hypre_ParCSRMatrixCreate #undef hypre_ParCSRMatrixCreateAssumedPartition #undef hypre_ParCSRMatrixCreateFromDenseBlockMatrix #undef hypre_ParCSRMatrixCreateFromParVector @@ -173,7 +174,6 @@ #undef hypre_ParCSRMatrixGetRowHost #undef hypre_ParCSRMatrixInfNorm #undef hypre_ParCSRMatrixInitialize -#undef hypre_ParCSRMatrixInitialize_v2 #undef hypre_ParCSRMatrixLocalSumElts #undef hypre_ParCSRMatrixLocalTranspose #undef hypre_ParCSRMatrixMatvec @@ -320,3 +320,44 @@ #undef hypre_setparvectordataowner #undef hypre_setparvectorrandomvalues #undef hypre_vectortoparvector +#undef hypreGPUKernel_ConcatDiagAndOffd +#undef hypreGPUKernel_ParCSRMatMatDiag +#undef hypreGPUKernel_ParCSRMatrixBlkFilterCount +#undef hypreGPUKernel_ParCSRMatrixBlkFilterFill +#undef hypre_ConcatDiagAndOffdDevice +#undef hypre_ConcatDiagOffdAndExtDevice +#undef hypre_ExchangeExternalRowsDeviceInit +#undef hypre_ExchangeExternalRowsDeviceWait +#undef hypre_MergeDiagAndOffdDevice +#undef hypre_ParCSRCommPkgCreateMatrixE +#undef hypre_ParCSRDiagScaleVectorDevice +#undef hypre_ParCSRMatMatDevice +#undef hypre_ParCSRMatMatDiagDevice +#undef hypre_ParCSRMatrixAddDevice +#undef hypre_ParCSRMatrixBlkFilterDevice +#undef hypre_ParCSRMatrixColSumDevice +#undef hypre_ParCSRMatrixCompressOffdMapDevice +#undef hypre_ParCSRMatrixDiagScaleDevice +#undef hypre_ParCSRMatrixDropSmallEntriesDevice +#undef hypre_ParCSRMatrixExtractBExtDevice +#undef hypre_ParCSRMatrixExtractBExtDeviceInit +#undef hypre_ParCSRMatrixExtractBExtDeviceWait +#undef hypre_ParCSRMatrixGenerate1DCFDevice +#undef hypre_ParCSRMatrixGenerateCCCFDevice +#undef hypre_ParCSRMatrixGenerateCCDevice +#undef hypre_ParCSRMatrixGenerateCFDevice +#undef hypre_ParCSRMatrixGenerateFFCFDevice +#undef hypre_ParCSRMatrixGenerateFFFC3Device +#undef hypre_ParCSRMatrixGenerateFFFCDevice +#undef hypre_ParCSRMatrixGenerateFFFCDevice_core +#undef hypre_ParCSRMatrixGetRowDevice +#undef hypre_ParCSRMatrixMatvecOutOfPlaceDevice +#undef hypre_ParCSRMatrixMatvecTDevice +#undef hypre_ParCSRMatrixMatvecT_unpack +#undef hypre_ParCSRMatrixRAPKTDevice +#undef hypre_ParCSRMatrixTransposeDevice +#undef hypre_ParCSRTMatMatKTDevice +#undef hypre_ParCSRTMatMatPartialAddDevice +#undef hypre_ParVectorGetValuesDevice +#undef hypre_ParcsrGetExternalRowsDeviceInit +#undef hypre_ParcsrGetExternalRowsDeviceWait diff --git a/src/parcsr_mv/headers b/src/parcsr_mv/headers index c7e6b97d09..8990d7f461 100755 --- a/src/parcsr_mv/headers +++ b/src/parcsr_mv/headers @@ -74,3 +74,60 @@ cat >> $INTERNAL_HEADER <<@ #endif @ + + +INTERNAL_HEADER=_hypre_parcsr_mv.hpp + +#=========================================================================== +# Include guards and other includes +#=========================================================================== + +cat > $INTERNAL_HEADER <<@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_PARCSR_MV_HPP +#define hypre_PARCSR_MV_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_parcsr_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +@ + +#=========================================================================== +# Structures and prototypes +#=========================================================================== + +#=========================================================================== +# Include guards +#=========================================================================== + +cat >> $INTERNAL_HEADER <<@ + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_parcsr_mv_mup_undef.h" +#include "_hypre_parcsr_mv_mup.h" +#include "_hypre_parcsr_mv_mup.hpp" +#endif +#endif + +#endif + +@ diff --git a/src/parcsr_mv/mup.fixed b/src/parcsr_mv/mup.fixed index 6cb8d48548..4237ffc470 100644 --- a/src/parcsr_mv/mup.fixed +++ b/src/parcsr_mv/mup.fixed @@ -79,17 +79,14 @@ hypre_ParCSRMatrixBlkFilter hypre_ParCSRMatrixBlkFilterHost hypre_ParCSRMatrixBlockColSum hypre_ParCSRMatrixBlockColSumHost -hypre_ParCSRMatrixClone hypre_ParCSRMatrixClone_v2 hypre_ParCSRMatrixColSum hypre_ParCSRMatrixColSumHost hypre_ParCSRMatrixCompressOffdMap hypre_ParCSRMatrixComputeScalingTagged -hypre_ParCSRMatrixCopy hypre_ParCSRMatrixCopyColMapOffdToDevice hypre_ParCSRMatrixCopyColMapOffdToHost hypre_ParCSRMatrixCopy_C -hypre_ParCSRMatrixCreate hypre_ParCSRMatrixCreateAssumedPartition hypre_ParCSRMatrixCreateFromDenseBlockMatrix hypre_ParCSRMatrixCreateFromParVector @@ -118,7 +115,6 @@ hypre_ParCSRMatrixGetRow hypre_ParCSRMatrixGetRowHost hypre_ParCSRMatrixInfNorm hypre_ParCSRMatrixInitialize -hypre_ParCSRMatrixInitialize_v2 hypre_ParCSRMatrixLocalSumElts hypre_ParCSRMatrixLocalTranspose hypre_ParCSRMatrixMatvec diff --git a/src/parcsr_mv/mup.fixed_gpu b/src/parcsr_mv/mup.fixed_gpu new file mode 100644 index 0000000000..0d22c912bf --- /dev/null +++ b/src/parcsr_mv/mup.fixed_gpu @@ -0,0 +1,41 @@ +hypreGPUKernel_ConcatDiagAndOffd +hypreGPUKernel_ParCSRMatMatDiag +hypreGPUKernel_ParCSRMatrixBlkFilterCount +hypreGPUKernel_ParCSRMatrixBlkFilterFill +hypre_ConcatDiagAndOffdDevice +hypre_ConcatDiagOffdAndExtDevice +hypre_ExchangeExternalRowsDeviceInit +hypre_ExchangeExternalRowsDeviceWait +hypre_MergeDiagAndOffdDevice +hypre_ParCSRCommPkgCreateMatrixE +hypre_ParCSRDiagScaleVectorDevice +hypre_ParCSRMatMatDevice +hypre_ParCSRMatMatDiagDevice +hypre_ParCSRMatrixAddDevice +hypre_ParCSRMatrixBlkFilterDevice +hypre_ParCSRMatrixColSumDevice +hypre_ParCSRMatrixCompressOffdMapDevice +hypre_ParCSRMatrixDiagScaleDevice +hypre_ParCSRMatrixDropSmallEntriesDevice +hypre_ParCSRMatrixExtractBExtDevice +hypre_ParCSRMatrixExtractBExtDeviceInit +hypre_ParCSRMatrixExtractBExtDeviceWait +hypre_ParCSRMatrixGenerate1DCFDevice +hypre_ParCSRMatrixGenerateCCCFDevice +hypre_ParCSRMatrixGenerateCCDevice +hypre_ParCSRMatrixGenerateCFDevice +hypre_ParCSRMatrixGenerateFFCFDevice +hypre_ParCSRMatrixGenerateFFFC3Device +hypre_ParCSRMatrixGenerateFFFCDevice +hypre_ParCSRMatrixGenerateFFFCDevice_core +hypre_ParCSRMatrixGetRowDevice +hypre_ParCSRMatrixMatvecOutOfPlaceDevice +hypre_ParCSRMatrixMatvecTDevice +hypre_ParCSRMatrixMatvecT_unpack +hypre_ParCSRMatrixRAPKTDevice +hypre_ParCSRMatrixTransposeDevice +hypre_ParCSRTMatMatKTDevice +hypre_ParCSRTMatMatPartialAddDevice +hypre_ParVectorGetValuesDevice +hypre_ParcsrGetExternalRowsDeviceInit +hypre_ParcsrGetExternalRowsDeviceWait diff --git a/src/parcsr_mv/mup.functions b/src/parcsr_mv/mup.functions index 26e45e4b67..85358b6217 100644 --- a/src/parcsr_mv/mup.functions +++ b/src/parcsr_mv/mup.functions @@ -39,3 +39,7 @@ HYPRE_ParVectorScale HYPRE_ParVectorSetConstantValues HYPRE_ParVectorSetRandomValues HYPRE_VectorToParVector +hypre_ParCSRMatrixClone +hypre_ParCSRMatrixCopy +hypre_ParCSRMatrixCreate +hypre_ParCSRMatrixInitialize_v2 diff --git a/src/parcsr_mv/mup.functions_gpu b/src/parcsr_mv/mup.functions_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/parcsr_mv/mup.methods_gpu b/src/parcsr_mv/mup.methods_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/parcsr_mv/mup_fixed.c b/src/parcsr_mv/mup_fixed.c index 237924b933..c5fa1d360f 100644 --- a/src/parcsr_mv/mup_fixed.c +++ b/src/parcsr_mv/mup_fixed.c @@ -632,14 +632,6 @@ hypre_ParCSRMatrixBlockColSum( hypre_ParCSRMatrix *A, HYPRE_Int row_major, HYPRE /*--------------------------------------------------------------------------*/ -hypre_ParCSRMatrix* -hypre_ParCSRMatrixClone( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixClone)( A, copy_data ); -} - -/*--------------------------------------------------------------------------*/ - hypre_ParCSRMatrix* hypre_ParCSRMatrixClone_v2( hypre_ParCSRMatrix *A, HYPRE_Int copy_data, HYPRE_MemoryLocation memory_location ) { @@ -672,14 +664,6 @@ hypre_ParCSRMatrixComputeScalingTagged( hypre_ParCSRMatrix *A, HYPRE_Int type, H /*--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_ParCSRMatrixCopy( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixCopy)( A, B, copy_data ); -} - -/*--------------------------------------------------------------------------*/ - HYPRE_Int hypre_ParCSRMatrixCopyColMapOffdToDevice( hypre_ParCSRMatrix *A ) { @@ -704,14 +688,6 @@ hypre_ParCSRMatrixCopy_C( hypre_ParCSRMatrix *P, hypre_ParCSRMatrix *C, HYPRE_In /*--------------------------------------------------------------------------*/ -hypre_ParCSRMatrix * -hypre_ParCSRMatrixCreate( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixCreate)( comm, global_num_rows, global_num_cols, row_starts_in, col_starts_in, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd ); -} - -/*--------------------------------------------------------------------------*/ - HYPRE_Int hypre_ParCSRMatrixCreateAssumedPartition( hypre_ParCSRMatrix *matrix ) { @@ -920,14 +896,6 @@ hypre_ParCSRMatrixInitialize( hypre_ParCSRMatrix *matrix ) /*--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixInitialize_v2)( matrix, memory_location ); -} - -/*--------------------------------------------------------------------------*/ - HYPRE_Complex hypre_ParCSRMatrixLocalSumElts( hypre_ParCSRMatrix *A ) { diff --git a/src/parcsr_mv/mup_fixed_gpu.c b/src/parcsr_mv/mup_fixed_gpu.c new file mode 100644 index 0000000000..98a2db0efa --- /dev/null +++ b/src/parcsr_mv/mup_fixed_gpu.c @@ -0,0 +1,311 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_mv.h" +#include "_hypre_parcsr_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_ConcatDiagAndOffdDevice( hypre_ParCSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ConcatDiagAndOffdDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ConcatDiagOffdAndExtDevice( hypre_ParCSRMatrix *A, hypre_CSRMatrix *E, hypre_CSRMatrix **B_ptr, HYPRE_Int *num_cols_offd_ptr, HYPRE_BigInt **cols_map_offd_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ConcatDiagOffdAndExtDevice)( A, E, B_ptr, num_cols_offd_ptr, cols_map_offd_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ExchangeExternalRowsDeviceInit( hypre_CSRMatrix *B_ext, hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int want_data, void **request_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ExchangeExternalRowsDeviceInit)( B_ext, comm_pkg_A, want_data, request_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_ExchangeExternalRowsDeviceWait( void *vrequest ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ExchangeExternalRowsDeviceWait)( vrequest ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_MergeDiagAndOffdDevice( hypre_ParCSRMatrix *par_matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MergeDiagAndOffdDevice)( par_matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRCommPkgCreateMatrixE( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int local_ncols ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRCommPkgCreateMatrixE)( comm_pkg, local_ncols ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRDiagScaleVectorDevice( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_y, hypre_ParVector *par_x ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRDiagScaleVectorDevice)( par_A, par_y, par_x ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix * +hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatMatDevice)( A, B ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatMatDiagDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BT, hypre_ParCSRMatrix *C ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatMatDiagDevice)( A, BT, C ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixAddDevice( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, HYPRE_Complex beta, hypre_ParCSRMatrix *B, hypre_ParCSRMatrix **Cout ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixAddDevice)( alpha, A, beta, B, Cout ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixBlkFilterDevice( hypre_ParCSRMatrix *A, HYPRE_Int block_size, hypre_ParCSRMatrix **B_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixBlkFilterDevice)( A, block_size, B_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixColSumDevice( hypre_ParCSRMatrix *A, hypre_ParVector *b ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixColSumDevice)( A, b ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixCompressOffdMapDevice( hypre_ParCSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixCompressOffdMapDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A, hypre_ParVector *par_ld, hypre_ParVector *par_rd ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixDiagScaleDevice)( par_A, par_ld, par_rd ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, HYPRE_Complex tol, HYPRE_Int type ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixDropSmallEntriesDevice)( A, tol, type ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDevice( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixExtractBExtDevice)( B, A, want_data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixExtractBExtDeviceInit( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, HYPRE_Int want_data, void **request_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixExtractBExtDeviceInit)( B, A, want_data, request_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_ParCSRMatrixExtractBExtDeviceWait( void *request ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixExtractBExtDeviceWait)( request ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACX_ptr, hypre_ParCSRMatrix **AXC_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerate1DCFDevice)( A, CF_marker, cpts_starts, S, ACX_ptr, AXC_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateCCCFDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_CC_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateCCCFDevice)( A, CF_marker, cpts_starts, S, A_CF_ptr, A_CC_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateCCDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACC_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateCCDevice)( A, CF_marker, cpts_starts, S, ACC_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateCFDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **ACF_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateCFDevice)( A, CF_marker, cpts_starts, S, ACF_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFCFDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_CF_ptr, hypre_ParCSRMatrix **A_FF_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateFFCFDevice)( A, CF_marker, cpts_starts, S, A_CF_ptr, A_FF_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFC3Device( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateFFFC3Device)( A, CF_marker, cpts_starts, S, A_FC_ptr, A_FF_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGenerateFFFCDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, HYPRE_BigInt *cpts_starts, hypre_ParCSRMatrix *S, hypre_ParCSRMatrix **A_FC_ptr, hypre_ParCSRMatrix **A_FF_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGenerateFFFCDevice)( A, CF_marker, cpts_starts, S, A_FC_ptr, A_FF_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixGetRowDevice( hypre_ParCSRMatrix *mat, HYPRE_BigInt row, HYPRE_Int *size, HYPRE_BigInt **col_ind, HYPRE_Complex **values ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixGetRowDevice)( mat, row, size, col_ind, values ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *b, hypre_ParVector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixMatvecOutOfPlaceDevice)( alpha, A, x, beta, b, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixMatvecTDevice)( alpha, A, x, beta, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixMatvecT_unpack( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int num_cols, HYPRE_Complex *recv_data, HYPRE_Complex *local_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixMatvecT_unpack)( comm_pkg, num_cols, recv_data, local_data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *P, HYPRE_Int keep_transpose, HYPRE_Int has_diagonal ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixRAPKTDevice)( R, A, P, keep_transpose, has_diagonal ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **AT_ptr, HYPRE_Int data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRMatrixTransposeDevice)( A, AT_ptr, data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix * +hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int keep_transpose ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRTMatMatKTDevice)( A, B, keep_transpose ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParCSRTMatMatPartialAddDevice)( comm_pkg_A, num_cols_A, num_cols_B, first_col_diag_B, last_col_diag_B, num_cols_offd_B, col_map_offd_B, local_nnz_Cbar, Cbar, Cext, C_diag_ptr, C_offd_ptr, num_cols_offd_C_ptr, col_map_offd_C_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParVectorGetValuesDevice( hypre_ParVector *vector, HYPRE_Int num_values, HYPRE_BigInt *indices, HYPRE_BigInt base, HYPRE_Complex *values ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParVectorGetValuesDevice)( vector, num_values, indices, base, values ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A, HYPRE_Int indices_len, HYPRE_BigInt *indices, hypre_ParCSRCommPkg *comm_pkg, HYPRE_Int want_data, void **request_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParcsrGetExternalRowsDeviceInit)( A, indices_len, indices, comm_pkg, want_data, request_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_ParcsrGetExternalRowsDeviceWait( void *vrequest ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ParcsrGetExternalRowsDeviceWait)( vrequest ); +} + + +#endif + +#endif + diff --git a/src/parcsr_mv/mup_functions.c b/src/parcsr_mv/mup_functions.c index 148402effb..1cd3b5c188 100644 --- a/src/parcsr_mv/mup_functions.c +++ b/src/parcsr_mv/mup_functions.c @@ -356,6 +356,42 @@ HYPRE_Generate2DSystem( HYPRE_ParCSRMatrix H_L1, HYPRE_ParCSRMatrix H_L2, HYPRE_ return HYPRE_Generate2DSystem_pre( precision, H_L1, H_L2, H_b1, H_b2, H_x1, H_x2, M_vals ); } +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone( hypre_ParCSRMatrix *A, HYPRE_Int copy_data ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_ParCSRMatrixClone_pre( precision, A, copy_data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixCopy( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_ParCSRMatrixCopy_pre( precision, A, B, copy_data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate( MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_ParCSRMatrixCreate_pre( precision, comm, global_num_rows, global_num_cols, row_starts_in, col_starts_in, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2( hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_ParCSRMatrixInitialize_v2_pre( precision, matrix, memory_location ); +} + #endif diff --git a/src/parcsr_mv/mup_functions_gpu.c b/src/parcsr_mv/mup_functions_gpu.c new file mode 100644 index 0000000000..e48b4248b2 --- /dev/null +++ b/src/parcsr_mv/mup_functions_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_mv.h" +#include "_hypre_parcsr_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/parcsr_mv/mup_pre.c b/src/parcsr_mv/mup_pre.c index 7f07d4955a..0d6d94d954 100644 --- a/src/parcsr_mv/mup_pre.c +++ b/src/parcsr_mv/mup_pre.c @@ -698,6 +698,78 @@ HYPRE_Generate2DSystem_pre( HYPRE_Precision precision, HYPRE_ParCSRMatrix H_L1, } } +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *A, HYPRE_Int copy_data ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_ParCSRMatrixClone_flt( A, copy_data ); + case HYPRE_REAL_DOUBLE: + return hypre_ParCSRMatrixClone_dbl( A, copy_data ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_ParCSRMatrixClone_long_dbl( A, copy_data ); + default: + { hypre_ParCSRMatrix* value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixCopy_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B, HYPRE_Int copy_data ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_ParCSRMatrixCopy_flt( A, B, copy_data ); + case HYPRE_REAL_DOUBLE: + return hypre_ParCSRMatrixCopy_dbl( A, B, copy_data ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_ParCSRMatrixCopy_long_dbl( A, B, copy_data ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix * +hypre_ParCSRMatrixCreate_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_BigInt global_num_rows, HYPRE_BigInt global_num_cols, HYPRE_BigInt *row_starts_in, HYPRE_BigInt *col_starts_in, HYPRE_Int num_cols_offd, HYPRE_Int num_nonzeros_diag, HYPRE_Int num_nonzeros_offd ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_ParCSRMatrixCreate_flt( comm, global_num_rows, global_num_cols, row_starts_in, col_starts_in, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd ); + case HYPRE_REAL_DOUBLE: + return hypre_ParCSRMatrixCreate_dbl( comm, global_num_rows, global_num_cols, row_starts_in, col_starts_in, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_ParCSRMatrixCreate_long_dbl( comm, global_num_rows, global_num_cols, row_starts_in, col_starts_in, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd ); + default: + { hypre_ParCSRMatrix * value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixInitialize_v2_pre( HYPRE_Precision precision, hypre_ParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_ParCSRMatrixInitialize_v2_flt( matrix, memory_location ); + case HYPRE_REAL_DOUBLE: + return hypre_ParCSRMatrixInitialize_v2_dbl( matrix, memory_location ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_ParCSRMatrixInitialize_v2_long_dbl( matrix, memory_location ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + #endif diff --git a/src/parcsr_mv/mup_pre_gpu.c b/src/parcsr_mv/mup_pre_gpu.c new file mode 100644 index 0000000000..e48b4248b2 --- /dev/null +++ b/src/parcsr_mv/mup_pre_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_parcsr_mv.h" +#include "_hypre_parcsr_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index 3124cf36c1..1a6d6f9fe7 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -665,7 +665,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(AFF); - hypre_ParCSRMatrixDNumNonzeros(AFF) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(AFF); + hypre_ParCSRMatrixDNumNonzeros(AFF) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(AFF); hypre_MatvecCommPkgCreate(AFF); *AFF_ptr = AFF; @@ -888,7 +888,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(AFC); - hypre_ParCSRMatrixDNumNonzeros(AFC) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(AFC); + hypre_ParCSRMatrixDNumNonzeros(AFC) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(AFC); hypre_MatvecCommPkgCreate(AFC); *AFC_ptr = AFC; @@ -1116,7 +1116,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(ACF); - hypre_ParCSRMatrixDNumNonzeros(ACF) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(ACF); + hypre_ParCSRMatrixDNumNonzeros(ACF) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(ACF); hypre_MatvecCommPkgCreate(ACF); *ACF_ptr = ACF; @@ -1340,7 +1340,7 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(ACC); - hypre_ParCSRMatrixDNumNonzeros(ACC) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(ACC); + hypre_ParCSRMatrixDNumNonzeros(ACC) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(ACC); hypre_MatvecCommPkgCreate(ACC); *ACC_ptr = ACC; @@ -1830,7 +1830,7 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(ACX); - hypre_ParCSRMatrixDNumNonzeros(ACX) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(ACX); + hypre_ParCSRMatrixDNumNonzeros(ACX) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(ACX); hypre_MatvecCommPkgCreate(ACX); *ACX_ptr = ACX; @@ -2029,7 +2029,7 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); hypre_ParCSRMatrixSetNumNonzeros(AXC); - hypre_ParCSRMatrixDNumNonzeros(AXC) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(AXC); + hypre_ParCSRMatrixDNumNonzeros(AXC) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(AXC); hypre_MatvecCommPkgCreate(AXC); *AXC_ptr = AXC; diff --git a/src/parcsr_mv/par_csr_matop.c b/src/parcsr_mv/par_csr_matop.c index 8b354ddf79..dce8d61131 100644 --- a/src/parcsr_mv/par_csr_matop.c +++ b/src/parcsr_mv/par_csr_matop.c @@ -4872,7 +4872,7 @@ hypre_ParcsrBdiagInvScal( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixColMapOffd(Anew) = col_map_offd_A_new; hypre_ParCSRMatrixSetNumNonzeros(Anew); - hypre_ParCSRMatrixDNumNonzeros(Anew) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(Anew); + hypre_ParCSRMatrixDNumNonzeros(Anew) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(Anew); //printf("nnz_diag %d --> %d, nnz_offd %d --> %d\n", nnz_diag, nnz_diag_new, nnz_offd, nnz_offd_new); /* create CommPkg of Anew */ @@ -5348,7 +5348,7 @@ hypre_ParCSRMatrixAddHost( HYPRE_Complex alpha, hypre_ParCSRMatrixOffd(C) = C_offd; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; hypre_ParCSRMatrixSetNumNonzeros(C); - hypre_ParCSRMatrixDNumNonzeros(C) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(C); + hypre_ParCSRMatrixDNumNonzeros(C) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(C); /* create CommPkg of C */ hypre_MatvecCommPkgCreate(C); @@ -6003,7 +6003,7 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A, hypre_ParCSRMatrixColMapOffd(B) = col_map_offd_B; hypre_ParCSRMatrixSetNumNonzeros(B); - hypre_ParCSRMatrixDNumNonzeros(B) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(B); + hypre_ParCSRMatrixDNumNonzeros(B) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(B); hypre_MatvecCommPkgCreate(B); @@ -6147,7 +6147,7 @@ hypre_ParCSRMatrixDropSmallEntriesHost( hypre_ParCSRMatrix *A, hypre_CSRMatrixNumNonzeros(A_diag) = nnz_diag; hypre_CSRMatrixNumNonzeros(A_offd) = nnz_offd; hypre_ParCSRMatrixSetNumNonzeros(A); - hypre_ParCSRMatrixDNumNonzeros(A) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A); + hypre_ParCSRMatrixDNumNonzeros(A) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(A); for (i = 0, k = 0; i < num_cols_A_offd; i++) { diff --git a/src/parcsr_mv/par_csr_matop_device.c b/src/parcsr_mv/par_csr_matop_device.c index 866742297a..61863158fb 100644 --- a/src/parcsr_mv/par_csr_matop_device.c +++ b/src/parcsr_mv/par_csr_matop_device.c @@ -1243,7 +1243,7 @@ hypre_ParCSRMatrixDropSmallEntriesDevice( hypre_ParCSRMatrix *A, hypre_CSRMatrixDropSmallEntriesDevice(A_offd, tol, elmt_tols_offd); hypre_ParCSRMatrixSetNumNonzeros(A); - hypre_ParCSRMatrixDNumNonzeros(A) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A); + hypre_ParCSRMatrixDNumNonzeros(A) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(A); /* squeeze out zero columns of A_offd */ hypre_ParCSRMatrixCompressOffdMapDevice(A); @@ -1803,7 +1803,7 @@ hypre_ParCSRMatrixAddDevice( HYPRE_Complex alpha, } hypre_ParCSRMatrixSetNumNonzeros(C); - hypre_ParCSRMatrixDNumNonzeros(C) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(C); + hypre_ParCSRMatrixDNumNonzeros(C) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(C); /* create CommPkg of C */ hypre_MatvecCommPkgCreate(C); diff --git a/src/parcsr_mv/par_csr_matrix.c b/src/parcsr_mv/par_csr_matrix.c index 199c2338ce..b940f5d9b0 100644 --- a/src/parcsr_mv/par_csr_matrix.c +++ b/src/parcsr_mv/par_csr_matrix.c @@ -279,7 +279,7 @@ hypre_ParCSRMatrixClone_v2(hypre_ParCSRMatrix *A, hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A)) ); hypre_ParCSRMatrixNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A); - hypre_ParCSRMatrixDNumNonzeros(S) = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A); + hypre_ParCSRMatrixDNumNonzeros(S) = (hypre_double) hypre_ParCSRMatrixNumNonzeros(A); hypre_ParCSRMatrixInitialize_v2(S, memory_location); @@ -377,7 +377,7 @@ hypre_ParCSRMatrixSetNumNonzeros_core( hypre_ParCSRMatrix *matrix, hypre_MPI_Allreduce(&local_num_nonzeros, &total_num_nonzeros, 1, HYPRE_MPI_REAL, hypre_MPI_SUM, comm); - hypre_ParCSRMatrixDNumNonzeros(matrix) = total_num_nonzeros; + hypre_ParCSRMatrixDNumNonzeros(matrix) = (hypre_double)total_num_nonzeros; } else { @@ -2703,7 +2703,7 @@ hypre_ParCSRMatrixToCSRMatrixAll_v2( hypre_ParCSRMatrix *par_matrix, } /*-------------------------------------------------------------------------- - * copies a ParCSR matrix B to A. + * copies a ParCSR matrix A to B. * If copy_data = 0, only the structure of A is copied to B * the routine does not check whether the dimensions of A and B are compatible *--------------------------------------------------------------------------*/ diff --git a/src/parcsr_mv/par_csr_matrix.h b/src/parcsr_mv/par_csr_matrix.h index b4b2701fe3..1d705c29bb 100644 --- a/src/parcsr_mv/par_csr_matrix.h +++ b/src/parcsr_mv/par_csr_matrix.h @@ -32,7 +32,7 @@ typedef struct hypre_ParCSRMatrix_struct HYPRE_BigInt global_num_cols; HYPRE_BigInt global_num_rownnz; HYPRE_BigInt num_nonzeros; - HYPRE_Real d_num_nonzeros; + hypre_double d_num_nonzeros; HYPRE_BigInt first_row_index; HYPRE_BigInt first_col_diag; diff --git a/src/parcsr_mv/par_csr_matrix_stats.c b/src/parcsr_mv/par_csr_matrix_stats.c index 6842272a22..2ca9d2bbf4 100644 --- a/src/parcsr_mv/par_csr_matrix_stats.c +++ b/src/parcsr_mv/par_csr_matrix_stats.c @@ -444,7 +444,7 @@ hypre_ParCSRMatrixStatsArrayCompute(HYPRE_Int num_matrices, hypre_MatrixStatsSparsity(stats) = 100.0 * (1.0 - recvbuffer(i, 0, 3) / global_size); hypre_ParCSRMatrixNumNonzeros(matrices[i]) = (HYPRE_Int) recvbuffer(i, 0, 3); - hypre_ParCSRMatrixDNumNonzeros(matrices[i]) = (HYPRE_Real) recvbuffer(i, 0, 3); + hypre_ParCSRMatrixDNumNonzeros(matrices[i]) = (hypre_double) recvbuffer(i, 0, 3); } /*------------------------------------------------- diff --git a/src/parcsr_mv/parcsr_mv_mp.c b/src/parcsr_mv/parcsr_mv_mp.c index 812d002630..890f43cad2 100644 --- a/src/parcsr_mv/parcsr_mv_mp.c +++ b/src/parcsr_mv/parcsr_mv_mp.c @@ -39,7 +39,7 @@ hypre_ParVectorCopy_mp( hypre_ParVector *x, *--------------------------------------------------------------------------*/ HYPRE_Int -hypre_ParVectorAxpy_mp( hypre_double alpha, +hypre_ParVectorAxpy_mp( hypre_long_double alpha, hypre_ParVector *x, hypre_ParVector *y ) { @@ -63,7 +63,8 @@ hypre_ParVectorConvert_mp( hypre_ParVector *v, return (hypre_error_flag); } /*-------------------------------------------------------------------------- - * Mixed-Precision hypre_ParVectorAxpy + * Mixed-precision matrix conversion + * Note: This converts only the diag and offd matrices *--------------------------------------------------------------------------*/ HYPRE_Int @@ -81,4 +82,105 @@ hypre_ParCSRMatrixConvert_mp( hypre_ParCSRMatrix *A, return (hypre_error_flag); } +/*-------------------------------------------------------------------------- + * Mixed-precision ParCSR matrix copy: Copies A to B. + * The routine does not check whether the dimensions of A and B are compatible + * TODO: update d_num_nonzeros not fixed as hypre_double + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParCSRMatrixCopy_mp( hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *B ) +{ + hypre_CSRMatrix *A_diag; + hypre_CSRMatrix *A_offd; + HYPRE_BigInt *col_map_offd_A; + hypre_CSRMatrix *B_diag; + hypre_CSRMatrix *B_offd; + HYPRE_BigInt *col_map_offd_B; + HYPRE_Int num_cols_offd_A; + HYPRE_Int num_cols_offd_B; + + if (!A) + { + hypre_error_in_arg(1); + return hypre_error_flag; + } + if (!B) + { + hypre_error_in_arg(1); + return hypre_error_flag; + } + if (hypre_ParCSRMatrixPrecision(A) == hypre_ParCSRMatrixPrecision(B)) + { + return hypre_ParCSRMatrixCopy_pre( hypre_ParCSRMatrixPrecision(A), A, B, 1 ); + } + + A_diag = hypre_ParCSRMatrixDiag(A); + A_offd = hypre_ParCSRMatrixOffd(A); + B_diag = hypre_ParCSRMatrixDiag(B); + B_offd = hypre_ParCSRMatrixOffd(B); + + num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); + num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd); + + hypre_assert(num_cols_offd_A == num_cols_offd_B); + + col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); + col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B); + + hypre_CSRMatrixCopy_mp(A_diag, B_diag); + hypre_CSRMatrixCopy_mp(A_offd, B_offd); + + /* should not happen if B has been initialized */ + if (num_cols_offd_B && col_map_offd_B == NULL) + { + col_map_offd_B = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_B, HYPRE_MEMORY_HOST); + hypre_ParCSRMatrixColMapOffd(B) = col_map_offd_B; + } + + hypre_TMemcpy(col_map_offd_B, col_map_offd_A, HYPRE_BigInt, num_cols_offd_B, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * Mixed-precision clone of ParCSR matrix. + * New matrix resides in the same memory location + *--------------------------------------------------------------------------*/ + +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_mp(hypre_ParCSRMatrix *A, HYPRE_Precision new_precision) +{ + hypre_ParCSRMatrix *S; + + hypre_GpuProfilingPushRange("hypre_ParCSRMatrixClone_mp"); + + if (hypre_ParCSRMatrixPrecision(A) == new_precision) + { + return hypre_ParCSRMatrixClone_pre( hypre_ParCSRMatrixPrecision(A), A, 1 ); + } + + S = hypre_ParCSRMatrixCreate_pre( new_precision, hypre_ParCSRMatrixComm(A), + hypre_ParCSRMatrixGlobalNumRows(A), + hypre_ParCSRMatrixGlobalNumCols(A), + hypre_ParCSRMatrixRowStarts(A), + hypre_ParCSRMatrixColStarts(A), + hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)), + hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(A)), + hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A)) ); + + hypre_ParCSRMatrixNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A); + hypre_ParCSRMatrixDNumNonzeros(S) = hypre_ParCSRMatrixNumNonzeros(A); + + hypre_ParCSRMatrixInitialize_v2_pre(new_precision, S, hypre_ParCSRMatrixMemoryLocation(A)); + + hypre_ParCSRMatrixCopy_mp(A, S); + + hypre_GpuProfilingPopRange(); + + return S; +} + #endif diff --git a/src/parcsr_mv/protos_mp.h b/src/parcsr_mv/protos_mp.h index bc96be26ce..052ee79004 100644 --- a/src/parcsr_mv/protos_mp.h +++ b/src/parcsr_mv/protos_mp.h @@ -14,7 +14,7 @@ hypre_ParVectorCopy_mp( hypre_ParVector *x, hypre_ParVector *y ); HYPRE_Int -hypre_ParVectorAxpy_mp( hypre_double alpha, +hypre_ParVectorAxpy_mp( hypre_long_double alpha, hypre_ParVector *x, hypre_ParVector *y ); @@ -26,4 +26,11 @@ HYPRE_Int hypre_ParCSRMatrixConvert_mp ( hypre_ParCSRMatrix *A, HYPRE_Precision new_precision ); +hypre_ParCSRMatrix* +hypre_ParCSRMatrixClone_mp(hypre_ParCSRMatrix *A, HYPRE_Precision new_precision); + +HYPRE_Int +hypre_ParCSRMatrixCopy_mp( hypre_ParCSRMatrix *A, + hypre_ParCSRMatrix *B ); + #endif diff --git a/src/seq_block_mv/_hypre_seq_block_mv_mup.h b/src/seq_block_mv/_hypre_seq_block_mv_mup.h index 48a23de9fe..3f6cda4e81 100644 --- a/src/seq_block_mv/_hypre_seq_block_mv_mup.h +++ b/src/seq_block_mv/_hypre_seq_block_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_SEQ_BLOCK_MV_MUP_HEADER #define hypre_SEQ_BLOCK_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_DenseBlockMatrixBuildAOP_flt( hypre_DenseBlockMatrix *A ); @@ -104,28 +103,9 @@ hypre_DenseBlockMatrixPrint_dbl( MPI_Comm comm, hypre_DenseBlockMatrix *A, const HYPRE_Int hypre_DenseBlockMatrixPrint_long_dbl( MPI_Comm comm, hypre_DenseBlockMatrix *A, const char* filename ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/seq_mv/HYPRE_seq_mv.h b/src/seq_mv/HYPRE_seq_mv.h index 3c62f959bf..5b5f67ab35 100644 --- a/src/seq_mv/HYPRE_seq_mv.h +++ b/src/seq_mv/HYPRE_seq_mv.h @@ -80,6 +80,7 @@ HYPRE_Int HYPRE_VectorInitialize( HYPRE_Vector vector ); HYPRE_Int HYPRE_VectorPrint( HYPRE_Vector vector, char *file_name ); HYPRE_Vector HYPRE_VectorRead( char *file_name ); HYPRE_Int HYPRE_VectorCopy( HYPRE_Vector xvec, HYPRE_Vector yvec); +HYPRE_Int HYPRE_VectorAxpy( HYPRE_Complex alpha, HYPRE_Vector xvec, HYPRE_Vector yvec); typedef enum HYPRE_TimerID { diff --git a/src/seq_mv/HYPRE_seq_mv_mup.h b/src/seq_mv/HYPRE_seq_mv_mup.h index ba5c2cc99c..db9949ea52 100644 --- a/src/seq_mv/HYPRE_seq_mv_mup.h +++ b/src/seq_mv/HYPRE_seq_mv_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_SEQ_MV_MUP_HEADER -#define HYPRE_SEQ_MV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_SEQ_MV_MUP_HEADER +#define HYPRE_SEQ_MV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_CSRMatrix HYPRE_CSRMatrixCreate_flt( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int *row_sizes ); @@ -247,6 +237,15 @@ HYPRE_MultiblockMatrixSetSubmatrixType_long_dbl( HYPRE_MultiblockMatrix matrix, HYPRE_Int HYPRE_MultiblockMatrixSetSubmatrixType( HYPRE_MultiblockMatrix matrix, HYPRE_Int j, HYPRE_Int type ); +HYPRE_Int +HYPRE_VectorAxpy_flt( hypre_float alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ); +HYPRE_Int +HYPRE_VectorAxpy_dbl( hypre_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ); +HYPRE_Int +HYPRE_VectorAxpy_long_dbl( hypre_long_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ); +HYPRE_Int +HYPRE_VectorAxpy( hypre_long_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ); + HYPRE_Int HYPRE_VectorCopy_flt( HYPRE_Vector xvec, HYPRE_Vector yvec ); HYPRE_Int @@ -301,16 +300,7 @@ HYPRE_VectorRead_long_dbl( char *file_name ); HYPRE_Vector HYPRE_VectorRead( char *file_name ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_CSRMatrix HYPRE_CSRMatrixCreate_pre( HYPRE_Precision precision, HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int *row_sizes ); @@ -384,6 +374,9 @@ HYPRE_MultiblockMatrixSetNumSubmatrices_pre( HYPRE_Precision precision, HYPRE_Mu HYPRE_Int HYPRE_MultiblockMatrixSetSubmatrixType_pre( HYPRE_Precision precision, HYPRE_MultiblockMatrix matrix, HYPRE_Int j, HYPRE_Int type ); +HYPRE_Int +HYPRE_VectorAxpy_pre( HYPRE_Precision precision, hypre_long_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ); + HYPRE_Int HYPRE_VectorCopy_pre( HYPRE_Precision precision, HYPRE_Vector xvec, HYPRE_Vector yvec ); @@ -402,7 +395,6 @@ HYPRE_VectorPrint_pre( HYPRE_Precision precision, HYPRE_Vector vector, char *fil HYPRE_Vector HYPRE_VectorRead_pre( HYPRE_Precision precision, char *file_name ); - #endif #ifdef __cplusplus diff --git a/src/seq_mv/HYPRE_vector.c b/src/seq_mv/HYPRE_vector.c index d0d7c29f3c..1ab507adf3 100644 --- a/src/seq_mv/HYPRE_vector.c +++ b/src/seq_mv/HYPRE_vector.c @@ -74,3 +74,13 @@ HYPRE_VectorCopy( HYPRE_Vector xvec, HYPRE_Vector yvec) { return ( hypre_SeqVectorCopy( (hypre_Vector *) xvec, (hypre_Vector *) yvec) ); } + +/*-------------------------------------------------------------------------- + * HYPRE_VectorAxpy + *--------------------------------------------------------------------------*/ + +HYPRE_Int +HYPRE_VectorAxpy( HYPRE_Complex alpha, HYPRE_Vector xvec, HYPRE_Vector yvec) +{ + return ( hypre_SeqVectorAxpy( alpha, (hypre_Vector *) xvec, (hypre_Vector *) yvec) ); +} diff --git a/src/seq_mv/Makefile b/src/seq_mv/Makefile index c12f0d89b3..5873004492 100644 --- a/src/seq_mv/Makefile +++ b/src/seq_mv/Makefile @@ -83,6 +83,11 @@ MP_FILES = \ mup_functions.c\ mup_pre.c\ seq_mv_mp.c + +MP_CUFILES=\ + mup_fixed_gpu.c\ + mup_functions_gpu.c\ + mup_pre_gpu.c COBJS = ${FILES:.c=.o} CUOBJS = ${CUFILES:.c=.obj} @@ -97,9 +102,10 @@ CUOBJS_single = ${CUFILES:.c=.obj_flt} CUOBJS_double = ${CUFILES:.c=.obj_dbl} CUOBJS_longdouble = ${CUFILES:.c=.obj_ldbl} MP_COBJS = ${MP_FILES:.c=.o} +MP_CUOBJS = ${MP_CUFILES:.c=.obj} OBJS = ${COBJS_single} ${COBJS_double} ${COBJS_longdouble} ${MP_COBJS} -OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} +OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} ${MP_CUOBJS} endif diff --git a/src/seq_mv/_hypre_seq_mv.h b/src/seq_mv/_hypre_seq_mv.h index 9a3616e47c..5383cf190f 100644 --- a/src/seq_mv/_hypre_seq_mv.h +++ b/src/seq_mv/_hypre_seq_mv.h @@ -465,6 +465,7 @@ HYPRE_Int hypre_CSRMatrixCheckSetNumNonzeros( hypre_CSRMatrix *matrix ); HYPRE_Int hypre_CSRMatrixResize( hypre_CSRMatrix *matrix, HYPRE_Int new_num_rows, HYPRE_Int new_num_cols, HYPRE_Int new_num_nonzeros ); HYPRE_Int hypre_CSRMatrixEliminateRowsCols(hypre_CSRMatrix *A, HYPRE_Int nrows, HYPRE_Int *rows); +HYPRE_Int hypre_CSRMatrixResetData(hypre_CSRMatrix *matrix); /* csr_matvec.c */ // y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] @@ -693,7 +694,7 @@ hypre_SeqVectorCopy_mp( hypre_Vector *x, hypre_Vector *y ); HYPRE_Int -hypre_SeqVectorAxpy_mp( hypre_double alpha, +hypre_SeqVectorAxpy_mp( hypre_long_double alpha, hypre_Vector *x, hypre_Vector *y ); @@ -705,6 +706,11 @@ HYPRE_Int hypre_SeqVectorConvert_mp ( hypre_Vector *v, HYPRE_Precision new_precision); +HYPRE_Int +hypre_CSRMatrixCopy_mp( hypre_CSRMatrix *A, hypre_CSRMatrix *B); + +hypre_CSRMatrix* +hypre_CSRMatrixClone_mp( hypre_CSRMatrix *A, HYPRE_Precision new_precision ); #endif #ifdef __cplusplus diff --git a/src/seq_mv/_hypre_seq_mv.hpp b/src/seq_mv/_hypre_seq_mv.hpp new file mode 100644 index 0000000000..2dda6aa8ec --- /dev/null +++ b/src/seq_mv/_hypre_seq_mv.hpp @@ -0,0 +1,36 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_SEQ_MV_HPP +#define hypre_SEQ_MV_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_seq_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_seq_mv_mup_undef.h" +#include "_hypre_seq_mv_mup.h" +#include "_hypre_seq_mv_mup.hpp" +#endif +#endif + +#endif + diff --git a/src/seq_mv/_hypre_seq_mv_mup.h b/src/seq_mv/_hypre_seq_mv_mup.h index c9942ec19b..b7f214d0e2 100644 --- a/src/seq_mv/_hypre_seq_mv_mup.h +++ b/src/seq_mv/_hypre_seq_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_SEQ_MV_MUP_HEADER #define hypre_SEQ_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ hypre_CSRMatrix * hypre_CSRMatrixAdd_flt( hypre_float alpha, hypre_CSRMatrix *A, hypre_float beta, hypre_CSRMatrix *B ); @@ -104,20 +103,6 @@ hypre_CSRMatrixComputeRowSum_dbl( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int HYPRE_Int hypre_CSRMatrixComputeRowSum_long_dbl( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, hypre_long_double *row_sum, HYPRE_Int type, hypre_long_double scal, const char *set_or_add ); -HYPRE_Int -hypre_CSRMatrixCopy_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); -HYPRE_Int -hypre_CSRMatrixCopy_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); -HYPRE_Int -hypre_CSRMatrixCopy_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); - -hypre_CSRMatrix * -hypre_CSRMatrixCreate_flt( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); -hypre_CSRMatrix * -hypre_CSRMatrixCreate_dbl( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); -hypre_CSRMatrix * -hypre_CSRMatrixCreate_long_dbl( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); - hypre_CSRMatrix * hypre_CSRMatrixDeleteZeros_flt( hypre_CSRMatrix *A, hypre_float tol ); hypre_CSRMatrix * @@ -188,13 +173,6 @@ hypre_CSRMatrixInitialize_dbl( hypre_CSRMatrix *matrix ); HYPRE_Int hypre_CSRMatrixInitialize_long_dbl( hypre_CSRMatrix *matrix ); -HYPRE_Int -hypre_CSRMatrixInitialize_v2_flt( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); -HYPRE_Int -hypre_CSRMatrixInitialize_v2_dbl( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); -HYPRE_Int -hypre_CSRMatrixInitialize_v2_long_dbl( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); - HYPRE_Int hypre_CSRMatrixJtoBigJ_flt( hypre_CSRMatrix *matrix ); HYPRE_Int @@ -818,13 +796,6 @@ hypre_SeqVectorSetConstantValuesHost_dbl( hypre_Vector *v, hypre_double value ); HYPRE_Int hypre_SeqVectorSetConstantValuesHost_long_dbl( hypre_Vector *v, hypre_long_double value ); -HYPRE_Int -hypre_SeqVectorSetData_flt( hypre_Vector *vector, hypre_float *data ); -HYPRE_Int -hypre_SeqVectorSetData_dbl( hypre_Vector *vector, hypre_double *data ); -HYPRE_Int -hypre_SeqVectorSetData_long_dbl( hypre_Vector *vector, hypre_long_double *data ); - HYPRE_Int hypre_SeqVectorSetDataOwner_flt( hypre_Vector *vector, HYPRE_Int owns_data ); HYPRE_Int @@ -895,28 +866,69 @@ hypre_SeqVectorSumEltsHost_dbl( hypre_Vector *vector ); HYPRE_Complex hypre_SeqVectorSumEltsHost_long_dbl( hypre_Vector *vector ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +HYPRE_Int +hypre_CSRMatrixCopy_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_CSRMatrixCopy_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_CSRMatrixCopy_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); +HYPRE_Int +hypre_CSRMatrixCopy( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +hypre_CSRMatrix * +hypre_CSRMatrixCreate_flt( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); +hypre_CSRMatrix * +hypre_CSRMatrixCreate_dbl( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); +hypre_CSRMatrix * +hypre_CSRMatrixCreate_long_dbl( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); +hypre_CSRMatrix * +hypre_CSRMatrixCreate( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); +HYPRE_Int +hypre_CSRMatrixInitialize_v2_flt( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_CSRMatrixInitialize_v2_dbl( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_CSRMatrixInitialize_v2_long_dbl( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_CSRMatrixInitialize_v2( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_CSRMatrixResetData_flt( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_CSRMatrixResetData_dbl( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_CSRMatrixResetData_long_dbl( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_CSRMatrixResetData( hypre_CSRMatrix *matrix ); -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +HYPRE_Int +hypre_SeqVectorSetData_flt( hypre_Vector *vector, hypre_float *data ); +HYPRE_Int +hypre_SeqVectorSetData_dbl( hypre_Vector *vector, hypre_double *data ); +HYPRE_Int +hypre_SeqVectorSetData_long_dbl( hypre_Vector *vector, hypre_long_double *data ); +HYPRE_Int +hypre_SeqVectorSetData( hypre_Vector *vector, void *data ); -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* pre */ + +HYPRE_Int +hypre_CSRMatrixCopy_pre( HYPRE_Precision precision, hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ); +hypre_CSRMatrix * +hypre_CSRMatrixCreate_pre( HYPRE_Precision precision, HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ); + +HYPRE_Int +hypre_CSRMatrixInitialize_v2_pre( HYPRE_Precision precision, hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_CSRMatrixResetData_pre( HYPRE_Precision precision, hypre_CSRMatrix *matrix ); + +HYPRE_Int +hypre_SeqVectorSetData_pre( HYPRE_Precision precision, hypre_Vector *vector, void *data ); #endif diff --git a/src/seq_mv/_hypre_seq_mv_mup.hpp b/src/seq_mv/_hypre_seq_mv_mup.hpp new file mode 100644 index 0000000000..7f00e0b08f --- /dev/null +++ b/src/seq_mv/_hypre_seq_mv_mup.hpp @@ -0,0 +1,494 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_SEQ_MV_MUP_HEADER_CXX +#define hypre_SEQ_MV_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +HYPRE_Int +hypreDevice_CSRSpAdd_flt( HYPRE_Int ma, HYPRE_Int mb, HYPRE_Int nnzA, HYPRE_Int nnzB, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_float alpha, hypre_float *d_aa, HYPRE_Int *d_ja_map, HYPRE_Int *d_ib, HYPRE_Int *d_jb, hypre_float beta, hypre_float *d_ab, HYPRE_Int *d_jb_map, HYPRE_Int *d_num_b, HYPRE_Int *nnzC_out, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_float **d_ac_out ); +HYPRE_Int +hypreDevice_CSRSpAdd_dbl( HYPRE_Int ma, HYPRE_Int mb, HYPRE_Int nnzA, HYPRE_Int nnzB, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_double alpha, hypre_double *d_aa, HYPRE_Int *d_ja_map, HYPRE_Int *d_ib, HYPRE_Int *d_jb, hypre_double beta, hypre_double *d_ab, HYPRE_Int *d_jb_map, HYPRE_Int *d_num_b, HYPRE_Int *nnzC_out, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_double **d_ac_out ); +HYPRE_Int +hypreDevice_CSRSpAdd_long_dbl( HYPRE_Int ma, HYPRE_Int mb, HYPRE_Int nnzA, HYPRE_Int nnzB, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_long_double alpha, hypre_long_double *d_aa, HYPRE_Int *d_ja_map, HYPRE_Int *d_ib, HYPRE_Int *d_jb, hypre_long_double beta, hypre_long_double *d_ab, HYPRE_Int *d_jb_map, HYPRE_Int *d_num_b, HYPRE_Int *nnzC_out, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_long_double **d_ac_out ); + +HYPRE_Int +hypreDevice_CSRSpGemm_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix **C_ptr ); +HYPRE_Int +hypreDevice_CSRSpGemm_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix **C_ptr ); +HYPRE_Int +hypreDevice_CSRSpGemm_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix **C_ptr ); + +HYPRE_Int +hypreDevice_CSRSpTrans_flt( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_float *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_float **d_ac_out, HYPRE_Int want_data ); +HYPRE_Int +hypreDevice_CSRSpTrans_dbl( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_double *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_double **d_ac_out, HYPRE_Int want_data ); +HYPRE_Int +hypreDevice_CSRSpTrans_long_dbl( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_long_double *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_long_double **d_ac_out, HYPRE_Int want_data ); + +HYPRE_Int +hypreDevice_CSRSpTransRocsparse_flt( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_float *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_float **d_ac_out, HYPRE_Int want_data ); +HYPRE_Int +hypreDevice_CSRSpTransRocsparse_dbl( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_double *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_double **d_ac_out, HYPRE_Int want_data ); +HYPRE_Int +hypreDevice_CSRSpTransRocsparse_long_dbl( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, hypre_long_double *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, hypre_long_double **d_ac_out, HYPRE_Int want_data ); + +hypre_CSRMatrix * +hypre_CSRMatrixAddDevice_flt( hypre_float alpha, hypre_CSRMatrix *A, hypre_float beta, hypre_CSRMatrix *B ); +hypre_CSRMatrix * +hypre_CSRMatrixAddDevice_dbl( hypre_double alpha, hypre_CSRMatrix *A, hypre_double beta, hypre_CSRMatrix *B ); +hypre_CSRMatrix * +hypre_CSRMatrixAddDevice_long_dbl( hypre_long_double alpha, hypre_CSRMatrix *A, hypre_long_double beta, hypre_CSRMatrix *B ); + +hypre_CSRMatrix* +hypre_CSRMatrixAddPartialDevice_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *row_nums ); +hypre_CSRMatrix* +hypre_CSRMatrixAddPartialDevice_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *row_nums ); +hypre_CSRMatrix* +hypre_CSRMatrixAddPartialDevice_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *row_nums ); + +HYPRE_Int +hypre_CSRMatrixCheckDiagFirstDevice_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixCheckDiagFirstDevice_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixCheckDiagFirstDevice_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixCheckForMissingDiagonal_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixCheckForMissingDiagonal_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixCheckForMissingDiagonal_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixColNNzRealDevice_flt( hypre_CSRMatrix *A, hypre_float *colnnz ); +HYPRE_Int +hypre_CSRMatrixColNNzRealDevice_dbl( hypre_CSRMatrix *A, hypre_double *colnnz ); +HYPRE_Int +hypre_CSRMatrixColNNzRealDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double *colnnz ); + +HYPRE_Int +hypre_CSRMatrixCompressColumnsDevice_flt( hypre_CSRMatrix *A, HYPRE_BigInt *col_map, HYPRE_Int **col_idx_new_ptr, HYPRE_BigInt **col_map_new_ptr ); +HYPRE_Int +hypre_CSRMatrixCompressColumnsDevice_dbl( hypre_CSRMatrix *A, HYPRE_BigInt *col_map, HYPRE_Int **col_idx_new_ptr, HYPRE_BigInt **col_map_new_ptr ); +HYPRE_Int +hypre_CSRMatrixCompressColumnsDevice_long_dbl( hypre_CSRMatrix *A, HYPRE_BigInt *col_map, HYPRE_Int **col_idx_new_ptr, HYPRE_BigInt **col_map_new_ptr ); + +HYPRE_Int +hypre_CSRMatrixComputeColSumDevice_flt( hypre_CSRMatrix *A, hypre_float *col_sum, HYPRE_Int type, hypre_float scal ); +HYPRE_Int +hypre_CSRMatrixComputeColSumDevice_dbl( hypre_CSRMatrix *A, hypre_double *col_sum, HYPRE_Int type, hypre_double scal ); +HYPRE_Int +hypre_CSRMatrixComputeColSumDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double *col_sum, HYPRE_Int type, hypre_long_double scal ); + +HYPRE_Int +hypre_CSRMatrixComputeRowSumDevice_flt( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, hypre_float *row_sum, HYPRE_Int type, hypre_float scal, const char *set_or_add ); +HYPRE_Int +hypre_CSRMatrixComputeRowSumDevice_dbl( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, hypre_double *row_sum, HYPRE_Int type, hypre_double scal, const char *set_or_add ); +HYPRE_Int +hypre_CSRMatrixComputeRowSumDevice_long_dbl( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, hypre_long_double *row_sum, HYPRE_Int type, hypre_long_double scal, const char *set_or_add ); + +hypre_CSRMatrix * +hypre_CSRMatrixDeleteZerosDevice_flt( hypre_CSRMatrix *A, hypre_float tol ); +hypre_CSRMatrix * +hypre_CSRMatrixDeleteZerosDevice_dbl( hypre_CSRMatrix *A, hypre_double tol ); +hypre_CSRMatrix * +hypre_CSRMatrixDeleteZerosDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double tol ); + +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromMatrixDevice_flt( hypre_CSRMatrix *A, HYPRE_Int type ); +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromMatrixDevice_dbl( hypre_CSRMatrix *A, HYPRE_Int type ); +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromMatrixDevice_long_dbl( hypre_CSRMatrix *A, HYPRE_Int type ); + +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromVectorDevice_flt( HYPRE_Int n, hypre_float *v ); +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromVectorDevice_dbl( HYPRE_Int n, hypre_double *v ); +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromVectorDevice_long_dbl( HYPRE_Int n, hypre_long_double *v ); + +HYPRE_Int +hypre_CSRMatrixDiagScaleDevice_flt( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd ); +HYPRE_Int +hypre_CSRMatrixDiagScaleDevice_dbl( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd ); +HYPRE_Int +hypre_CSRMatrixDiagScaleDevice_long_dbl( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd ); + +HYPRE_Int +hypre_CSRMatrixDropSmallEntriesDevice_flt( hypre_CSRMatrix *A, hypre_float tol, hypre_float *elmt_tols ); +HYPRE_Int +hypre_CSRMatrixDropSmallEntriesDevice_dbl( hypre_CSRMatrix *A, hypre_double tol, hypre_double *elmt_tols ); +HYPRE_Int +hypre_CSRMatrixDropSmallEntriesDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double tol, hypre_long_double *elmt_tols ); + +HYPRE_Int +hypre_CSRMatrixExtractDiagonalDevice_flt( hypre_CSRMatrix *A, hypre_float *d, HYPRE_Int type ); +HYPRE_Int +hypre_CSRMatrixExtractDiagonalDevice_dbl( hypre_CSRMatrix *A, hypre_double *d, HYPRE_Int type ); +HYPRE_Int +hypre_CSRMatrixExtractDiagonalDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double *d, HYPRE_Int type ); + +hypre_GpuMatData* +hypre_CSRMatrixGetGPUMatData_flt( hypre_CSRMatrix *matrix ); +hypre_GpuMatData* +hypre_CSRMatrixGetGPUMatData_dbl( hypre_CSRMatrix *matrix ); +hypre_GpuMatData* +hypre_CSRMatrixGetGPUMatData_long_dbl( hypre_CSRMatrix *matrix ); + +HYPRE_Int +hypre_CSRMatrixILU0_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixILU0_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixILU0_long_dbl( hypre_CSRMatrix *A ); + +hypre_CSRMatrix* +hypre_CSRMatrixIdentityDevice_flt( HYPRE_Int n, hypre_float alp ); +hypre_CSRMatrix* +hypre_CSRMatrixIdentityDevice_dbl( HYPRE_Int n, hypre_double alp ); +hypre_CSRMatrix* +hypre_CSRMatrixIdentityDevice_long_dbl( HYPRE_Int n, hypre_long_double alp ); + +HYPRE_Int +hypre_CSRMatrixIntSpMVDevice_flt( HYPRE_Int num_rows, HYPRE_Int num_nonzeros, HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta, HYPRE_Int *d_y ); +HYPRE_Int +hypre_CSRMatrixIntSpMVDevice_dbl( HYPRE_Int num_rows, HYPRE_Int num_nonzeros, HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta, HYPRE_Int *d_y ); +HYPRE_Int +hypre_CSRMatrixIntSpMVDevice_long_dbl( HYPRE_Int num_rows, HYPRE_Int num_nonzeros, HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta, HYPRE_Int *d_y ); + +HYPRE_Int +hypre_CSRMatrixIntersectPattern_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option ); +HYPRE_Int +hypre_CSRMatrixIntersectPattern_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option ); +HYPRE_Int +hypre_CSRMatrixIntersectPattern_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option ); + +HYPRE_Int +hypre_CSRMatrixMatvecDevice_flt( HYPRE_Int trans, hypre_float alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_float beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ); +HYPRE_Int +hypre_CSRMatrixMatvecDevice_dbl( HYPRE_Int trans, hypre_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_double beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ); +HYPRE_Int +hypre_CSRMatrixMatvecDevice_long_dbl( HYPRE_Int trans, hypre_long_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_long_double beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ); + +HYPRE_Int +hypre_CSRMatrixMatvecRocsparse_flt( HYPRE_Int trans, hypre_float alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_float beta, hypre_Vector *y, HYPRE_Int offset ); +HYPRE_Int +hypre_CSRMatrixMatvecRocsparse_dbl( HYPRE_Int trans, hypre_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_double beta, hypre_Vector *y, HYPRE_Int offset ); +HYPRE_Int +hypre_CSRMatrixMatvecRocsparse_long_dbl( HYPRE_Int trans, hypre_long_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_long_double beta, hypre_Vector *y, HYPRE_Int offset ); + +HYPRE_Int +hypre_CSRMatrixMergeColMapOffd_flt( HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int B_ext_offd_nnz, HYPRE_BigInt *B_ext_offd_bigj, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int **map_B_to_C_ptr ); +HYPRE_Int +hypre_CSRMatrixMergeColMapOffd_dbl( HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int B_ext_offd_nnz, HYPRE_BigInt *B_ext_offd_bigj, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int **map_B_to_C_ptr ); +HYPRE_Int +hypre_CSRMatrixMergeColMapOffd_long_dbl( HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int B_ext_offd_nnz, HYPRE_BigInt *B_ext_offd_bigj, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int **map_B_to_C_ptr ); + +HYPRE_Int +hypre_CSRMatrixMoveDiagFirstDevice_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixMoveDiagFirstDevice_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixMoveDiagFirstDevice_long_dbl( hypre_CSRMatrix *A ); + +hypre_CSRMatrix * +hypre_CSRMatrixMultiplyDevice_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); +hypre_CSRMatrix * +hypre_CSRMatrixMultiplyDevice_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); +hypre_CSRMatrix * +hypre_CSRMatrixMultiplyDevice_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); + +HYPRE_Int +hypre_CSRMatrixPermuteDevice_flt( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ); +HYPRE_Int +hypre_CSRMatrixPermuteDevice_dbl( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ); +HYPRE_Int +hypre_CSRMatrixPermuteDevice_long_dbl( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ); + +HYPRE_Int +hypre_CSRMatrixRemoveDiagonalDevice_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixRemoveDiagonalDevice_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixRemoveDiagonalDevice_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixReplaceDiagDevice_flt( hypre_CSRMatrix *A, hypre_float *new_diag, hypre_float v, hypre_float tol ); +HYPRE_Int +hypre_CSRMatrixReplaceDiagDevice_dbl( hypre_CSRMatrix *A, hypre_double *new_diag, hypre_double v, hypre_double tol ); +HYPRE_Int +hypre_CSRMatrixReplaceDiagDevice_long_dbl( hypre_CSRMatrix *A, hypre_long_double *new_diag, hypre_long_double v, hypre_long_double tol ); + +HYPRE_Int +hypre_CSRMatrixSetRownnzDevice_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSetRownnzDevice_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSetRownnzDevice_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixSortRow_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSortRow_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSortRow_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixSortRowOutOfPlace_flt( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSortRowOutOfPlace_dbl( hypre_CSRMatrix *A ); +HYPRE_Int +hypre_CSRMatrixSortRowOutOfPlace_long_dbl( hypre_CSRMatrix *A ); + +HYPRE_Int +hypre_CSRMatrixSpMVAnalysisDevice_flt( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_CSRMatrixSpMVAnalysisDevice_dbl( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_CSRMatrixSpMVAnalysisDevice_long_dbl( hypre_CSRMatrix *matrix ); + +HYPRE_Int +hypre_CSRMatrixSpMVDevice_flt( HYPRE_Int trans, hypre_float alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_float beta, hypre_Vector *y, HYPRE_Int fill ); +HYPRE_Int +hypre_CSRMatrixSpMVDevice_dbl( HYPRE_Int trans, hypre_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_double beta, hypre_Vector *y, HYPRE_Int fill ); +HYPRE_Int +hypre_CSRMatrixSpMVDevice_long_dbl( HYPRE_Int trans, hypre_long_double alpha, hypre_CSRMatrix *A, hypre_Vector *x, hypre_long_double beta, hypre_Vector *y, HYPRE_Int fill ); + +HYPRE_Int +hypre_CSRMatrixSplitDevice_flt( hypre_CSRMatrix *B_ext, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, hypre_CSRMatrix **B_ext_diag_ptr, hypre_CSRMatrix **B_ext_offd_ptr ); +HYPRE_Int +hypre_CSRMatrixSplitDevice_dbl( hypre_CSRMatrix *B_ext, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, hypre_CSRMatrix **B_ext_diag_ptr, hypre_CSRMatrix **B_ext_offd_ptr ); +HYPRE_Int +hypre_CSRMatrixSplitDevice_long_dbl( hypre_CSRMatrix *B_ext, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, hypre_CSRMatrix **B_ext_diag_ptr, hypre_CSRMatrix **B_ext_offd_ptr ); + +HYPRE_Int +hypre_CSRMatrixSplitDevice_core_flt( HYPRE_Int job, HYPRE_Int num_rows, HYPRE_Int B_ext_nnz, HYPRE_Int *B_ext_ii, HYPRE_BigInt *B_ext_bigj, hypre_float *B_ext_data, char *B_ext_xata, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int *B_ext_diag_nnz_ptr, HYPRE_Int *B_ext_diag_ii, HYPRE_Int *B_ext_diag_j, hypre_float *B_ext_diag_data, char *B_ext_diag_xata, HYPRE_Int *B_ext_offd_nnz_ptr, HYPRE_Int *B_ext_offd_ii, HYPRE_Int *B_ext_offd_j, hypre_float *B_ext_offd_data, char *B_ext_offd_xata ); +HYPRE_Int +hypre_CSRMatrixSplitDevice_core_dbl( HYPRE_Int job, HYPRE_Int num_rows, HYPRE_Int B_ext_nnz, HYPRE_Int *B_ext_ii, HYPRE_BigInt *B_ext_bigj, hypre_double *B_ext_data, char *B_ext_xata, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int *B_ext_diag_nnz_ptr, HYPRE_Int *B_ext_diag_ii, HYPRE_Int *B_ext_diag_j, hypre_double *B_ext_diag_data, char *B_ext_diag_xata, HYPRE_Int *B_ext_offd_nnz_ptr, HYPRE_Int *B_ext_offd_ii, HYPRE_Int *B_ext_offd_j, hypre_double *B_ext_offd_data, char *B_ext_offd_xata ); +HYPRE_Int +hypre_CSRMatrixSplitDevice_core_long_dbl( HYPRE_Int job, HYPRE_Int num_rows, HYPRE_Int B_ext_nnz, HYPRE_Int *B_ext_ii, HYPRE_BigInt *B_ext_bigj, hypre_long_double *B_ext_data, char *B_ext_xata, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int *B_ext_diag_nnz_ptr, HYPRE_Int *B_ext_diag_ii, HYPRE_Int *B_ext_diag_j, hypre_long_double *B_ext_diag_data, char *B_ext_diag_xata, HYPRE_Int *B_ext_offd_nnz_ptr, HYPRE_Int *B_ext_offd_ii, HYPRE_Int *B_ext_offd_j, hypre_long_double *B_ext_offd_data, char *B_ext_offd_xata ); + +hypre_CSRMatrix* +hypre_CSRMatrixStack2Device_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); +hypre_CSRMatrix* +hypre_CSRMatrixStack2Device_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); +hypre_CSRMatrix* +hypre_CSRMatrixStack2Device_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B ); + +HYPRE_Int +hypre_CSRMatrixTaggedFnormDevice_flt( hypre_CSRMatrix *A, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_float *tnorms ); +HYPRE_Int +hypre_CSRMatrixTaggedFnormDevice_dbl( hypre_CSRMatrix *A, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_double *tnorms ); +HYPRE_Int +hypre_CSRMatrixTaggedFnormDevice_long_dbl( hypre_CSRMatrix *A, HYPRE_Int num_tags, HYPRE_Int *tags, hypre_long_double *tnorms ); + +HYPRE_Int +hypre_CSRMatrixTransposeDevice_flt( hypre_CSRMatrix *A, hypre_CSRMatrix **AT, HYPRE_Int data ); +HYPRE_Int +hypre_CSRMatrixTransposeDevice_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix **AT, HYPRE_Int data ); +HYPRE_Int +hypre_CSRMatrixTransposeDevice_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix **AT, HYPRE_Int data ); + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_flt( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_float *l1_norms, hypre_Vector *f, hypre_Vector *u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_double *l1_norms, hypre_Vector *f, hypre_Vector *u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_long_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_long_double *l1_norms, hypre_Vector *f, hypre_Vector *u ); + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_core_flt( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_float *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_core_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_double *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_core_long_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_long_double *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u ); + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveRocsparse_flt( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_float *l1_norms, hypre_float *f, hypre_float *u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveRocsparse_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_double *l1_norms, hypre_double *f, hypre_double *u ); +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveRocsparse_long_dbl( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, hypre_long_double *l1_norms, hypre_long_double *f, hypre_long_double *u ); + +hypre_CSRMatrix * +hypre_CSRMatrixTripleMultiplyDevice_flt( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix *C ); +hypre_CSRMatrix * +hypre_CSRMatrixTripleMultiplyDevice_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix *C ); +hypre_CSRMatrix * +hypre_CSRMatrixTripleMultiplyDevice_long_dbl( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix *C ); + +hypre_CsrsvData* +hypre_CsrsvDataCreate_flt( ); +hypre_CsrsvData* +hypre_CsrsvDataCreate_dbl( ); +hypre_CsrsvData* +hypre_CsrsvDataCreate_long_dbl( ); + +HYPRE_Int +hypre_CsrsvDataDestroy_flt( hypre_CsrsvData *data ); +HYPRE_Int +hypre_CsrsvDataDestroy_dbl( hypre_CsrsvData *data ); +HYPRE_Int +hypre_CsrsvDataDestroy_long_dbl( hypre_CsrsvData *data ); + +HYPRE_Int +hypre_GPUMatDataSetCSRData_flt( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_GPUMatDataSetCSRData_dbl( hypre_CSRMatrix *matrix ); +HYPRE_Int +hypre_GPUMatDataSetCSRData_long_dbl( hypre_CSRMatrix *matrix ); + +hypre_GpuMatData* +hypre_GpuMatDataCreate_flt( ); +hypre_GpuMatData* +hypre_GpuMatDataCreate_dbl( ); +hypre_GpuMatData* +hypre_GpuMatDataCreate_long_dbl( ); + +HYPRE_Int +hypre_GpuMatDataDestroy_flt( hypre_GpuMatData *data ); +HYPRE_Int +hypre_GpuMatDataDestroy_dbl( hypre_GpuMatData *data ); +HYPRE_Int +hypre_GpuMatDataDestroy_long_dbl( hypre_GpuMatData *data ); + +HYPRE_Int +hypre_SeqVectorAxpyDevice_flt( hypre_float alpha, hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorAxpyDevice_dbl( hypre_double alpha, hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorAxpyDevice_long_dbl( hypre_long_double alpha, hypre_Vector *x, hypre_Vector *y ); + +HYPRE_Int +hypre_SeqVectorAxpyzDevice_flt( hypre_float alpha, hypre_Vector *x, hypre_float beta, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorAxpyzDevice_dbl( hypre_double alpha, hypre_Vector *x, hypre_double beta, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorAxpyzDevice_long_dbl( hypre_long_double alpha, hypre_Vector *x, hypre_long_double beta, hypre_Vector *y, hypre_Vector *z ); + +HYPRE_Real +hypre_SeqVectorInnerProdDevice_flt( hypre_Vector *x, hypre_Vector *y ); +HYPRE_Real +hypre_SeqVectorInnerProdDevice_dbl( hypre_Vector *x, hypre_Vector *y ); +HYPRE_Real +hypre_SeqVectorInnerProdDevice_long_dbl( hypre_Vector *x, hypre_Vector *y ); + +HYPRE_Int +hypre_SeqVectorPointwiseDivisionDevice_flt( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorPointwiseDivisionDevice_dbl( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorPointwiseDivisionDevice_long_dbl( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); + +HYPRE_Int +hypre_SeqVectorPointwiseDivpyDevice_flt( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *marker, HYPRE_Int marker_val ); +HYPRE_Int +hypre_SeqVectorPointwiseDivpyDevice_dbl( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *marker, HYPRE_Int marker_val ); +HYPRE_Int +hypre_SeqVectorPointwiseDivpyDevice_long_dbl( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *marker, HYPRE_Int marker_val ); + +HYPRE_Int +hypre_SeqVectorPointwiseInverseDevice_flt( hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorPointwiseInverseDevice_dbl( hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorPointwiseInverseDevice_long_dbl( hypre_Vector *x, hypre_Vector *y ); + +HYPRE_Int +hypre_SeqVectorPointwiseProductDevice_flt( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorPointwiseProductDevice_dbl( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); +HYPRE_Int +hypre_SeqVectorPointwiseProductDevice_long_dbl( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ); + +HYPRE_Int +hypre_SeqVectorPrefetch_flt( hypre_Vector *x, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_SeqVectorPrefetch_dbl( hypre_Vector *x, HYPRE_MemoryLocation memory_location ); +HYPRE_Int +hypre_SeqVectorPrefetch_long_dbl( hypre_Vector *x, HYPRE_MemoryLocation memory_location ); + +HYPRE_Int +hypre_SeqVectorScaleDevice_flt( hypre_float alpha, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorScaleDevice_dbl( hypre_double alpha, hypre_Vector *y ); +HYPRE_Int +hypre_SeqVectorScaleDevice_long_dbl( hypre_long_double alpha, hypre_Vector *y ); + +HYPRE_Int +hypre_SeqVectorSetConstantValuesDevice_flt( hypre_Vector *v, hypre_float value ); +HYPRE_Int +hypre_SeqVectorSetConstantValuesDevice_dbl( hypre_Vector *v, hypre_double value ); +HYPRE_Int +hypre_SeqVectorSetConstantValuesDevice_long_dbl( hypre_Vector *v, hypre_long_double value ); + +HYPRE_Int +hypre_SeqVectorSetValuesTaggedDevice_flt( hypre_Vector *vector, hypre_float *values ); +HYPRE_Int +hypre_SeqVectorSetValuesTaggedDevice_dbl( hypre_Vector *vector, hypre_double *values ); +HYPRE_Int +hypre_SeqVectorSetValuesTaggedDevice_long_dbl( hypre_Vector *vector, hypre_long_double *values ); + +HYPRE_Int +hypre_SeqVectorStridedCopyDevice_flt( hypre_Vector *vector, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, hypre_float *data ); +HYPRE_Int +hypre_SeqVectorStridedCopyDevice_dbl( hypre_Vector *vector, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, hypre_double *data ); +HYPRE_Int +hypre_SeqVectorStridedCopyDevice_long_dbl( hypre_Vector *vector, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, hypre_long_double *data ); + +HYPRE_Complex +hypre_SeqVectorSumEltsDevice_flt( hypre_Vector *vector ); +HYPRE_Complex +hypre_SeqVectorSumEltsDevice_dbl( hypre_Vector *vector ); +HYPRE_Complex +hypre_SeqVectorSumEltsDevice_long_dbl( hypre_Vector *vector ); + +/* functions_gpu */ + +/* pre_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/seq_mv/_hypre_seq_mv_mup_def.h b/src/seq_mv/_hypre_seq_mv_mup_def.h index 204b99edbb..1f6ab5e532 100644 --- a/src/seq_mv/_hypre_seq_mv_mup_def.h +++ b/src/seq_mv/_hypre_seq_mv_mup_def.h @@ -40,12 +40,18 @@ #define HYPRE_MultiblockMatrixPrint HYPRE_MULTIPRECISION_FUNC ( HYPRE_MultiblockMatrixPrint ) #define HYPRE_MultiblockMatrixSetNumSubmatrices HYPRE_MULTIPRECISION_FUNC ( HYPRE_MultiblockMatrixSetNumSubmatrices ) #define HYPRE_MultiblockMatrixSetSubmatrixType HYPRE_MULTIPRECISION_FUNC ( HYPRE_MultiblockMatrixSetSubmatrixType ) +#define HYPRE_VectorAxpy HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorAxpy ) #define HYPRE_VectorCopy HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorCopy ) #define HYPRE_VectorCreate HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorCreate ) #define HYPRE_VectorDestroy HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorDestroy ) #define HYPRE_VectorInitialize HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorInitialize ) #define HYPRE_VectorPrint HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorPrint ) #define HYPRE_VectorRead HYPRE_MULTIPRECISION_FUNC ( HYPRE_VectorRead ) +#define hypre_CSRMatrixCopy HYPRE_MULTIPRECISION_FUNC ( hypre_CSRMatrixCopy ) +#define hypre_CSRMatrixCreate HYPRE_MULTIPRECISION_FUNC ( hypre_CSRMatrixCreate ) +#define hypre_CSRMatrixInitialize_v2 HYPRE_MULTIPRECISION_FUNC ( hypre_CSRMatrixInitialize_v2 ) +#define hypre_CSRMatrixResetData HYPRE_MULTIPRECISION_FUNC ( hypre_CSRMatrixResetData ) +#define hypre_SeqVectorSetData HYPRE_MULTIPRECISION_FUNC ( hypre_SeqVectorSetData ) #define hypre_CSRMatrixAdd HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixAdd ) #define hypre_CSRMatrixAddFirstPass HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixAddFirstPass ) #define hypre_CSRMatrixAddHost HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixAddHost ) @@ -60,8 +66,6 @@ #define hypre_CSRMatrixComputeColSumHost HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixComputeColSumHost ) #define hypre_CSRMatrixComputeRowSum HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixComputeRowSum ) #define hypre_CSRMatrixComputeRowSumHost HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixComputeRowSumHost ) -#define hypre_CSRMatrixCopy HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixCopy ) -#define hypre_CSRMatrixCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixCreate ) #define hypre_CSRMatrixDeleteZeros HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDeleteZeros ) #define hypre_CSRMatrixDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDestroy ) #define hypre_CSRMatrixDiagScale HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDiagScale ) @@ -73,7 +77,6 @@ #define hypre_CSRMatrixGetLoadBalancedPartitionBegin HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixGetLoadBalancedPartitionBegin ) #define hypre_CSRMatrixGetLoadBalancedPartitionEnd HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixGetLoadBalancedPartitionEnd ) #define hypre_CSRMatrixInitialize HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixInitialize ) -#define hypre_CSRMatrixInitialize_v2 HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixInitialize_v2 ) #define hypre_CSRMatrixJtoBigJ HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixJtoBigJ ) #define hypre_CSRMatrixMatvec HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMatvec ) #define hypre_CSRMatrixMatvecOutOfPlace HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMatvecOutOfPlace ) @@ -176,7 +179,6 @@ #define hypre_SeqVectorScaleHost HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorScaleHost ) #define hypre_SeqVectorSetConstantValues HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetConstantValues ) #define hypre_SeqVectorSetConstantValuesHost HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetConstantValuesHost ) -#define hypre_SeqVectorSetData HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetData ) #define hypre_SeqVectorSetDataOwner HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetDataOwner ) #define hypre_SeqVectorSetNumTags HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetNumTags ) #define hypre_SeqVectorSetOwnsTags HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetOwnsTags ) @@ -188,5 +190,93 @@ #define hypre_SeqVectorStridedCopy HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorStridedCopy ) #define hypre_SeqVectorSumElts HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSumElts ) #define hypre_SeqVectorSumEltsHost HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSumEltsHost ) +#define hypreDevice_CSRSpAdd HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpAdd ) +#define hypreDevice_CSRSpGemm HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemm ) +#define hypreDevice_CSRSpGemmBinnedGetBlockNumDim HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmBinnedGetBlockNumDim ) +#define hypreDevice_CSRSpGemmNumerWithRownnzUpperbound HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmNumerWithRownnzUpperbound ) +#define hypreDevice_CSRSpGemmNumerWithRownnzUpperboundBinned HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmNumerWithRownnzUpperboundBinned ) +#define hypreDevice_CSRSpGemmNumerWithRownnzUpperboundNoBin HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmNumerWithRownnzUpperboundNoBin ) +#define hypreDevice_CSRSpGemmRocsparse HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRocsparse ) +#define hypreDevice_CSRSpGemmRownnz HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnz ) +#define hypreDevice_CSRSpGemmRownnzBinned HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzBinned ) +#define hypreDevice_CSRSpGemmRownnzEstimate HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzEstimate ) +#define hypreDevice_CSRSpGemmRownnzNoBin HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzNoBin ) +#define hypreDevice_CSRSpGemmRownnzUpperbound HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzUpperbound ) +#define hypreDevice_CSRSpGemmRownnzUpperboundBinned HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzUpperboundBinned ) +#define hypreDevice_CSRSpGemmRownnzUpperboundNoBin HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpGemmRownnzUpperboundNoBin ) +#define hypreDevice_CSRSpTrans HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpTrans ) +#define hypreDevice_CSRSpTransRocsparse HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CSRSpTransRocsparse ) +#define hypreGPUKernel_CSRCheckDiagFirst HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRCheckDiagFirst ) +#define hypreGPUKernel_CSRDiagScale HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRDiagScale ) +#define hypreGPUKernel_CSRMatrixCheckForMissingDiagonal HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixCheckForMissingDiagonal ) +#define hypreGPUKernel_CSRMatrixIntersectPattern HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixIntersectPattern ) +#define hypreGPUKernel_CSRMatrixReplaceDiagDevice HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixReplaceDiagDevice ) +#define hypreGPUKernel_CSRMatrixTaggedFnormAccum HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMatrixTaggedFnormAccum ) +#define hypreGPUKernel_CSRMoveDiagFirst HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CSRMoveDiagFirst ) +#define hypre_CSRMatrixAddDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixAddDevice ) +#define hypre_CSRMatrixAddPartialDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixAddPartialDevice ) +#define hypre_CSRMatrixCheckDiagFirstDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixCheckDiagFirstDevice ) +#define hypre_CSRMatrixCheckForMissingDiagonal HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixCheckForMissingDiagonal ) +#define hypre_CSRMatrixColNNzRealDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixColNNzRealDevice ) +#define hypre_CSRMatrixCompressColumnsDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixCompressColumnsDevice ) +#define hypre_CSRMatrixComputeColSumDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixComputeColSumDevice ) +#define hypre_CSRMatrixComputeRowSumDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixComputeRowSumDevice ) +#define hypre_CSRMatrixDeleteZerosDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDeleteZerosDevice ) +#define hypre_CSRMatrixDiagMatrixFromMatrixDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDiagMatrixFromMatrixDevice ) +#define hypre_CSRMatrixDiagMatrixFromVectorDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDiagMatrixFromVectorDevice ) +#define hypre_CSRMatrixDiagScaleDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDiagScaleDevice ) +#define hypre_CSRMatrixDropSmallEntriesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixDropSmallEntriesDevice ) +#define hypre_CSRMatrixExtractDiagonalDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixExtractDiagonalDevice ) +#define hypre_CSRMatrixGetGPUMatData HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixGetGPUMatData ) +#define hypre_CSRMatrixILU0 HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixILU0 ) +#define hypre_CSRMatrixIdentityDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixIdentityDevice ) +#define hypre_CSRMatrixIntSpMVDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixIntSpMVDevice ) +#define hypre_CSRMatrixIntersectPattern HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixIntersectPattern ) +#define hypre_CSRMatrixMatvecDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMatvecDevice ) +#define hypre_CSRMatrixMatvecRocsparse HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMatvecRocsparse ) +#define hypre_CSRMatrixMergeColMapOffd HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMergeColMapOffd ) +#define hypre_CSRMatrixMoveDiagFirstDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMoveDiagFirstDevice ) +#define hypre_CSRMatrixMultiplyDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixMultiplyDevice ) +#define hypre_CSRMatrixPermuteDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixPermuteDevice ) +#define hypre_CSRMatrixRemoveDiagonalDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixRemoveDiagonalDevice ) +#define hypre_CSRMatrixReplaceDiagDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixReplaceDiagDevice ) +#define hypre_CSRMatrixSetRownnzDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSetRownnzDevice ) +#define hypre_CSRMatrixSortRow HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSortRow ) +#define hypre_CSRMatrixSortRowOutOfPlace HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSortRowOutOfPlace ) +#define hypre_CSRMatrixSpMVAnalysisDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSpMVAnalysisDevice ) +#define hypre_CSRMatrixSpMVDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSpMVDevice ) +#define hypre_CSRMatrixSplitDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSplitDevice ) +#define hypre_CSRMatrixSplitDevice_core HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixSplitDevice_core ) +#define hypre_CSRMatrixStack2Device HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixStack2Device ) +#define hypre_CSRMatrixTaggedFnormDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTaggedFnormDevice ) +#define hypre_CSRMatrixTransposeDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTransposeDevice ) +#define hypre_CSRMatrixTriLowerUpperSolveDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTriLowerUpperSolveDevice ) +#define hypre_CSRMatrixTriLowerUpperSolveDevice_core HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTriLowerUpperSolveDevice_core ) +#define hypre_CSRMatrixTriLowerUpperSolveRocsparse HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTriLowerUpperSolveRocsparse ) +#define hypre_CSRMatrixTripleMultiplyDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_CSRMatrixTripleMultiplyDevice ) +#define hypre_CsrsvDataCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_CsrsvDataCreate ) +#define hypre_CsrsvDataDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_CsrsvDataDestroy ) +#define hypre_GPUMatDataSetCSRData HYPRE_FIXEDPRECISION_FUNC ( hypre_GPUMatDataSetCSRData ) +#define hypre_GpuMatDataCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_GpuMatDataCreate ) +#define hypre_GpuMatDataDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_GpuMatDataDestroy ) +#define hypre_SeqVectorAxpyDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorAxpyDevice ) +#define hypre_SeqVectorAxpyzDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorAxpyzDevice ) +#define hypre_SeqVectorInnerProdDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorInnerProdDevice ) +#define hypre_SeqVectorPointwiseDivisionDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorPointwiseDivisionDevice ) +#define hypre_SeqVectorPointwiseDivpyDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorPointwiseDivpyDevice ) +#define hypre_SeqVectorPointwiseInverseDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorPointwiseInverseDevice ) +#define hypre_SeqVectorPointwiseProductDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorPointwiseProductDevice ) +#define hypre_SeqVectorPrefetch HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorPrefetch ) +#define hypre_SeqVectorScaleDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorScaleDevice ) +#define hypre_SeqVectorSetConstantValuesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetConstantValuesDevice ) +#define hypre_SeqVectorSetValuesTaggedDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSetValuesTaggedDevice ) +#define hypre_SeqVectorStridedCopyDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorStridedCopyDevice ) +#define hypre_SeqVectorSumEltsDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SeqVectorSumEltsDevice ) +#define hypre_SortCSRRocsparse HYPRE_FIXEDPRECISION_FUNC ( hypre_SortCSRRocsparse ) +#define hypre_SpGemmCreateBins HYPRE_FIXEDPRECISION_FUNC ( hypre_SpGemmCreateBins ) +#define hypre_SpGemmCreateGlobalHashTable HYPRE_FIXEDPRECISION_FUNC ( hypre_SpGemmCreateGlobalHashTable ) +#define hypre_SpGemmGhashSize HYPRE_FIXEDPRECISION_FUNC ( hypre_SpGemmGhashSize ) +#define hypre_create_ija HYPRE_FIXEDPRECISION_FUNC ( hypre_create_ija ) +#define hypre_expdistfromuniform HYPRE_FIXEDPRECISION_FUNC ( hypre_expdistfromuniform ) #endif diff --git a/src/seq_mv/_hypre_seq_mv_mup_undef.h b/src/seq_mv/_hypre_seq_mv_mup_undef.h index ce3d19ed13..5ddc1381f8 100644 --- a/src/seq_mv/_hypre_seq_mv_mup_undef.h +++ b/src/seq_mv/_hypre_seq_mv_mup_undef.h @@ -37,12 +37,18 @@ #undef HYPRE_MultiblockMatrixPrint #undef HYPRE_MultiblockMatrixSetNumSubmatrices #undef HYPRE_MultiblockMatrixSetSubmatrixType +#undef HYPRE_VectorAxpy #undef HYPRE_VectorCopy #undef HYPRE_VectorCreate #undef HYPRE_VectorDestroy #undef HYPRE_VectorInitialize #undef HYPRE_VectorPrint #undef HYPRE_VectorRead +#undef hypre_CSRMatrixCopy +#undef hypre_CSRMatrixCreate +#undef hypre_CSRMatrixInitialize_v2 +#undef hypre_CSRMatrixResetData +#undef hypre_SeqVectorSetData #undef hypre_CSRMatrixAdd #undef hypre_CSRMatrixAddFirstPass #undef hypre_CSRMatrixAddHost @@ -57,8 +63,6 @@ #undef hypre_CSRMatrixComputeColSumHost #undef hypre_CSRMatrixComputeRowSum #undef hypre_CSRMatrixComputeRowSumHost -#undef hypre_CSRMatrixCopy -#undef hypre_CSRMatrixCreate #undef hypre_CSRMatrixDeleteZeros #undef hypre_CSRMatrixDestroy #undef hypre_CSRMatrixDiagScale @@ -70,7 +74,6 @@ #undef hypre_CSRMatrixGetLoadBalancedPartitionBegin #undef hypre_CSRMatrixGetLoadBalancedPartitionEnd #undef hypre_CSRMatrixInitialize -#undef hypre_CSRMatrixInitialize_v2 #undef hypre_CSRMatrixJtoBigJ #undef hypre_CSRMatrixMatvec #undef hypre_CSRMatrixMatvecOutOfPlace @@ -173,7 +176,6 @@ #undef hypre_SeqVectorScaleHost #undef hypre_SeqVectorSetConstantValues #undef hypre_SeqVectorSetConstantValuesHost -#undef hypre_SeqVectorSetData #undef hypre_SeqVectorSetDataOwner #undef hypre_SeqVectorSetNumTags #undef hypre_SeqVectorSetOwnsTags @@ -185,3 +187,91 @@ #undef hypre_SeqVectorStridedCopy #undef hypre_SeqVectorSumElts #undef hypre_SeqVectorSumEltsHost +#undef hypreDevice_CSRSpAdd +#undef hypreDevice_CSRSpGemm +#undef hypreDevice_CSRSpGemmBinnedGetBlockNumDim +#undef hypreDevice_CSRSpGemmNumerWithRownnzUpperbound +#undef hypreDevice_CSRSpGemmNumerWithRownnzUpperboundBinned +#undef hypreDevice_CSRSpGemmNumerWithRownnzUpperboundNoBin +#undef hypreDevice_CSRSpGemmRocsparse +#undef hypreDevice_CSRSpGemmRownnz +#undef hypreDevice_CSRSpGemmRownnzBinned +#undef hypreDevice_CSRSpGemmRownnzEstimate +#undef hypreDevice_CSRSpGemmRownnzNoBin +#undef hypreDevice_CSRSpGemmRownnzUpperbound +#undef hypreDevice_CSRSpGemmRownnzUpperboundBinned +#undef hypreDevice_CSRSpGemmRownnzUpperboundNoBin +#undef hypreDevice_CSRSpTrans +#undef hypreDevice_CSRSpTransRocsparse +#undef hypreGPUKernel_CSRCheckDiagFirst +#undef hypreGPUKernel_CSRDiagScale +#undef hypreGPUKernel_CSRMatrixCheckForMissingDiagonal +#undef hypreGPUKernel_CSRMatrixIntersectPattern +#undef hypreGPUKernel_CSRMatrixReplaceDiagDevice +#undef hypreGPUKernel_CSRMatrixTaggedFnormAccum +#undef hypreGPUKernel_CSRMoveDiagFirst +#undef hypre_CSRMatrixAddDevice +#undef hypre_CSRMatrixAddPartialDevice +#undef hypre_CSRMatrixCheckDiagFirstDevice +#undef hypre_CSRMatrixCheckForMissingDiagonal +#undef hypre_CSRMatrixColNNzRealDevice +#undef hypre_CSRMatrixCompressColumnsDevice +#undef hypre_CSRMatrixComputeColSumDevice +#undef hypre_CSRMatrixComputeRowSumDevice +#undef hypre_CSRMatrixDeleteZerosDevice +#undef hypre_CSRMatrixDiagMatrixFromMatrixDevice +#undef hypre_CSRMatrixDiagMatrixFromVectorDevice +#undef hypre_CSRMatrixDiagScaleDevice +#undef hypre_CSRMatrixDropSmallEntriesDevice +#undef hypre_CSRMatrixExtractDiagonalDevice +#undef hypre_CSRMatrixGetGPUMatData +#undef hypre_CSRMatrixILU0 +#undef hypre_CSRMatrixIdentityDevice +#undef hypre_CSRMatrixIntSpMVDevice +#undef hypre_CSRMatrixIntersectPattern +#undef hypre_CSRMatrixMatvecDevice +#undef hypre_CSRMatrixMatvecRocsparse +#undef hypre_CSRMatrixMergeColMapOffd +#undef hypre_CSRMatrixMoveDiagFirstDevice +#undef hypre_CSRMatrixMultiplyDevice +#undef hypre_CSRMatrixPermuteDevice +#undef hypre_CSRMatrixRemoveDiagonalDevice +#undef hypre_CSRMatrixReplaceDiagDevice +#undef hypre_CSRMatrixSetRownnzDevice +#undef hypre_CSRMatrixSortRow +#undef hypre_CSRMatrixSortRowOutOfPlace +#undef hypre_CSRMatrixSpMVAnalysisDevice +#undef hypre_CSRMatrixSpMVDevice +#undef hypre_CSRMatrixSplitDevice +#undef hypre_CSRMatrixSplitDevice_core +#undef hypre_CSRMatrixStack2Device +#undef hypre_CSRMatrixTaggedFnormDevice +#undef hypre_CSRMatrixTransposeDevice +#undef hypre_CSRMatrixTriLowerUpperSolveDevice +#undef hypre_CSRMatrixTriLowerUpperSolveDevice_core +#undef hypre_CSRMatrixTriLowerUpperSolveRocsparse +#undef hypre_CSRMatrixTripleMultiplyDevice +#undef hypre_CsrsvDataCreate +#undef hypre_CsrsvDataDestroy +#undef hypre_GPUMatDataSetCSRData +#undef hypre_GpuMatDataCreate +#undef hypre_GpuMatDataDestroy +#undef hypre_SeqVectorAxpyDevice +#undef hypre_SeqVectorAxpyzDevice +#undef hypre_SeqVectorInnerProdDevice +#undef hypre_SeqVectorPointwiseDivisionDevice +#undef hypre_SeqVectorPointwiseDivpyDevice +#undef hypre_SeqVectorPointwiseInverseDevice +#undef hypre_SeqVectorPointwiseProductDevice +#undef hypre_SeqVectorPrefetch +#undef hypre_SeqVectorScaleDevice +#undef hypre_SeqVectorSetConstantValuesDevice +#undef hypre_SeqVectorSetValuesTaggedDevice +#undef hypre_SeqVectorStridedCopyDevice +#undef hypre_SeqVectorSumEltsDevice +#undef hypre_SortCSRRocsparse +#undef hypre_SpGemmCreateBins +#undef hypre_SpGemmCreateGlobalHashTable +#undef hypre_SpGemmGhashSize +#undef hypre_create_ija +#undef hypre_expdistfromuniform diff --git a/src/seq_mv/csr_matop_device.c b/src/seq_mv/csr_matop_device.c index b3e6eeb546..26d2836123 100644 --- a/src/seq_mv/csr_matop_device.c +++ b/src/seq_mv/csr_matop_device.c @@ -194,6 +194,27 @@ hypre_GpuMatDataDestroy(hypre_GpuMatData *data) return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_CSRMatrixGetGPUMatData + *--------------------------------------------------------------------------*/ + +hypre_GpuMatData* +hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix) +{ + if (!matrix) + { + return NULL; + } + + if (!hypre_CSRMatrixGPUMatData(matrix)) + { + hypre_CSRMatrixGPUMatData(matrix) = hypre_GpuMatDataCreate(); + hypre_GPUMatDataSetCSRData(matrix); + } + + return hypre_CSRMatrixGPUMatData(matrix); +} + #endif /* #if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE) */ #if defined(HYPRE_USING_GPU) diff --git a/src/seq_mv/csr_matrix.c b/src/seq_mv/csr_matrix.c index 2f4fca88b4..06df39ad45 100644 --- a/src/seq_mv/csr_matrix.c +++ b/src/seq_mv/csr_matrix.c @@ -1544,27 +1544,30 @@ hypre_CSRMatrixPrefetch( hypre_CSRMatrix *A, return hypre_error_flag; } -#if defined(HYPRE_USING_CUSPARSE) ||\ - defined(HYPRE_USING_ROCSPARSE) ||\ - defined(HYPRE_USING_ONEMKLSPARSE) /*-------------------------------------------------------------------------- - * hypre_CSRMatrixGetGPUMatData + * hypre_CSRMatrixSetData + * Reinitialize matrix data array *--------------------------------------------------------------------------*/ - -hypre_GpuMatData* -hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix) +HYPRE_Int +hypre_CSRMatrixResetData(hypre_CSRMatrix *matrix) { - if (!matrix) + + /* Check that matrix owns its data */ + if (!hypre_CSRMatrixOwnsData(matrix)) { - return NULL; + hypre_error_w_msg(HYPRE_ERROR_GENERIC, + "Error: called hypre_CSRMatrixResetData on a matrix that doesn't own the data\n"); + return hypre_error_flag; } - - if (!hypre_CSRMatrixGPUMatData(matrix)) + /* Free data array if already present */ + if (hypre_CSRMatrixData(matrix)) { - hypre_CSRMatrixGPUMatData(matrix) = hypre_GpuMatDataCreate(); - hypre_GPUMatDataSetCSRData(matrix); + hypre_TFree(hypre_CSRMatrixData(matrix), hypre_CSRMatrixMemoryLocation(matrix)); } - return hypre_CSRMatrixGPUMatData(matrix); + /* Reallocate memory */ + hypre_CSRMatrixData(matrix) = hypre_CTAlloc(HYPRE_Complex, hypre_CSRMatrixNumNonzeros(matrix), + hypre_CSRMatrixMemoryLocation(matrix)); + + return hypre_error_flag; } -#endif diff --git a/src/seq_mv/headers b/src/seq_mv/headers index 267b3beff8..3bf21cf673 100755 --- a/src/seq_mv/headers +++ b/src/seq_mv/headers @@ -74,3 +74,60 @@ cat >> $INTERNAL_HEADER <<@ #endif @ + + +INTERNAL_HEADER=_hypre_seq_mv.hpp + +#=========================================================================== +# Include guards and other includes +#=========================================================================== + +cat > $INTERNAL_HEADER <<@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + +#ifndef hypre_SEQ_MV_HPP +#define hypre_SEQ_MV_HPP + +#include +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION +#include "_hypre_seq_mv_mup_def.h" +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +@ + +#=========================================================================== +# Structures and prototypes +#=========================================================================== + +#=========================================================================== +# Include guards +#=========================================================================== + +cat >> $INTERNAL_HEADER <<@ + +#ifdef __cplusplus +} +#endif + +#ifdef HYPRE_MIXED_PRECISION +/* The following is for user compiles and the order is important. The first + * header ensures that we do not change prototype names in user files or in the + * second header file. The second header contains all the prototypes needed by + * users for mixed precision. */ +#ifndef hypre_MP_BUILD +#include "_hypre_seq_mv_mup_undef.h" +#include "_hypre_seq_mv_mup.h" +#include "_hypre_seq_mv_mup.hpp" +#endif +#endif + +#endif + +@ diff --git a/src/seq_mv/mup.fixed b/src/seq_mv/mup.fixed index 2ab9ea61b5..4d936e2493 100644 --- a/src/seq_mv/mup.fixed +++ b/src/seq_mv/mup.fixed @@ -12,8 +12,6 @@ hypre_CSRMatrixComputeColSum hypre_CSRMatrixComputeColSumHost hypre_CSRMatrixComputeRowSum hypre_CSRMatrixComputeRowSumHost -hypre_CSRMatrixCopy -hypre_CSRMatrixCreate hypre_CSRMatrixDeleteZeros hypre_CSRMatrixDestroy hypre_CSRMatrixDiagScale @@ -25,7 +23,6 @@ hypre_CSRMatrixFnorm hypre_CSRMatrixGetLoadBalancedPartitionBegin hypre_CSRMatrixGetLoadBalancedPartitionEnd hypre_CSRMatrixInitialize -hypre_CSRMatrixInitialize_v2 hypre_CSRMatrixJtoBigJ hypre_CSRMatrixMatvec hypre_CSRMatrixMatvecOutOfPlace @@ -128,7 +125,6 @@ hypre_SeqVectorScale hypre_SeqVectorScaleHost hypre_SeqVectorSetConstantValues hypre_SeqVectorSetConstantValuesHost -hypre_SeqVectorSetData hypre_SeqVectorSetDataOwner hypre_SeqVectorSetNumTags hypre_SeqVectorSetOwnsTags diff --git a/src/seq_mv/mup.fixed_gpu b/src/seq_mv/mup.fixed_gpu new file mode 100644 index 0000000000..c85c9e2bbc --- /dev/null +++ b/src/seq_mv/mup.fixed_gpu @@ -0,0 +1,88 @@ +hypreDevice_CSRSpAdd +hypreDevice_CSRSpGemm +hypreDevice_CSRSpGemmBinnedGetBlockNumDim +hypreDevice_CSRSpGemmNumerWithRownnzUpperbound +hypreDevice_CSRSpGemmNumerWithRownnzUpperboundBinned +hypreDevice_CSRSpGemmNumerWithRownnzUpperboundNoBin +hypreDevice_CSRSpGemmRocsparse +hypreDevice_CSRSpGemmRownnz +hypreDevice_CSRSpGemmRownnzBinned +hypreDevice_CSRSpGemmRownnzEstimate +hypreDevice_CSRSpGemmRownnzNoBin +hypreDevice_CSRSpGemmRownnzUpperbound +hypreDevice_CSRSpGemmRownnzUpperboundBinned +hypreDevice_CSRSpGemmRownnzUpperboundNoBin +hypreDevice_CSRSpTrans +hypreDevice_CSRSpTransRocsparse +hypreGPUKernel_CSRCheckDiagFirst +hypreGPUKernel_CSRDiagScale +hypreGPUKernel_CSRMatrixCheckForMissingDiagonal +hypreGPUKernel_CSRMatrixIntersectPattern +hypreGPUKernel_CSRMatrixReplaceDiagDevice +hypreGPUKernel_CSRMatrixTaggedFnormAccum +hypreGPUKernel_CSRMoveDiagFirst +hypre_CSRMatrixAddDevice +hypre_CSRMatrixAddPartialDevice +hypre_CSRMatrixCheckDiagFirstDevice +hypre_CSRMatrixCheckForMissingDiagonal +hypre_CSRMatrixColNNzRealDevice +hypre_CSRMatrixCompressColumnsDevice +hypre_CSRMatrixComputeColSumDevice +hypre_CSRMatrixComputeRowSumDevice +hypre_CSRMatrixDeleteZerosDevice +hypre_CSRMatrixDiagMatrixFromMatrixDevice +hypre_CSRMatrixDiagMatrixFromVectorDevice +hypre_CSRMatrixDiagScaleDevice +hypre_CSRMatrixDropSmallEntriesDevice +hypre_CSRMatrixExtractDiagonalDevice +hypre_CSRMatrixGetGPUMatData +hypre_CSRMatrixILU0 +hypre_CSRMatrixIdentityDevice +hypre_CSRMatrixIntSpMVDevice +hypre_CSRMatrixIntersectPattern +hypre_CSRMatrixMatvecDevice +hypre_CSRMatrixMatvecRocsparse +hypre_CSRMatrixMergeColMapOffd +hypre_CSRMatrixMoveDiagFirstDevice +hypre_CSRMatrixMultiplyDevice +hypre_CSRMatrixPermuteDevice +hypre_CSRMatrixRemoveDiagonalDevice +hypre_CSRMatrixReplaceDiagDevice +hypre_CSRMatrixSetRownnzDevice +hypre_CSRMatrixSortRow +hypre_CSRMatrixSortRowOutOfPlace +hypre_CSRMatrixSpMVAnalysisDevice +hypre_CSRMatrixSpMVDevice +hypre_CSRMatrixSplitDevice +hypre_CSRMatrixSplitDevice_core +hypre_CSRMatrixStack2Device +hypre_CSRMatrixTaggedFnormDevice +hypre_CSRMatrixTransposeDevice +hypre_CSRMatrixTriLowerUpperSolveDevice +hypre_CSRMatrixTriLowerUpperSolveDevice_core +hypre_CSRMatrixTriLowerUpperSolveRocsparse +hypre_CSRMatrixTripleMultiplyDevice +hypre_CsrsvDataCreate +hypre_CsrsvDataDestroy +hypre_GPUMatDataSetCSRData +hypre_GpuMatDataCreate +hypre_GpuMatDataDestroy +hypre_SeqVectorAxpyDevice +hypre_SeqVectorAxpyzDevice +hypre_SeqVectorInnerProdDevice +hypre_SeqVectorPointwiseDivisionDevice +hypre_SeqVectorPointwiseDivpyDevice +hypre_SeqVectorPointwiseInverseDevice +hypre_SeqVectorPointwiseProductDevice +hypre_SeqVectorPrefetch +hypre_SeqVectorScaleDevice +hypre_SeqVectorSetConstantValuesDevice +hypre_SeqVectorSetValuesTaggedDevice +hypre_SeqVectorStridedCopyDevice +hypre_SeqVectorSumEltsDevice +hypre_SortCSRRocsparse +hypre_SpGemmCreateBins +hypre_SpGemmCreateGlobalHashTable +hypre_SpGemmGhashSize +hypre_create_ija +hypre_expdistfromuniform diff --git a/src/seq_mv/mup.functions b/src/seq_mv/mup.functions index fe0efc74cf..f09771d10d 100644 --- a/src/seq_mv/mup.functions +++ b/src/seq_mv/mup.functions @@ -23,9 +23,15 @@ HYPRE_MultiblockMatrixLimitedDestroy HYPRE_MultiblockMatrixPrint HYPRE_MultiblockMatrixSetNumSubmatrices HYPRE_MultiblockMatrixSetSubmatrixType +HYPRE_VectorAxpy HYPRE_VectorCopy HYPRE_VectorCreate HYPRE_VectorDestroy HYPRE_VectorInitialize HYPRE_VectorPrint HYPRE_VectorRead +hypre_CSRMatrixCopy +hypre_CSRMatrixCreate +hypre_CSRMatrixInitialize_v2 +hypre_CSRMatrixResetData +hypre_SeqVectorSetData diff --git a/src/seq_mv/mup.functions_gpu b/src/seq_mv/mup.functions_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/seq_mv/mup.methods_gpu b/src/seq_mv/mup.methods_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/seq_mv/mup_fixed.c b/src/seq_mv/mup_fixed.c index ef655d4bf8..cced5c94b3 100644 --- a/src/seq_mv/mup_fixed.c +++ b/src/seq_mv/mup_fixed.c @@ -112,22 +112,6 @@ hypre_CSRMatrixComputeRowSum( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF /*--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_CSRMatrixCopy( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixCopy)( A, B, copy_data ); -} - -/*--------------------------------------------------------------------------*/ - -hypre_CSRMatrix * -hypre_CSRMatrixCreate( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixCreate)( num_rows, num_cols, num_nonzeros ); -} - -/*--------------------------------------------------------------------------*/ - hypre_CSRMatrix * hypre_CSRMatrixDeleteZeros( hypre_CSRMatrix *A, HYPRE_Real tol ) { @@ -208,14 +192,6 @@ hypre_CSRMatrixInitialize( hypre_CSRMatrix *matrix ) /*--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_CSRMatrixInitialize_v2( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixInitialize_v2)( matrix, bigInit, memory_location ); -} - -/*--------------------------------------------------------------------------*/ - HYPRE_Int hypre_CSRMatrixJtoBigJ( hypre_CSRMatrix *matrix ) { @@ -928,14 +904,6 @@ hypre_SeqVectorSetConstantValuesHost( hypre_Vector *v, HYPRE_Complex value ) /*--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_SeqVectorSetData( hypre_Vector *vector, HYPRE_Complex *data ) -{ - return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorSetData)( vector, data ); -} - -/*--------------------------------------------------------------------------*/ - HYPRE_Int hypre_SeqVectorSetDataOwner( hypre_Vector *vector, HYPRE_Int owns_data ) { diff --git a/src/seq_mv/mup_fixed_gpu.c b/src/seq_mv/mup_fixed_gpu.c new file mode 100644 index 0000000000..c1198cbb20 --- /dev/null +++ b/src/seq_mv/mup_fixed_gpu.c @@ -0,0 +1,527 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_seq_mv.h" +#include "_hypre_seq_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CSRSpAdd( HYPRE_Int ma, HYPRE_Int mb, HYPRE_Int nnzA, HYPRE_Int nnzB, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex alpha, HYPRE_Complex *d_aa, HYPRE_Int *d_ja_map, HYPRE_Int *d_ib, HYPRE_Int *d_jb, HYPRE_Complex beta, HYPRE_Complex *d_ab, HYPRE_Int *d_jb_map, HYPRE_Int *d_num_b, HYPRE_Int *nnzC_out, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, HYPRE_Complex **d_ac_out ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CSRSpAdd)( ma, mb, nnzA, nnzB, d_ia, d_ja, alpha, d_aa, d_ja_map, d_ib, d_jb, beta, d_ab, d_jb_map, d_num_b, nnzC_out, d_ic_out, d_jc_out, d_ac_out ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CSRSpGemm( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix **C_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CSRSpGemm)( A, B, C_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CSRSpTrans( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, HYPRE_Complex **d_ac_out, HYPRE_Int want_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CSRSpTrans)( m, n, nnzA, d_ia, d_ja, d_aa, d_ic_out, d_jc_out, d_ac_out, want_data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CSRSpTransRocsparse( HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_aa, HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, HYPRE_Complex **d_ac_out, HYPRE_Int want_data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CSRSpTransRocsparse)( m, n, nnzA, d_ia, d_ja, d_aa, d_ic_out, d_jc_out, d_ac_out, want_data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixAddDevice( HYPRE_Complex alpha, hypre_CSRMatrix *A, HYPRE_Complex beta, hypre_CSRMatrix *B ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixAddDevice)( alpha, A, beta, B ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixAddPartialDevice( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *row_nums ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixAddPartialDevice)( A, B, row_nums ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixCheckDiagFirstDevice( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixCheckDiagFirstDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixCheckForMissingDiagonal( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixCheckForMissingDiagonal)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixColNNzRealDevice( hypre_CSRMatrix *A, HYPRE_Real *colnnz ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixColNNzRealDevice)( A, colnnz ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixCompressColumnsDevice( hypre_CSRMatrix *A, HYPRE_BigInt *col_map, HYPRE_Int **col_idx_new_ptr, HYPRE_BigInt **col_map_new_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixCompressColumnsDevice)( A, col_map, col_idx_new_ptr, col_map_new_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixComputeColSumDevice( hypre_CSRMatrix *A, HYPRE_Complex *col_sum, HYPRE_Int type, HYPRE_Complex scal ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixComputeColSumDevice)( A, col_sum, type, scal ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixComputeRowSumDevice( hypre_CSRMatrix *A, HYPRE_Int *CF_i, HYPRE_Int *CF_j, HYPRE_Complex *row_sum, HYPRE_Int type, HYPRE_Complex scal, const char *set_or_add ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixComputeRowSumDevice)( A, CF_i, CF_j, row_sum, type, scal, set_or_add ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixDeleteZerosDevice( hypre_CSRMatrix *A, HYPRE_Real tol ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixDeleteZerosDevice)( A, tol ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromMatrixDevice( hypre_CSRMatrix *A, HYPRE_Int type ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixDiagMatrixFromMatrixDevice)( A, type ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixDiagMatrixFromVectorDevice( HYPRE_Int n, HYPRE_Complex *v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixDiagMatrixFromVectorDevice)( n, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixDiagScaleDevice)( A, ld, rd ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixDropSmallEntriesDevice( hypre_CSRMatrix *A, HYPRE_Real tol, HYPRE_Real *elmt_tols ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixDropSmallEntriesDevice)( A, tol, elmt_tols ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixExtractDiagonalDevice( hypre_CSRMatrix *A, HYPRE_Complex *d, HYPRE_Int type ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixExtractDiagonalDevice)( A, d, type ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_GpuMatData* +hypre_CSRMatrixGetGPUMatData( hypre_CSRMatrix *matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixGetGPUMatData)( matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixILU0( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixILU0)( A ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixIdentityDevice( HYPRE_Int n, HYPRE_Complex alp ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixIdentityDevice)( n, alp ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixIntSpMVDevice( HYPRE_Int num_rows, HYPRE_Int num_nonzeros, HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta, HYPRE_Int *d_y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixIntSpMVDevice)( num_rows, num_nonzeros, alpha, d_ia, d_ja, d_a, d_x, beta, d_y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixIntersectPattern( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixIntersectPattern)( A, B, markA, diag_option ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixMatvecDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixMatvecDevice)( trans, alpha, A, x, beta, b, y, offset ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixMatvecRocsparse)( trans, alpha, A, x, beta, y, offset ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixMergeColMapOffd( HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int B_ext_offd_nnz, HYPRE_BigInt *B_ext_offd_bigj, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int **map_B_to_C_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixMergeColMapOffd)( num_cols_offd_B, col_map_offd_B, B_ext_offd_nnz, B_ext_offd_bigj, num_cols_offd_C_ptr, col_map_offd_C_ptr, map_B_to_C_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixMoveDiagFirstDevice( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixMoveDiagFirstDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixMultiplyDevice( hypre_CSRMatrix *A, hypre_CSRMatrix *B ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixMultiplyDevice)( A, B ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixPermuteDevice)( A, perm, rqperm, B ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixRemoveDiagonalDevice( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixRemoveDiagonalDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixReplaceDiagDevice( hypre_CSRMatrix *A, HYPRE_Complex *new_diag, HYPRE_Complex v, HYPRE_Real tol ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixReplaceDiagDevice)( A, new_diag, v, tol ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSetRownnzDevice( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSetRownnzDevice)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSortRow( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSortRow)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSortRowOutOfPlace( hypre_CSRMatrix *A ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSortRowOutOfPlace)( A ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSpMVAnalysisDevice( hypre_CSRMatrix *matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSpMVAnalysisDevice)( matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int fill ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSpMVDevice)( trans, alpha, A, x, beta, y, fill ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSplitDevice( hypre_CSRMatrix *B_ext, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, hypre_CSRMatrix **B_ext_diag_ptr, hypre_CSRMatrix **B_ext_offd_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSplitDevice)( B_ext, first_col_diag_B, last_col_diag_B, num_cols_offd_B, col_map_offd_B, map_B_to_C_ptr, num_cols_offd_C_ptr, col_map_offd_C_ptr, B_ext_diag_ptr, B_ext_offd_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixSplitDevice_core( HYPRE_Int job, HYPRE_Int num_rows, HYPRE_Int B_ext_nnz, HYPRE_Int *B_ext_ii, HYPRE_BigInt *B_ext_bigj, HYPRE_Complex *B_ext_data, char *B_ext_xata, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int **map_B_to_C_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr, HYPRE_Int *B_ext_diag_nnz_ptr, HYPRE_Int *B_ext_diag_ii, HYPRE_Int *B_ext_diag_j, HYPRE_Complex *B_ext_diag_data, char *B_ext_diag_xata, HYPRE_Int *B_ext_offd_nnz_ptr, HYPRE_Int *B_ext_offd_ii, HYPRE_Int *B_ext_offd_j, HYPRE_Complex *B_ext_offd_data, char *B_ext_offd_xata ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixSplitDevice_core)( job, num_rows, B_ext_nnz, B_ext_ii, B_ext_bigj, B_ext_data, B_ext_xata, first_col_diag_B, last_col_diag_B, num_cols_offd_B, col_map_offd_B, map_B_to_C_ptr, num_cols_offd_C_ptr, col_map_offd_C_ptr, B_ext_diag_nnz_ptr, B_ext_diag_ii, B_ext_diag_j, B_ext_diag_data, B_ext_diag_xata, B_ext_offd_nnz_ptr, B_ext_offd_ii, B_ext_offd_j, B_ext_offd_data, B_ext_offd_xata ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixStack2Device( hypre_CSRMatrix *A, hypre_CSRMatrix *B ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixStack2Device)( A, B ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixTaggedFnormDevice( hypre_CSRMatrix *A, HYPRE_Int num_tags, HYPRE_Int *tags, HYPRE_Real *tnorms ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTaggedFnormDevice)( A, num_tags, tags, tnorms ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixTransposeDevice( hypre_CSRMatrix *A, hypre_CSRMatrix **AT, HYPRE_Int data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTransposeDevice)( A, AT, data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTriLowerUpperSolveDevice)( uplo, unit_diag, A, l1_norms, f, u ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveDevice_core( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTriLowerUpperSolveDevice_core)( uplo, unit_diag, A, l1_norms, f, offset_f, u, offset_u ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixTriLowerUpperSolveRocsparse( char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTriLowerUpperSolveRocsparse)( uplo, unit_diag, A, l1_norms, f, u ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixTripleMultiplyDevice( hypre_CSRMatrix *A, hypre_CSRMatrix *B, hypre_CSRMatrix *C ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CSRMatrixTripleMultiplyDevice)( A, B, C ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CsrsvData* +hypre_CsrsvDataCreate( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CsrsvDataCreate)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CsrsvDataDestroy( hypre_CsrsvData *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CsrsvDataDestroy)( data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GPUMatDataSetCSRData( hypre_CSRMatrix *matrix ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GPUMatDataSetCSRData)( matrix ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_GpuMatData* +hypre_GpuMatDataCreate( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GpuMatDataCreate)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GpuMatDataDestroy( hypre_GpuMatData *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GpuMatDataDestroy)( data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorAxpyDevice( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorAxpyDevice)( alpha, x, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorAxpyzDevice( HYPRE_Complex alpha, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, hypre_Vector *z ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorAxpyzDevice)( alpha, x, beta, y, z ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Real +hypre_SeqVectorInnerProdDevice( hypre_Vector *x, hypre_Vector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorInnerProdDevice)( x, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorPointwiseDivisionDevice( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorPointwiseDivisionDevice)( x, y, z ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorPointwiseDivpyDevice( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *marker, HYPRE_Int marker_val ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorPointwiseDivpyDevice)( x, b, y, marker, marker_val ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorPointwiseInverseDevice( hypre_Vector *x, hypre_Vector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorPointwiseInverseDevice)( x, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorPointwiseProductDevice( hypre_Vector *x, hypre_Vector *y, hypre_Vector *z ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorPointwiseProductDevice)( x, y, z ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorPrefetch)( x, memory_location ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorScaleDevice( HYPRE_Complex alpha, hypre_Vector *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorScaleDevice)( alpha, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorSetConstantValuesDevice( hypre_Vector *v, HYPRE_Complex value ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorSetConstantValuesDevice)( v, value ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorSetValuesTaggedDevice( hypre_Vector *vector, HYPRE_Complex *values ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorSetValuesTaggedDevice)( vector, values ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorStridedCopyDevice( hypre_Vector *vector, HYPRE_Int istride, HYPRE_Int ostride, HYPRE_Int size, HYPRE_Complex *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorStridedCopyDevice)( vector, istride, ostride, size, data ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Complex +hypre_SeqVectorSumEltsDevice( hypre_Vector *vector ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SeqVectorSumEltsDevice)( vector ); +} + + +#endif + +#endif + diff --git a/src/seq_mv/mup_functions.c b/src/seq_mv/mup_functions.c index b5f89c814f..8427aa42e6 100644 --- a/src/seq_mv/mup_functions.c +++ b/src/seq_mv/mup_functions.c @@ -231,6 +231,15 @@ HYPRE_MultiblockMatrixSetSubmatrixType( HYPRE_MultiblockMatrix matrix, HYPRE_Int /*--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_VectorAxpy( hypre_long_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return HYPRE_VectorAxpy_pre( precision, alpha, xvec, yvec ); +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int HYPRE_VectorCopy( HYPRE_Vector xvec, HYPRE_Vector yvec ) { @@ -284,6 +293,51 @@ HYPRE_VectorRead( char *file_name ) } +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixCopy( hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_CSRMatrixCopy_pre( precision, A, B, copy_data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixCreate( HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_CSRMatrixCreate_pre( precision, num_rows, num_cols, num_nonzeros ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixInitialize_v2( hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_CSRMatrixInitialize_v2_pre( precision, matrix, bigInit, memory_location ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixResetData( hypre_CSRMatrix *matrix ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_CSRMatrixResetData_pre( precision, matrix ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorSetData( hypre_Vector *vector, void *data ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_SeqVectorSetData_pre( precision, vector, data ); +} + #endif diff --git a/src/seq_mv/mup_functions_gpu.c b/src/seq_mv/mup_functions_gpu.c new file mode 100644 index 0000000000..1b0f942411 --- /dev/null +++ b/src/seq_mv/mup_functions_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_seq_mv.h" +#include "_hypre_seq_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/seq_mv/mup_pre.c b/src/seq_mv/mup_pre.c index 6ed239a0c9..a45c853cec 100644 --- a/src/seq_mv/mup_pre.c +++ b/src/seq_mv/mup_pre.c @@ -447,6 +447,24 @@ HYPRE_MultiblockMatrixSetSubmatrixType_pre( HYPRE_Precision precision, HYPRE_Mul /*--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_VectorAxpy_pre( HYPRE_Precision precision, hypre_long_double alpha, HYPRE_Vector xvec, HYPRE_Vector yvec ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return HYPRE_VectorAxpy_flt( (hypre_float)alpha, xvec, yvec ); + case HYPRE_REAL_DOUBLE: + return HYPRE_VectorAxpy_dbl( (hypre_double)alpha, xvec, yvec ); + case HYPRE_REAL_LONGDOUBLE: + return HYPRE_VectorAxpy_long_dbl( (hypre_long_double)alpha, xvec, yvec ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int HYPRE_VectorCopy_pre( HYPRE_Precision precision, HYPRE_Vector xvec, HYPRE_Vector yvec ) { @@ -554,6 +572,96 @@ HYPRE_VectorRead_pre( HYPRE_Precision precision, char *file_name ) } +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixCopy_pre( HYPRE_Precision precision, hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int copy_data ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_CSRMatrixCopy_flt( A, B, copy_data ); + case HYPRE_REAL_DOUBLE: + return hypre_CSRMatrixCopy_dbl( A, B, copy_data ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_CSRMatrixCopy_long_dbl( A, B, copy_data ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +hypre_CSRMatrix * +hypre_CSRMatrixCreate_pre( HYPRE_Precision precision, HYPRE_Int num_rows, HYPRE_Int num_cols, HYPRE_Int num_nonzeros ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_CSRMatrixCreate_flt( num_rows, num_cols, num_nonzeros ); + case HYPRE_REAL_DOUBLE: + return hypre_CSRMatrixCreate_dbl( num_rows, num_cols, num_nonzeros ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_CSRMatrixCreate_long_dbl( num_rows, num_cols, num_nonzeros ); + default: + { hypre_CSRMatrix * value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixInitialize_v2_pre( HYPRE_Precision precision, hypre_CSRMatrix *matrix, HYPRE_Int bigInit, HYPRE_MemoryLocation memory_location ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_CSRMatrixInitialize_v2_flt( matrix, bigInit, memory_location ); + case HYPRE_REAL_DOUBLE: + return hypre_CSRMatrixInitialize_v2_dbl( matrix, bigInit, memory_location ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_CSRMatrixInitialize_v2_long_dbl( matrix, bigInit, memory_location ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CSRMatrixResetData_pre( HYPRE_Precision precision, hypre_CSRMatrix *matrix ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_CSRMatrixResetData_flt( matrix ); + case HYPRE_REAL_DOUBLE: + return hypre_CSRMatrixResetData_dbl( matrix ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_CSRMatrixResetData_long_dbl( matrix ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorSetData_pre( HYPRE_Precision precision, hypre_Vector *vector, void *data ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_SeqVectorSetData_flt( vector, (hypre_float *)data ); + case HYPRE_REAL_DOUBLE: + return hypre_SeqVectorSetData_dbl( vector, (hypre_double *)data ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_SeqVectorSetData_long_dbl( vector, (hypre_long_double *)data ); + default: + { HYPRE_Int value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + #endif diff --git a/src/seq_mv/mup_pre_gpu.c b/src/seq_mv/mup_pre_gpu.c new file mode 100644 index 0000000000..1b0f942411 --- /dev/null +++ b/src/seq_mv/mup_pre_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_seq_mv.h" +#include "_hypre_seq_mv.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/seq_mv/protos.h b/src/seq_mv/protos.h index 8902995a7a..f6443f8d10 100644 --- a/src/seq_mv/protos.h +++ b/src/seq_mv/protos.h @@ -163,6 +163,7 @@ HYPRE_Int hypre_CSRMatrixCheckSetNumNonzeros( hypre_CSRMatrix *matrix ); HYPRE_Int hypre_CSRMatrixResize( hypre_CSRMatrix *matrix, HYPRE_Int new_num_rows, HYPRE_Int new_num_cols, HYPRE_Int new_num_nonzeros ); HYPRE_Int hypre_CSRMatrixEliminateRowsCols(hypre_CSRMatrix *A, HYPRE_Int nrows, HYPRE_Int *rows); +HYPRE_Int hypre_CSRMatrixResetData(hypre_CSRMatrix *matrix); /* csr_matvec.c */ // y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] diff --git a/src/seq_mv/protos_mp.h b/src/seq_mv/protos_mp.h index 08bf3ca646..1f869d5081 100644 --- a/src/seq_mv/protos_mp.h +++ b/src/seq_mv/protos_mp.h @@ -15,7 +15,7 @@ hypre_SeqVectorCopy_mp( hypre_Vector *x, hypre_Vector *y ); HYPRE_Int -hypre_SeqVectorAxpy_mp( hypre_double alpha, +hypre_SeqVectorAxpy_mp( hypre_long_double alpha, hypre_Vector *x, hypre_Vector *y ); @@ -27,4 +27,9 @@ HYPRE_Int hypre_SeqVectorConvert_mp ( hypre_Vector *v, HYPRE_Precision new_precision); +HYPRE_Int +hypre_CSRMatrixCopy_mp( hypre_CSRMatrix *A, hypre_CSRMatrix *B); + +hypre_CSRMatrix* +hypre_CSRMatrixClone_mp( hypre_CSRMatrix *A, HYPRE_Precision new_precision ); #endif diff --git a/src/seq_mv/seq_mv_mp.c b/src/seq_mv/seq_mv_mp.c index 39b4183ec8..10c3779d73 100644 --- a/src/seq_mv/seq_mv_mp.c +++ b/src/seq_mv/seq_mv_mp.c @@ -22,8 +22,7 @@ *****************************************************************************/ /*-------------------------------------------------------------------------- - * Mixed precision hypre_SeqVectorCopy -- TODO: Needs GPU support - DOK - * copies data from x to y + * Mixed precision hypre_SeqVectorCopy * if size of x is larger than y only the first size_y elements of x are * copied to y *--------------------------------------------------------------------------*/ @@ -31,145 +30,45 @@ HYPRE_Int hypre_SeqVectorCopy_mp( hypre_Vector *x, hypre_Vector *y ) { - /* - #ifdef HYPRE_PROFILE - hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); - #endif - - hypre_GpuProfilingPushRange("SeqVectorCopy"); - */ - /* determine type of output vector data ==> Precision of y. */ - HYPRE_Precision precision_y = hypre_VectorPrecision (y); - - HYPRE_Int i; - + HYPRE_Int size; /* Generic pointer type */ void *xp, *yp; /* Call standard vector copy if precisions match. */ - if (precision_y == hypre_VectorPrecision (x)) + if (hypre_VectorPrecision (y) == hypre_VectorPrecision (x)) { - return HYPRE_VectorCopy_pre(precision_y, (HYPRE_Vector)x, (HYPRE_Vector)y); + return HYPRE_VectorCopy_pre(hypre_VectorPrecision (y), (HYPRE_Vector)x, (HYPRE_Vector)y); } - HYPRE_Int size = hypre_min(hypre_VectorSize(x), hypre_VectorSize(y)) * hypre_VectorNumVectors(x); + size = hypre_min(hypre_VectorSize(x), hypre_VectorSize(y)) * hypre_VectorNumVectors(x); - /* Implicit conversion to generic data type (void pointer) */ xp = hypre_VectorData(x); yp = hypre_VectorData(y); + /* copy data */ + hypre_RealArrayCopy_mp(hypre_VectorPrecision (x), xp, hypre_VectorMemoryLocation(y), + hypre_VectorPrecision (y), yp, hypre_VectorMemoryLocation(y), size); - switch (hypre_VectorPrecision (x)) - { - case HYPRE_REAL_SINGLE: - switch (precision_y) - { - case HYPRE_REAL_DOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)yp)[i] = (hypre_double)((hypre_float *)xp)[i]; - } - break; - case HYPRE_REAL_LONGDOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)yp)[i] = (hypre_long_double)((hypre_float *)xp)[i]; - } - break; - default: - break; - } - break; - case HYPRE_REAL_DOUBLE: - switch (precision_y) - { - case HYPRE_REAL_SINGLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)yp)[i] = (hypre_float)((hypre_double *)xp)[i]; - } - break; - case HYPRE_REAL_LONGDOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)yp)[i] = (hypre_long_double)((hypre_double *)xp)[i]; - } - break; - default: - break; - } - break; - case HYPRE_REAL_LONGDOUBLE: - switch (precision_y) - { - case HYPRE_REAL_SINGLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)yp)[i] = (hypre_float)((hypre_long_double *)xp)[i]; - } - break; - case HYPRE_REAL_DOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)yp)[i] = (hypre_double)((hypre_long_double *)xp)[i]; - } - break; - default: - break; - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for Vector Copy!\n"); - break; - } - - /* - #ifdef HYPRE_PROFILE - hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); - #endif - hypre_GpuProfilingPopRange(); - */ return hypre_error_flag; } /*-------------------------------------------------------------------------- - * Mixed-precision hypre_SeqVectorAxpy -- TODO: Needs GPU support - DOK + * Mixed-precision hypre_SeqVectorAxpy *--------------------------------------------------------------------------*/ HYPRE_Int -hypre_SeqVectorAxpy_mp( hypre_double alpha, +hypre_SeqVectorAxpy_mp( hypre_long_double alpha, hypre_Vector *x, hypre_Vector *y ) { - /* - #ifdef HYPRE_PROFILE - hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); - #endif - */ - /* determine type of output vector data ==> Precision of y. */ - HYPRE_Precision precision = hypre_VectorPrecision (y); - void *xp, *yp; HYPRE_Int size = hypre_VectorSize(x); - HYPRE_Int i; + + /* Call standard vector axpy if precisions match. */ + if (hypre_VectorPrecision (y) == hypre_VectorPrecision (x)) + { + return HYPRE_VectorAxpy_pre(hypre_VectorPrecision (y), alpha, (HYPRE_Vector)x, (HYPRE_Vector)y); + } size *= hypre_VectorNumVectors(x); @@ -177,44 +76,9 @@ hypre_SeqVectorAxpy_mp( hypre_double alpha, xp = hypre_VectorData(x); yp = hypre_VectorData(y); - switch (precision) - { - case HYPRE_REAL_SINGLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)yp)[i] += (hypre_float)(alpha * ((hypre_double *)xp)[i]); - } - break; - case HYPRE_REAL_DOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)yp)[i] += (hypre_double)(alpha * ((hypre_float *)xp)[i]); - } - break; - case HYPRE_REAL_LONGDOUBLE: -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)yp)[i] += (hypre_long_double)(alpha * ((hypre_double *)xp)[i]); - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for Vector Axpy!\n"); - } - /* - #ifdef HYPRE_PROFILE - hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); - #endif - */ - return hypre_error_flag; + /* Call mixed-precision axpy on vector data */ + return hypre_RealArrayAxpyn_mp(hypre_VectorPrecision (x), xp, hypre_VectorPrecision (y), yp, + hypre_VectorMemoryLocation(y), size, alpha); } /*-------------------------------------------------------------------------- @@ -225,129 +89,31 @@ HYPRE_Int hypre_SeqVectorConvert_mp (hypre_Vector *v, HYPRE_Precision new_precision) { - HYPRE_Precision precision = hypre_VectorPrecision (v); + HYPRE_Precision data_precision = hypre_VectorPrecision (v); void *data = hypre_VectorData(v); void *data_mp = NULL; - HYPRE_Int size = hypre_VectorSize(v); - HYPRE_MemoryLocation memory_location = hypre_VectorMemoryLocation(v); - HYPRE_Int i; + HYPRE_Int size = hypre_VectorSize(v) * hypre_VectorNumVectors(v); + + HYPRE_MemoryLocation data_location = hypre_VectorMemoryLocation(v); - if (new_precision == precision) + if (new_precision == data_precision) { return hypre_error_flag; } else { - switch (precision) - { - case HYPRE_REAL_SINGLE: - { - switch (new_precision) - { - case HYPRE_REAL_DOUBLE: - { - data_mp = (hypre_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)data_mp)[i] = (hypre_double) ((hypre_float *) data)[i]; - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - data_mp = (hypre_long_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_long_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)data_mp)[i] = (hypre_long_double) ((hypre_float *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - case HYPRE_REAL_DOUBLE: - { - switch (new_precision) - { - case HYPRE_REAL_SINGLE: - { - data_mp = (hypre_float *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_float), memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)data_mp)[i] = (hypre_float) ((hypre_double *) data)[i]; - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - data_mp = (hypre_long_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_long_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)data_mp)[i] = (hypre_long_double) ((hypre_double *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - switch (new_precision) - { - case HYPRE_REAL_SINGLE: - { - data_mp = (hypre_float *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_float), memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)data_mp)[i] = (hypre_float) ((hypre_long_double *) data)[i]; - } - } - break; - case HYPRE_REAL_DOUBLE: - { - data_mp = (hypre_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)data_mp)[i] = (hypre_double) ((hypre_long_double *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - hypre_Free(data, memory_location); - hypre_VectorData(v) = data_mp; + /* clone vector data and convert to new precision type */ + data_mp = hypre_RealArrayClone_mp(data_precision, data, data_location, new_precision, data_location, + size); + + /* reset data pointer for vector */ + hypre_SeqVectorSetData_pre(new_precision, v, data_mp); + /* Note: + * SeqVectorSetData() frees old vector data and resets ownership to 0. + * We need to set data ownership here to ensure new data memory is cleaned up later. + */ + hypre_SeqVectorSetDataOwner(v, 1); + /* Update precision */ hypre_VectorPrecision(v) = new_precision; } return hypre_error_flag; @@ -355,138 +121,110 @@ hypre_SeqVectorConvert_mp (hypre_Vector *v, /*-------------------------------------------------------------------------- * Convert precision in a mixed precision matrix + * + * 1. Save matrix data pointer + * 2. Set the matrix data pointer to NULL + * 3. Call ResetData() to allocate new data in new precision + * 4. Copy data + * 5. Free pointer to old data and update precision *--------------------------------------------------------------------------*/ HYPRE_Int hypre_CSRMatrixConvert_mp (hypre_CSRMatrix *A, HYPRE_Precision new_precision) { - HYPRE_Precision precision = hypre_CSRMatrixPrecision (A); - void *data = hypre_CSRMatrixData(A); - void *data_mp = NULL; + HYPRE_Precision data_precision = hypre_CSRMatrixPrecision (A); + void *data, *data_mp; HYPRE_Int size = hypre_CSRMatrixI(A)[hypre_CSRMatrixNumRows(A)]; - HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(A); - HYPRE_Int i; + HYPRE_MemoryLocation data_location = hypre_CSRMatrixMemoryLocation(A); - if (new_precision == precision) + if (new_precision == data_precision) { return hypre_error_flag; } else { - switch (precision) - { - case HYPRE_REAL_SINGLE: - { - switch (new_precision) - { - case HYPRE_REAL_DOUBLE: - { - data_mp = (hypre_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)data_mp)[i] = (hypre_double) ((hypre_float *) data)[i]; - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - data_mp = (hypre_long_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_long_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)data_mp)[i] = (hypre_long_double) ((hypre_float *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - case HYPRE_REAL_DOUBLE: - { - switch (new_precision) - { - case HYPRE_REAL_SINGLE: - { - data_mp = (hypre_float *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_float), memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)data_mp)[i] = (hypre_float) ((hypre_double *) data)[i]; - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - data_mp = (hypre_long_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_long_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_long_double *)data_mp)[i] = (hypre_long_double) ((hypre_double *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - case HYPRE_REAL_LONGDOUBLE: - { - switch (new_precision) - { - case HYPRE_REAL_SINGLE: - { - data_mp = (hypre_float *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_float), memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_float *)data_mp)[i] = (hypre_float) ((hypre_long_double *) data)[i]; - } - } - break; - case HYPRE_REAL_DOUBLE: - { - data_mp = (hypre_double *) hypre_CAlloc ((size_t)size, (size_t)sizeof(hypre_double), - memory_location); -#ifdef HYPRE_USING_OPENMP - #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < size; i++) - { - ((hypre_double *)data_mp)[i] = (hypre_double) ((hypre_long_double *) data)[i]; - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - } - break; - default: - hypre_error_w_msg_mp(HYPRE_ERROR_GENERIC, "Error: Undefined precision type!\n"); - } - hypre_Free(data, memory_location); - hypre_CSRMatrixData(A) = data_mp; + /* Set pointer to current data */ + data = hypre_CSRMatrixData(A); + /* Set matrix data pointer to NULL */ + hypre_CSRMatrixData(A) = NULL; + + /* reset matrix A's data storage to match new precision */ + hypre_CSRMatrixResetData_pre(new_precision, A); + + /* copy data to newly reset storage */ + data_mp = hypre_CSRMatrixData(A); + hypre_RealArrayCopy_mp(data_precision, data, data_location, + new_precision, data_mp, data_location, size); + + /* Now free old data */ + hypre_Free(data, data_location); + /* Update precision */ hypre_CSRMatrixPrecision(A) = new_precision; } return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * Mixed precision matrix copy. + * NOTE: This copies the entire matrix and not just the structure. + * For structure only, use hypre_CSRMatrixCopy(A, B, 0); + *--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_CSRMatrixCopy_mp( hypre_CSRMatrix *A, hypre_CSRMatrix *B) +{ + HYPRE_Precision precision_A = hypre_CSRMatrixPrecision (A); + HYPRE_Precision precision_B = hypre_CSRMatrixPrecision (B); + HYPRE_Int size = hypre_CSRMatrixI(A)[hypre_CSRMatrixNumRows(A)]; + + /* Implicit conversion to generic data type (void pointer) */ + void *Ap = hypre_CSRMatrixData(A); + void *Bp = hypre_CSRMatrixData(B); + + /* Call standard vector copy if precisions match. */ + if (precision_A == precision_B) + { + return hypre_CSRMatrixCopy_pre(precision_A, A, B, 1); + } + + /* Copy structure of A to B. + * Note: We are only copying structure here so we + * can use the default function call + */ + hypre_CSRMatrixCopy(A, B, 0); + + /* Now copy data from A to B */ + hypre_RealArrayCopy_mp(precision_A, Ap, hypre_CSRMatrixMemoryLocation(A), + precision_B, Bp, hypre_CSRMatrixMemoryLocation(B), size); + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_CSRMatrixClone_mp + * Clone matrix A to a new_precision matrix at the same memory location. + *--------------------------------------------------------------------------*/ + +hypre_CSRMatrix* +hypre_CSRMatrixClone_mp( hypre_CSRMatrix *A, HYPRE_Precision new_precision ) +{ + HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A); + HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); + HYPRE_Int num_nonzeros = hypre_CSRMatrixNumNonzeros(A); + HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(A); + + hypre_CSRMatrix *B = NULL; + + HYPRE_Int bigInit = hypre_CSRMatrixBigJ(A) != NULL; + + /* Create and initialize new matrix B in new precision */ + B = hypre_CSRMatrixCreate_pre(new_precision, num_rows, num_cols, num_nonzeros); + hypre_CSRMatrixInitialize_v2_pre(new_precision, B, bigInit, memory_location); + + /* Call mixed-precision copy */ + hypre_CSRMatrixCopy_mp(A, B); + + return B; +} + #endif diff --git a/src/sstruct_ls/HYPRE_sstruct_ls_mup.h b/src/sstruct_ls/HYPRE_sstruct_ls_mup.h index ebed680926..4ec00cf193 100644 --- a/src/sstruct_ls/HYPRE_sstruct_ls_mup.h +++ b/src/sstruct_ls/HYPRE_sstruct_ls_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_SSTRUCT_LS_MUP_HEADER -#define HYPRE_SSTRUCT_LS_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_SSTRUCT_LS_MUP_HEADER +#define HYPRE_SSTRUCT_LS_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_SStructBiCGSTABCreate_flt( MPI_Comm comm, HYPRE_SStructSolver *solver ); @@ -1300,16 +1290,7 @@ HYPRE_SStructSysPFMGSolve_long_dbl( HYPRE_SStructSolver solver, HYPRE_SStructMat HYPRE_Int HYPRE_SStructSysPFMGSolve( HYPRE_SStructSolver solver, HYPRE_SStructMatrix A, HYPRE_SStructVector b, HYPRE_SStructVector x ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_SStructBiCGSTABCreate_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_SStructSolver *solver ); @@ -1734,7 +1715,6 @@ HYPRE_SStructSysPFMGSetup_pre( HYPRE_Precision precision, HYPRE_SStructSolver so HYPRE_Int HYPRE_SStructSysPFMGSolve_pre( HYPRE_Precision precision, HYPRE_SStructSolver solver, HYPRE_SStructMatrix A, HYPRE_SStructVector b, HYPRE_SStructVector x ); - #endif #ifdef __cplusplus diff --git a/src/sstruct_ls/_hypre_sstruct_ls_mup.h b/src/sstruct_ls/_hypre_sstruct_ls_mup.h index 8aa34caba3..382d03340d 100644 --- a/src/sstruct_ls/_hypre_sstruct_ls_mup.h +++ b/src/sstruct_ls/_hypre_sstruct_ls_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_SSTRUCT_LS_MUP_HEADER #define hypre_SSTRUCT_LS_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_NodeRelax_flt( void *relax_vdata, hypre_SStructPMatrix *A, hypre_SStructPVector *b, hypre_SStructPVector *x ); @@ -867,28 +866,9 @@ hypre_SysPFMGZeroDiagonal_dbl( hypre_SStructPMatrix *A ); HYPRE_Int hypre_SysPFMGZeroDiagonal_long_dbl( hypre_SStructPMatrix *A ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/sstruct_mv/HYPRE_sstruct_mv_mup.h b/src/sstruct_mv/HYPRE_sstruct_mv_mup.h index 8fa5cb83b2..8f775243ad 100644 --- a/src/sstruct_mv/HYPRE_sstruct_mv_mup.h +++ b/src/sstruct_mv/HYPRE_sstruct_mv_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_SSTRUCT_MV_MUP_HEADER -#define HYPRE_SSTRUCT_MV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_SSTRUCT_MV_MUP_HEADER +#define HYPRE_SSTRUCT_MV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_SStructAxpy_flt( hypre_float alpha, HYPRE_SStructVector x, HYPRE_SStructVector y ); @@ -850,16 +840,7 @@ HYPRE_SStructVectorSetValues_long_dbl( HYPRE_SStructVector vector, HYPRE_Int par HYPRE_Int HYPRE_SStructVectorSetValues( HYPRE_SStructVector vector, HYPRE_Int part, HYPRE_Int *index, HYPRE_Int var, void *value ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_SStructAxpy_pre( HYPRE_Precision precision, hypre_long_double alpha, HYPRE_SStructVector x, HYPRE_SStructVector y ); @@ -1134,7 +1115,6 @@ HYPRE_SStructVectorSetRandomValues_pre( HYPRE_Precision precision, HYPRE_SStruct HYPRE_Int HYPRE_SStructVectorSetValues_pre( HYPRE_Precision precision, HYPRE_SStructVector vector, HYPRE_Int part, HYPRE_Int *index, HYPRE_Int var, void *value ); - #endif #ifdef __cplusplus diff --git a/src/sstruct_mv/_hypre_sstruct_mv_mup.h b/src/sstruct_mv/_hypre_sstruct_mv_mup.h index 5f750f1ad5..fc91c8972f 100644 --- a/src/sstruct_mv/_hypre_sstruct_mv_mup.h +++ b/src/sstruct_mv/_hypre_sstruct_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_SSTRUCT_MV_MUP_HEADER #define hypre_SSTRUCT_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_SStructAxpy_flt( hypre_float alpha, hypre_SStructVector *x, hypre_SStructVector *y ); @@ -1147,28 +1146,9 @@ hypre_SStructVectorSetRandomValues_dbl( hypre_SStructVector *vector, HYPRE_Int s HYPRE_Int hypre_SStructVectorSetRandomValues_long_dbl( hypre_SStructVector *vector, HYPRE_Int seed ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/struct_ls/HYPRE_struct_ls_mup.h b/src/struct_ls/HYPRE_struct_ls_mup.h index 664d649c64..be7a5dc905 100644 --- a/src/struct_ls/HYPRE_struct_ls_mup.h +++ b/src/struct_ls/HYPRE_struct_ls_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_STRUCT_LS_MUP_HEADER -#define HYPRE_STRUCT_LS_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_STRUCT_LS_MUP_HEADER +#define HYPRE_STRUCT_LS_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_StructBiCGSTABCreate_flt( MPI_Comm comm, HYPRE_StructSolver *solver ); @@ -1624,16 +1614,7 @@ HYPRE_StructSetupMatvec_long_dbl( HYPRE_MatvecFunctions *mv ); HYPRE_Int HYPRE_StructSetupMatvec( HYPRE_MatvecFunctions *mv ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_StructBiCGSTABCreate_pre( HYPRE_Precision precision, MPI_Comm comm, HYPRE_StructSolver *solver ); @@ -2166,7 +2147,6 @@ HYPRE_StructSetupInterpreter_pre( HYPRE_Precision precision, mv_InterfaceInterpr HYPRE_Int HYPRE_StructSetupMatvec_pre( HYPRE_Precision precision, HYPRE_MatvecFunctions *mv ); - #endif #ifdef __cplusplus diff --git a/src/struct_ls/_hypre_struct_ls_mup.h b/src/struct_ls/_hypre_struct_ls_mup.h index 8fdda4bd8b..34058727e2 100644 --- a/src/struct_ls/_hypre_struct_ls_mup.h +++ b/src/struct_ls/_hypre_struct_ls_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_STRUCT_LS_MUP_HEADER #define hypre_STRUCT_LS_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ hypre_StructMatrix * hypre_CycRedCreateCoarseOp_flt( hypre_StructMatrix *A, hypre_StructGrid *coarse_grid, HYPRE_Int cdir ); @@ -1791,28 +1790,9 @@ hypre_StructKrylovSetRandomValues_dbl( void *x, HYPRE_Int seed ); HYPRE_Int hypre_StructKrylovSetRandomValues_long_dbl( void *x, HYPRE_Int seed ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/struct_mv/HYPRE_struct_mv_mup.h b/src/struct_mv/HYPRE_struct_mv_mup.h index e1ee6da929..d559a92321 100644 --- a/src/struct_mv/HYPRE_struct_mv_mup.h +++ b/src/struct_mv/HYPRE_struct_mv_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_STRUCT_MV_MUP_HEADER -#define HYPRE_STRUCT_MV_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_STRUCT_MV_MUP_HEADER +#define HYPRE_STRUCT_MV_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_CommPkgDestroy_flt( HYPRE_CommPkg comm_pkg ); @@ -652,16 +642,7 @@ HYPRE_StructVectorSetValues_long_dbl( HYPRE_StructVector vector, HYPRE_Int *inde HYPRE_Int HYPRE_StructVectorSetValues( HYPRE_StructVector vector, HYPRE_Int *index, void *values ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_CommPkgDestroy_pre( HYPRE_Precision precision, HYPRE_CommPkg comm_pkg ); @@ -870,7 +851,6 @@ HYPRE_StructVectorSetStride_pre( HYPRE_Precision precision, HYPRE_StructVector v HYPRE_Int HYPRE_StructVectorSetValues_pre( HYPRE_Precision precision, HYPRE_StructVector vector, HYPRE_Int *index, void *values ); - #endif #ifdef __cplusplus diff --git a/src/struct_mv/_hypre_struct_mv_mup.h b/src/struct_mv/_hypre_struct_mv_mup.h index b897ee00bb..332f62950f 100644 --- a/src/struct_mv/_hypre_struct_mv_mup.h +++ b/src/struct_mv/_hypre_struct_mv_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_STRUCT_MV_MUP_HEADER #define hypre_STRUCT_MV_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_APFillResponseStructAssumedPart_flt( void *p_recv_contact_buf, HYPRE_Int contact_size, HYPRE_Int contact_proc, void *ro, MPI_Comm comm, void **p_send_response_buf, HYPRE_Int *response_message_size ); @@ -2687,28 +2686,9 @@ hypre_doubleBoxVolume_dbl( hypre_Box *box ); HYPRE_Real hypre_doubleBoxVolume_long_dbl( hypre_Box *box ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - +/* pre */ #endif diff --git a/src/struct_mv/mup_fixed.c b/src/struct_mv/mup_fixed.c index f1ae974e4e..c6fc2b2e24 100644 --- a/src/struct_mv/mup_fixed.c +++ b/src/struct_mv/mup_fixed.c @@ -3063,5 +3063,6 @@ hypre_doubleBoxVolume( hypre_Box *box ) } + #endif diff --git a/src/test/struct_mp.c b/src/test/struct_mp.c index 64d05a8400..1371afae25 100644 --- a/src/test/struct_mp.c +++ b/src/test/struct_mp.c @@ -1878,7 +1878,7 @@ AddValuesVector_mp(hypre_StructGrid *gridvector, hypre_IndexRef ilower; hypre_IndexRef iupper; hypre_Box *box; - char *values; + char *values; /* use char to allow pointer arithmetic */ HYPRE_Int volume, dim; gridboxes = hypre_StructGridBoxes(gridvector); @@ -1959,7 +1959,7 @@ AddValuesMatrix_mp(HYPRE_StructMatrix A, hypre_IndexRef ilower; hypre_IndexRef iupper; hypre_Box *box; - char *values; + char *values; /* use char to allow pointer arithmetic */ void *east, *west; void *north, *south; void *top, *bottom; diff --git a/src/test/test_mp.c b/src/test/test_mp.c index c487c4b70f..23e3540042 100644 --- a/src/test/test_mp.c +++ b/src/test/test_mp.c @@ -92,9 +92,16 @@ int main (int argc, char *argv[]) MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + HYPRE_Initialize(); + /* default memory location */ + HYPRE_SetMemoryLocation(HYPRE_MEMORY_HOST); + + /* default execution policy */ + HYPRE_SetExecutionPolicy(HYPRE_EXEC_HOST); + /*! We set up the linear system following ex5. */ /* Some problem parameters */ - n = 2; + n = 20; //solver_id = 0; /* Preliminaries: want at least one processor per row */ if (n * n < num_procs) { n = sqrt(num_procs) + 1; } @@ -310,7 +317,7 @@ int main (int argc, char *argv[]) HYPRE_Solver amg_solver; HYPRE_BoomerAMGCreate_flt(&amg_solver); - HYPRE_BoomerAMGSetPrintLevel_flt(amg_solver, 3); /* print amg solution info */ + HYPRE_BoomerAMGSetPrintLevel_flt(amg_solver, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType_flt(amg_solver, 8); HYPRE_BoomerAMGSetRelaxType_flt(amg_solver, 18); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps_flt(amg_solver, 1); @@ -362,7 +369,7 @@ int main (int argc, char *argv[]) rprod = 0.; HYPRE_ParVectorInnerProd_dbl(hres, hres, &rprod); rnrm[i] = rprod; - printf("rprod = %f\n", rprod); + printf("rprod = %e\n", rprod); /*=====================*/ /* step 4: solver for error in single precision */ @@ -428,6 +435,7 @@ int main (int argc, char *argv[]) HYPRE_IJVectorDestroy_dbl(ijhres); HYPRE_IJVectorDestroy_dbl(ijxtmp); + HYPRE_Finalize(); /* Finalize MPI*/ MPI_Finalize(); diff --git a/src/utilities/CMakeLists.txt b/src/utilities/CMakeLists.txt index b10ed0b3d9..a1d5922902 100644 --- a/src/utilities/CMakeLists.txt +++ b/src/utilities/CMakeLists.txt @@ -50,7 +50,6 @@ set(REGULAR_SRCS memory_tracker.c merge_sort.c mmio.c - omp_device.c prefix_sum.c qsort.c unique.c @@ -78,6 +77,7 @@ set(MUP_SRCS mup_functions.c mup_pre.c printf.c + utilities_mp.c ) if (HYPRE_ENABLE_MIXED_PRECISION) @@ -103,6 +103,7 @@ if (HYPRE_USING_GPU) omp_device.c stl_ops.c HYPRE_handle.c + utilities_mp_device.c ) convert_filenames_to_full_paths(GPU_SRCS) set(HYPRE_GPU_SOURCES ${HYPRE_GPU_SOURCES} ${GPU_SRCS} PARENT_SCOPE) diff --git a/src/utilities/HYPRE_utilities.h b/src/utilities/HYPRE_utilities.h index b9b2b29ada..46a664f17d 100644 --- a/src/utilities/HYPRE_utilities.h +++ b/src/utilities/HYPRE_utilities.h @@ -162,7 +162,11 @@ typedef HYPRE_Real HYPRE_Complex; * mixed precision code. */ typedef double hypre_double; typedef float hypre_float; +#if defined (HYPRE_USING_GPU) +typedef double hypre_long_double; +#else typedef long double hypre_long_double; +#endif /*-------------------------------------------------------------------------- * Sequential MPI stuff diff --git a/src/utilities/HYPRE_utilities_mup.h b/src/utilities/HYPRE_utilities_mup.h index 2458590478..f052db27c1 100644 --- a/src/utilities/HYPRE_utilities_mup.h +++ b/src/utilities/HYPRE_utilities_mup.h @@ -1,17 +1,6 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ -#ifndef HYPRE_UTILITIES_MUP_HEADER -#define HYPRE_UTILITIES_MUP_HEADER - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (HYPRE_MIXED_PRECISION) - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -19,17 +8,18 @@ extern "C" { * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ +#ifndef HYPRE_UTILITIES_MUP_HEADER +#define HYPRE_UTILITIES_MUP_HEADER +#ifdef __cplusplus +extern "C" { +#endif -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +#if defined (HYPRE_MIXED_PRECISION) -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +/* fixed */ +/* functions */ HYPRE_Int HYPRE_AssumedPartitionCheck_flt( void ); @@ -328,16 +318,7 @@ HYPRE_VersionNumber_long_dbl( HYPRE_Int *major_ptr, HYPRE_Int *minor_ptr, HYPRE_ HYPRE_Int HYPRE_VersionNumber( HYPRE_Int *major_ptr, HYPRE_Int *minor_ptr, HYPRE_Int *patch_ptr, HYPRE_Int *single_ptr ); - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* pre */ HYPRE_Int HYPRE_AssumedPartitionCheck_pre( HYPRE_Precision precision ); @@ -438,7 +419,6 @@ HYPRE_Version_pre( HYPRE_Precision precision, char **version_ptr ); HYPRE_Int HYPRE_VersionNumber_pre( HYPRE_Precision precision, HYPRE_Int *major_ptr, HYPRE_Int *minor_ptr, HYPRE_Int *patch_ptr, HYPRE_Int *single_ptr ); - #endif #ifdef __cplusplus diff --git a/src/utilities/Makefile b/src/utilities/Makefile index 7844d7d979..90a90ac6b5 100644 --- a/src/utilities/Makefile +++ b/src/utilities/Makefile @@ -79,12 +79,19 @@ CUFILES=\ memory.c\ omp_device.c\ stl_ops.c - + # Mixed precision files MP_FILES = \ mup_fixed.c\ mup_functions.c\ - mup_pre.c + mup_pre.c\ + utilities_mp.c + +MP_CUFILES=\ + mup_fixed_gpu.c\ + mup_functions_gpu.c\ + mup_pre_gpu.c\ + utilities_mp_device.c ifeq (${MP_BUILD}, 1) MP_FILES += printf.c multiprecision.c @@ -105,9 +112,10 @@ CUOBJS_single = ${CUFILES:.c=.obj_flt} CUOBJS_double = ${CUFILES:.c=.obj_dbl} CUOBJS_longdouble = ${CUFILES:.c=.obj_ldbl} MP_COBJS = ${MP_FILES:.c=.o} +MP_CUOBJS = ${MP_CUFILES:.c=.obj} OBJS = ${COBJS_single} ${COBJS_double} ${COBJS_longdouble} ${MP_COBJS} -OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} +OBJS += ${CUOBJS_single} ${CUOBJS_double} ${CUOBJS_longdouble} ${MP_CUOBJS} endif diff --git a/src/utilities/_hypre_mup_def.h b/src/utilities/_hypre_mup_def.h index 3de93317f6..871fb30391 100644 --- a/src/utilities/_hypre_mup_def.h +++ b/src/utilities/_hypre_mup_def.h @@ -59,9 +59,13 @@ #define HYPRE_MULTIPRECISION_FUNC(a) hypre_CONCAT_(a, hypre_LDBL_SUFFIX) #define HYPRE_FIXEDPRECISION_FUNC(a) hypre_CONCAT_(a, hypre_LDBL_SUFFIX) #undef HYPRE_SINGLE +#if defined (HYPRE_USING_GPU) +#undef HYPRE_LONG_DOUBLE +#else #ifndef HYPRE_LONG_DOUBLE #define HYPRE_LONG_DOUBLE 1 #endif +#endif #else diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 5b5bddff57..90396d6dc3 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1075,12 +1075,10 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); -#endif HYPRE_Int hypre_MPI_CheckCommMatrix( hypre_MPI_Comm comm, HYPRE_Int num_recvs, HYPRE_Int *recvs, HYPRE_Int num_sends, HYPRE_Int *sends ); @@ -2441,6 +2439,7 @@ char* hypre_ConvertIndicesToString(HYPRE_Int size, HYPRE_Int *indices); HYPRE_Int hypre_SetSyncCudaCompute(HYPRE_Int action); HYPRE_Int hypre_RestoreSyncCudaCompute(void); HYPRE_Int hypre_GetSyncCudaCompute(HYPRE_Int *cuda_compute_stream_sync_ptr); +size_t hypre_GetSizeOfReal(void); /* handle.c */ HYPRE_Int hypre_SetLogLevel( HYPRE_Int log_level ); @@ -4102,6 +4101,40 @@ hypre_GlobalPrecision(); #endif +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* Mixed precision function protos */ + +#ifdef HYPRE_MIXED_PRECISION +/* utilities_mp.c */ +HYPRE_Int +hypre_RealArrayCopyHost_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayCopy_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision precision_y, void *y, HYPRE_MemoryLocation location_y, HYPRE_Int n); +void * +hypre_RealArrayClone_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision new_precision, HYPRE_MemoryLocation new_location, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpynHost_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpyn_mp(HYPRE_Precision precision_x, void *x, HYPRE_Precision precision_y, void *y, + HYPRE_MemoryLocation location, HYPRE_Int n, hypre_long_double alpha); +/* utilities_mp_device.c */ +HYPRE_Int +hypre_RealArrayCopyDevice_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpynDevice_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +#endif #ifdef __cplusplus } diff --git a/src/utilities/_hypre_utilities.hpp b/src/utilities/_hypre_utilities.hpp index 3f413abce0..095aca9134 100644 --- a/src/utilities/_hypre_utilities.hpp +++ b/src/utilities/_hypre_utilities.hpp @@ -129,6 +129,21 @@ struct hypreFunctor_NonzeroAboveTol } }; +/*-------------------------------------------------------------------------- + * hypreFunctor_ElementCast + * + * Functor for performing casting data between datatypes + *--------------------------------------------------------------------------*/ + +template +struct hypreFunctor_ElementCast +{ + __host__ __device__ T2 operator()(T a) + { + return static_cast(a); + } +}; + #endif /* if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */ #endif /* ifndef HYPRE_FUNCTORS_H */ /****************************************************************************** @@ -504,6 +519,15 @@ using hypre_DeviceItem = sycl::nd_item<3>; * NOTE: IN HYPRE'S DEFAULT STREAM * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +/* unified device stream */ +#if defined(HYPRE_USING_CUDA) +typedef cudaStream_t hypre_DeviceStream; +#elif defined(HYPRE_USING_HIP) +typedef hipStream_t hypre_DeviceStream; +#elif defined(HYPRE_USING_SYCL) +typedef sycl::queue* hypre_DeviceStream; +#endif + #if defined(HYPRE_DEBUG) #define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(); hypre_GetDeviceLastError(); } #else @@ -935,32 +959,45 @@ using hypre_DeviceItem = sycl::nd_item<3>; * device info data structures * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +/* unified dense solver handle */ #if defined(HYPRE_USING_CUSOLVER) typedef cusolverDnHandle_t vendorSolverHandle_t; #elif defined(HYPRE_USING_ROCSOLVER) typedef rocblas_handle vendorSolverHandle_t; +#else +typedef void * vendorSolverHandle_t; #endif -struct hypre_DeviceData -{ +/* unified rand generator */ #if defined(HYPRE_USING_CURAND) - curandGenerator_t curand_generator; +typedef curandGenerator_t hypre_DeviceRandGenerator; +#elif defined(HYPRE_USING_ROCRAND) +typedef rocrand_generator hypre_DeviceRandGenerator; +#else +typedef void * hypre_DeviceRandGenerator; #endif -#if defined(HYPRE_USING_ROCRAND) - rocrand_generator curand_generator; +/* unified sparse LA library */ +#if defined(HYPRE_USING_CUSPARSE) +typedef cusparseHandle_t hypre_DeviceSparseLibHandle; +#elif defined(HYPRE_USING_ROCSPARSE) +typedef rocsparse_handle hypre_DeviceSparseLibHandle; +#else +typedef void * hypre_DeviceSparseLibHandle; #endif -#if defined(HYPRE_USING_CUBLAS) - cublasHandle_t cublas_handle; +struct hypre_DeviceData +{ +#if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND) + hypre_DeviceRandGenerator curand_generator; #endif -#if defined(HYPRE_USING_CUSPARSE) - cusparseHandle_t cusparse_handle; +#if defined(HYPRE_USING_CUBLAS) + cublasHandle_t cublas_handle; #endif -#if defined(HYPRE_USING_ROCSPARSE) - rocsparse_handle cusparse_handle; +#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) + hypre_DeviceSparseLibHandle cusparse_handle; #endif #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER) @@ -969,13 +1006,7 @@ struct hypre_DeviceData /* TODO (VPM): Change to HYPRE_USING_GPU_STREAMS*/ #if defined(HYPRE_USING_CUDA_STREAMS) -#if defined(HYPRE_USING_CUDA) - cudaStream_t streams[HYPRE_MAX_NUM_STREAMS]; -#elif defined(HYPRE_USING_HIP) - hipStream_t streams[HYPRE_MAX_NUM_STREAMS]; -#elif defined(HYPRE_USING_SYCL) - sycl::queue* streams[HYPRE_MAX_NUM_STREAMS] = {NULL}; -#endif + hypre_DeviceStream streams[HYPRE_MAX_NUM_STREAMS] = {0}; #endif #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) @@ -1037,41 +1068,20 @@ struct hypre_DeviceData hypre_DeviceData* hypre_DeviceDataCreate(); void hypre_DeviceDataDestroy(hypre_DeviceData* data); -#if defined(HYPRE_USING_CURAND) -curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); -#endif - -#if defined(HYPRE_USING_ROCRAND) -rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); -#endif +hypre_DeviceRandGenerator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); #if defined(HYPRE_USING_CUBLAS) cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data); #endif -#if defined(HYPRE_USING_CUSPARSE) -cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); -#endif - -#if defined(HYPRE_USING_ROCSPARSE) -rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); -#endif +hypre_DeviceSparseLibHandle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER) vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data); #endif -/* TODO (VPM): Create a deviceStream_t to encapsulate all stream types below */ -#if defined(HYPRE_USING_CUDA) -cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#elif defined(HYPRE_USING_HIP) -hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#elif defined(HYPRE_USING_SYCL) -sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#endif +hypre_DeviceStream hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); +hypre_DeviceStream hypre_DeviceDataComputeStream(hypre_DeviceData *data); /* Data structure and accessor routines for Sparse Triangular Matrices */ struct hypre_CsrsvData @@ -2455,6 +2465,8 @@ template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind); +template +HYPRE_Int hypreDevice_Axpyzn_mp(HYPRE_Int n, T1 *d_x, T2 *d_y, T3 *d_z, T1 a, T2 b); #endif #if defined(HYPRE_USING_CUSPARSE) @@ -2467,7 +2479,7 @@ cusparseIndexType_t hypre_HYPREIntToCusparseIndexType(); #endif // #if defined(HYPRE_USING_CUSPARSE) -#endif /* #ifndef HYPRE_CUDA_UTILS_H */ +#endif /* #ifndef HYPRE_DEVICE_UTILS_H */ /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -2840,6 +2852,7 @@ struct ReduceSum #ifndef hypre_MP_BUILD #include "_hypre_utilities_mup_undef.h" #include "_hypre_utilities_mup.h" +#include "_hypre_utilities_mup.hpp" #endif #endif diff --git a/src/utilities/_hypre_utilities_mup.h b/src/utilities/_hypre_utilities_mup.h index 06febb7164..dba5e1f6f7 100644 --- a/src/utilities/_hypre_utilities_mup.h +++ b/src/utilities/_hypre_utilities_mup.h @@ -1,6 +1,13 @@ /*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef hypre_UTILITIES_MUP_HEADER #define hypre_UTILITIES_MUP_HEADER @@ -10,15 +17,7 @@ extern "C" { #if defined (HYPRE_MIXED_PRECISION) -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - +/* fixed */ HYPRE_Int hypre_BeginTiming_fcn_flt( HYPRE_Int time_index ); @@ -720,6 +719,13 @@ hypre_MPI_Comm_split_dbl( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, hypre_M HYPRE_Int hypre_MPI_Comm_split_long_dbl( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, hypre_MPI_Comm * comms ); +HYPRE_Int +hypre_MPI_Comm_split_type_flt( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ); +HYPRE_Int +hypre_MPI_Comm_split_type_dbl( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ); +HYPRE_Int +hypre_MPI_Comm_split_type_long_dbl( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ); + HYPRE_Int hypre_MPI_Finalize_flt( void ); HYPRE_Int @@ -762,6 +768,20 @@ hypre_MPI_Group_incl_dbl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, HYPRE_Int hypre_MPI_Group_incl_long_dbl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypre_MPI_Group *newgroup ); +HYPRE_Int +hypre_MPI_Info_create_flt( hypre_MPI_Info *info ); +HYPRE_Int +hypre_MPI_Info_create_dbl( hypre_MPI_Info *info ); +HYPRE_Int +hypre_MPI_Info_create_long_dbl( hypre_MPI_Info *info ); + +HYPRE_Int +hypre_MPI_Info_free_flt( hypre_MPI_Info *info ); +HYPRE_Int +hypre_MPI_Info_free_dbl( hypre_MPI_Info *info ); +HYPRE_Int +hypre_MPI_Info_free_long_dbl( hypre_MPI_Info *info ); + HYPRE_Int hypre_MPI_Init_flt( hypre_int *argc, char ***argv ); HYPRE_Int @@ -1847,28 +1867,21 @@ utilities_FortranMatrixWrap_dbl( hypre_double* v, HYPRE_BigInt gh, HYPRE_BigInt void utilities_FortranMatrixWrap_long_dbl( hypre_long_double* v, HYPRE_BigInt gh, HYPRE_BigInt h, HYPRE_BigInt w, utilities_FortranMatrix* mtx ); +/* functions */ -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - - - -/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ - -/****************************************************************************** - * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ +size_t +hypre_GetSizeOfReal_flt( void ); +size_t +hypre_GetSizeOfReal_dbl( void ); +size_t +hypre_GetSizeOfReal_long_dbl( void ); +size_t +hypre_GetSizeOfReal( void ); +/* pre */ +size_t +hypre_GetSizeOfReal_pre( HYPRE_Precision precision ); #endif diff --git a/src/utilities/_hypre_utilities_mup.hpp b/src/utilities/_hypre_utilities_mup.hpp new file mode 100644 index 0000000000..f9be5f571d --- /dev/null +++ b/src/utilities/_hypre_utilities_mup.hpp @@ -0,0 +1,466 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#ifndef hypre_UTILITIES_MUP_HEADER_CXX +#define hypre_UTILITIES_MUP_HEADER_CXX + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +HYPRE_Int +hypreDevice_BigIntAxpyn_flt( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a ); +HYPRE_Int +hypreDevice_BigIntAxpyn_dbl( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a ); +HYPRE_Int +hypreDevice_BigIntAxpyn_long_dbl( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a ); + +HYPRE_Int +hypreDevice_BigIntFilln_flt( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v ); +HYPRE_Int +hypreDevice_BigIntFilln_dbl( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v ); +HYPRE_Int +hypreDevice_BigIntFilln_long_dbl( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v ); + +HYPRE_Int +hypreDevice_CharFilln_flt( char *d_x, size_t n, char v ); +HYPRE_Int +hypreDevice_CharFilln_dbl( char *d_x, size_t n, char v ); +HYPRE_Int +hypreDevice_CharFilln_long_dbl( char *d_x, size_t n, char v ); + +HYPRE_Int +hypreDevice_ComplexArrayToArrayOfPtrs_flt( HYPRE_Int n, HYPRE_Int m, hypre_float *data, hypre_float **data_aop ); +HYPRE_Int +hypreDevice_ComplexArrayToArrayOfPtrs_dbl( HYPRE_Int n, HYPRE_Int m, hypre_double *data, hypre_double **data_aop ); +HYPRE_Int +hypreDevice_ComplexArrayToArrayOfPtrs_long_dbl( HYPRE_Int n, HYPRE_Int m, hypre_long_double *data, hypre_long_double **data_aop ); + +HYPRE_Int +hypreDevice_ComplexAxpyn_flt( hypre_float *d_x, size_t n, hypre_float *d_y, hypre_float *d_z, hypre_float a ); +HYPRE_Int +hypreDevice_ComplexAxpyn_dbl( hypre_double *d_x, size_t n, hypre_double *d_y, hypre_double *d_z, hypre_double a ); +HYPRE_Int +hypreDevice_ComplexAxpyn_long_dbl( hypre_long_double *d_x, size_t n, hypre_long_double *d_y, hypre_long_double *d_z, hypre_long_double a ); + +HYPRE_Int +hypreDevice_ComplexAxpyzn_flt( HYPRE_Int n, hypre_float *d_x, hypre_float *d_y, hypre_float *d_z, hypre_float a, hypre_float b ); +HYPRE_Int +hypreDevice_ComplexAxpyzn_dbl( HYPRE_Int n, hypre_double *d_x, hypre_double *d_y, hypre_double *d_z, hypre_double a, hypre_double b ); +HYPRE_Int +hypreDevice_ComplexAxpyzn_long_dbl( HYPRE_Int n, hypre_long_double *d_x, hypre_long_double *d_y, hypre_long_double *d_z, hypre_long_double a, hypre_long_double b ); + +HYPRE_Int +hypreDevice_ComplexFilln_flt( hypre_float *d_x, size_t n, hypre_float v ); +HYPRE_Int +hypreDevice_ComplexFilln_dbl( hypre_double *d_x, size_t n, hypre_double v ); +HYPRE_Int +hypreDevice_ComplexFilln_long_dbl( hypre_long_double *d_x, size_t n, hypre_long_double v ); + +HYPRE_Complex +hypreDevice_ComplexReduceSum_flt( HYPRE_Int m, hypre_float *d_x ); +HYPRE_Complex +hypreDevice_ComplexReduceSum_dbl( HYPRE_Int m, hypre_double *d_x ); +HYPRE_Complex +hypreDevice_ComplexReduceSum_long_dbl( HYPRE_Int m, hypre_long_double *d_x ); + +HYPRE_Int +hypreDevice_ComplexScalen_flt( hypre_float *d_x, size_t n, hypre_float *d_y, hypre_float v ); +HYPRE_Int +hypreDevice_ComplexScalen_dbl( hypre_double *d_x, size_t n, hypre_double *d_y, hypre_double v ); +HYPRE_Int +hypreDevice_ComplexScalen_long_dbl( hypre_long_double *d_x, size_t n, hypre_long_double *d_y, hypre_long_double v ); + +HYPRE_Int +hypreDevice_ComplexStridedCopy_flt( HYPRE_Int size, HYPRE_Int stride, hypre_float *in, hypre_float *out ); +HYPRE_Int +hypreDevice_ComplexStridedCopy_dbl( HYPRE_Int size, HYPRE_Int stride, hypre_double *in, hypre_double *out ); +HYPRE_Int +hypreDevice_ComplexStridedCopy_long_dbl( HYPRE_Int size, HYPRE_Int stride, hypre_long_double *in, hypre_long_double *out ); + +HYPRE_Int +hypreDevice_CopyParCSRRows_flt( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, hypre_float *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, hypre_float *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, hypre_float *d_ab ); +HYPRE_Int +hypreDevice_CopyParCSRRows_dbl( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, hypre_double *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, hypre_double *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, hypre_double *d_ab ); +HYPRE_Int +hypreDevice_CopyParCSRRows_long_dbl( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, hypre_long_double *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, hypre_long_double *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, hypre_long_double *d_ab ); + +HYPRE_Int* +hypreDevice_CsrRowIndicesToPtrs_flt( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind ); +HYPRE_Int* +hypreDevice_CsrRowIndicesToPtrs_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind ); +HYPRE_Int* +hypreDevice_CsrRowIndicesToPtrs_long_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind ); + +HYPRE_Int +hypreDevice_CsrRowIndicesToPtrs_v2_flt( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind, HYPRE_Int *d_row_ptr ); +HYPRE_Int +hypreDevice_CsrRowIndicesToPtrs_v2_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind, HYPRE_Int *d_row_ptr ); +HYPRE_Int +hypreDevice_CsrRowIndicesToPtrs_v2_long_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind, HYPRE_Int *d_row_ptr ); + +HYPRE_Int* +hypreDevice_CsrRowPtrsToIndices_flt( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr ); +HYPRE_Int* +hypreDevice_CsrRowPtrsToIndices_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr ); +HYPRE_Int* +hypreDevice_CsrRowPtrsToIndices_long_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr ); + +HYPRE_Int +hypreDevice_CsrRowPtrsToIndices_v2_flt( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind ); +HYPRE_Int +hypreDevice_CsrRowPtrsToIndices_v2_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind ); +HYPRE_Int +hypreDevice_CsrRowPtrsToIndices_v2_long_dbl( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind ); + +HYPRE_Int +hypreDevice_DiagScaleVector_flt( HYPRE_Int num_vectors, HYPRE_Int num_rows, HYPRE_Int *A_i, hypre_float *A_data, hypre_float *x, hypre_float beta, hypre_float *y ); +HYPRE_Int +hypreDevice_DiagScaleVector_dbl( HYPRE_Int num_vectors, HYPRE_Int num_rows, HYPRE_Int *A_i, hypre_double *A_data, hypre_double *x, hypre_double beta, hypre_double *y ); +HYPRE_Int +hypreDevice_DiagScaleVector_long_dbl( HYPRE_Int num_vectors, HYPRE_Int num_rows, HYPRE_Int *A_i, hypre_long_double *A_data, hypre_long_double *x, hypre_long_double beta, hypre_long_double *y ); + +HYPRE_Int +hypreDevice_DiagScaleVector2_flt( HYPRE_Int num_vectors, HYPRE_Int num_rows, hypre_float *diag, hypre_float *x, hypre_float beta, hypre_float *y, hypre_float *z, HYPRE_Int computeY ); +HYPRE_Int +hypreDevice_DiagScaleVector2_dbl( HYPRE_Int num_vectors, HYPRE_Int num_rows, hypre_double *diag, hypre_double *x, hypre_double beta, hypre_double *y, hypre_double *z, HYPRE_Int computeY ); +HYPRE_Int +hypreDevice_DiagScaleVector2_long_dbl( HYPRE_Int num_vectors, HYPRE_Int num_rows, hypre_long_double *diag, hypre_long_double *x, hypre_long_double beta, hypre_long_double *y, hypre_long_double *z, HYPRE_Int computeY ); + +HYPRE_Int +hypreDevice_GetRowNnz_flt( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz ); +HYPRE_Int +hypreDevice_GetRowNnz_dbl( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz ); +HYPRE_Int +hypreDevice_GetRowNnz_long_dbl( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz ); + +HYPRE_Int +hypreDevice_IVAMXPMY_flt( HYPRE_Int m, HYPRE_Int n, hypre_float *a, hypre_float *x, hypre_float *y ); +HYPRE_Int +hypreDevice_IVAMXPMY_dbl( HYPRE_Int m, HYPRE_Int n, hypre_double *a, hypre_double *x, hypre_double *y ); +HYPRE_Int +hypreDevice_IVAMXPMY_long_dbl( HYPRE_Int m, HYPRE_Int n, hypre_long_double *a, hypre_long_double *x, hypre_long_double *y ); + +HYPRE_Int +hypreDevice_IVAXPY_flt( HYPRE_Int n, hypre_float *a, hypre_float *x, hypre_float *y ); +HYPRE_Int +hypreDevice_IVAXPY_dbl( HYPRE_Int n, hypre_double *a, hypre_double *x, hypre_double *y ); +HYPRE_Int +hypreDevice_IVAXPY_long_dbl( HYPRE_Int n, hypre_long_double *a, hypre_long_double *x, hypre_long_double *y ); + +HYPRE_Int +hypreDevice_IVAXPYMarked_flt( HYPRE_Int n, hypre_float *a, hypre_float *x, hypre_float *y, HYPRE_Int *marker, HYPRE_Int marker_val ); +HYPRE_Int +hypreDevice_IVAXPYMarked_dbl( HYPRE_Int n, hypre_double *a, hypre_double *x, hypre_double *y, HYPRE_Int *marker, HYPRE_Int marker_val ); +HYPRE_Int +hypreDevice_IVAXPYMarked_long_dbl( HYPRE_Int n, hypre_long_double *a, hypre_long_double *x, hypre_long_double *y, HYPRE_Int *marker, HYPRE_Int marker_val ); + +HYPRE_Int +hypreDevice_IntAxpyn_flt( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int *d_z, HYPRE_Int a ); +HYPRE_Int +hypreDevice_IntAxpyn_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int *d_z, HYPRE_Int a ); +HYPRE_Int +hypreDevice_IntAxpyn_long_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int *d_z, HYPRE_Int a ); + +HYPRE_Int +hypreDevice_IntFilln_flt( HYPRE_Int *d_x, size_t n, HYPRE_Int v ); +HYPRE_Int +hypreDevice_IntFilln_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int v ); +HYPRE_Int +hypreDevice_IntFilln_long_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int v ); + +HYPRE_Int +hypreDevice_IntScalen_flt( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v ); +HYPRE_Int +hypreDevice_IntScalen_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v ); +HYPRE_Int +hypreDevice_IntScalen_long_dbl( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v ); + +HYPRE_Int +hypreDevice_IntStridedCopy_flt( HYPRE_Int size, HYPRE_Int stride, HYPRE_Int *in, HYPRE_Int *out ); +HYPRE_Int +hypreDevice_IntStridedCopy_dbl( HYPRE_Int size, HYPRE_Int stride, HYPRE_Int *in, HYPRE_Int *out ); +HYPRE_Int +hypreDevice_IntStridedCopy_long_dbl( HYPRE_Int size, HYPRE_Int stride, HYPRE_Int *in, HYPRE_Int *out ); + +HYPRE_Int +hypreDevice_IntegerExclusiveScan_flt( HYPRE_Int n, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerExclusiveScan_dbl( HYPRE_Int n, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerExclusiveScan_long_dbl( HYPRE_Int n, HYPRE_Int *d_i ); + +HYPRE_Int +hypreDevice_IntegerInclusiveScan_flt( HYPRE_Int n, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerInclusiveScan_dbl( HYPRE_Int n, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerInclusiveScan_long_dbl( HYPRE_Int n, HYPRE_Int *d_i ); + +HYPRE_Int +hypreDevice_IntegerReduceSum_flt( HYPRE_Int m, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerReduceSum_dbl( HYPRE_Int m, HYPRE_Int *d_i ); +HYPRE_Int +hypreDevice_IntegerReduceSum_long_dbl( HYPRE_Int m, HYPRE_Int *d_i ); + +HYPRE_Int +hypreDevice_zeqxmydd_flt( HYPRE_Int n, hypre_float *x, hypre_float alpha, hypre_float *y, hypre_float *z, hypre_float *d ); +HYPRE_Int +hypreDevice_zeqxmydd_dbl( HYPRE_Int n, hypre_double *x, hypre_double alpha, hypre_double *y, hypre_double *z, hypre_double *d ); +HYPRE_Int +hypreDevice_zeqxmydd_long_dbl( HYPRE_Int n, hypre_long_double *x, hypre_long_double alpha, hypre_long_double *y, hypre_long_double *z, hypre_long_double *d ); + +HYPRE_Int +hypre_CudaCompileFlagCheck_flt( void ); +HYPRE_Int +hypre_CudaCompileFlagCheck_dbl( void ); +HYPRE_Int +hypre_CudaCompileFlagCheck_long_dbl( void ); + +HYPRE_Int +hypre_CurandUniform_flt( HYPRE_Int n, hypre_float *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); +HYPRE_Int +hypre_CurandUniform_dbl( HYPRE_Int n, hypre_double *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); +HYPRE_Int +hypre_CurandUniform_long_dbl( HYPRE_Int n, hypre_long_double *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); + +HYPRE_Int +hypre_CurandUniformSingle_flt( HYPRE_Int n, float *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); +HYPRE_Int +hypre_CurandUniformSingle_dbl( HYPRE_Int n, float *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); +HYPRE_Int +hypre_CurandUniformSingle_long_dbl( HYPRE_Int n, float *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ); + +HYPRE_Int +hypre_DeviceMemoryGetUsage_flt( hypre_float *mem ); +HYPRE_Int +hypre_DeviceMemoryGetUsage_dbl( hypre_double *mem ); +HYPRE_Int +hypre_DeviceMemoryGetUsage_long_dbl( hypre_long_double *mem ); + +HYPRE_Int +hypre_ForceSyncComputeStream_flt( ); +HYPRE_Int +hypre_ForceSyncComputeStream_dbl( ); +HYPRE_Int +hypre_ForceSyncComputeStream_long_dbl( ); + +HYPRE_Int +hypre_GetSyncCudaCompute_flt( HYPRE_Int *cuda_compute_stream_sync_ptr ); +HYPRE_Int +hypre_GetSyncCudaCompute_dbl( HYPRE_Int *cuda_compute_stream_sync_ptr ); +HYPRE_Int +hypre_GetSyncCudaCompute_long_dbl( HYPRE_Int *cuda_compute_stream_sync_ptr ); + +HYPRE_Int +hypre_IntArrayCountDevice_flt( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); +HYPRE_Int +hypre_IntArrayCountDevice_dbl( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); +HYPRE_Int +hypre_IntArrayCountDevice_long_dbl( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); + +HYPRE_Int +hypre_IntArrayInverseMappingDevice_flt( hypre_IntArray *v, hypre_IntArray *w ); +HYPRE_Int +hypre_IntArrayInverseMappingDevice_dbl( hypre_IntArray *v, hypre_IntArray *w ); +HYPRE_Int +hypre_IntArrayInverseMappingDevice_long_dbl( hypre_IntArray *v, hypre_IntArray *w ); + +HYPRE_Int +hypre_IntArrayNegateDevice_flt( hypre_IntArray *v ); +HYPRE_Int +hypre_IntArrayNegateDevice_dbl( hypre_IntArray *v ); +HYPRE_Int +hypre_IntArrayNegateDevice_long_dbl( hypre_IntArray *v ); + +HYPRE_Int +hypre_IntArraySeparateByValueDevice_flt( HYPRE_Int num_values, HYPRE_Int *values, HYPRE_Int *sizes, hypre_IntArray *v, hypre_IntArrayArray *w ); +HYPRE_Int +hypre_IntArraySeparateByValueDevice_dbl( HYPRE_Int num_values, HYPRE_Int *values, HYPRE_Int *sizes, hypre_IntArray *v, hypre_IntArrayArray *w ); +HYPRE_Int +hypre_IntArraySeparateByValueDevice_long_dbl( HYPRE_Int num_values, HYPRE_Int *values, HYPRE_Int *sizes, hypre_IntArray *v, hypre_IntArrayArray *w ); + +HYPRE_Int +hypre_IntArraySetConstantValuesDevice_flt( hypre_IntArray *v, HYPRE_Int value ); +HYPRE_Int +hypre_IntArraySetConstantValuesDevice_dbl( hypre_IntArray *v, HYPRE_Int value ); +HYPRE_Int +hypre_IntArraySetConstantValuesDevice_long_dbl( hypre_IntArray *v, HYPRE_Int value ); + +HYPRE_Int +hypre_IntArraySetInterleavedValuesDevice_flt( hypre_IntArray *v, HYPRE_Int cycle ); +HYPRE_Int +hypre_IntArraySetInterleavedValuesDevice_dbl( hypre_IntArray *v, HYPRE_Int cycle ); +HYPRE_Int +hypre_IntArraySetInterleavedValuesDevice_long_dbl( hypre_IntArray *v, HYPRE_Int cycle ); + +HYPRE_Int +hypre_ResetDevice_flt( ); +HYPRE_Int +hypre_ResetDevice_dbl( ); +HYPRE_Int +hypre_ResetDevice_long_dbl( ); + +HYPRE_Int +hypre_ResetDeviceRandGenerator_flt( hypre_ulonglongint seed, hypre_ulonglongint offset ); +HYPRE_Int +hypre_ResetDeviceRandGenerator_dbl( hypre_ulonglongint seed, hypre_ulonglongint offset ); +HYPRE_Int +hypre_ResetDeviceRandGenerator_long_dbl( hypre_ulonglongint seed, hypre_ulonglongint offset ); + +HYPRE_Int +hypre_RestoreSyncCudaCompute_flt( void ); +HYPRE_Int +hypre_RestoreSyncCudaCompute_dbl( void ); +HYPRE_Int +hypre_RestoreSyncCudaCompute_long_dbl( void ); + +HYPRE_Int +hypre_SetSyncCudaCompute_flt( HYPRE_Int action ); +HYPRE_Int +hypre_SetSyncCudaCompute_dbl( HYPRE_Int action ); +HYPRE_Int +hypre_SetSyncCudaCompute_long_dbl( HYPRE_Int action ); + +HYPRE_Int +hypre_SyncComputeStream_flt( ); +HYPRE_Int +hypre_SyncComputeStream_dbl( ); +HYPRE_Int +hypre_SyncComputeStream_long_dbl( ); + +HYPRE_Int +hypre_SyncDevice_flt( ); +HYPRE_Int +hypre_SyncDevice_dbl( ); +HYPRE_Int +hypre_SyncDevice_long_dbl( ); + +/* functions_gpu */ + +/* pre_gpu */ + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#if defined (HYPRE_MIXED_PRECISION) +#if defined(HYPRE_USING_GPU) + +/* fixed_gpu */ + +HYPRE_Int +hypreDevice_GenScatterAdd_flt( hypre_float *x, HYPRE_Int ny, HYPRE_Int *map, hypre_float *y, char *work ); +HYPRE_Int +hypreDevice_GenScatterAdd_dbl( hypre_double *x, HYPRE_Int ny, HYPRE_Int *map, hypre_double *y, char *work ); +HYPRE_Int +hypreDevice_GenScatterAdd_long_dbl( hypre_long_double *x, HYPRE_Int ny, HYPRE_Int *map, hypre_long_double *y, char *work ); + +hypre_DeviceStream +hypre_DeviceDataComputeStream_flt( hypre_DeviceData *data ); +hypre_DeviceStream +hypre_DeviceDataComputeStream_dbl( hypre_DeviceData *data ); +hypre_DeviceStream +hypre_DeviceDataComputeStream_long_dbl( hypre_DeviceData *data ); + +hypre_DeviceData* +hypre_DeviceDataCreate_flt( ); +hypre_DeviceData* +hypre_DeviceDataCreate_dbl( ); +hypre_DeviceData* +hypre_DeviceDataCreate_long_dbl( ); + +hypre_DeviceRandGenerator +hypre_DeviceDataCurandGenerator_flt( hypre_DeviceData *data ); +hypre_DeviceRandGenerator +hypre_DeviceDataCurandGenerator_dbl( hypre_DeviceData *data ); +hypre_DeviceRandGenerator +hypre_DeviceDataCurandGenerator_long_dbl( hypre_DeviceData *data ); + +hypre_DeviceSparseLibHandle +hypre_DeviceDataCusparseHandle_flt( hypre_DeviceData *data ); +hypre_DeviceSparseLibHandle +hypre_DeviceDataCusparseHandle_dbl( hypre_DeviceData *data ); +hypre_DeviceSparseLibHandle +hypre_DeviceDataCusparseHandle_long_dbl( hypre_DeviceData *data ); + +void +hypre_DeviceDataDestroy_flt( hypre_DeviceData* data ); +void +hypre_DeviceDataDestroy_dbl( hypre_DeviceData* data ); +void +hypre_DeviceDataDestroy_long_dbl( hypre_DeviceData* data ); + +hypre_DeviceStream +hypre_DeviceDataStream_flt( hypre_DeviceData *data, HYPRE_Int i ); +hypre_DeviceStream +hypre_DeviceDataStream_dbl( hypre_DeviceData *data, HYPRE_Int i ); +hypre_DeviceStream +hypre_DeviceDataStream_long_dbl( hypre_DeviceData *data, HYPRE_Int i ); + +dim3 +hypre_GetDefaultDeviceBlockDimension_flt( ); +dim3 +hypre_GetDefaultDeviceBlockDimension_dbl( ); +dim3 +hypre_GetDefaultDeviceBlockDimension_long_dbl( ); + +dim3 +hypre_GetDefaultDeviceGridDimension_flt( HYPRE_Int n, const char *granularity, dim3 bDim ); +dim3 +hypre_GetDefaultDeviceGridDimension_dbl( HYPRE_Int n, const char *granularity, dim3 bDim ); +dim3 +hypre_GetDefaultDeviceGridDimension_long_dbl( HYPRE_Int n, const char *granularity, dim3 bDim ); + +dim3 +hypre_dim3_flt( HYPRE_Int x ); +dim3 +hypre_dim3_dbl( HYPRE_Int x ); +dim3 +hypre_dim3_long_dbl( HYPRE_Int x ); + +dim3 +hypre_dim3_flt( HYPRE_Int x, HYPRE_Int y ); +dim3 +hypre_dim3_dbl( HYPRE_Int x, HYPRE_Int y ); +dim3 +hypre_dim3_long_dbl( HYPRE_Int x, HYPRE_Int y ); + +dim3 +hypre_dim3_flt( HYPRE_Int x, HYPRE_Int y, HYPRE_Int z ); +dim3 +hypre_dim3_dbl( HYPRE_Int x, HYPRE_Int y, HYPRE_Int z ); +dim3 +hypre_dim3_long_dbl( HYPRE_Int x, HYPRE_Int y, HYPRE_Int z ); + + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/utilities/_hypre_utilities_mup_def.h b/src/utilities/_hypre_utilities_mup_def.h index faf15acc0e..85c28daeab 100644 --- a/src/utilities/_hypre_utilities_mup_def.h +++ b/src/utilities/_hypre_utilities_mup_def.h @@ -48,6 +48,7 @@ #define HYPRE_SetUseGpuRand HYPRE_MULTIPRECISION_FUNC ( HYPRE_SetUseGpuRand ) #define HYPRE_Version HYPRE_MULTIPRECISION_FUNC ( HYPRE_Version ) #define HYPRE_VersionNumber HYPRE_MULTIPRECISION_FUNC ( HYPRE_VersionNumber ) +#define hypre_GetSizeOfReal HYPRE_MULTIPRECISION_FUNC ( hypre_GetSizeOfReal ) #define hypre_BeginTiming_fcn HYPRE_FIXEDPRECISION_FUNC ( hypre_BeginTiming_fcn ) #define hypre_BigBinarySearch HYPRE_FIXEDPRECISION_FUNC ( hypre_BigBinarySearch ) #define hypre_BigLowerBound HYPRE_FIXEDPRECISION_FUNC ( hypre_BigLowerBound ) @@ -150,12 +151,15 @@ #define hypre_MPI_Comm_rank HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Comm_rank ) #define hypre_MPI_Comm_size HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Comm_size ) #define hypre_MPI_Comm_split HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Comm_split ) +#define hypre_MPI_Comm_split_type HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Comm_split_type ) #define hypre_MPI_Finalize HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Finalize ) #define hypre_MPI_Gather HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Gather ) #define hypre_MPI_Gatherv HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Gatherv ) #define hypre_MPI_Get_count HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Get_count ) #define hypre_MPI_Group_free HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Group_free ) #define hypre_MPI_Group_incl HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Group_incl ) +#define hypre_MPI_Info_create HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Info_create ) +#define hypre_MPI_Info_free HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Info_free ) #define hypre_MPI_Init HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Init ) #define hypre_MPI_Iprobe HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Iprobe ) #define hypre_MPI_Irecv HYPRE_FIXEDPRECISION_FUNC ( hypre_MPI_Irecv ) @@ -322,5 +326,72 @@ #define utilities_FortranMatrixValues HYPRE_FIXEDPRECISION_FUNC ( utilities_FortranMatrixValues ) #define utilities_FortranMatrixWidth HYPRE_FIXEDPRECISION_FUNC ( utilities_FortranMatrixWidth ) #define utilities_FortranMatrixWrap HYPRE_FIXEDPRECISION_FUNC ( utilities_FortranMatrixWrap ) +#define hypreDevice_BigIntAxpyn HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_BigIntAxpyn ) +#define hypreDevice_BigIntFilln HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_BigIntFilln ) +#define hypreDevice_CharFilln HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CharFilln ) +#define hypreDevice_ComplexArrayToArrayOfPtrs HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexArrayToArrayOfPtrs ) +#define hypreDevice_ComplexAxpyn HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexAxpyn ) +#define hypreDevice_ComplexAxpyzn HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexAxpyzn ) +#define hypreDevice_ComplexFilln HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexFilln ) +#define hypreDevice_ComplexReduceSum HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexReduceSum ) +#define hypreDevice_ComplexScalen HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexScalen ) +#define hypreDevice_ComplexStridedCopy HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_ComplexStridedCopy ) +#define hypreDevice_CopyParCSRRows HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CopyParCSRRows ) +#define hypreDevice_CsrRowIndicesToPtrs HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CsrRowIndicesToPtrs ) +#define hypreDevice_CsrRowIndicesToPtrs_v2 HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CsrRowIndicesToPtrs_v2 ) +#define hypreDevice_CsrRowPtrsToIndices HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CsrRowPtrsToIndices ) +#define hypreDevice_CsrRowPtrsToIndices_v2 HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_CsrRowPtrsToIndices_v2 ) +#define hypreDevice_DiagScaleVector HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_DiagScaleVector ) +#define hypreDevice_DiagScaleVector2 HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_DiagScaleVector2 ) +#define hypreDevice_GenScatterAdd HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_GenScatterAdd ) +#define hypreDevice_GetRowNnz HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_GetRowNnz ) +#define hypreDevice_IVAMXPMY HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IVAMXPMY ) +#define hypreDevice_IVAXPY HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IVAXPY ) +#define hypreDevice_IVAXPYMarked HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IVAXPYMarked ) +#define hypreDevice_IntAxpyn HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntAxpyn ) +#define hypreDevice_IntFilln HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntFilln ) +#define hypreDevice_IntScalen HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntScalen ) +#define hypreDevice_IntStridedCopy HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntStridedCopy ) +#define hypreDevice_IntegerExclusiveScan HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntegerExclusiveScan ) +#define hypreDevice_IntegerInclusiveScan HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntegerInclusiveScan ) +#define hypreDevice_IntegerReduceSum HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_IntegerReduceSum ) +#define hypreDevice_zeqxmydd HYPRE_FIXEDPRECISION_FUNC ( hypreDevice_zeqxmydd ) +#define hypreGPUKernel_CompileFlagSafetyCheck HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CompileFlagSafetyCheck ) +#define hypreGPUKernel_CopyParCSRRows HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_CopyParCSRRows ) +#define hypreGPUKernel_GetRowNnz HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_GetRowNnz ) +#define hypreGPUKernel_IVAXPY HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IVAXPY ) +#define hypreGPUKernel_IVAXPYMarked HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IVAXPYMarked ) +#define hypreGPUKernel_IntArrayInverseMapping HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_IntArrayInverseMapping ) +#define hypreGPUKernel_ScatterAdd HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ScatterAdd ) +#define hypreGPUKernel_ScatterAddTrivial HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_ScatterAddTrivial ) +#define hypreGPUKernel_zeqxmydd HYPRE_FIXEDPRECISION_FUNC ( hypreGPUKernel_zeqxmydd ) +#define hypre_CudaCompileFlagCheck HYPRE_FIXEDPRECISION_FUNC ( hypre_CudaCompileFlagCheck ) +#define hypre_CurandUniform HYPRE_FIXEDPRECISION_FUNC ( hypre_CurandUniform ) +#define hypre_CurandUniformSingle HYPRE_FIXEDPRECISION_FUNC ( hypre_CurandUniformSingle ) +#define hypre_DeviceDataComputeStream HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataComputeStream ) +#define hypre_DeviceDataCreate HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataCreate ) +#define hypre_DeviceDataCurandGenerator HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataCurandGenerator ) +#define hypre_DeviceDataCusparseHandle HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataCusparseHandle ) +#define hypre_DeviceDataDestroy HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataDestroy ) +#define hypre_DeviceDataStream HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceDataStream ) +#define hypre_DeviceMemoryGetUsage HYPRE_FIXEDPRECISION_FUNC ( hypre_DeviceMemoryGetUsage ) +#define hypre_ForceSyncComputeStream HYPRE_FIXEDPRECISION_FUNC ( hypre_ForceSyncComputeStream ) +#define hypre_GetDefaultDeviceBlockDimension HYPRE_FIXEDPRECISION_FUNC ( hypre_GetDefaultDeviceBlockDimension ) +#define hypre_GetDefaultDeviceGridDimension HYPRE_FIXEDPRECISION_FUNC ( hypre_GetDefaultDeviceGridDimension ) +#define hypre_GetSyncCudaCompute HYPRE_FIXEDPRECISION_FUNC ( hypre_GetSyncCudaCompute ) +#define hypre_IntArrayCountDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArrayCountDevice ) +#define hypre_IntArrayInverseMappingDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArrayInverseMappingDevice ) +#define hypre_IntArrayNegateDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArrayNegateDevice ) +#define hypre_IntArraySeparateByValueDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArraySeparateByValueDevice ) +#define hypre_IntArraySetConstantValuesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArraySetConstantValuesDevice ) +#define hypre_IntArraySetInterleavedValuesDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_IntArraySetInterleavedValuesDevice ) +#define hypre_ResetDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_ResetDevice ) +#define hypre_ResetDeviceRandGenerator HYPRE_FIXEDPRECISION_FUNC ( hypre_ResetDeviceRandGenerator ) +#define hypre_RestoreSyncCudaCompute HYPRE_FIXEDPRECISION_FUNC ( hypre_RestoreSyncCudaCompute ) +#define hypre_SetSyncCudaCompute HYPRE_FIXEDPRECISION_FUNC ( hypre_SetSyncCudaCompute ) +#define hypre_SyncComputeStream HYPRE_FIXEDPRECISION_FUNC ( hypre_SyncComputeStream ) +#define hypre_SyncComputeStream_core HYPRE_FIXEDPRECISION_FUNC ( hypre_SyncComputeStream_core ) +#define hypre_SyncDevice HYPRE_FIXEDPRECISION_FUNC ( hypre_SyncDevice ) +#define hypre_dim3 HYPRE_FIXEDPRECISION_FUNC ( hypre_dim3 ) #endif diff --git a/src/utilities/_hypre_utilities_mup_undef.h b/src/utilities/_hypre_utilities_mup_undef.h index ec35cb31a3..b3c324f45c 100644 --- a/src/utilities/_hypre_utilities_mup_undef.h +++ b/src/utilities/_hypre_utilities_mup_undef.h @@ -45,6 +45,7 @@ #undef HYPRE_SetUseGpuRand #undef HYPRE_Version #undef HYPRE_VersionNumber +#undef hypre_GetSizeOfReal #undef hypre_BeginTiming_fcn #undef hypre_BigBinarySearch #undef hypre_BigLowerBound @@ -147,12 +148,15 @@ #undef hypre_MPI_Comm_rank #undef hypre_MPI_Comm_size #undef hypre_MPI_Comm_split +#undef hypre_MPI_Comm_split_type #undef hypre_MPI_Finalize #undef hypre_MPI_Gather #undef hypre_MPI_Gatherv #undef hypre_MPI_Get_count #undef hypre_MPI_Group_free #undef hypre_MPI_Group_incl +#undef hypre_MPI_Info_create +#undef hypre_MPI_Info_free #undef hypre_MPI_Init #undef hypre_MPI_Iprobe #undef hypre_MPI_Irecv @@ -319,3 +323,70 @@ #undef utilities_FortranMatrixValues #undef utilities_FortranMatrixWidth #undef utilities_FortranMatrixWrap +#undef hypreDevice_BigIntAxpyn +#undef hypreDevice_BigIntFilln +#undef hypreDevice_CharFilln +#undef hypreDevice_ComplexArrayToArrayOfPtrs +#undef hypreDevice_ComplexAxpyn +#undef hypreDevice_ComplexAxpyzn +#undef hypreDevice_ComplexFilln +#undef hypreDevice_ComplexReduceSum +#undef hypreDevice_ComplexScalen +#undef hypreDevice_ComplexStridedCopy +#undef hypreDevice_CopyParCSRRows +#undef hypreDevice_CsrRowIndicesToPtrs +#undef hypreDevice_CsrRowIndicesToPtrs_v2 +#undef hypreDevice_CsrRowPtrsToIndices +#undef hypreDevice_CsrRowPtrsToIndices_v2 +#undef hypreDevice_DiagScaleVector +#undef hypreDevice_DiagScaleVector2 +#undef hypreDevice_GenScatterAdd +#undef hypreDevice_GetRowNnz +#undef hypreDevice_IVAMXPMY +#undef hypreDevice_IVAXPY +#undef hypreDevice_IVAXPYMarked +#undef hypreDevice_IntAxpyn +#undef hypreDevice_IntFilln +#undef hypreDevice_IntScalen +#undef hypreDevice_IntStridedCopy +#undef hypreDevice_IntegerExclusiveScan +#undef hypreDevice_IntegerInclusiveScan +#undef hypreDevice_IntegerReduceSum +#undef hypreDevice_zeqxmydd +#undef hypreGPUKernel_CompileFlagSafetyCheck +#undef hypreGPUKernel_CopyParCSRRows +#undef hypreGPUKernel_GetRowNnz +#undef hypreGPUKernel_IVAXPY +#undef hypreGPUKernel_IVAXPYMarked +#undef hypreGPUKernel_IntArrayInverseMapping +#undef hypreGPUKernel_ScatterAdd +#undef hypreGPUKernel_ScatterAddTrivial +#undef hypreGPUKernel_zeqxmydd +#undef hypre_CudaCompileFlagCheck +#undef hypre_CurandUniform +#undef hypre_CurandUniformSingle +#undef hypre_DeviceDataComputeStream +#undef hypre_DeviceDataCreate +#undef hypre_DeviceDataCurandGenerator +#undef hypre_DeviceDataCusparseHandle +#undef hypre_DeviceDataDestroy +#undef hypre_DeviceDataStream +#undef hypre_DeviceMemoryGetUsage +#undef hypre_ForceSyncComputeStream +#undef hypre_GetDefaultDeviceBlockDimension +#undef hypre_GetDefaultDeviceGridDimension +#undef hypre_GetSyncCudaCompute +#undef hypre_IntArrayCountDevice +#undef hypre_IntArrayInverseMappingDevice +#undef hypre_IntArrayNegateDevice +#undef hypre_IntArraySeparateByValueDevice +#undef hypre_IntArraySetConstantValuesDevice +#undef hypre_IntArraySetInterleavedValuesDevice +#undef hypre_ResetDevice +#undef hypre_ResetDeviceRandGenerator +#undef hypre_RestoreSyncCudaCompute +#undef hypre_SetSyncCudaCompute +#undef hypre_SyncComputeStream +#undef hypre_SyncComputeStream_core +#undef hypre_SyncDevice +#undef hypre_dim3 diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index fc84184260..33c718390c 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -384,14 +384,8 @@ hypre_DeviceMemoryGetUsage(HYPRE_Real *mem) * hypre_DeviceDataComputeStream *--------------------------------------------------------------------*/ -/* CUDA/HIP stream */ -#if defined(HYPRE_USING_CUDA) -cudaStream_t -#elif defined(HYPRE_USING_HIP) -hipStream_t -#elif defined(HYPRE_USING_SYCL) -sycl::queue* -#endif +/* CUDA/HIP/SYCL stream */ +hypre_DeviceStream hypre_DeviceDataComputeStream(hypre_DeviceData *data) { return hypre_DeviceDataStream(data, hypre_DeviceDataComputeStreamNum(data)); @@ -401,22 +395,10 @@ hypre_DeviceDataComputeStream(hypre_DeviceData *data) * hypre_DeviceDataStream *--------------------------------------------------------------------*/ -#if defined(HYPRE_USING_CUDA) -cudaStream_t -#elif defined(HYPRE_USING_HIP) -hipStream_t -#elif defined(HYPRE_USING_SYCL) -sycl::queue* -#endif +hypre_DeviceStream hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i) { -#if defined(HYPRE_USING_CUDA) - cudaStream_t stream = 0; -#elif defined(HYPRE_USING_HIP) - hipStream_t stream = 0; -#elif defined(HYPRE_USING_SYCL) - sycl::queue *stream = NULL; -#endif + hypre_DeviceStream stream = NULL; #if defined(HYPRE_USING_CUDA_STREAMS) if (i >= HYPRE_MAX_NUM_STREAMS) @@ -1660,7 +1642,7 @@ hypreDevice_ComplexAxpyzn( HYPRE_Int n, * hypre_DeviceDataCurandGenerator *--------------------------------------------------------------------*/ -curandGenerator_t +hypre_DeviceRandGenerator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data) { if (data->curand_generator) @@ -1729,7 +1711,7 @@ hypre_CurandUniform_core( HYPRE_Int n, * hypre_DeviceDataCurandGenerator *--------------------------------------------------------------------*/ -rocrand_generator +hypre_DeviceRandGenerator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data) { if (data->curand_generator) @@ -3064,3 +3046,42 @@ hypre_bind_device( HYPRE_Int myid, { return hypre_bind_device_id(-1, myid, nproc, comm); } + +/*-------------------------------------------------------------------------- + * hypreDevice_ComplexDeviceArrayAxpyn + *--------------------------------------------------------------------------*/ +/* +HYPRE_Int +hypreDevice_ComplexDeviceArrayAxpyn( HYPRE_Complex alpha, + HYPRE_Complex *x, + HYPRE_Complex *y, + HYPRE_Int n ) +{ + +#if defined(HYPRE_USING_GPU) + +#if ( defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) ) && defined(HYPRE_USING_CUBLAS) + HYPRE_CUBLAS_CALL( hypre_cublas_axpy(hypre_HandleCublasHandle(hypre_handle()), + n, &alpha, x, 1, y, 1) ); +#elif defined(HYPRE_USING_SYCL) && defined(HYPRE_USING_ONEMKLBLAS) + HYPRE_ONEMKL_CALL( oneapi::mkl::blas::axpy(*hypre_HandleComputeStream(hypre_handle()), + n, alpha, x, 1, y, 1).wait() ); +#else + hypreDevice_ComplexAxpyn(x, n, y, y, alpha); +#endif + + hypre_SyncComputeStream(); + +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(y, x) + for (i = 0; i < n; i++) + { + y[i] += alpha * x[i]; + } +#endif + + return hypre_error_flag; +} +*/ diff --git a/src/utilities/device_utils.h b/src/utilities/device_utils.h index 635574b450..8ed77afb6a 100644 --- a/src/utilities/device_utils.h +++ b/src/utilities/device_utils.h @@ -286,6 +286,15 @@ using hypre_DeviceItem = sycl::nd_item<3>; * NOTE: IN HYPRE'S DEFAULT STREAM * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +/* unified device stream */ +#if defined(HYPRE_USING_CUDA) +typedef cudaStream_t hypre_DeviceStream; +#elif defined(HYPRE_USING_HIP) +typedef hipStream_t hypre_DeviceStream; +#elif defined(HYPRE_USING_SYCL) +typedef sycl::queue* hypre_DeviceStream; +#endif + #if defined(HYPRE_DEBUG) #define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(); hypre_GetDeviceLastError(); } #else @@ -717,32 +726,45 @@ using hypre_DeviceItem = sycl::nd_item<3>; * device info data structures * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +/* unified dense solver handle */ #if defined(HYPRE_USING_CUSOLVER) typedef cusolverDnHandle_t vendorSolverHandle_t; #elif defined(HYPRE_USING_ROCSOLVER) typedef rocblas_handle vendorSolverHandle_t; +#else +typedef void * vendorSolverHandle_t; #endif -struct hypre_DeviceData -{ +/* unified rand generator */ #if defined(HYPRE_USING_CURAND) - curandGenerator_t curand_generator; +typedef curandGenerator_t hypre_DeviceRandGenerator; +#elif defined(HYPRE_USING_ROCRAND) +typedef rocrand_generator hypre_DeviceRandGenerator; +#else +typedef void * hypre_DeviceRandGenerator; #endif -#if defined(HYPRE_USING_ROCRAND) - rocrand_generator curand_generator; +/* unified sparse LA library */ +#if defined(HYPRE_USING_CUSPARSE) +typedef cusparseHandle_t hypre_DeviceSparseLibHandle; +#elif defined(HYPRE_USING_ROCSPARSE) +typedef rocsparse_handle hypre_DeviceSparseLibHandle; +#else +typedef void * hypre_DeviceSparseLibHandle; +#endif + +struct hypre_DeviceData +{ +#if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND) + hypre_DeviceRandGenerator curand_generator; #endif #if defined(HYPRE_USING_CUBLAS) cublasHandle_t cublas_handle; #endif -#if defined(HYPRE_USING_CUSPARSE) - cusparseHandle_t cusparse_handle; -#endif - -#if defined(HYPRE_USING_ROCSPARSE) - rocsparse_handle cusparse_handle; +#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) + hypre_DeviceSparseLibHandle cusparse_handle; #endif #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER) @@ -751,13 +773,7 @@ struct hypre_DeviceData /* TODO (VPM): Change to HYPRE_USING_GPU_STREAMS*/ #if defined(HYPRE_USING_CUDA_STREAMS) -#if defined(HYPRE_USING_CUDA) - cudaStream_t streams[HYPRE_MAX_NUM_STREAMS]; -#elif defined(HYPRE_USING_HIP) - hipStream_t streams[HYPRE_MAX_NUM_STREAMS]; -#elif defined(HYPRE_USING_SYCL) - sycl::queue* streams[HYPRE_MAX_NUM_STREAMS] = {NULL}; -#endif + hypre_DeviceStream streams[HYPRE_MAX_NUM_STREAMS] = {0}; #endif #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) @@ -819,41 +835,20 @@ struct hypre_DeviceData hypre_DeviceData* hypre_DeviceDataCreate(); void hypre_DeviceDataDestroy(hypre_DeviceData* data); -#if defined(HYPRE_USING_CURAND) -curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); -#endif - -#if defined(HYPRE_USING_ROCRAND) -rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); -#endif +hypre_DeviceRandGenerator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); #if defined(HYPRE_USING_CUBLAS) cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data); #endif -#if defined(HYPRE_USING_CUSPARSE) -cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); -#endif - -#if defined(HYPRE_USING_ROCSPARSE) -rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); -#endif +hypre_DeviceSparseLibHandle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER) vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data); #endif -/* TODO (VPM): Create a deviceStream_t to encapsulate all stream types below */ -#if defined(HYPRE_USING_CUDA) -cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#elif defined(HYPRE_USING_HIP) -hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#elif defined(HYPRE_USING_SYCL) -sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); -sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data); -#endif +hypre_DeviceStream hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); +hypre_DeviceStream hypre_DeviceDataComputeStream(hypre_DeviceData *data); /* Data structure and accessor routines for Sparse Triangular Matrices */ struct hypre_CsrsvData @@ -2237,6 +2232,8 @@ template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind); +template +HYPRE_Int hypreDevice_Axpyzn_mp(HYPRE_Int n, T1 *d_x, T2 *d_y, T3 *d_z, T1 a, T2 b); #endif #if defined(HYPRE_USING_CUSPARSE) @@ -2249,4 +2246,4 @@ cusparseIndexType_t hypre_HYPREIntToCusparseIndexType(); #endif // #if defined(HYPRE_USING_CUSPARSE) -#endif /* #ifndef HYPRE_CUDA_UTILS_H */ +#endif /* #ifndef HYPRE_DEVICE_UTILS_H */ diff --git a/src/utilities/functors.h b/src/utilities/functors.h index d160dda72c..57e4d57e57 100644 --- a/src/utilities/functors.h +++ b/src/utilities/functors.h @@ -113,5 +113,20 @@ struct hypreFunctor_NonzeroAboveTol } }; +/*-------------------------------------------------------------------------- + * hypreFunctor_ElementCast + * + * Functor for performing casting data between datatypes + *--------------------------------------------------------------------------*/ + +template +struct hypreFunctor_ElementCast +{ + __host__ __device__ T2 operator()(T a) + { + return static_cast(a); + } +}; + #endif /* if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */ #endif /* ifndef HYPRE_FUNCTORS_H */ diff --git a/src/utilities/headers b/src/utilities/headers index 347551c675..5ed20b00a6 100755 --- a/src/utilities/headers +++ b/src/utilities/headers @@ -62,6 +62,7 @@ cat hopscotch_hash.h >> $INTERNAL_HEADER cat mmio.h >> $INTERNAL_HEADER cat _hypre_fortran_matrix.h >> $INTERNAL_HEADER cat multiprecision.h >> $INTERNAL_HEADER +cat protos_mp.h >> $INTERNAL_HEADER #=========================================================================== # Include guards @@ -142,6 +143,7 @@ cat >> $INTERNAL_HEADER <<@ #ifndef hypre_MP_BUILD #include "_hypre_utilities_mup_undef.h" #include "_hypre_utilities_mup.h" +#include "_hypre_utilities_mup.hpp" #endif #endif diff --git a/src/utilities/mpistubs.c b/src/utilities/mpistubs.c index 50e2a288f8..6e0bd6cca9 100644 --- a/src/utilities/mpistubs.c +++ b/src/utilities/mpistubs.c @@ -887,7 +887,6 @@ hypre_MPI_Op_free( hypre_MPI_Op *op ) return (0); } -#if defined(HYPRE_USING_GPU) HYPRE_Int hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) { @@ -910,7 +909,6 @@ HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ) HYPRE_UNUSED_VAR(info); return (0); } -#endif HYPRE_Int hypre_MPI_CheckCommMatrix( hypre_MPI_Comm comm, @@ -1653,7 +1651,6 @@ hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre return (HYPRE_Int) MPI_Op_create(function, commute, op); } -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) @@ -1672,7 +1669,6 @@ hypre_MPI_Info_free( hypre_MPI_Info *info ) { return (HYPRE_Int) MPI_Info_free(info); } -#endif HYPRE_Int hypre_MPI_CheckCommMatrix( hypre_MPI_Comm comm, diff --git a/src/utilities/mpistubs.h b/src/utilities/mpistubs.h index f2a061bb25..11a343b337 100644 --- a/src/utilities/mpistubs.h +++ b/src/utilities/mpistubs.h @@ -343,12 +343,10 @@ HYPRE_Int hypre_MPI_Type_free( hypre_MPI_Datatype *datatype ); HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ); HYPRE_Int hypre_MPI_Op_create( hypre_MPI_User_function *function, hypre_int commute, hypre_MPI_Op *op ); -#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_MPI_Comm_split_type(hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm); HYPRE_Int hypre_MPI_Info_create(hypre_MPI_Info *info); HYPRE_Int hypre_MPI_Info_free( hypre_MPI_Info *info ); -#endif HYPRE_Int hypre_MPI_CheckCommMatrix( hypre_MPI_Comm comm, HYPRE_Int num_recvs, HYPRE_Int *recvs, HYPRE_Int num_sends, HYPRE_Int *sends ); diff --git a/src/utilities/mup.fixed b/src/utilities/mup.fixed index 3e5e15cd37..59828d3302 100644 --- a/src/utilities/mup.fixed +++ b/src/utilities/mup.fixed @@ -100,12 +100,15 @@ hypre_MPI_Comm_group hypre_MPI_Comm_rank hypre_MPI_Comm_size hypre_MPI_Comm_split +hypre_MPI_Comm_split_type hypre_MPI_Finalize hypre_MPI_Gather hypre_MPI_Gatherv hypre_MPI_Get_count hypre_MPI_Group_free hypre_MPI_Group_incl +hypre_MPI_Info_create +hypre_MPI_Info_free hypre_MPI_Init hypre_MPI_Iprobe hypre_MPI_Irecv diff --git a/src/utilities/mup.fixed_gpu b/src/utilities/mup.fixed_gpu new file mode 100644 index 0000000000..cd390dc2d8 --- /dev/null +++ b/src/utilities/mup.fixed_gpu @@ -0,0 +1,67 @@ +hypreDevice_BigIntAxpyn +hypreDevice_BigIntFilln +hypreDevice_CharFilln +hypreDevice_ComplexArrayToArrayOfPtrs +hypreDevice_ComplexAxpyn +hypreDevice_ComplexAxpyzn +hypreDevice_ComplexFilln +hypreDevice_ComplexReduceSum +hypreDevice_ComplexScalen +hypreDevice_ComplexStridedCopy +hypreDevice_CopyParCSRRows +hypreDevice_CsrRowIndicesToPtrs +hypreDevice_CsrRowIndicesToPtrs_v2 +hypreDevice_CsrRowPtrsToIndices +hypreDevice_CsrRowPtrsToIndices_v2 +hypreDevice_DiagScaleVector +hypreDevice_DiagScaleVector2 +hypreDevice_GenScatterAdd +hypreDevice_GetRowNnz +hypreDevice_IVAMXPMY +hypreDevice_IVAXPY +hypreDevice_IVAXPYMarked +hypreDevice_IntAxpyn +hypreDevice_IntFilln +hypreDevice_IntScalen +hypreDevice_IntStridedCopy +hypreDevice_IntegerExclusiveScan +hypreDevice_IntegerInclusiveScan +hypreDevice_IntegerReduceSum +hypreDevice_zeqxmydd +hypreGPUKernel_CompileFlagSafetyCheck +hypreGPUKernel_CopyParCSRRows +hypreGPUKernel_GetRowNnz +hypreGPUKernel_IVAXPY +hypreGPUKernel_IVAXPYMarked +hypreGPUKernel_IntArrayInverseMapping +hypreGPUKernel_ScatterAdd +hypreGPUKernel_ScatterAddTrivial +hypreGPUKernel_zeqxmydd +hypre_CudaCompileFlagCheck +hypre_CurandUniform +hypre_CurandUniformSingle +hypre_DeviceDataComputeStream +hypre_DeviceDataCreate +hypre_DeviceDataCurandGenerator +hypre_DeviceDataCusparseHandle +hypre_DeviceDataDestroy +hypre_DeviceDataStream +hypre_DeviceMemoryGetUsage +hypre_ForceSyncComputeStream +hypre_GetDefaultDeviceBlockDimension +hypre_GetDefaultDeviceGridDimension +hypre_GetSyncCudaCompute +hypre_IntArrayCountDevice +hypre_IntArrayInverseMappingDevice +hypre_IntArrayNegateDevice +hypre_IntArraySeparateByValueDevice +hypre_IntArraySetConstantValuesDevice +hypre_IntArraySetInterleavedValuesDevice +hypre_ResetDevice +hypre_ResetDeviceRandGenerator +hypre_RestoreSyncCudaCompute +hypre_SetSyncCudaCompute +hypre_SyncComputeStream +hypre_SyncComputeStream_core +hypre_SyncDevice +hypre_dim3 diff --git a/src/utilities/mup.functions b/src/utilities/mup.functions index 90babd4b7f..fe1f218c11 100644 --- a/src/utilities/mup.functions +++ b/src/utilities/mup.functions @@ -31,3 +31,4 @@ HYPRE_SetSpTransUseVendor HYPRE_SetUseGpuRand HYPRE_Version HYPRE_VersionNumber +hypre_GetSizeOfReal diff --git a/src/utilities/mup.functions_gpu b/src/utilities/mup.functions_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/utilities/mup.methods_gpu b/src/utilities/mup.methods_gpu new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/utilities/mup_fixed.c b/src/utilities/mup_fixed.c index ef6cc2dac0..92f146dd78 100644 --- a/src/utilities/mup_fixed.c +++ b/src/utilities/mup_fixed.c @@ -816,6 +816,14 @@ hypre_MPI_Comm_split( hypre_MPI_Comm comm, HYPRE_Int n, HYPRE_Int m, hypre_MPI_C /*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_MPI_Comm_split_type( hypre_MPI_Comm comm, HYPRE_Int split_type, HYPRE_Int key, hypre_MPI_Info info, hypre_MPI_Comm *newcomm ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MPI_Comm_split_type)( comm, split_type, key, info, newcomm ); +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int hypre_MPI_Finalize( void ) { @@ -864,6 +872,22 @@ hypre_MPI_Group_incl( hypre_MPI_Group group, HYPRE_Int n, HYPRE_Int *ranks, hypr /*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_MPI_Info_create( hypre_MPI_Info *info ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MPI_Info_create)( info ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_MPI_Info_free( hypre_MPI_Info *info ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_MPI_Info_free)( info ); +} + +/*--------------------------------------------------------------------------*/ + HYPRE_Int hypre_MPI_Init( hypre_int *argc, char ***argv ) { diff --git a/src/utilities/mup_fixed_gpu.c b/src/utilities/mup_fixed_gpu.c new file mode 100644 index 0000000000..7dd136cb41 --- /dev/null +++ b/src/utilities/mup_fixed_gpu.c @@ -0,0 +1,495 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_utilities.h" +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_BigIntAxpyn( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_BigIntAxpyn)( d_x, n, d_y, d_z, a ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_BigIntFilln( HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_BigIntFilln)( d_x, n, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CharFilln( char *d_x, size_t n, char v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CharFilln)( d_x, n, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexArrayToArrayOfPtrs( HYPRE_Int n, HYPRE_Int m, HYPRE_Complex *data, HYPRE_Complex **data_aop ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexArrayToArrayOfPtrs)( n, m, data, data_aop ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexAxpyn( HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y, HYPRE_Complex *d_z, HYPRE_Complex a ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexAxpyn)( d_x, n, d_y, d_z, a ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexAxpyzn( HYPRE_Int n, HYPRE_Complex *d_x, HYPRE_Complex *d_y, HYPRE_Complex *d_z, HYPRE_Complex a, HYPRE_Complex b ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexAxpyzn)( n, d_x, d_y, d_z, a, b ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexFilln( HYPRE_Complex *d_x, size_t n, HYPRE_Complex v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexFilln)( d_x, n, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Complex +hypreDevice_ComplexReduceSum( HYPRE_Int m, HYPRE_Complex *d_x ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexReduceSum)( m, d_x ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexScalen( HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y, HYPRE_Complex v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexScalen)( d_x, n, d_y, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexStridedCopy( HYPRE_Int size, HYPRE_Int stride, HYPRE_Complex *in, HYPRE_Complex *out ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_ComplexStridedCopy)( size, stride, in, out ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CopyParCSRRows( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CopyParCSRRows)( nrows, d_row_indices, job, has_offd, first_col, d_col_map_offd_A, d_diag_i, d_diag_j, d_diag_a, d_offd_i, d_offd_j, d_offd_a, d_ib, d_jb, d_ab ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int* +hypreDevice_CsrRowIndicesToPtrs( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CsrRowIndicesToPtrs)( nrows, nnz, d_row_ind ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CsrRowIndicesToPtrs_v2( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind, HYPRE_Int *d_row_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CsrRowIndicesToPtrs_v2)( nrows, nnz, d_row_ind, d_row_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int* +hypreDevice_CsrRowPtrsToIndices( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CsrRowPtrsToIndices)( nrows, nnz, d_row_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_CsrRowPtrsToIndices_v2( HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_CsrRowPtrsToIndices_v2)( nrows, nnz, d_row_ptr, d_row_ind ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_DiagScaleVector( HYPRE_Int num_vectors, HYPRE_Int num_rows, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_DiagScaleVector)( num_vectors, num_rows, A_i, A_data, x, beta, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_DiagScaleVector2( HYPRE_Int num_vectors, HYPRE_Int num_rows, HYPRE_Complex *diag, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z, HYPRE_Int computeY ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_DiagScaleVector2)( num_vectors, num_rows, diag, x, beta, y, z, computeY ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_GetRowNnz( HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_GetRowNnz)( nrows, d_row_indices, d_diag_ia, d_offd_ia, d_rownnz ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IVAMXPMY( HYPRE_Int m, HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IVAMXPMY)( m, n, a, x, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IVAXPY( HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IVAXPY)( n, a, x, y ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IVAXPYMarked( HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y, HYPRE_Int *marker, HYPRE_Int marker_val ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IVAXPYMarked)( n, a, x, y, marker, marker_val ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntAxpyn( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int *d_z, HYPRE_Int a ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntAxpyn)( d_x, n, d_y, d_z, a ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntFilln( HYPRE_Int *d_x, size_t n, HYPRE_Int v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntFilln)( d_x, n, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntScalen( HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntScalen)( d_x, n, d_y, v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntStridedCopy( HYPRE_Int size, HYPRE_Int stride, HYPRE_Int *in, HYPRE_Int *out ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntStridedCopy)( size, stride, in, out ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntegerExclusiveScan( HYPRE_Int n, HYPRE_Int *d_i ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntegerExclusiveScan)( n, d_i ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntegerInclusiveScan( HYPRE_Int n, HYPRE_Int *d_i ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntegerInclusiveScan)( n, d_i ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_IntegerReduceSum( HYPRE_Int m, HYPRE_Int *d_i ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_IntegerReduceSum)( m, d_i ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_zeqxmydd( HYPRE_Int n, HYPRE_Complex *x, HYPRE_Complex alpha, HYPRE_Complex *y, HYPRE_Complex *z, HYPRE_Complex *d ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_zeqxmydd)( n, x, alpha, y, z, d ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CudaCompileFlagCheck( void ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CudaCompileFlagCheck)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CurandUniform( HYPRE_Int n, HYPRE_Real *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CurandUniform)( n, urand, set_seed, seed, set_offset, offset ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_CurandUniformSingle( HYPRE_Int n, float *urand, HYPRE_Int set_seed, hypre_ulonglongint seed, HYPRE_Int set_offset, hypre_ulonglongint offset ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_CurandUniformSingle)( n, urand, set_seed, seed, set_offset, offset ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_DeviceMemoryGetUsage( HYPRE_Real *mem ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceMemoryGetUsage)( mem ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ForceSyncComputeStream( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ForceSyncComputeStream)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_GetSyncCudaCompute( HYPRE_Int *cuda_compute_stream_sync_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GetSyncCudaCompute)( cuda_compute_stream_sync_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArrayCountDevice( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArrayCountDevice)( v, value, num_values_ptr ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArrayInverseMappingDevice)( v, w ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArrayNegateDevice( hypre_IntArray *v ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArrayNegateDevice)( v ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArraySeparateByValueDevice( HYPRE_Int num_values, HYPRE_Int *values, HYPRE_Int *sizes, hypre_IntArray *v, hypre_IntArrayArray *w ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArraySeparateByValueDevice)( num_values, values, sizes, v, w ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArraySetConstantValuesDevice)( v, value ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArraySetInterleavedValuesDevice( hypre_IntArray *v, HYPRE_Int cycle ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_IntArraySetInterleavedValuesDevice)( v, cycle ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ResetDevice( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ResetDevice)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ResetDeviceRandGenerator( hypre_ulonglongint seed, hypre_ulonglongint offset ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_ResetDeviceRandGenerator)( seed, offset ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_RestoreSyncCudaCompute( void ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_RestoreSyncCudaCompute)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SetSyncCudaCompute( HYPRE_Int action ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SetSyncCudaCompute)( action ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SyncComputeStream( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SyncComputeStream)( ); +} + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SyncDevice( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_SyncDevice)( ); +} + + +/*--------------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_GenScatterAdd( HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y, char *work ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypreDevice_GenScatterAdd)( x, ny, map, y, work ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_DeviceStream +hypre_DeviceDataComputeStream( hypre_DeviceData *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataComputeStream)( data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_DeviceData* +hypre_DeviceDataCreate( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataCreate)( ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_DeviceRandGenerator +hypre_DeviceDataCurandGenerator( hypre_DeviceData *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataCurandGenerator)( data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_DeviceSparseLibHandle +hypre_DeviceDataCusparseHandle( hypre_DeviceData *data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataCusparseHandle)( data ); +} + +/*--------------------------------------------------------------------------*/ + +void +hypre_DeviceDataDestroy( hypre_DeviceData* data ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataDestroy)( data ); +} + +/*--------------------------------------------------------------------------*/ + +hypre_DeviceStream +hypre_DeviceDataStream( hypre_DeviceData *data, HYPRE_Int i ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_DeviceDataStream)( data, i ); +} + +/*--------------------------------------------------------------------------*/ + +dim3 +hypre_GetDefaultDeviceBlockDimension( ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GetDefaultDeviceBlockDimension)( ); +} + +/*--------------------------------------------------------------------------*/ + +dim3 +hypre_GetDefaultDeviceGridDimension( HYPRE_Int n, const char *granularity, dim3 bDim ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_GetDefaultDeviceGridDimension)( n, granularity, bDim ); +} + +/*--------------------------------------------------------------------------*/ + +dim3 +hypre_dim3( HYPRE_Int x ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_dim3)( x ); +} + +/*--------------------------------------------------------------------------*/ + +dim3 +hypre_dim3( HYPRE_Int x, HYPRE_Int y ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_dim3)( x, y ); +} + +/*--------------------------------------------------------------------------*/ + +dim3 +hypre_dim3( HYPRE_Int x, HYPRE_Int y, HYPRE_Int z ) +{ + return HYPRE_CURRENTPRECISION_FUNC(hypre_dim3)( x, y, z ); +} + +#endif + +#endif + diff --git a/src/utilities/mup_functions.c b/src/utilities/mup_functions.c index 23fdf0915b..cf01b72bfa 100644 --- a/src/utilities/mup_functions.c +++ b/src/utilities/mup_functions.c @@ -311,6 +311,15 @@ HYPRE_VersionNumber( HYPRE_Int *major_ptr, HYPRE_Int *minor_ptr, HYPRE_Int *patc } +/*--------------------------------------------------------------------------*/ + +size_t +hypre_GetSizeOfReal( void ) +{ + HYPRE_Precision precision = hypre_GlobalPrecision(); + return hypre_GetSizeOfReal_pre( precision ); +} + #endif diff --git a/src/utilities/mup_functions_gpu.c b/src/utilities/mup_functions_gpu.c new file mode 100644 index 0000000000..4b8b5f0089 --- /dev/null +++ b/src/utilities/mup_functions_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_utilities.h" +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/utilities/mup_pre.c b/src/utilities/mup_pre.c index 92644cd996..29d2dd9bf2 100644 --- a/src/utilities/mup_pre.c +++ b/src/utilities/mup_pre.c @@ -608,6 +608,24 @@ HYPRE_VersionNumber_pre( HYPRE_Precision precision, HYPRE_Int *major_ptr, HYPRE_ } +/*--------------------------------------------------------------------------*/ + +size_t +hypre_GetSizeOfReal_pre( HYPRE_Precision precision ) +{ + switch (precision) + { + case HYPRE_REAL_SINGLE: + return hypre_GetSizeOfReal_flt( ); + case HYPRE_REAL_DOUBLE: + return hypre_GetSizeOfReal_dbl( ); + case HYPRE_REAL_LONGDOUBLE: + return hypre_GetSizeOfReal_long_dbl( ); + default: + { size_t value = 0; hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unknown solver precision"); return value; } + } +} + #endif diff --git a/src/utilities/mup_pre_gpu.c b/src/utilities/mup_pre_gpu.c new file mode 100644 index 0000000000..4b8b5f0089 --- /dev/null +++ b/src/utilities/mup_pre_gpu.c @@ -0,0 +1,22 @@ + +/*** DO NOT EDIT THIS FILE DIRECTLY (use ../config/gen_code.sh to generate) ***/ + +#include "_hypre_utilities.h" +#include "_hypre_utilities.hpp" + +#ifdef HYPRE_MIXED_PRECISION + +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +#if defined(HYPRE_USING_GPU) + + +#endif + +#endif + diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 176f2fa373..6c6f92341f 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -358,6 +358,7 @@ char* hypre_ConvertIndicesToString(HYPRE_Int size, HYPRE_Int *indices); HYPRE_Int hypre_SetSyncCudaCompute(HYPRE_Int action); HYPRE_Int hypre_RestoreSyncCudaCompute(void); HYPRE_Int hypre_GetSyncCudaCompute(HYPRE_Int *cuda_compute_stream_sync_ptr); +size_t hypre_GetSizeOfReal(void); /* handle.c */ HYPRE_Int hypre_SetLogLevel( HYPRE_Int log_level ); diff --git a/src/utilities/protos_mp.h b/src/utilities/protos_mp.h new file mode 100644 index 0000000000..244cd30ba9 --- /dev/null +++ b/src/utilities/protos_mp.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* Mixed precision function protos */ + +#ifdef HYPRE_MIXED_PRECISION +/* utilities_mp.c */ +HYPRE_Int +hypre_RealArrayCopyHost_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayCopy_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision precision_y, void *y, HYPRE_MemoryLocation location_y, HYPRE_Int n); +void * +hypre_RealArrayClone_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision new_precision, HYPRE_MemoryLocation new_location, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpynHost_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpyn_mp(HYPRE_Precision precision_x, void *x, HYPRE_Precision precision_y, void *y, + HYPRE_MemoryLocation location, HYPRE_Int n, hypre_long_double alpha); +/* utilities_mp_device.c */ +HYPRE_Int +hypre_RealArrayCopyDevice_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +HYPRE_Int +hypre_RealArrayAxpynDevice_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n); +#endif diff --git a/src/utilities/utilities.c b/src/utilities/utilities.c index 454a0e3081..4ea9d1ca25 100644 --- a/src/utilities/utilities.c +++ b/src/utilities/utilities.c @@ -265,3 +265,11 @@ hypre_ConvertIndicesToString(HYPRE_Int size, return string; } + +/*--------------------------------------------------------------------------* +* hypre_GetSizeOfReal: get size of real (floating point) precision type +*--------------------------------------------------------------------------*/ +size_t hypre_GetSizeOfReal(void) +{ + return sizeof(HYPRE_Real); +} diff --git a/src/utilities/utilities_mp.c b/src/utilities/utilities_mp.c new file mode 100644 index 0000000000..44ddfd0103 --- /dev/null +++ b/src/utilities/utilities_mp.c @@ -0,0 +1,363 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** +* +* hypre utilities mixed-precision interface +* +*****************************************************************************/ + +#include "_hypre_utilities.h" + +#if defined(HYPRE_MIXED_PRECISION) + +/*--------------------------------------------------------------------------* +* hypre_RealArrayCopyHost_mp: copy n array contents from x to y. +* Assumes arrays x and y are both on host memory. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayCopyHost_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n) +{ + HYPRE_Int i; + + /* Mixed-precision copy of data */ + switch (precision_x) + { + case HYPRE_REAL_SINGLE: + switch (precision_y) + { + case HYPRE_REAL_DOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_double *)y)[i] = (hypre_double)((hypre_float *)x)[i]; + } + break; + case HYPRE_REAL_LONGDOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_long_double *)y)[i] = (hypre_long_double)((hypre_float *)x)[i]; + } + break; + default: + break; + } + break; + case HYPRE_REAL_DOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_float *)y)[i] = (hypre_float)((hypre_double *)x)[i]; + } + break; + case HYPRE_REAL_LONGDOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_long_double *)y)[i] = (hypre_long_double)((hypre_double *)x)[i]; + } + break; + default: + break; + } + break; + case HYPRE_REAL_LONGDOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_float *)y)[i] = (hypre_float)((hypre_long_double *)x)[i]; + } + break; + case HYPRE_REAL_DOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_double *)y)[i] = (hypre_double)((hypre_long_double *)x)[i]; + } + break; + default: + break; + } + break; + default: + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for array Copy!\n"); + break; + } + + return hypre_error_flag; +} + +/*--------------------------------------------------------------------------* +* hypre_RealArrayCopy_mp: copy n array contents from x to y. +* Arrays x and y need not have the same memory location. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayCopy_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision precision_y, void *y, HYPRE_MemoryLocation location_y, HYPRE_Int n) +{ +#ifdef HYPRE_PROFILE + hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); +#endif + + hypre_GpuProfilingPushRange("RealArrayCopy"); + + HYPRE_Int nbytes; + size_t sizeof_x; + + /* tmp pointer for data copy */ + void *xp = NULL; + + /* get sizeof x data */ + sizeof_x = hypre_GetSizeOfReal_pre(precision_x); + + nbytes = n * sizeof_x; + /* Call standard memory copy if precisions match. */ + if (precision_x == precision_y) + { + hypre_Memcpy(y, x, nbytes, location_y, location_x); + + return hypre_error_flag; + } + + /* Check memory location */ + if (location_x != location_y) + { + /* Allocate memory and copy x to y's memory location */ + xp = hypre_CAlloc(n, sizeof_x, location_y); + hypre_Memcpy(xp, x, nbytes, location_y, location_x); + } + else + { + xp = x; + } + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(location_y); + + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_RealArrayCopyDevice_mp(precision_x, xp, precision_y, y, n); + } + else +#endif + { + hypre_RealArrayCopyHost_mp(precision_x, xp, precision_y, y, n); + } + /* free xp if allocated */ + if (location_x != location_y) + { + hypre_TFree(xp, location_y); + } +#ifdef HYPRE_PROFILE + hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); +#endif + hypre_GpuProfilingPopRange(); + + return hypre_error_flag; +} + +/*--------------------------------------------------------------------------* +* hypre_RealArrayClone_mp: Clone array x. +*--------------------------------------------------------------------------*/ +void * +hypre_RealArrayClone_mp(HYPRE_Precision precision_x, void *x, HYPRE_MemoryLocation location_x, + HYPRE_Precision new_precision, HYPRE_MemoryLocation new_location, HYPRE_Int n) +{ + /* cloned data */ + void *y = NULL; + size_t sizeof_y; + + /* get sizeof new_precision data */ + sizeof_y = hypre_GetSizeOfReal_pre(new_precision); + /* Allocate memory for cloned data */ + y = hypre_CAlloc(n, sizeof_y, new_location); + + /* Copy from x to y */ + hypre_RealArrayCopy_mp(precision_x, x, location_x, + new_precision, y, new_location, n); + return y; +} + +/*--------------------------------------------------------------------------* +* hypre_RealArrayAxpynHost_mp: Axpy on n array contents into y. +* Assumes arrays x and y are both on host memory. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayAxpynHost_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n) +{ + HYPRE_Int i; + + /* Mixed-precision copy of data */ + switch (precision_x) + { + case HYPRE_REAL_SINGLE: + switch (precision_y) + { + case HYPRE_REAL_DOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_double *)y)[i] += (hypre_double)((hypre_float)alpha * ((hypre_float *)x)[i]); + } + break; + case HYPRE_REAL_LONGDOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_long_double *)y)[i] += (hypre_long_double)((hypre_float)alpha * ((hypre_float *)x)[i]); + } + break; + default: + break; + } + break; + case HYPRE_REAL_DOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_float *)y)[i] += (hypre_float)((hypre_double)alpha * ((hypre_double *)x)[i]); + } + break; + case HYPRE_REAL_LONGDOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_long_double *)y)[i] += (hypre_long_double)((hypre_double)alpha * ((hypre_double *)x)[i]); + } + break; + default: + break; + } + break; + case HYPRE_REAL_LONGDOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_float *)y)[i] += (hypre_float)((hypre_long_double)alpha * ((hypre_long_double *)x)[i]); + } + break; + case HYPRE_REAL_DOUBLE: +#ifdef HYPRE_USING_OPENMP + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < n; i++) + { + ((hypre_double *)y)[i] += (hypre_double)((hypre_long_double)alpha * ((hypre_long_double *)x)[i]); + } + break; + default: + break; + } + break; + default: + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for array Axpyn!\n"); + break; + } + + return hypre_error_flag; +} + +/*--------------------------------------------------------------------------* +* hypre_RealArrayAxpyn_mp: Axpy on n array contents into y. +* Assumes arrays x and y have the same memory location. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayAxpyn_mp(HYPRE_Precision precision_x, void *x, HYPRE_Precision precision_y, void *y, + HYPRE_MemoryLocation location, HYPRE_Int n, hypre_long_double alpha) +{ +#ifdef HYPRE_PROFILE + hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); +#endif + + hypre_GpuProfilingPushRange("RealArrayAxpyn"); + + /* Call standard memory copy if precisions match. */ + if (precision_x == precision_y) + { + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: Not Implemented!\n"); + /* + #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(location); + + hypre_long_double d_alpha = (hypre_long_double)(*alpha); + + if (exec == HYPRE_EXEC_DEVICE) + { + hypreDevice_ComplexDeviceArrayAxpyn_pre(precision_y, d_alpha, x, y, n); + } + else + #endif + { + HYPRE_Int inc = 1; + hypre_daxpy_pre(precision_y, n, alpha, x, inc, y, inc); + + } + return hypre_error_flag; + */ + } + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(location); + + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_RealArrayAxpynDevice_mp(precision_x, alpha, x, precision_y, y, n); + } + else +#endif + { + hypre_RealArrayAxpynHost_mp(precision_x, alpha, x, precision_y, y, n); + } + +#ifdef HYPRE_PROFILE + hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); +#endif + hypre_GpuProfilingPopRange(); + + return hypre_error_flag; +} + + +#endif + diff --git a/src/utilities/utilities_mp_device.c b/src/utilities/utilities_mp_device.c new file mode 100644 index 0000000000..8bc4f189cd --- /dev/null +++ b/src/utilities/utilities_mp_device.c @@ -0,0 +1,232 @@ +/****************************************************************************** + * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** +* +* hypre utilities mixed-precision interface on device +* +*****************************************************************************/ + +#include "_hypre_utilities.h" +#include "_hypre_utilities.hpp" +#include "_hypre_onedpl.hpp" + +#if defined(HYPRE_MIXED_PRECISION) + +#if defined(HYPRE_USING_GPU) +/*-------------------------------------------------------------------- + * Mixed-precision hypreGPUKernel_Axpyzn_mp + * Lifts or drops x and y to precision of z. + *--------------------------------------------------------------------*/ + +template +__global__ void +hypreGPUKernel_Axpyzn_mp( hypre_DeviceItem &item, + HYPRE_Int n, + T1 *x, + T2 *y, + T3 *z, + T1 a, + T2 b ) +{ + HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); + + if (i < n) + { + z[i] = static_cast(a * x[i]) + static_cast(b * y[i]); + } +} + +/*-------------------------------------------------------------------- + * hypreDevice_Axpyzn_mp + *--------------------------------------------------------------------*/ + +template +HYPRE_Int +hypreDevice_Axpyzn_mp(HYPRE_Int n, T1 *d_x, T2 *d_y, T3 *d_z, T1 a, T2 b) +{ + if (n <= 0) + { + return hypre_error_flag; + } + + dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); + dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim); + + HYPRE_GPU_LAUNCH( hypreGPUKernel_Axpyzn_mp, gDim, bDim, n, d_x, d_y, d_z, a, b ); + + return hypre_error_flag; +} +#endif // HYPRE_USING_GPU + +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +/*--------------------------------------------------------------------------* +* hypre_RealArrayCopyDevice_mp: copy n array contents from x to y. +* Assumes arrays x and y are both on device memory. +* +* NOTE: We could use in if-statement for the inner switch statement on precision_y. + However, inner switch-statement allows for additional future cases - DOK. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayCopyDevice_mp(HYPRE_Precision precision_x, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n) +{ + + /* Mixed-precision copy of data. + * Execute the same code for hypre_long_double and hypre_double + */ + switch (precision_x) + { + case HYPRE_REAL_SINGLE: + switch (precision_y) + { + case HYPRE_REAL_DOUBLE: + case HYPRE_REAL_LONGDOUBLE: + { + hypre_float *xp = (hypre_float *)x; + hypre_double *yp = (hypre_double *)y; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_THRUST_CALL( transform, xp, xp + n, yp, + hypreFunctor_ElementCast() ); +#elif defined(HYPRE_USING_SYCL) + HYPRE_ONEDPL_CALL( std::transform, xp, xp + n, yp, [](const auto & x) {return static_cast(x);} ); +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(xp, yp) + for (i = 0; i < n; i++) + { + yp[i] = static_cast(xp[i]); + } +#endif + } + break; + default: + break; + } + break; + case HYPRE_REAL_DOUBLE: + case HYPRE_REAL_LONGDOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: + { + hypre_double *xp = (hypre_double *)x; + hypre_float *yp = (hypre_float *)y; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_THRUST_CALL( transform, xp, xp + n, yp, + hypreFunctor_ElementCast() ); +#elif defined(HYPRE_USING_SYCL) + HYPRE_ONEDPL_CALL( std::transform, xp, xp + n, yp, [](const auto & x) {return static_cast(x);}); +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(xp, yp) + for (i = 0; i < n; i++) + { + yp[i] = static_cast(xp[i]); + } +#endif + } + break; + default: + break; + } + break; + default: + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for array Copy!\n"); + break; + } + + return hypre_error_flag; +} + +/*--------------------------------------------------------------------------* +* hypre_RealArrayAxpynDevice_mp: Axpy on n array contents into y. +* Assumes arrays x and y are both on device memory. +* +* NOTE: We could use in if-statement for the inner switch statement on precision_y. + However, inner switch-statement allows for additional future cases - DOK. +*--------------------------------------------------------------------------*/ +HYPRE_Int +hypre_RealArrayAxpynDevice_mp(HYPRE_Precision precision_x, hypre_long_double alpha, void *x, + HYPRE_Precision precision_y, void *y, HYPRE_Int n) +{ + + /* Mixed-precision copy of data. + * Execute the same code for hypre_long_double and hypre_double + */ + switch (precision_x) + { + case HYPRE_REAL_SINGLE: + switch (precision_y) + { + case HYPRE_REAL_DOUBLE: + case HYPRE_REAL_LONGDOUBLE: + { + hypre_float *xp = (hypre_float *)x; + hypre_double *yp = (hypre_double *)y; + +#if defined(HYPRE_USING_GPU) + hypreDevice_Axpyzn_mp(n, xp, yp, yp, (hypre_float)alpha, 1.0); + hypre_SyncComputeStream(); +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(xp, yp) + for (i = 0; i < n; i++) + { + yp[i] += static_cast((hypre_float)alpha * xp[i]); + } +#endif + } + break; + default: + break; + } + break; + case HYPRE_REAL_DOUBLE: + case HYPRE_REAL_LONGDOUBLE: + switch (precision_y) + { + case HYPRE_REAL_SINGLE: + { + hypre_double *xp = (hypre_double *)x; + hypre_float *yp = (hypre_float *)y; + +#if defined(HYPRE_USING_GPU) + + hypreDevice_Axpyzn_mp(n, xp, yp, yp, (hypre_double)alpha, 1.0f); + hypre_SyncComputeStream(); +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(xp, yp) + for (i = 0; i < n; i++) + { + yp[i] += static_cast((hypre_double)alpha * xp[i]); + } +#endif + } + break; + default: + break; + } + break; + default: + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: Undefined precision type for array Axpyn!\n"); + break; + } + + return hypre_error_flag; +} + +#endif // HYPRE_USING_GPU || HYPRE_USING_DEVICE_OPENMP +#endif // HYPRE_MIXED_PRECISION +