diff --git a/include/Global.h b/include/Global.h index ac1d5eb66e..b6ca726d3d 100644 --- a/include/Global.h +++ b/include/Global.h @@ -181,7 +181,9 @@ extern double AveDensity_Init; // initial average mass density (in al extern int Pot_ParaBuf; // number of parallel buffers to exchange potential for the // Poisson/Gravity solvers and the potential refinement extern int Rho_ParaBuf; // number of parallel buffers to exchange density for the Poisson solver -extern real *GreenFuncK; +extern bool FFTW_Inited[NLEVEL]; +extern bool GreenFuncK_Inited[NLEVEL]; +extern real *GreenFuncK[NLEVEL]; extern double GFUNC_COEFF0; extern double DT__GRAVITY; extern double NEWTON_G; diff --git a/include/Prototype.h b/include/Prototype.h index 2bf9474a87..c869ed1189 100644 --- a/include/Prototype.h +++ b/include/Prototype.h @@ -260,14 +260,15 @@ void Init_ByRestart_HDF5( const char *FileName ); #endif #ifdef SUPPORT_FFTW void End_FFTW(); -void Init_FFTW(); +void Init_FFTW( const int lv ); void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf_SIdx, long *RecvBuf_SIdx, int **List_PID, int **List_k, long *List_NSend_Var, long *List_NRecv_Var, const int *List_z_start, const int local_nz, const int FFT_Size[], const int NRecvSlice, - const double PrepTime, const long TVar, const bool InPlacePad, const bool ForPoisson, const bool AddExtraMass ); + const double PrepTime, const long TVar, const bool InPlacePad, const bool ForPoisson, + const bool AddExtraMass, const int lv ); void Slab2Patch( const real *VarS, real *SendBuf, real *RecvBuf, const int SaveSg, const long *List_SIdx, int **List_PID, int **List_k, long *List_NSend, long *List_NRecv, const int local_nz, const int FFT_Size[], - const int NSendSlice, const long TVar, const bool InPlacePad ); + const int NSendSlice, const long TVar, const bool InPlacePad, const int lv ); #endif // #ifdef SUPPORT_FFTW void Microphysics_Init(); void Microphysics_End(); @@ -405,12 +406,12 @@ void CPU_PoissonGravitySolver( const real h_Rho_Array [][RHO_NXT][RHO_NXT][RH const bool SelfGravity, const OptExtPot_t ExtPot, const OptExtAcc_t ExtAcc, const double TimeNew, const double TimeOld, const real MinEint, const bool UseWaveFlag ); -void CPU_ExtPotSolver_BaseLevel( const ExtPot_t Func, const double AuxArray_Flt[], const int AuxArray_Int[], - const real Table[], void **GenePtr, - const double Time, const bool PotIsInit, const int SaveSg ); +void CPU_ExtPotSolver_FullyRefinedLevel( const ExtPot_t Func, const double AuxArray_Flt[], const int AuxArray_Int[], + const real Table[], void **GenePtr, + const double Time, const bool PotIsInit, const int SaveSg, const int lv ); #ifdef SUPPORT_FFTW -void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double PrepTime ); -void Init_GreenFuncK(); +void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double PrepTime, const int lv ); +void Init_GreenFuncK( const int lv ); #endif void End_MemFree_PoissonGravity(); void Gra_AdvanceDt( const int lv, const double TimeNew, const double TimeOld, const double dt, diff --git a/src/Init/Init_FFTW.cpp b/src/Init/Init_FFTW.cpp index dabb0b2389..114bd0573a 100644 --- a/src/Init/Init_FFTW.cpp +++ b/src/Init/Init_FFTW.cpp @@ -4,15 +4,30 @@ static int ZIndex2Rank( const int IndexZ, const int *List_z_start, const int TRank_Guess ); -root_fftw::real_plan_nd FFTW_Plan_PS; // PS : plan for calculating the power spectrum +// PS : plan for calculating the power spectrum +root_fftw::real_plan_nd FFTW_Plan_PS; +static void Init_FFTW_PowerSpectrum( const int StartupFlag ); +static void End_FFTW_PowerSpectrum(); + +// Poi : plan for the self-gravity Poisson solver #ifdef GRAVITY -root_fftw::real_plan_nd FFTW_Plan_Poi, FFTW_Plan_Poi_Inv; // Poi : plan for the self-gravity Poisson solver +root_fftw::real_plan_nd FFTW_Plan_Poi[NLEVEL], FFTW_Plan_Poi_Inv[NLEVEL]; +static void Init_FFTW_Poisson( const int StartupFlag, const int lv ); +static void End_FFTW_Poisson(); #endif // #ifdef GRAVITY + +// Psi : plan for the ELBDM spectral solver #if ( MODEL == ELBDM ) -root_fftw::complex_plan_nd FFTW_Plan_Psi, FFTW_Plan_Psi_Inv; // Psi : plan for the ELBDM spectral solver +root_fftw::complex_plan_nd FFTW_Plan_Psi, FFTW_Plan_Psi_Inv; +static void Init_FFTW_ELBDMSpectral( const int StartupFlag ); +static void End_FFTW_ELBDMSpectral(); + +// ExtPsi : plan for the Gram Fourier extension solver #if ( WAVE_SCHEME == WAVE_GRAMFE ) -gramfe_fftw::complex_plan_1d FFTW_Plan_ExtPsi, FFTW_Plan_ExtPsi_Inv; // ExtPsi : plan for the Gram Fourier extension solver -#endif // #if (WAVE_SCHEME == WAVE_GRAMFE) +gramfe_fftw::complex_plan_1d FFTW_Plan_ExtPsi, FFTW_Plan_ExtPsi_Inv; +static void Init_FFTW_GramFE( const int StartupFlag ); +static void End_FFTW_GramFE(); +#endif // #if ( WAVE_SCHEME == WAVE_GRAMFE ) #endif // #if ( MODEL == ELBDM ) @@ -73,102 +88,17 @@ size_t ComputeTotalSize( int* size ) //------------------------------------------------------------------------------------------------------- // Function : Init_FFTW // Description : Create the FFTW plans +// +// Parameter : lv : Target level //------------------------------------------------------------------------------------------------------- -void Init_FFTW() +void Init_FFTW( const int lv ) { if ( MPI_Rank == 0 ) Aux_Message( stdout, "%s ... ", __FUNCTION__ ); + if ( FFTW_Inited[lv] ) return; -# if ( SUPPORT_FFTW == FFTW3 ) - FFTW3_Double_OMP_Enabled = false; - FFTW3_Single_OMP_Enabled = false; - - -# ifdef OPENMP - -# ifndef SERIAL -// check the level of MPI thread support - int MPI_Thread_Status; - MPI_Query_thread( &MPI_Thread_Status ); - -// enable multithreading if possible - FFTW3_Double_OMP_Enabled = MPI_Thread_Status >= MPI_THREAD_FUNNELED; - FFTW3_Single_OMP_Enabled = FFTW3_Double_OMP_Enabled; -# else // # ifndef SERIAL - -// always enable multithreading in serial mode with openmp - FFTW3_Double_OMP_Enabled = true; - FFTW3_Single_OMP_Enabled = true; -# endif // # ifndef SERIAL ... # else - -// initialise fftw multithreading - if (FFTW3_Double_OMP_Enabled) { - FFTW3_Double_OMP_Enabled = fftw_init_threads(); - if ( !FFTW3_Double_OMP_Enabled ) Aux_Error( ERROR_INFO, "fftw_init_threads() failed !!\n" ); - } - if (FFTW3_Single_OMP_Enabled) { - FFTW3_Single_OMP_Enabled = fftwf_init_threads(); - if ( !FFTW3_Single_OMP_Enabled ) Aux_Error( ERROR_INFO, "fftwf_init_threads() failed !!\n" ); - } -# endif // # ifdef OPENMP - -// initialise fftw mpi support -# ifndef SERIAL - fftw_mpi_init(); - fftwf_mpi_init(); -# endif // # ifndef SERIAL - -// tell all subsequent fftw3 planners to use OMP_NTHREAD threads -# ifdef OPENMP - if (FFTW3_Double_OMP_Enabled) fftw_plan_with_nthreads (OMP_NTHREAD); - if (FFTW3_Single_OMP_Enabled) fftwf_plan_with_nthreads(OMP_NTHREAD); -# endif // # ifdef OPENMP -# endif // # if ( SUPPORT_FFTW == FFTW3 ) - - - -// determine the FFT size for the power spectrum - int PS_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; - -// determine the FFT size for the self-gravity solver -# ifdef GRAVITY - int Gravity_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; - -// the zero-padding method is adopted for the isolated BC. - if ( OPT__BC_POT == BC_POT_ISOLATED ) - for (int d=0; d<3; d++) Gravity_FFT_Size[d] *= 2; - -// check - if ( MPI_Rank == 0 ) - for (int d=0; d<3; d++) - { - if ( Gravity_FFT_Size[d] <= 0 ) Aux_Error( ERROR_INFO, "Gravity_FFT_Size[%d] = %d < 0 !!\n", d, Gravity_FFT_Size[d] ); - } -# endif // # ifdef GRAVITY - -// determine the FFT size for the base-level FFT wave solver -# if ( MODEL == ELBDM ) - int Psi_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; -# if ( defined(SERIAL) || SUPPORT_FFTW == FFTW3 ) - int InvPsi_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; -# else // # ifdef SERIAL || FFTW3 -// Note that the dimensions of the inverse transform in FFTW2, -// which are given by the dimensions of the output of the forward transform, -// are Ny*Nz*Nx because we are using "FFTW_TRANSPOSED_ORDER" in fftwnd_mpi(). - int InvPsi_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[2], NX0_TOT[1] }; -# endif // # ifdef SERIAL || FFTW3 ... # else - -# if ( WAVE_SCHEME == WAVE_GRAMFE ) - int ExtPsi_FFT_Size = GRAMFE_FLU_NXT; -# endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) -# endif // # if ( MODEL == ELBDM ) - real* PS = NULL; - real* RhoK = NULL; - real* PsiK = NULL; -# if ( WAVE_SCHEME == WAVE_GRAMFE ) - gramfe_fftw::fft_complex* ExtPsiK = NULL; -# endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) + static bool FirstTime = true; // determine how to initialise fftw plans int StartupFlag; @@ -181,73 +111,78 @@ void Init_FFTW() case FFTW_STARTUP_PATIENT: StartupFlag = FFTW_PATIENT; break; # endif // # if ( SUPPORT_FFTW == FFTW3 ) - default: Aux_Error( ERROR_INFO, "unrecognised FFTW startup option %d !!\n", OPT__FFTW_STARTUP ); + default: Aux_Error( ERROR_INFO, "unrecognised FFTW startup option %d !!\n", OPT__FFTW_STARTUP ); } // switch ( OPT__FFTW_STARTUP ) -// allocate memory for arrays in fftw3 -# if ( SUPPORT_FFTW == FFTW3 ) - PS = (real*) root_fftw::fft_malloc( ComputePaddedTotalSize(PS_FFT_Size )*sizeof(real) ); -# ifdef GRAVITY - RhoK = (real*) root_fftw::fft_malloc( ComputePaddedTotalSize(Gravity_FFT_Size)*sizeof(real) ); -# endif // # ifdef GRAVITY -# if ( MODEL == ELBDM ) - PsiK = (real*) root_fftw::fft_malloc( ComputeTotalSize ( Psi_FFT_Size )*sizeof(real)*2 ); // 2*real for size of complex number -# endif // # if ( MODEL == ELBDM ) -# if ( WAVE_SCHEME == WAVE_GRAMFE ) - ExtPsiK = (gramfe_fftw::fft_complex*) gramfe_fftw::fft_malloc( ExtPsi_FFT_Size*sizeof(gramfe_fftw::fft_complex) ); -# endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) -# endif // # if ( SUPPORT_FFTW == FFTW3 ) + if ( FirstTime ) + { +# if ( SUPPORT_FFTW == FFTW3 ) + FFTW3_Double_OMP_Enabled = false; + FFTW3_Single_OMP_Enabled = false; -// create plans for power spectrum and the self-gravity solver - FFTW_Plan_PS = root_fftw_create_3d_r2c_plan(PS_FFT_Size, PS, StartupFlag); -# ifdef GRAVITY - FFTW_Plan_Poi = root_fftw_create_3d_r2c_plan(Gravity_FFT_Size, RhoK, StartupFlag); - FFTW_Plan_Poi_Inv = root_fftw_create_3d_c2r_plan(Gravity_FFT_Size, RhoK, StartupFlag); -# endif // # ifdef GRAVITY -# if ( MODEL == ELBDM ) - FFTW_Plan_Psi = root_fftw_create_3d_forward_c2c_plan ( Psi_FFT_Size, PsiK, StartupFlag ); - FFTW_Plan_Psi_Inv = root_fftw_create_3d_backward_c2c_plan( InvPsi_FFT_Size, PsiK, StartupFlag ); +# ifdef OPENMP -# if ( WAVE_SCHEME == WAVE_GRAMFE ) +# ifndef SERIAL +// check the level of MPI thread support + int MPI_Thread_Status; + MPI_Query_thread( &MPI_Thread_Status ); -// the Gram-Fourier extension planners only use one thread because OMP parallelisation evolves different patches parallely -// From the FFTW3 documentation: https://www.fftw.org/fftw3_doc/Usage-of-Multi_002dthreaded-FFTW.html -// "You can call fftw_plan_with_nthreads, create some plans, -// call fftw_plan_with_nthreads again with a different argument, and create some more plans for a new number of threads." +// enable multithreading if possible + FFTW3_Double_OMP_Enabled = MPI_Thread_Status >= MPI_THREAD_FUNNELED; + FFTW3_Single_OMP_Enabled = FFTW3_Double_OMP_Enabled; +# else // # ifndef SERIAL -# if ( defined(SUPPORT_FFTW3) && defined(OPENMP) ) - if (FFTW3_Double_OMP_Enabled) fftw_plan_with_nthreads(1); - if (FFTW3_Single_OMP_Enabled) fftwf_plan_with_nthreads(1); -# endif // # if ( defined(SUPPORT_FFTW3) && defined(OPENMP) ) +// always enable multithreading in serial mode with openmp + FFTW3_Double_OMP_Enabled = true; + FFTW3_Single_OMP_Enabled = true; +# endif // # ifndef SERIAL ... # else - FFTW_Plan_ExtPsi = gramfe_fftw_create_1d_forward_c2c_plan ( ExtPsi_FFT_Size, ExtPsiK, StartupFlag ); - FFTW_Plan_ExtPsi_Inv = gramfe_fftw_create_1d_backward_c2c_plan( ExtPsi_FFT_Size, ExtPsiK, StartupFlag ); +// initialise fftw multithreading + if ( FFTW3_Double_OMP_Enabled ) + { + FFTW3_Double_OMP_Enabled = fftw_init_threads(); + if ( !FFTW3_Double_OMP_Enabled ) Aux_Error( ERROR_INFO, "fftw_init_threads() failed !!\n" ); + } + if ( FFTW3_Single_OMP_Enabled ) + { + FFTW3_Single_OMP_Enabled = fftwf_init_threads(); + if ( !FFTW3_Single_OMP_Enabled ) Aux_Error( ERROR_INFO, "fftwf_init_threads() failed !!\n" ); + } +# endif // # ifdef OPENMP + +// initialise fftw mpi support +# ifndef SERIAL + fftw_mpi_init(); + fftwf_mpi_init(); +# endif // # ifndef SERIAL + +// tell all subsequent fftw3 planners to use OMP_NTHREAD threads +# ifdef OPENMP + if ( FFTW3_Double_OMP_Enabled ) fftw_plan_with_nthreads ( OMP_NTHREAD ); + if ( FFTW3_Single_OMP_Enabled ) fftwf_plan_with_nthreads( OMP_NTHREAD ); +# endif // # ifdef OPENMP +# endif // # if ( SUPPORT_FFTW == FFTW3 ) -// restore regular settings -# if ( defined(SUPPORT_FFTW3) && defined(OPENMP) ) - if (FFTW3_Double_OMP_Enabled) fftw_plan_with_nthreads(OMP_NTHREAD); - if (FFTW3_Single_OMP_Enabled) fftwf_plan_with_nthreads(OMP_NTHREAD); -# endif // # if ( defined(SUPPORT_FFTW3) && defined(OPENMP) ) -# endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) + FirstTime = false; + } // if ( FirstTime ) -# endif // # if ( MODEL == ELBDM ) + if ( lv == 0 ) Init_FFTW_PowerSpectrum( StartupFlag ); -// free memory for arrays in fftw3 -# if ( SUPPORT_FFTW == FFTW3 ) - root_fftw::fft_free(PS); # ifdef GRAVITY - root_fftw::fft_free(RhoK); -# endif // # ifdef GRAVITY + Init_FFTW_Poisson( StartupFlag, lv ); +# endif + # if ( MODEL == ELBDM ) - root_fftw::fft_free( PsiK ); + if ( lv == 0 ) Init_FFTW_ELBDMSpectral( StartupFlag ); + # if ( WAVE_SCHEME == WAVE_GRAMFE ) - gramfe_fftw::fft_free( ExtPsiK ); -# endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) -# endif // # if ( MODEL == ELBDM ) -# endif // # if ( SUPPORT_FFTW == FFTW3 ) + if ( lv == 0 ) Init_FFTW_GramFE( StartupFlag ); +# endif // #if ( WAVE_SCHEME == WAVE_GRAMFE ) +# endif // #if ( MODEL == ELBDM ) + FFTW_Inited[lv] = true; if ( MPI_Rank == 0 ) Aux_Message( stdout, "done\n" ); @@ -264,24 +199,22 @@ void End_FFTW() if ( MPI_Rank == 0 ) Aux_Message( stdout, "%s ... ", __FUNCTION__ ); - root_fftw::destroy_real_plan_nd ( FFTW_Plan_PS ); -# ifdef GRAVITY - root_fftw::destroy_real_plan_nd ( FFTW_Plan_Poi ); - root_fftw::destroy_real_plan_nd ( FFTW_Plan_Poi_Inv ); -# endif // # ifdef GRAVITY + End_FFTW_PowerSpectrum(); +# ifdef GRAVITY + End_FFTW_Poisson(); +# endif # if ( MODEL == ELBDM ) - root_fftw::destroy_complex_plan_nd ( FFTW_Plan_Psi ); - root_fftw::destroy_complex_plan_nd ( FFTW_Plan_Psi_Inv ); + End_FFTW_ELBDMSpectral(); # if ( WAVE_SCHEME == WAVE_GRAMFE ) - gramfe_fftw::destroy_complex_plan_1d ( FFTW_Plan_ExtPsi ); - gramfe_fftw::destroy_complex_plan_1d ( FFTW_Plan_ExtPsi_Inv ); + End_FFTW_GramFE(); # endif // # if ( WAVE_SCHEME == WAVE_GRAMFE ) # endif // #if ( MODEL == ELBDM ) + # if ( SUPPORT_FFTW == FFTW3 ) # ifdef OPENMP if ( FFTW3_Double_OMP_Enabled ) fftw_cleanup_threads(); @@ -305,6 +238,255 @@ void End_FFTW() +//------------------------------------------------------------------------------------------------------- +// Function : Init_FFTW_PowerSpectrum +// Description : Create the FFTW plan for the power spectrum +// +// Parameter : StartupFlag : Initialize FFTW plan method +// +// Return : none +//------------------------------------------------------------------------------------------------------- +void Init_FFTW_PowerSpectrum( const int StartupFlag ) +{ + +// determine the FFT size + int PS_FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; + + real* PS = NULL; + +// allocate memory for arrays in fftw3 +# if ( SUPPORT_FFTW == FFTW3 ) + PS = (real*)root_fftw::fft_malloc( ComputePaddedTotalSize( PS_FFT_Size ) * sizeof(real) ); +# endif + +// create plans + FFTW_Plan_PS = root_fftw_create_3d_r2c_plan( PS_FFT_Size, PS, StartupFlag ); + +// free memory for arrays in fftw3 +# if ( SUPPORT_FFTW == FFTW3 ) + root_fftw::fft_free( PS ); +# endif + +} // FUNCITON : Init_FFTW_PowerSpectrum + + + +//------------------------------------------------------------------------------------------------------- +// Function : End_FFTW_PowerSpectrum +// Description : Delete the FFTW plan for the power spectrum +// +// Return : none +//------------------------------------------------------------------------------------------------------- +void End_FFTW_PowerSpectrum() +{ + + root_fftw::destroy_real_plan_nd( FFTW_Plan_PS ); + +} // FUNCITON : End_FFTW_PowerSpectrum + + + +#ifdef GRAVITY +//------------------------------------------------------------------------------------------------------- +// Function : Init_FFTW_Poisson +// Description : Create the FFTW plan for the self-gravity Poisson solver +// +// Parameter : StartupFlag : Initialize FFTW plan method +// lv : Target level +// +// Return : none +//------------------------------------------------------------------------------------------------------- +void Init_FFTW_Poisson( const int StartupFlag, const int lv ) +{ + + const int CellFactor = (int)(1L< "You can call fftw_plan_with_nthreads, create some plans, call fftw_plan_with_nthreads +// again with a different argument, and create some more plans for a new number of threads." +// +// Parameter : StartupFlag : Initialize FFTW plan method +// +// Return : none +//------------------------------------------------------------------------------------------------------- +void Init_FFTW_GramFE( const int StartupFlag ) +{ + +// determine the FFT size + int ExtPsi_FFT_Size = GRAMFE_FLU_NXT; + + gramfe_fftw::fft_complex* ExtPsiK = NULL; + +// allocate memory for arrays in fftw3 +# if ( SUPPORT_FFTW == FFTW3 ) + ExtPsiK = (gramfe_fftw::fft_complex*)gramfe_fftw::fft_malloc( ExtPsi_FFT_Size * sizeof(gramfe_fftw::fft_complex) ); +# endif + +// See note 1. +# if ( defined( SUPPORT_FFTW3 ) && defined( OPENMP ) ) + if ( FFTW3_Double_OMP_Enabled ) fftw_plan_with_nthreads( 1 ); + if ( FFTW3_Single_OMP_Enabled ) fftwf_plan_with_nthreads( 1 ); +# endif + +// create plans + FFTW_Plan_ExtPsi = gramfe_fftw_create_1d_forward_c2c_plan ( ExtPsi_FFT_Size, ExtPsiK, StartupFlag ); + FFTW_Plan_ExtPsi_Inv = gramfe_fftw_create_1d_backward_c2c_plan( ExtPsi_FFT_Size, ExtPsiK, StartupFlag ); + +// restore regular settings +# if ( defined( SUPPORT_FFTW3 ) && defined( OPENMP ) ) + if ( FFTW3_Double_OMP_Enabled ) fftw_plan_with_nthreads( OMP_NTHREAD ); + if ( FFTW3_Single_OMP_Enabled ) fftwf_plan_with_nthreads( OMP_NTHREAD ); +# endif + +// free memory for arrays in fftw3 +# if ( SUPPORT_FFTW == FFTW3 ) + gramfe_fftw::fft_free( ExtPsiK ); +# endif + +} // FUNCTION : Init_FFTW_Poisson + + + +//------------------------------------------------------------------------------------------------------- +// Function : End_FFTW_GramFE +// Description : Delete the FFTW plan for the Gram Fourier extension solver +// +// Return : none +//------------------------------------------------------------------------------------------------------- +void End_FFTW_GramFE() +{ + + gramfe_fftw::destroy_complex_plan_1d( FFTW_Plan_ExtPsi ); + gramfe_fftw::destroy_complex_plan_1d( FFTW_Plan_ExtPsi_Inv ); + +} // FUNCITON : End_FFTW_Poisson +#endif // #if ( WAVE_SCHEME == WAVE_GRAMFE ) +#endif // #if ( MODEL == ELBDM ) + + + //------------------------------------------------------------------------------------------------------- // Function : Patch2Slab // Description : Patch-based data --> slab domain decomposition @@ -330,11 +512,13 @@ void End_FFTW() // InPlacePad : Whether or not to pad the array size for in-place real-to-complex FFT // ForPoisson : Preparing the density field for the Poisson solver // AddExtraMass : Adding an extra density field for computing gravitational potential (only works with ForPoisson) +// lv : Target level //------------------------------------------------------------------------------------------------------- void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf_SIdx, long *RecvBuf_SIdx, int **List_PID, int **List_k, long *List_NSend_Var, long *List_NRecv_Var, const int *List_z_start, const int local_nz, const int FFT_Size[], const int NRecvSlice, - const double PrepTime, const long TVar, const bool InPlacePad, const bool ForPoisson, const bool AddExtraMass ) + const double PrepTime, const long TVar, const bool InPlacePad, const bool ForPoisson, + const bool AddExtraMass, const int lv ) { // check @@ -360,12 +544,12 @@ void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf const int SSize[2] = { ( InPlacePad ? 2*(FFT_Size[0]/2+1) : FFT_Size[0] ), FFT_Size[1] }; // padded slab size in the x and y directions const int PSSize = PS1*PS1; // patch slice size - const int MemUnit = MAX( 1, amr->NPatchComma[0][1]/MPI_NRank ); // set arbitrarily; divided by MPI_NRank so that the + const int MemUnit = MAX( 1, amr->NPatchComma[lv][1]/MPI_NRank ); // set arbitrarily; divided by MPI_NRank so that the // memory consumption per rank for arrays like // TempBuf_Var[MPI_NRank] reduces when launching // more ranks const int AveNz = FFT_Size[2]/MPI_NRank + ( ( FFT_Size[2]%MPI_NRank == 0 ) ? 0 : 1 ); // average slab thickness - const int Scale0 = amr->scale[0]; + const int Scale = amr->scale[lv]; int Cr[3]; // corner coordinates of each patch normalized to the base-level grid size int BPos_z; // z coordinate of each patch slice in the simulation box @@ -413,15 +597,16 @@ void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf const real MinEntr_No = -1.0; const int GhostSize = 0; const int NPG = 1; + const long CellFactor = (long)(1L<NPatchComma[0][1]; PID0+=8) + for (int PID0=0; PID0NPatchComma[lv][1]; PID0+=8) { // even with NSIDE_00 and GhostSize=0, we still need OPT__BC_FLU to determine whether periodic BC is adopted // for depositing particle mass onto grids. // also note that we do not check minimum density here since no ghost zones are required - Prepare_PatchData( 0, PrepTime, VarPatch[0][0][0], NULL, GhostSize, NPG, &PID0, TVar, _NONE, + Prepare_PatchData( lv, PrepTime, VarPatch[0][0][0], NULL, GhostSize, NPG, &PID0, TVar, _NONE, IntScheme, INT_NONE, UNIT_PATCH, NSide_None, IntPhase_No, OPT__BC_FLU, PotBC_None, MinDens_No, MinPres_No, MinTemp_No, MinEntr_No, DE_Consistency_No ); @@ -430,20 +615,20 @@ void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf // add extra mass source for gravity if required if ( ForPoisson && AddExtraMass ) { - const double dh = amr->dh[0]; + const double dh = amr->dh[lv]; for (int PID=PID0, LocalID=0; PIDpatch[0][0][PID]->EdgeL[0] + 0.5*dh; - const double y0 = amr->patch[0][0][PID]->EdgeL[1] + 0.5*dh; - const double z0 = amr->patch[0][0][PID]->EdgeL[2] + 0.5*dh; + const double x0 = amr->patch[0][lv][PID]->EdgeL[0] + 0.5*dh; + const double y0 = amr->patch[0][lv][PID]->EdgeL[1] + 0.5*dh; + const double z0 = amr->patch[0][lv][PID]->EdgeL[2] + 0.5*dh; double x, y, z; for (int k=0; kpatch[0][0][PID]->corner[d] / Scale0; + for (int d=0; d<3; d++) Cr[d] = amr->patch[0][lv][PID]->corner[d] / Scale; for (int k=0; kNPatchComma[0][1]*(long)CUBE(PS1); - const long NRecv_Expect = (long)NX0_TOT[0]*(long)NX0_TOT[1]*(long)NRecvSlice; + const long NSend_Expect = (long)amr->NPatchComma[lv][1]*(long)CUBE(PS1); + const long NRecv_Expect = (long)NX0_TOT[0]*CellFactor*(long)NX0_TOT[1]*CellFactor*(long)NRecvSlice; if ( NSend_Total != NSend_Expect ) Aux_Error( ERROR_INFO, "NSend_Total = %ld != expected value = %ld !!\n", NSend_Total, NSend_Expect ); @@ -587,7 +772,7 @@ void Patch2Slab( real *VarS, real *SendBuf_Var, real *RecvBuf_Var, long *SendBuf // 5. store the received data to the padded array "VarS" for FFTW - const long NPSlice = (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice/PSSize; // total number of received patch slices + const long NPSlice = (long)NX0_TOT[0]*CellFactor*NX0_TOT[1]*CellFactor*NRecvSlice/PSSize; // total number of received patch slices long dSIdx, Counter = 0; real *VarS_Ptr = NULL; @@ -670,10 +855,11 @@ int ZIndex2Rank( const int IndexZ, const int *List_z_start, const int TRank_Gues // NSendSlice : Total number of z slices need to be sent to other ranks (could be zero in the isolated BC) // TVar : Target variable to be prepared // InPlacePad : Whether or not to pad the array size for in-place real-to-complex FFT +// lv : Target level //------------------------------------------------------------------------------------------------------- void Slab2Patch( const real *VarS, real *SendBuf, real *RecvBuf, const int SaveSg, const long *List_SIdx, int **List_PID, int **List_k, long *List_NSend, long *List_NRecv, const int local_nz, const int FFT_Size[], - const int NSendSlice, const long TVar, const bool InPlacePad ) + const int NSendSlice, const long TVar, const bool InPlacePad, const int lv ) { // check @@ -704,9 +890,10 @@ void Slab2Patch( const real *VarS, real *SendBuf, real *RecvBuf, const int SaveS // 1. store the evaluated data to the send buffer + const long CellFactor = (long)(1L<patch[SaveSg][0][PID]->fluid[TVarIdx][k], RecvPtr, PSSize*sizeof(real) ); + memcpy( amr->patch[SaveSg][lv][PID]->fluid[TVarIdx][k], RecvPtr, PSSize*sizeof(real) ); # ifdef GRAVITY else if ( TVarIdx == NCOMP_TOTAL+NDERIVE ) // TVar == _POTE - memcpy( amr->patch[SaveSg][0][PID]->pot[k], RecvPtr, PSSize*sizeof(real) ); + memcpy( amr->patch[SaveSg][lv][PID]->pot[k], RecvPtr, PSSize*sizeof(real) ); # endif else Aux_Error( ERROR_INFO, "incorrect target variable index %s = %d !!\n", "TVarIdx", TVarIdx ); diff --git a/src/Init/Init_GAMER.cpp b/src/Init/Init_GAMER.cpp index d6d48c9313..08d23b8da6 100644 --- a/src/Init/Init_GAMER.cpp +++ b/src/Init/Init_GAMER.cpp @@ -90,7 +90,7 @@ void Init_GAMER( int *argc, char ***argv ) # ifdef SUPPORT_FFTW // initialize FFTW - Init_FFTW(); + Init_FFTW( 0 ); # endif @@ -266,7 +266,7 @@ void Init_GAMER( int *argc, char ***argv ) { # ifdef SUPPORT_FFTW // initialize the k-space Green's function for the isolated BC. - if ( OPT__SELF_GRAVITY && OPT__BC_POT == BC_POT_ISOLATED ) Init_GreenFuncK(); + if ( OPT__SELF_GRAVITY && OPT__BC_POT == BC_POT_ISOLATED ) Init_GreenFuncK( 0 ); # endif diff --git a/src/Main/EvolveLevel.cpp b/src/Main/EvolveLevel.cpp index 3305379464..65e6745f1c 100644 --- a/src/Main/EvolveLevel.cpp +++ b/src/Main/EvolveLevel.cpp @@ -352,10 +352,14 @@ void EvolveLevel( const int lv, const double dTime_FaLv ) if ( OPT__VERBOSE && MPI_Rank == 0 ) Aux_Message( stdout, " Lv %2d: Gra_AdvanceDt, counter = %8ld ... ", lv, AdvanceCounter[lv] ); - if ( lv == 0 ) + bool FullRefinedLv = false; + const long CellFactor = (long)(1L< 0 + else // not FullRefinedLv { if ( false ) {} /* @@ -430,7 +434,7 @@ void EvolveLevel( const int lv, const double dTime_FaLv ) amr->PotSgTime[lv][SaveSg_Pot] = TimeNew; } - } // if ( lv == 0 ) ... else ... + } // if ( FullRefinedLv ) ... else ... if ( OPT__VERBOSE && MPI_Rank == 0 ) Aux_Message( stdout, "done\n" ); # endif // #ifdef GRAVITY diff --git a/src/Main/Main.cpp b/src/Main/Main.cpp index a495c688a5..aaba99e5e2 100644 --- a/src/Main/Main.cpp +++ b/src/Main/Main.cpp @@ -170,7 +170,9 @@ bool ELBDM_BASE_SPECTRAL; double AveDensity_Init = -1.0; // initialize it as <= 0 to check if it is properly set later int Pot_ParaBuf, Rho_ParaBuf; -real *GreenFuncK = NULL; +bool FFTW_Inited[NLEVEL] = { false }; +bool GreenFuncK_Inited[NLEVEL] = { false }; +real *GreenFuncK[NLEVEL] = { NULL }; double GFUNC_COEFF0; double DT__GRAVITY; double NEWTON_G; diff --git a/src/Makefile_base b/src/Makefile_base index 05a5ff17b0..c3664f8fd9 100644 --- a/src/Makefile_base +++ b/src/Makefile_base @@ -203,7 +203,7 @@ GPU_FILE += CUPOT_PoissonSolver_SOR.cu \ CUPOT_ExtPotSolver.cu CUPOT_ExtPot_Tabular.cu CPU_FILE += CPU_PoissonGravitySolver.cpp CPU_PoissonSolver_SOR.cpp CPU_PoissonSolver_FFT.cpp \ - CPU_PoissonSolver_MG.cpp CPU_ExtPotSolver.cpp CPU_ExtPotSolver_BaseLevel.cpp + CPU_PoissonSolver_MG.cpp CPU_ExtPotSolver.cpp CPU_ExtPotSolver_FullyRefinedLevel.cpp CPU_FILE += Gra_Close.cpp Gra_Prepare_Flu.cpp Gra_Prepare_Pot.cpp Gra_Prepare_Corner.cpp \ Gra_AdvanceDt.cpp Poi_Close.cpp Poi_Prepare_Pot.cpp Poi_Prepare_Rho.cpp \ diff --git a/src/Model_ELBDM/CPU_ELBDM/CPU_ELBDMSolver_FFT.cpp b/src/Model_ELBDM/CPU_ELBDM/CPU_ELBDMSolver_FFT.cpp index d677ac9875..6cb9292b57 100644 --- a/src/Model_ELBDM/CPU_ELBDM/CPU_ELBDMSolver_FFT.cpp +++ b/src/Model_ELBDM/CPU_ELBDM/CPU_ELBDMSolver_FFT.cpp @@ -136,6 +136,8 @@ void Psi_Advance_FFT( real *PsiR, real *PsiI, const int j_start, const int dj, c void CPU_ELBDMSolver_FFT( const real dt, const double PrepTime, const int SaveSg ) { + const int lv = 0; + // determine the FFT size const int FFT_Size[3] = { NX0_TOT[0], NX0_TOT[1], NX0_TOT[2] }; @@ -199,9 +201,9 @@ void CPU_ELBDMSolver_FFT( const real dt, const double PrepTime, const int SaveSg real *PsiR = (real*)root_fftw::fft_malloc( sizeof(real)*total_local_size ); // array storing real and imaginary parts of wave function real *PsiI = (real*)root_fftw::fft_malloc( sizeof(real)*total_local_size ); - real *SendBuf = new real [ (long)amr->NPatchComma[0][1]*CUBE(PS1) ]; // MPI send buffer + real *SendBuf = new real [ (long)amr->NPatchComma[lv][1]*CUBE(PS1) ]; // MPI send buffer real *RecvBuf = new real [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice ]; // MPI recv buffer - long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[0][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab + long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[lv][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab long *RecvBuf_SIdx = new long [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice/SQR(PS1) ]; // MPI recv buffer for 1D coordinate in slab int *List_PID_R [MPI_NRank]; // PID of each patch slice sent to each rank for the real part @@ -214,9 +216,9 @@ void CPU_ELBDMSolver_FFT( const real dt, const double PrepTime, const int SaveSg // rearrange data from patch to slab Patch2Slab( PsiR, SendBuf, RecvBuf, SendBuf_SIdx, RecvBuf_SIdx, List_PID_R, List_k_R, List_NSend, List_NRecv, List_z_start, - local_nz, FFT_Size, NRecvSlice, PrepTime, _REAL, InPlacePad_No, ForPoisson_No, false ); + local_nz, FFT_Size, NRecvSlice, PrepTime, _REAL, InPlacePad_No, ForPoisson_No, false, lv ); Patch2Slab( PsiI, SendBuf, RecvBuf, SendBuf_SIdx, RecvBuf_SIdx, List_PID_I, List_k_I, List_NSend, List_NRecv, List_z_start, - local_nz, FFT_Size, NRecvSlice, PrepTime, _IMAG, InPlacePad_No, ForPoisson_No, false ); + local_nz, FFT_Size, NRecvSlice, PrepTime, _IMAG, InPlacePad_No, ForPoisson_No, false, lv ); // advance wave function by exp( -i*dt*k^2/(2*ELBDM_ETA) ) in the k-space using FFT @@ -226,23 +228,23 @@ void CPU_ELBDMSolver_FFT( const real dt, const double PrepTime, const int SaveSg // rearrange data from slab back to patch Slab2Patch( PsiR, RecvBuf, SendBuf, SaveSg, RecvBuf_SIdx, List_PID_R, List_k_R, List_NRecv, List_NSend, - local_nz, FFT_Size, NRecvSlice, _REAL, InPlacePad_No ); + local_nz, FFT_Size, NRecvSlice, _REAL, InPlacePad_No, lv ); Slab2Patch( PsiI, RecvBuf, SendBuf, SaveSg, RecvBuf_SIdx, List_PID_I, List_k_I, List_NRecv, List_NSend, - local_nz, FFT_Size, NRecvSlice, _IMAG, InPlacePad_No ); + local_nz, FFT_Size, NRecvSlice, _IMAG, InPlacePad_No, lv ); // update density according to the updated wave function # pragma omp parallel for schedule( runtime ) - for (int PID=0; PIDNPatchComma[0][1]; PID++) + for (int PID=0; PIDNPatchComma[lv][1]; PID++) for (int k=0; kpatch[SaveSg][0][PID]->fluid[REAL][k][j][i]; - const real NewImag = amr->patch[SaveSg][0][PID]->fluid[IMAG][k][j][i]; + const real NewReal = amr->patch[SaveSg][lv][PID]->fluid[REAL][k][j][i]; + const real NewImag = amr->patch[SaveSg][lv][PID]->fluid[IMAG][k][j][i]; const real NewDens = SQR( NewReal ) + SQR( NewImag ); - amr->patch[SaveSg][0][PID]->fluid[DENS][k][j][i] = NewDens; + amr->patch[SaveSg][lv][PID]->fluid[DENS][k][j][i] = NewDens; } // PID,i,j,k diff --git a/src/Output/Output_BasePowerSpectrum.cpp b/src/Output/Output_BasePowerSpectrum.cpp index e9565aa517..d94f6e4a3c 100644 --- a/src/Output/Output_BasePowerSpectrum.cpp +++ b/src/Output/Output_BasePowerSpectrum.cpp @@ -32,6 +32,7 @@ void Output_BasePowerSpectrum( const char *FileName, const long TVar ) if ( NX0_TOT[0] != NX0_TOT[1] || NX0_TOT[0] != NX0_TOT[2] ) Aux_Error( ERROR_INFO, "%s only works with CUBIC domain !!\n", __FUNCTION__ ); + const int lv = 0; // 1. determine the FFT size const int Nx_Padded = NX0_TOT[0]/2+1; @@ -93,9 +94,9 @@ void Output_BasePowerSpectrum( const char *FileName, const long TVar ) double *PS_total = NULL; real *VarK = (real*)root_fftw::fft_malloc( sizeof(real)*total_local_size ); // array storing data - real *SendBuf = new real [ (long)amr->NPatchComma[0][1]*CUBE(PS1) ]; // MPI send buffer for data + real *SendBuf = new real [ (long)amr->NPatchComma[lv][1]*CUBE(PS1) ]; // MPI send buffer for data real *RecvBuf = new real [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice ]; // MPI recv buffer for data - long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[0][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab + long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[lv][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab long *RecvBuf_SIdx = new long [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice/SQR(PS1) ]; // MPI recv buffer for 1D coordinate in slab int *List_PID [MPI_NRank]; // PID of each patch slice sent to each rank @@ -123,17 +124,17 @@ void Output_BasePowerSpectrum( const char *FileName, const long TVar ) # endif if ( TVar == _TOTAL_DENS ) { - Par_CollectParticle2OneLevel( 0, _PAR_MASS|_PAR_POSX|_PAR_POSY|_PAR_POSZ, _PAR_TYPE, PredictPos, Time[0], + Par_CollectParticle2OneLevel( lv, _PAR_MASS|_PAR_POSX|_PAR_POSY|_PAR_POSZ, _PAR_TYPE, PredictPos, Time[lv], SibBufPatch, FaSibBufPatch, JustCountNPar_No, TimingSendPar_No ); - Prepare_PatchData_InitParticleDensityArray( 0, Time[0] ); + Prepare_PatchData_InitParticleDensityArray( lv, Time[lv] ); } // if ( TVar == _TOTAL_DENS ) # endif // #ifdef MASSIVE_PARTICLES // 4. rearrange data from patch to slab Patch2Slab( VarK, SendBuf, RecvBuf, SendBuf_SIdx, RecvBuf_SIdx, List_PID, List_k, List_NSend, List_NRecv, List_z_start, - local_nz, FFT_Size, NRecvSlice, Time[0], TVar, InPlacePad, ForPoisson, false ); + local_nz, FFT_Size, NRecvSlice, Time[lv], TVar, InPlacePad, ForPoisson, false, lv ); // 5. evaluate the base-level power spectrum by FFT @@ -185,9 +186,9 @@ void Output_BasePowerSpectrum( const char *FileName, const long TVar ) // free memory for collecting particles from other ranks and levels, and free density arrays with ghost zones (rho_ext) # ifdef MASSIVE_PARTICLES if ( TVar == _TOTAL_DENS ) { - Par_CollectParticle2OneLevel_FreeMemory( 0, SibBufPatch, FaSibBufPatch ); + Par_CollectParticle2OneLevel_FreeMemory( lv, SibBufPatch, FaSibBufPatch ); - Prepare_PatchData_FreeParticleDensityArray( 0 ); + Prepare_PatchData_FreeParticleDensityArray( lv ); } # endif diff --git a/src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_BaseLevel.cpp b/src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_FullyRefinedLevel.cpp similarity index 84% rename from src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_BaseLevel.cpp rename to src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_FullyRefinedLevel.cpp index 6272823cbe..9990cbe267 100644 --- a/src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_BaseLevel.cpp +++ b/src/SelfGravity/CPU_Poisson/CPU_ExtPotSolver_FullyRefinedLevel.cpp @@ -6,8 +6,8 @@ //----------------------------------------------------------------------------------------- -// Function : CPU_ExtPotSolver_BaseLevel -// Description : Add external potential on the base level +// Function : CPU_ExtPotSolver_FullyRefinedLevel +// Description : Add external potential on the fully refined level // // Note : 1. External potential is specified by the input function Func() // 2. Set PotIsInit to false if the base-level potential has not been initialized @@ -23,12 +23,13 @@ // --> true : **add** to the original data // false: **overwrite** the original data // SaveSg : Sandglass to store the updated potential +// lv : Target level // // Return : amr->patch->pot[] //----------------------------------------------------------------------------------------- -void CPU_ExtPotSolver_BaseLevel( const ExtPot_t Func, const double AuxArray_Flt[], const int AuxArray_Int[], - const real Table[], void **GenePtr, - const double Time, const bool PotIsInit, const int SaveSg ) +void CPU_ExtPotSolver_FullyRefinedLevel( const ExtPot_t Func, const double AuxArray_Flt[], const int AuxArray_Int[], + const real Table[], void **GenePtr, + const double Time, const bool PotIsInit, const int SaveSg, const int lv ) { // check @@ -50,7 +51,6 @@ void CPU_ExtPotSolver_BaseLevel( const ExtPot_t Func, const double AuxArray_Flt[ # endif - const int lv = 0; const double dh = amr->dh[lv]; const double dh_2 = 0.5*dh; @@ -79,7 +79,7 @@ void CPU_ExtPotSolver_BaseLevel( const ExtPot_t Func, const double AuxArray_Flt[ } // for (int PID=0; PIDNPatchComma[lv][1]; PID++) } // end of OpenMP parallel region -} // FUNCTION : CPU_ExtPotSolver_BaseLevel +} // FUNCTION : CPU_ExtPotSolver_FullyRefinedLevel diff --git a/src/SelfGravity/CPU_Poisson/CPU_PoissonSolver_FFT.cpp b/src/SelfGravity/CPU_Poisson/CPU_PoissonSolver_FFT.cpp index f335aaf877..0230fccfab 100644 --- a/src/SelfGravity/CPU_Poisson/CPU_PoissonSolver_FFT.cpp +++ b/src/SelfGravity/CPU_Poisson/CPU_PoissonSolver_FFT.cpp @@ -2,10 +2,10 @@ #if ( defined GRAVITY && defined SUPPORT_FFTW ) -static void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const int dj, const long RhoK_Size ); -static void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const long RhoK_Size ); +static void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const int dj, const long RhoK_Size, const int lv ); +static void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const long RhoK_Size, const int lv ); -extern root_fftw::real_plan_nd FFTW_Plan_Poi, FFTW_Plan_Poi_Inv; +extern root_fftw::real_plan_nd FFTW_Plan_Poi[NLEVEL], FFTW_Plan_Poi_Inv[NLEVEL]; @@ -22,21 +22,23 @@ extern root_fftw::real_plan_nd FFTW_Plan_Poi, FFTW_Plan_Poi_Inv; // j_start : Starting j index // dj : Size of array in the j (y) direction after the forward FFT // RhoK_Size : Size of the array "RhoK" +// lv : Target level //------------------------------------------------------------------------------------------------------- -void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const int dj, const long RhoK_Size ) +void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const int dj, const long RhoK_Size, const int lv ) { - const int Nx = NX0_TOT[0]; - const int Ny = NX0_TOT[1]; - const int Nz = NX0_TOT[2]; - const int Nx_Padded = Nx/2 + 1; - const real dh = amr->dh[0]; + const int CellFactor = (int)(1L<dh[lv]; real Deno; gamer_fftw::fft_complex *cdata; // forward FFT - root_fftw_r2c( FFTW_Plan_Poi, RhoK ); + root_fftw_r2c( FFTW_Plan_Poi[lv], RhoK ); // the data are now complex, so typecast a pointer cdata = (gamer_fftw::fft_complex*) RhoK; @@ -102,7 +104,7 @@ void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const in // backward FFT - root_fftw_c2r( FFTW_Plan_Poi_Inv, RhoK ); + root_fftw_c2r( FFTW_Plan_Poi_Inv[lv], RhoK ); // normalization const real norm = dh*dh / ( (real)Nx*Ny*Nz ); @@ -124,8 +126,9 @@ void FFT_Periodic( real *RhoK, const real Poi_Coeff, const int j_start, const in // Parameter : RhoK : Array storing the input density and output potential // Poi_Coeff : Coefficient in front of density in the Poisson equation (4*Pi*Newton_G*a) // RhoK_Size : Size of the array "RhoK" +// lv : Target level //------------------------------------------------------------------------------------------------------- -void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const long RhoK_Size ) +void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const long RhoK_Size, const int lv ) { gamer_fftw::fft_complex *RhoK_cplx = (gamer_fftw::fft_complex *)RhoK; @@ -134,7 +137,7 @@ void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const l // forward FFT - root_fftw_r2c( FFTW_Plan_Poi, RhoK ); + root_fftw_r2c( FFTW_Plan_Poi[lv], RhoK ); // multiply density and Green's function in the k space @@ -151,7 +154,7 @@ void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const l // backward FFT - root_fftw_c2r( FFTW_Plan_Poi_Inv, RhoK ); + root_fftw_c2r( FFTW_Plan_Poi_Inv[lv], RhoK ); // effect of "4*PI*NEWTON_G" has been included in gFuncK, but the scale factor in the comoving frame hasn't # ifdef COMOVING @@ -173,12 +176,16 @@ void FFT_Isolated( real *RhoK, const real *gFuncK, const real Poi_Coeff, const l // Parameter : Poi_Coeff : Coefficient in front of the RHS in the Poisson eq. // SaveSg : Sandglass to store the updated data // PrepTime : Physical time for preparing the density field +// lv : Target level //------------------------------------------------------------------------------------------------------- -void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double PrepTime ) +void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double PrepTime, const int lv ) { + const int CellFactor = (int)(1L<NPatchComma[0][1]*CUBE(PS1) ]; // MPI send buffer for density and potential - real *RecvBuf = new real [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice ]; // MPI recv buffer for density and potentia - long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[0][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab - long *RecvBuf_SIdx = new long [ (long)NX0_TOT[0]*NX0_TOT[1]*NRecvSlice/SQR(PS1) ]; // MPI recv buffer for 1D coordinate in slab + real *RhoK = (real*)root_fftw::fft_malloc( sizeof(real)*total_local_size ); // array storing both density and potential + real *SendBuf = new real [ (long)amr->NPatchComma[lv][1]*CUBE(PS1) ]; // MPI send buffer for density and potential + real *RecvBuf = new real [ (long)NX0_TOT[0]*CellFactor_l*NX0_TOT[1]*CellFactor_l*NRecvSlice ]; // MPI recv buffer for density and potentia + long *SendBuf_SIdx = new long [ (long)amr->NPatchComma[lv][1]*PS1 ]; // MPI send buffer for 1D coordinate in slab + long *RecvBuf_SIdx = new long [ (long)NX0_TOT[0]*CellFactor_l*NX0_TOT[1]*CellFactor_l*NRecvSlice/SQR(PS1) ]; // MPI recv buffer for 1D coordinate in slab int *List_PID [MPI_NRank]; // PID of each patch slice sent to each rank int *List_k [MPI_NRank]; // local z coordinate of each patch slice sent to each rank @@ -260,15 +267,15 @@ void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double // rearrange data from patch to slab Patch2Slab( RhoK, SendBuf, RecvBuf, SendBuf_SIdx, RecvBuf_SIdx, List_PID, List_k, List_NSend, List_NRecv, List_z_start, - local_nz, FFT_Size, NRecvSlice, PrepTime, _TOTAL_DENS, InPlacePad, ForPoisson, OPT__GRAVITY_EXTRA_MASS ); + local_nz, FFT_Size, NRecvSlice, PrepTime, _TOTAL_DENS, InPlacePad, ForPoisson, OPT__GRAVITY_EXTRA_MASS, lv ); // evaluate potential by FFT if ( OPT__BC_POT == BC_POT_PERIODIC ) - FFT_Periodic( RhoK, Poi_Coeff, local_y_start_after_transpose, local_ny_after_transpose, total_local_size ); + FFT_Periodic( RhoK, Poi_Coeff, local_y_start_after_transpose, local_ny_after_transpose, total_local_size, lv ); else if ( OPT__BC_POT == BC_POT_ISOLATED ) - FFT_Isolated( RhoK, GreenFuncK, Poi_Coeff, total_local_size ); + FFT_Isolated( RhoK, GreenFuncK[lv], Poi_Coeff, total_local_size, lv ); else Aux_Error( ERROR_INFO, "unsupported paramter %s = %d !!\n", "OPT__BC_POT", OPT__BC_POT ); @@ -276,7 +283,7 @@ void CPU_PoissonSolver_FFT( const real Poi_Coeff, const int SaveSg, const double // rearrange data from slab back to patch Slab2Patch( RhoK, RecvBuf, SendBuf, SaveSg, RecvBuf_SIdx, List_PID, List_k, List_NRecv, List_NSend, - local_nz, FFT_Size, NRecvSlice, _POTE, InPlacePad ); + local_nz, FFT_Size, NRecvSlice, _POTE, InPlacePad, lv ); root_fftw::fft_free( RhoK ); diff --git a/src/SelfGravity/End_MemFree_PoissonGravity.cpp b/src/SelfGravity/End_MemFree_PoissonGravity.cpp index cc01f37b04..cbc31bcfee 100644 --- a/src/SelfGravity/End_MemFree_PoissonGravity.cpp +++ b/src/SelfGravity/End_MemFree_PoissonGravity.cpp @@ -34,7 +34,13 @@ void End_MemFree_PoissonGravity() delete [] h_Pot_Array_T [t]; h_Pot_Array_T [t] = NULL; } - root_fftw::fft_free(GreenFuncK); GreenFuncK = NULL; + for (int lv=0; lvPotSg [lv] = SaveSg_Pot; @@ -152,12 +162,12 @@ void Gra_AdvanceDt( const int lv, const double TimeNew, const double TimeOld, co false, false ), Timer_Gra_Advance[lv], Timing ); - amr->FluSg[0] = SaveSg_Flu; + amr->FluSg[lv] = SaveSg_Flu; } // if ( Gravity ) - } // if ( lv == 0 ) + } // if ( FullRefinedLv ) - else // lv > 0 + else // if ( FullRefinedLv ) { if ( Poisson && !Gravity ) InvokeSolver( POISSON_SOLVER, lv, TimeNew, TimeOld, NULL_REAL, Poi_Coeff, NULL_INT, NULL_INT, SaveSg_Pot, diff --git a/src/SelfGravity/Init_GreenFuncK.cpp b/src/SelfGravity/Init_GreenFuncK.cpp index fdd8465c7f..82124be213 100644 --- a/src/SelfGravity/Init_GreenFuncK.cpp +++ b/src/SelfGravity/Init_GreenFuncK.cpp @@ -2,7 +2,7 @@ #if ( defined GRAVITY && defined SUPPORT_FFTW ) -extern root_fftw::real_plan_nd FFTW_Plan_Poi; +extern root_fftw::real_plan_nd FFTW_Plan_Poi[NLEVEL]; @@ -15,9 +15,9 @@ extern root_fftw::real_plan_nd FFTW_Plan_Poi; // 2. The zero-padding method is implemented // 3. Slab decomposition is assumed in FFTW // -// Parameter : None +// Parameter : lv : Target level //------------------------------------------------------------------------------------------------------- -void Init_GreenFuncK() +void Init_GreenFuncK( const int lv ) { if ( MPI_Rank == 0 ) Aux_Message( stdout, "%s ...\n", __FUNCTION__ ); @@ -27,9 +27,12 @@ void Init_GreenFuncK() if ( OPT__BC_POT != BC_POT_ISOLATED ) Aux_Message( stderr, "OPT__BC_POT != BC_POT_ISOLATED, why do you need to calculate the Green's function !?\n" ); + if ( ! FFTW_Inited[lv] ) Aux_Error( ERROR_INFO, "FFTW not initialized lv = %02d !!\n", lv ); + if ( GreenFuncK_Inited[lv] ) return; // 1. get the array indices used by FFTW - const int FFT_Size[3] = { 2*NX0_TOT[0], 2*NX0_TOT[1], 2*NX0_TOT[2] }; + const int CellFactor = (int)(1L<dh[0]; - const double Coeff = -NEWTON_G*CUBE(dh0)/( (double)FFT_Size[0]*FFT_Size[1]*FFT_Size[2] ); + const double dh = amr->dh[lv]; + const double Coeff = -NEWTON_G*CUBE(dh)/( (double)FFT_Size[0]*FFT_Size[1]*FFT_Size[2] ); double x, y, z, r; int kk; long idx; - GreenFuncK = (real*) root_fftw::fft_malloc(sizeof(real) * total_local_size); + GreenFuncK[lv] = (real*) root_fftw::fft_malloc(sizeof(real) * total_local_size); for (int k=0; kFluSg[lv], NULL_INT, NULL_INT, DATA_GENERAL, _DENS, _NONE, Rho_ParaBuf, USELB_YES ); @@ -388,8 +392,12 @@ void Aux_Record_Gravity() if ( lv >= MinLv ) Timer_PoiPerf.Start(); for (int t=0; tPotSg[lv], Time[lv] ); + if ( FullRefinedLv ) + { + if ( ! FFTW_Inited[lv] ) Init_FFTW( lv ); + + CPU_PoissonSolver_FFT( Poi_Coeff, amr->PotSg[lv], Time[lv], lv ); + } else InvokeSolver( POISSON_SOLVER, lv, Time[lv], NULL_REAL, NULL_REAL, Poi_Coeff,