From e3792616ad1d749f526e9e9ae2d4a0b1e76e4214 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 1 Aug 2023 12:54:23 -0600 Subject: [PATCH 001/267] Moving line break in BPM doc pages for link, fixing prop/atom syntax --- doc/src/bond_bpm_rotational.rst | 4 ++-- doc/src/bond_bpm_spring.rst | 4 ++-- src/BPM/bond_bpm.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/src/bond_bpm_rotational.rst b/doc/src/bond_bpm_rotational.rst index 7459d491d6..6734bd7bfe 100644 --- a/doc/src/bond_bpm_rotational.rst +++ b/doc/src/bond_bpm_rotational.rst @@ -147,8 +147,8 @@ By default, pair forces are not calculated between bonded particles. Pair forces can alternatively be overlaid on top of bond forces by setting the *overlay/pair* keyword to *yes*. These settings require specific :doc:`special_bonds ` settings described in the -restrictions. Further details can be found in the :doc:`how to -` page on BPMs. +restrictions. Further details can be found in the :doc:`how to ` +page on BPMs. .. versionadded:: 28Mar2023 diff --git a/doc/src/bond_bpm_spring.rst b/doc/src/bond_bpm_spring.rst index 04ff4d5991..a03c832249 100644 --- a/doc/src/bond_bpm_spring.rst +++ b/doc/src/bond_bpm_spring.rst @@ -113,8 +113,8 @@ By default, pair forces are not calculated between bonded particles. Pair forces can alternatively be overlaid on top of bond forces by setting the *overlay/pair* keyword to *yes*. These settings require specific :doc:`special_bonds ` settings described in the -restrictions. Further details can be found in the :doc:`how to -` page on BPMs. +restrictions. Further details can be found in the :doc:`how to ` +page on BPMs. .. versionadded:: 28Mar2023 diff --git a/src/BPM/bond_bpm.cpp b/src/BPM/bond_bpm.cpp index 3ebeed3f1d..b484df7fab 100644 --- a/src/BPM/bond_bpm.cpp +++ b/src/BPM/bond_bpm.cpp @@ -224,7 +224,7 @@ void BondBPM::settings(int narg, char **arg) ifix = modify->get_fix_by_id(id_fix_prop_atom); if (!ifix) - ifix = modify->add_fix(fmt::format("{} all property/atom {} {} {} ghost yes", + ifix = modify->add_fix(fmt::format("{} all property/atom d_{} d_{} d_{} ghost yes", id_fix_prop_atom, x_ref_id, y_ref_id, z_ref_id)); int type_flag; From cd5ebb86c870ebdceb842130aeea6b9bdf067a30 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Fri, 25 Aug 2023 21:37:57 -0400 Subject: [PATCH 002/267] inserting atoms: correct logic for per-atom mass --- src/REACTION/fix_bond_react.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index d124b06dc2..9254c3f5d1 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -3895,7 +3895,8 @@ int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) // guess a somewhat reasonable initial velocity based on reaction site // further control is possible using bond_react_MASTER_group // compute |velocity| corresponding to a given temperature t, using specific atom's mass - double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / atom->mass[twomol->type[m]]); + double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]]; + double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass); v[n][0] = random[rxnID]->uniform(); v[n][1] = random[rxnID]->uniform(); v[n][2] = random[rxnID]->uniform(); From 999c364b83d69195dfbfeaeb47397feb2d6d7dc7 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Fri, 25 Aug 2023 21:44:42 -0400 Subject: [PATCH 003/267] better way to list rxn counts --- .../reaction/create_atoms_polystyrene/in.grow_styrene | 2 +- examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt | 4 ++-- .../PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized | 2 +- .../PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized | 2 +- .../tiny_nylon/in.tiny_nylon.stabilized_variable_probability | 2 +- .../PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized | 2 +- .../reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene index 7860db4e55..dcca29c026 100644 --- a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene +++ b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene @@ -40,7 +40,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_myrxns[1] +thermo_style custom step temp press density f_myrxns[*] thermo 100 diff --git a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt index 9678a714d6..635b2c9750 100644 --- a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt +++ b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt @@ -26,7 +26,7 @@ read_data large_nylon_melt.data.gz & extra/angle/per/atom 15 & extra/dihedral/per/atom 15 & extra/improper/per/atom 25 & - extra/special/per/atom 25 + extra/special/per/atom 25 velocity all create 800.0 4928459 dist gaussian @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 800 800 100 # you can use the internally created 'bond_react_MASTER_group', like so: # fix 2 bond_react_MASTER_group temp/rescale 1 800 800 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] # cumulative reaction counts +thermo_style custom step temp press density f_myrxns[*] # cumulative reaction counts # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized index 57b03b630f..ea09d06893 100644 --- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized +++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized @@ -44,7 +44,7 @@ fix rxns all bond/react stabilization yes statted_grp .03 & fix 1 statted_grp_REACT nvt temp 300 300 100 -thermo_style custom step temp f_rxns[1] f_rxns[2] f_rxns[3] f_rxns[4] +thermo_style custom step temp f_rxns[*] run 2000 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized index 95b39033db..853bc45f1e 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized @@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability index 88b5a95a41..f3c32f3cbd 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability @@ -54,7 +54,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100 # by using the internally-created 'bond_react_MASTER_group', like so: fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1 -thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized index a569e28d43..e5cbaaaf86 100644 --- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized +++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized @@ -47,7 +47,7 @@ fix myrxns all bond/react stabilization no & fix 1 all nve/limit .03 -thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] +thermo_style custom step temp press density f_myrxns[*] # restart 100 restart1 restart2 diff --git a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized index 4ecc481719..230998fcd3 100644 --- a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized +++ b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized @@ -51,7 +51,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1 -thermo_style custom step temp press density f_rxn1[1] f_rxn1[2] f_rxn1[3] +thermo_style custom step temp press density f_rxn1[*] run 10000 From 1039f86037f2870062a0a79108a933ab96063494 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Sat, 26 Aug 2023 14:29:07 -0400 Subject: [PATCH 004/267] remove unnecessary restriction do not check for comm cutoff when initiator atoms are directly bonded --- src/REACTION/fix_bond_react.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 9254c3f5d1..1da26e32a1 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -827,11 +827,10 @@ void FixBondReact::init() nlevels_respa = (dynamic_cast(update->integrate))->nlevels; // check cutoff for iatomtype,jatomtype - for (int i = 0; i < nreacts; i++) { - if (!utils::strmatch(force->pair_style,"^hybrid")) - if (force->pair == nullptr || cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]]) + if (!utils::strmatch(force->pair_style,"^hybrid")) + for (int i = 0; i < nreacts; i++) + if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])) error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff"); - } // need a half neighbor list, built every Nevery steps neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); From fc897512a029cee3a57b299c9dd4b93ab6bbff02 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:13:34 -0500 Subject: [PATCH 005/267] Initial conversion of all FFT_ settings in the KOKKOS subdirectory to FFT_KOKKOS_ --- src/KOKKOS/fft3d_kokkos.cpp | 84 ++++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 10 ++--- src/KOKKOS/fftdata_kokkos.h | 86 ++++++++++++++++++------------------- src/KOKKOS/pppm_kokkos.cpp | 4 +- src/KOKKOS/pppm_kokkos.h | 32 +++++++------- 5 files changed, 108 insertions(+), 108 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 82e4140f77..ca3d18e11a 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -44,20 +44,20 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int int ngpus = lmp->kokkos->ngpus; ExecutionSpace execution_space = ExecutionSpaceFromDevice::space; -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_KISSFFT) +#elif defined(FFT_KOKKOS_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread // stack size on GPUs needs to be increased to prevent stack overflows @@ -149,20 +149,20 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) +#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#elif defined(FFT_MKL) +#elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KISS +#else // FFT_KOKKOS_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif } }; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT template struct kiss_fft_functor { public: @@ -219,19 +219,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total1; length = plan->length1; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_fast,d_data.data()); else DftiComputeBackward(plan->handle_fast,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = @@ -265,19 +265,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total2; length = plan->length2; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_mid,d_data.data()); else DftiComputeBackward(plan->handle_mid,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -309,19 +309,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total3; length = plan->length3; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_slow,d_data.data()); else DftiComputeBackward(plan->handle_slow,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -609,46 +609,46 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl // system specific pre-computation of 1d FFT coeffs // and scaling normalization -#if defined(FFT_MKL) - DftiCreateDescriptor( &(plan->handle_fast), FFT_MKL_PREC, DFTI_COMPLEX, 1, +#if defined(FFT_KOKKOS_MKL) + DftiCreateDescriptor( &(plan->handle_fast), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total1/nfast); DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_fast); - DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_mid), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total2/nmid); DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_mid); - DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_slow), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total3/nslow); DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_slow); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) if (nthreads > 1) { FFTW_API(init_threads)(); FFTW_API(plan_with_nthreads)(nthreads); @@ -692,7 +692,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl nullptr,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -709,7 +709,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -726,7 +726,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, HIPFFT_TYPE,plan->total3/plan->length3); -#else /* FFT_KISS */ +#else /* FFT_KOKKOS_KISS */ kissfftKK = new KissFFTKokkos(); @@ -781,11 +781,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk if (plan->mid2_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->mid2_plan); if (plan->post_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->post_plan); -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); FFTW_API(destroy_plan)(plan->plan_mid_forward); @@ -793,11 +793,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk FFTW_API(destroy_plan)(plan->plan_fast_forward); FFTW_API(destroy_plan)(plan->plan_fast_backward); -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) FFTW_API(cleanup_threads)(); #endif -#elif defined (FFT_KISSFFT) +#elif defined (FFT_KOKKOS_KISSFFT) delete kissfftKK; #endif @@ -855,7 +855,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set -#if defined(FFT_MKL) || defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif @@ -866,7 +866,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,d_data.data()); DftiComputeForward(plan->handle_mid,d_data.data()); @@ -876,7 +876,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ DftiComputeBackward(plan->handle_mid,d_data.data()); DftiComputeBackward(plan->handle_slow,d_data.data()); } -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); @@ -886,11 +886,11 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index a0489f69bb..ed49c4b1ee 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -45,22 +45,22 @@ struct fft_plan_3d_kokkos { double norm; // normalization factor for rescaling // system specific 1d FFT info -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; FFTW_API(plan) plan_mid_forward; FFTW_API(plan) plan_mid_backward; FFTW_API(plan) plan_slow_forward; FFTW_API(plan) plan_slow_backward; -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftHandle plan_fast; hipfftHandle plan_mid; hipfftHandle plan_slow; @@ -92,7 +92,7 @@ class FFT3dKokkos : protected Pointers { struct fft_plan_3d_kokkos *plan; RemapKokkos *remapKK; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT KissFFTKokkos *kissfftKK; #endif diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a3812a1cf0..bed2c7faf0 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_PRECISION == 1 -#elif FFT_PRECISION == 2 +#if FFT_KOKKOS_PRECISION == 1 +#elif FFT_KOKKOS_PRECISION == 2 #else -#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -41,70 +41,70 @@ // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #else -# if defined(FFT_CUFFT) -# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" +# if defined(FFT_KOKKOS_CUFFT) +# error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" # endif -# if defined(FFT_HIPFFT) -# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# if defined(FFT_KOKKOS_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" # endif // if user set FFTW, it means FFTW3 -# ifdef FFT_FFTW -# define FFT_FFTW3 +# ifdef FFT_KOKKOS_FFTW +# define FFT_KOKKOS_FFTW3 # endif -# ifdef FFT_FFTW_THREADS -# if !defined(FFT_FFTW3) -# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" +# ifdef FFT_KOKKOS_FFTW_THREADS +# if !defined(FFT_KOKKOS_FFTW3) +# error "Must use -DFFT_KOKKOS_FFTW3 with -DFFT_KOKKOS_FFTW_THREADS" # endif # endif #endif -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef float _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_SINGLE + #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else typedef double _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_DOUBLE + #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef fftwf_complex FFT_DATA; #define FFTW_API(function) fftwf_ ## function #else typedef fftw_complex FFT_DATA; #define FFTW_API(function) fftw_ ## function #endif -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_DATA; @@ -113,9 +113,9 @@ #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_TYPE HIPFFT_C2C typedef hipfftComplex FFT_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_DATA; #endif #else - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -134,13 +134,13 @@ kiss_fft_scalar re; kiss_fft_scalar im; } FFT_DATA; - #ifndef FFT_KISSFFT - #define FFT_KISSFFT + #ifndef FFT_KOKKOS_KISSFFT + #define FFT_KOKKOS_KISSFFT #endif #endif // (double[2]*) is not a 1D pointer -#if defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_FFTW3) typedef FFT_SCALAR* FFT_DATA_POINTER; #else typedef FFT_DATA* FFT_DATA_POINTER; @@ -216,7 +216,7 @@ typedef struct FFTArrayTypes FFT_DAT; typedef struct FFTArrayTypes FFT_HAT; -#if defined(FFT_KISSFFT) +#if defined(FFT_KOKKOS_KISSFFT) #include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 912ae36f6f..17a9c82bdb 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index d621313873..14d4670dbd 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -36,30 +36,30 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos); // fix up FFT defines for KOKKOS with CUDA and HIP #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #endif From 0e98e706c69881f5562ce12285f80687ac8baf6c Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:49:54 -0500 Subject: [PATCH 006/267] Added LMPFFT settings for Kokkos-specific FFTs --- src/KOKKOS/lmpfftsettings_kokkos.h | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/KOKKOS/lmpfftsettings_kokkos.h diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h new file mode 100644 index 0000000000..148e001de3 --- /dev/null +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// common FFT library related defines and compilation settings + +#ifndef LMP_FFT_KOKKOS_SETTINGS_H +#define LMP_FFT_KOKKOS_SETTINGS_H + +// if user set FFTW, it means FFTW3 + +#ifdef FFT_KOKKOS_FFTW +#ifndef FFT_KOKKOS_FFTW3 +#define FFT_KOKKOS_FFTW3 +#endif +#endif + +// set strings for library info output + +#if defined(FFT_KOKKOS_FFTW3) +#define LMP_FFT_KOKKOS_LIB "FFTW3" +#elif defined(FFT_KOKKOS_MKL) +#define LMP_FFT_KOKKOS_LIB "MKL FFT" +#elif defined(FFT_KOKKOS_CUFFT) +#define LMP_FFT_KOKKOS_LIB "cuFFT" +#elif defined(FFT_KOKKOS_HIPFFT) +#define LMP_FFT_KOKKOS_LIB "hipFFT" +#else +#define LMP_FFT_KOKKOS_LIB "KISS FFT" +#endif + +#ifdef FFT_KOKKOS_SINGLE +typedef float FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 1 +#define LMP_FFT_KOKKOS_PREC "single" +#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT +#else + +typedef double FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 2 +#define LMP_FFT_KOKKOS_PREC "double" +#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE +#endif + +#endif From 48ef968dd21c4e474fdbf927a89b0585f5235568 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:01:06 -0500 Subject: [PATCH 007/267] Decoupled all variable typenames from the non-kokkos FFT --- src/KOKKOS/fft3d_kokkos.cpp | 120 +++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 16 ++--- src/KOKKOS/fftdata_kokkos.h | 90 +++++++++++++------------- src/KOKKOS/grid3d_kokkos.cpp | 28 ++++---- src/KOKKOS/grid3d_kokkos.h | 14 ++-- src/KOKKOS/kissfft_kokkos.h | 76 +++++++++++----------- src/KOKKOS/kokkos_base_fft.h | 12 ++-- src/KOKKOS/pack_kokkos.h | 52 +++++++-------- src/KOKKOS/pppm_kokkos.cpp | 114 ++++++++++++++++----------------- src/KOKKOS/pppm_kokkos.h | 50 +++++++-------- src/KOKKOS/remap_kokkos.cpp | 20 +++--- src/KOKKOS/remap_kokkos.h | 20 +++--- 12 files changed, 306 insertions(+), 306 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index ca3d18e11a..7093136fe6 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,17 +140,17 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); + FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -226,16 +226,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_fast,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -272,15 +272,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_mid,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -316,15 +316,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_slow,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // scaling if required if (flag == -1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_PRECISION, + first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_PRECISION, + second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,34 +697,34 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_TYPE,plan->total1/plan->length1); + CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_TYPE,plan->total2/plan->length2); + CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_TYPE,plan->total3/plan->length3); + CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - HIPFFT_TYPE,plan->total1/plan->length1); + HIPFFT_KOKKOS_TYPE,plan->total1/plan->length1); hipfftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - HIPFFT_TYPE,plan->total2/plan->length2); + HIPFFT_KOKKOS_TYPE,plan->total2/plan->length2); hipfftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - HIPFFT_TYPE,plan->total3/plan->length3); + HIPFFT_KOKKOS_TYPE,plan->total3/plan->length3); #else /* FFT_KOKKOS_KISS */ @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -878,13 +878,13 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ } #elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } else { - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index ed49c4b1ee..bb552ec4ef 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); + void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index bed2c7faf0..a9ea2de896 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,10 +20,10 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_DATA_KOKKOS_H -#define LMP_FFT_DATA_KOKKOS_H +#ifndef LMP_FFT_KOKKOS_DATA_H +#define LMP_FFT_KOKKOS_DATA_H -#include "lmpfftsettings.h" +#include "lmpfftsettings_kokkos.h" // ------------------------------------------------------------------------- @@ -87,42 +87,42 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" #if defined(FFT_KOKKOS_SINGLE) - typedef float _Complex FFT_DATA; + typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else - typedef double _Complex FFT_DATA; + typedef double _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" #if defined(FFT_KOKKOS_SINGLE) - typedef fftwf_complex FFT_DATA; + typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else - typedef fftw_complex FFT_DATA; + typedef fftw_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftw_ ## function #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_TYPE CUFFT_C2C - typedef cufftComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_TYPE CUFFT_Z2Z - typedef cufftDoubleComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_TYPE HIPFFT_C2C - typedef hipfftComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_TYPE HIPFFT_Z2Z - typedef hipfftDoubleComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else #if defined(FFT_KOKKOS_SINGLE) @@ -133,7 +133,7 @@ typedef struct { kiss_fft_scalar re; kiss_fft_scalar im; - } FFT_DATA; + } FFT_KOKKOS_DATA; #ifndef FFT_KOKKOS_KISSFFT #define FFT_KOKKOS_KISSFFT #endif @@ -141,9 +141,9 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_SCALAR* FFT_DATA_POINTER; + typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; #else - typedef FFT_DATA* FFT_DATA_POINTER; + typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_DAT; -typedef struct FFTArrayTypes FFT_HAT; +typedef struct FFTArrayTypes FFT_KOKKOS_DAT; +typedef struct FFTArrayTypes FFT_KOKKOS_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 9a82e0157d..0f8e0bdc4e 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 19751d83c9..8e9f6cd051 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,16 +27,16 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 265677a21c..fc23bf7891 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) -#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) +#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) +#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_COS(phase);\ - (x)(x_index).im = KISS_FFT_SIN(phase);\ + (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ + (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ }while(0) @@ -138,26 +138,26 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; int nfft; int inverse; - typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_DATA_1d d_scratch; + typename FFT_KOKKOS_AT::t_int_64 d_factors; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[6][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[5][2]; - FFT_SCALAR epi3[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[5][2]; + FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR ya[2],yb[2]; + FFT_KOKKOS_SCALAR scratch[13][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_KOKKOS_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); + typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); + typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 08369b3c78..567dc02ff3 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_H -#define LMP_KOKKOS_BASE_FFT_H +#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H +#define LMP_KOKKOS_BASE_FFT_KOKKOS_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index fe90d294a6..97d35afe26 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 17a9c82bdb..3fc90c088d 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_LIB "\n"; + mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; + FFT_KOKKOS_SCALAR x0,y0,z0; + FFT_KOKKOS_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; + FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, + const FFT_KOKKOS_SCALAR &dz) const { int k,l; - FFT_SCALAR r1,r2,r3; + FFT_KOKKOS_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_SCALAR s; - FFT_SCALAR **a = new FFT_SCALAR *[order]; + FFT_KOKKOS_SCALAR s; + FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_SCALAR[2*order+1]; + a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 14d4670dbd..dc0fbd88e5 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; - FFT_DAT::tdual_FFT_SCALAR_1d k_work1; - FFT_DAT::tdual_FFT_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_SCALAR_1d d_work2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; - FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; - FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; + //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, - const FFT_SCALAR &) const; + void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, + const FFT_KOKKOS_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index efc6742a25..d6b8a5691c 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // post all recvs into scratch space - FFT_SCALAR* v_scratch = d_scratch.data(); + FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index a62c14f00b..035b58260e 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); + void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 51eebf311fde2f3848feae35ce3692a35d90fe9f Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:40:11 -0500 Subject: [PATCH 008/267] Added fft settings to Install.sh, fixed typo in HIPFFT_C2C and HIPFFT_Z2Z --- src/KOKKOS/Install.sh | 1 + src/KOKKOS/fftdata_kokkos.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 489efc55a0..ba6c4ed427 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,6 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h +action lmpfftsettings_kokkos.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a9ea2de896..d52bc0b968 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -117,11 +117,11 @@ #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + #define HIPFFT_KOKKOS_TYPE HIPFFT_Z2Z typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else From e80c3d3215a9de1f8e1c9041fc96c9713cb156ca Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 12 Dec 2023 12:20:24 -0700 Subject: [PATCH 009/267] Revert FFT_AT name change --- src/KOKKOS/fft3d_kokkos.cpp | 70 ++++++++++++++++++------------------- src/KOKKOS/fft3d_kokkos.h | 16 ++++----- src/KOKKOS/grid3d_kokkos.h | 2 +- src/KOKKOS/kissfft_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pack_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pppm_kokkos.cpp | 26 +++++++------- src/KOKKOS/pppm_kokkos.h | 24 ++++++------- src/KOKKOS/remap_kokkos.cpp | 10 +++--- src/KOKKOS/remap_kokkos.h | 16 ++++----- 9 files changed, 134 insertions(+), 134 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 7093136fe6..9e7b87b8d8 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,11 +140,11 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FF hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index bb552ec4ef..9729bc6a63 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 8e9f6cd051..864ac19c06 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,7 +27,7 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index fc23bf7891..66f32d29fb 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -138,25 +138,25 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; int nfft; int inverse; - typename FFT_KOKKOS_AT::t_int_64 d_factors; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_int_64 d_factors; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,10 +179,10 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; @@ -237,12 +237,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[5][2]; FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); @@ -289,12 +289,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); - typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); + typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 97d35afe26..5e014db020 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3fc90c088d..ed7ace08c1 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,16 +775,16 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index dc0fbd88e5..09513c9a2f 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,11 +364,11 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; @@ -377,17 +377,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index d6b8a5691c..18ba626460 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKK // use point-to-point communication int i,isend,irecv; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index 035b58260e..ad5fa9833d 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 68c53886b8f52ec1f4af801783c87d368cef10e2 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:09:02 -0700 Subject: [PATCH 010/267] Revert some name changes --- src/KOKKOS/Install.sh | 2 +- src/KOKKOS/fft3d_kokkos.cpp | 88 +++++++++++----------- src/KOKKOS/fft3d_kokkos.h | 12 +-- src/KOKKOS/fftdata_kokkos.h | 80 ++++++++++---------- src/KOKKOS/grid3d_kokkos.cpp | 28 +++---- src/KOKKOS/grid3d_kokkos.h | 12 +-- src/KOKKOS/kissfft_kokkos.h | 64 ++++++++-------- src/KOKKOS/kokkos_base_fft.h | 12 +-- src/KOKKOS/lmpfftsettings_kokkos.h | 15 +--- src/KOKKOS/pack_kokkos.h | 48 ++++++------ src/KOKKOS/pppm_kokkos.cpp | 116 ++++++++++++++--------------- src/KOKKOS/pppm_kokkos.h | 48 ++++++------ src/KOKKOS/remap_kokkos.cpp | 20 ++--- src/KOKKOS/remap_kokkos.h | 16 ++-- 14 files changed, 275 insertions(+), 286 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ba6c4ed427..2dcf49ce06 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,7 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h -action lmpfftsettings_kokkos.h +action lmpfftsettings_kokkos.h lmpfftsettings.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 9e7b87b8d8..d78239606e 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -141,21 +141,21 @@ struct norm_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typename FFT_AT::t_FFT_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); + FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KOKKOS_KISS +#else // FFT_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif @@ -168,13 +168,13 @@ struct kiss_fft_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // scaling if required if (flag == -1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, + first_klo,first_khi,2,0,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, + second_klo,second_khi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,17 +697,17 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); + CUFFT_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); + CUFFT_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); + CUFFT_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 9729bc6a63..ed49c4b1ee 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -34,8 +34,8 @@ struct fft_plan_3d_kokkos { struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -85,8 +85,8 @@ class FFT3dKokkos : protected Pointers { int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index d52bc0b968..15dca33bcc 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,8 +20,8 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_KOKKOS_DATA_H -#define LMP_FFT_KOKKOS_DATA_H +#ifndef LMP_FFT_DATA_KOKKOS_H +#define LMP_FFT_DATA_KOKKOS_H #include "lmpfftsettings_kokkos.h" @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_KOKKOS_PRECISION == 1 -#elif FFT_KOKKOS_PRECISION == 2 +#if FFT_PRECISION == 1 +#elif FFT_PRECISION == 2 #else -#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -86,7 +86,7 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else @@ -95,7 +95,7 @@ #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else @@ -104,18 +104,18 @@ #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -141,7 +141,7 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; + typedef FFT_SCALAR* FFT_KOKKOS_DATA_POINTER; #else typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_KOKKOS_DAT; -typedef struct FFTArrayTypes FFT_KOKKOS_HAT; +typedef struct FFTArrayTypes FFT_DAT; +typedef struct FFTArrayTypes FFT_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 0f8e0bdc4e..9a82e0157d 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 864ac19c06..19751d83c9 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -34,9 +34,9 @@ class Grid3dKokkos : public Grid3d { ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 66f32d29fb..265677a21c 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) -#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) +#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) +#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ - (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ + (x)(x_index).re = KISS_FFT_COS(phase);\ + (x)(x_index).im = KISS_FFT_SIN(phase);\ }while(0) @@ -142,8 +142,8 @@ struct kiss_fft_state_kokkos { int nfft; int inverse; typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_FFT_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_DATA_1d d_scratch; }; template @@ -153,11 +153,11 @@ class KissFFTKokkos { typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[6][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[5][2]; - FFT_KOKKOS_SCALAR epi3[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[5][2]; + FFT_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR ya[2],yb[2]; + FFT_SCALAR scratch[13][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) { @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -497,11 +497,11 @@ class KissFFTKokkos { st.inverse = inverse_fft; typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); if (nfft > 0) { k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 567dc02ff3..08369b3c78 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H -#define LMP_KOKKOS_BASE_FFT_KOKKOS_H +#ifndef LMP_KOKKOS_BASE_FFT_H +#define LMP_KOKKOS_BASE_FFT_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h index 148e001de3..6cea9bb63a 100644 --- a/src/KOKKOS/lmpfftsettings_kokkos.h +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -16,6 +16,8 @@ #ifndef LMP_FFT_KOKKOS_SETTINGS_H #define LMP_FFT_KOKKOS_SETTINGS_H +#include "lmpfftsettings.h" + // if user set FFTW, it means FFTW3 #ifdef FFT_KOKKOS_FFTW @@ -38,17 +40,4 @@ #define LMP_FFT_KOKKOS_LIB "KISS FFT" #endif -#ifdef FFT_KOKKOS_SINGLE -typedef float FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 1 -#define LMP_FFT_KOKKOS_PREC "single" -#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT -#else - -typedef double FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 2 -#define LMP_FFT_KOKKOS_PREC "double" -#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE -#endif - #endif diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 5e014db020..fe90d294a6 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -44,7 +44,7 @@ struct pack_3d_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ public: } }; -static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ public: } }; -static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ public: } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ public: } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ public: } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ public: } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ public: } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ public: } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index ed7ace08c1..6e1b3a83fa 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR x0,y0,z0; - FFT_KOKKOS_SCALAR ekx,eky,ekz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; - FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, - const FFT_KOKKOS_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) const { int k,l; - FFT_KOKKOS_SCALAR r1,r2,r3; + FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_KOKKOS_SCALAR s; - FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; + FFT_SCALAR s; + FFT_SCALAR **a = new FFT_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; + a[i] = new FFT_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 09513c9a2f..14d4670dbd 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; + FFT_DAT::tdual_FFT_SCALAR_1d k_work1; + FFT_DAT::tdual_FFT_SCALAR_1d k_work2; + typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; + //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; + FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; + FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, - const FFT_KOKKOS_SCALAR &) const; + void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, + const FFT_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index 18ba626460..efc6742a25 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // post all recvs into scratch space - FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); + FFT_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index ad5fa9833d..a62c14f00b 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -28,13 +28,13 @@ template struct remap_plan_3d_kokkos { typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -66,11 +66,11 @@ class RemapKokkos : protected Pointers { RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 6d1d515f3a3f7369f9ace5bef4dfcc1b81d6f80e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:32:32 -0700 Subject: [PATCH 011/267] Fix compile issue --- src/KOKKOS/pppm_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 6e1b3a83fa..2a53682df3 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; + mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); From b199368c19bdc5f1b5f0fc991473ae7de3f73d31 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Thu, 14 Dec 2023 10:46:32 +1000 Subject: [PATCH 012/267] add extract function to fix_property_atom --- src/fix_property_atom.cpp | 24 ++++++++++++++++++++++++ src/fix_property_atom.h | 1 + 2 files changed, 25 insertions(+) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 9613523059..1e004ae4cb 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -948,3 +948,27 @@ int FixPropertyAtom::size_restart(int /*nlocal*/) { return values_peratom + 1; } + +/* ---------------------------------------------------------------------- + extract fix property/atom properties +------------------------------------------------------------------------- */ + +void *FixPropertyAtom::extract(const char *str, int &dim) +{ + dim=0; + if (strcmp(str, "nvalue") == 0) { + return &nvalue; + } else if (strcmp(str, "border") == 0) { + return &border; + } + dim=1; + if (strcmp(str, "styles") == 0) { + return &styles; + } else if (strcmp(str, "index") == 0) { + return &index; + } else if (strcmp(str, "cols") == 0) { + return &cols; + } + return nullptr; +} + diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index c50b6049dc..820acf3a20 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -51,6 +51,7 @@ class FixPropertyAtom : public Fix { void unpack_restart(int, int) override; int size_restart(int) override; int maxsize_restart() override; + void *extract(const char *, int &) override; double memory_usage() override; protected: From c45183d45c98c151ae4227dd415859249afae766 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Thu, 14 Dec 2023 15:38:45 -0500 Subject: [PATCH 013/267] Updated CMake build system to allow for FFT_KOKKOS parameter. Updated CMakeLists.txt to print the correct value when summarizing. --- cmake/CMakeLists.txt | 16 +--------------- cmake/Modules/Packages/KOKKOS.cmake | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 28e02bbee7..aacaca4e6c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -984,21 +984,7 @@ if(PKG_KSPACE) message(STATUS "Using non-threaded FFTs") endif() if(PKG_KOKKOS) - if(Kokkos_ENABLE_CUDA) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: cuFFT") - endif() - elseif(Kokkos_ENABLE_HIP) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: hipFFT") - endif() - else() - message(STATUS "Kokkos FFT: ${FFT}") - endif() + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") endif() endif() if(BUILD_DOC) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 0edd9a3baa..eb20f93956 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,16 +131,31 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) + set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) + validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) + string(TOUPPER ${FFT_KOKKOS} FFT_KOKKOS) + if(Kokkos_ENABLE_CUDA) - if(NOT (FFT STREQUAL "KISS")) - target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "CUFFT"))) + message(FATAL_ERROR "The CUDA backend of Kokkos requires either KISS FFT or CUFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "CUFFT") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() elseif(Kokkos_ENABLE_HIP) - if(NOT (FFT STREQUAL "KISS")) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT"))) + message(FATAL_ERROR "The HIP backend of Kokkos requires either KISS FFT or HIPFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the HIP backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "HIPFFT") include(DetectHIPInstallation) find_package(hipfft REQUIRED) - target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_HIPFFT) target_link_libraries(lammps PRIVATE hip::hipfft) endif() endif() From 54089fb5abbd18d91ad2b0f1665338df403396f1 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 10:58:06 +1000 Subject: [PATCH 014/267] Revert "add extract function to fix_property_atom" This reverts commit b199368c19bdc5f1b5f0fc991473ae7de3f73d31. --- src/fix_property_atom.cpp | 24 ------------------------ src/fix_property_atom.h | 1 - 2 files changed, 25 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 1e004ae4cb..9613523059 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -948,27 +948,3 @@ int FixPropertyAtom::size_restart(int /*nlocal*/) { return values_peratom + 1; } - -/* ---------------------------------------------------------------------- - extract fix property/atom properties -------------------------------------------------------------------------- */ - -void *FixPropertyAtom::extract(const char *str, int &dim) -{ - dim=0; - if (strcmp(str, "nvalue") == 0) { - return &nvalue; - } else if (strcmp(str, "border") == 0) { - return &border; - } - dim=1; - if (strcmp(str, "styles") == 0) { - return &styles; - } else if (strcmp(str, "index") == 0) { - return &index; - } else if (strcmp(str, "cols") == 0) { - return &cols; - } - return nullptr; -} - diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index 820acf3a20..c50b6049dc 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -51,7 +51,6 @@ class FixPropertyAtom : public Fix { void unpack_restart(int, int) override; int size_restart(int) override; int maxsize_restart() override; - void *extract(const char *, int &) override; double memory_usage() override; protected: From 44fbcf7bfe2b8fd27c2cacf171fdc8dc6219ec69 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:01:50 +1000 Subject: [PATCH 015/267] reorder "ghost" processing in fix property/atom --- src/fix_property_atom.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 9613523059..93e33ca056 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -51,6 +51,19 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : nvalue = 0; values_peratom = 0; + // get "ghost" first for settings + + border = 0; + while (iarg < narg) { + if (strcmp(arg[iarg], "ghost") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); + border = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else iarg++; + } + + iarg = 3; + while (iarg < narg) { if (strcmp(arg[iarg], "mol") == 0) { if (atom->molecule_flag) @@ -168,11 +181,8 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : // optional args - border = 0; while (iarg < narg) { if (strcmp(arg[iarg], "ghost") == 0) { - if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); - border = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; } else if (strcmp(arg[iarg], "writedata") == 0) { if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command"); From 61ca9b79db605dae37450c2b20d12d1f88ce817e Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:48:46 +1000 Subject: [PATCH 016/267] add custom_border to Atom and AtomKokkos --- src/KOKKOS/atom_kokkos.cpp | 7 ++++++- src/KOKKOS/atom_kokkos.h | 2 +- src/atom.cpp | 13 ++++++++++++- src/atom.h | 4 +++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index bc393b29d8..ecd618e7ac 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -297,7 +297,7 @@ void AtomKokkos::grow(unsigned int mask) return index in ivector or dvector of its location ------------------------------------------------------------------------- */ -int AtomKokkos::add_custom(const char *name, int flag, int cols) +int AtomKokkos::add_custom(const char *name, int flag, int cols, int border) { int index; @@ -342,6 +342,11 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols) dcols[index] = cols; } + if (index < 0) + error->all(FLERR,"Invalid call to AtomKokkos::add_custom()"); + else + custom_border[flag + (cols) ? 2 : 0].push_back(border); + return index; } diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 21a9aeebbd..000ad5e112 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -154,7 +154,7 @@ class AtomKokkos : public Atom { void sync_overlapping_device(const ExecutionSpace space, unsigned int mask); void sort() override; virtual void grow(unsigned int mask); - int add_custom(const char *, int, int) override; + int add_custom(const char *, int, int, int border = 0) override; void remove_custom(int, int, int) override; virtual void deallocate_topology(); private: diff --git a/src/atom.cpp b/src/atom.cpp index b604c54e6b..8ac72e8950 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -2605,6 +2605,7 @@ void Atom::update_callback(int ifix) lists of names can have NULL entries if previously removed return flag = 0/1 for int/double return cols = 0/N for vector/array where N = # of columns + return border = 0/1 if fix property/atom has "ghost" no/yes ------------------------------------------------------------------------- */ int Atom::find_custom(const char *name, int &flag, int &cols) @@ -2642,6 +2643,13 @@ int Atom::find_custom(const char *name, int &flag, int &cols) return -1; } +int Atom::find_custom(const char *name, int &flag, int &cols, int &border) +{ + int i = find_custom(name, flag, cols); + if (i != -1) border = custom_border[flag + (cols) ? 2 : 0][i]; + return i; +} + /** \brief Add a custom per-atom property with the given name and type and size \verbatim embed:rst @@ -2654,7 +2662,7 @@ This function is called, e.g. from :doc:`fix property/atom `. * \param cols Number of values: 0 for a single value, 1 or more for a vector of values * \return index of property in the respective list of properties */ -int Atom::add_custom(const char *name, int flag, int cols) +int Atom::add_custom(const char *name, int flag, int cols, int border) { int index = -1; @@ -2697,6 +2705,9 @@ int Atom::add_custom(const char *name, int flag, int cols) if (index < 0) error->all(FLERR,"Invalid call to Atom::add_custom()"); + else + custom_border[flag + (cols) ? 2 : 0].push_back(border); + return index; } diff --git a/src/atom.h b/src/atom.h index 548168ac59..9724e5662f 100644 --- a/src/atom.h +++ b/src/atom.h @@ -242,6 +242,7 @@ class Atom : protected Pointers { int *icols, *dcols; char **ivname, **dvname, **ianame, **daname; int nivector, ndvector, niarray, ndarray; + std::array, 4> custom_border; // molecule templates // each template can be a set of consecutive molecules @@ -363,7 +364,8 @@ class Atom : protected Pointers { void update_callback(int); int find_custom(const char *, int &, int &); - virtual int add_custom(const char *, int, int); + int find_custom(const char *, int &, int &, int &); + virtual int add_custom(const char *, int, int, int border = 0); virtual void remove_custom(int, int, int); void *extract(const char *); From 09c87040b54b7bea54a0b7528cea0801ef94e81b Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 11:54:15 +1000 Subject: [PATCH 017/267] add border arguments to FixPropertyAtom add_custom functions --- src/fix_property_atom.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 93e33ca056..c3af7c2f1a 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -125,7 +125,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists"); if (ReadData::is_data_section(id)) error->all(FLERR, "Fix property/atom fix ID must not be a data file section name"); - index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0); + index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0, border); cols[nvalue] = 0; values_peratom++; nvalue++; @@ -138,7 +138,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists"); if (ReadData::is_data_section(id)) error->all(FLERR, "Fix property/atom fix ID must not be a data file section name"); - index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0); + index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0, border); cols[nvalue] = 0; values_peratom++; nvalue++; @@ -167,7 +167,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : which = 1; styles[nvalue] = DARRAY; } - index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols); + index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols, border); cols[nvalue] = ncols; values_peratom += ncols; nvalue++; From a873106790223b3a72ed4d9293f55a2a8ba59bcc Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 12:26:05 +1000 Subject: [PATCH 018/267] improve AMOEBA fix property/atom checks --- src/AMOEBA/pair_amoeba.cpp | 43 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp index cad9e2b628..c94faf91a7 100644 --- a/src/AMOEBA/pair_amoeba.cpp +++ b/src/AMOEBA/pair_amoeba.cpp @@ -827,28 +827,31 @@ void PairAmoeba::init_style() // check if all custom atom arrays were set via fix property/atom - int flag,cols; + char const * names[6] = {"amtype", "amgroup", "redID", + "xyzaxis", "polaxe", "pval"}; + int const flag_check[6] = {0, 0, 1, 1, 0, 1}; // correct type (0 int, 1 dbl) + int const cols_check[6] = {0, 0, 0, 3, 0, 0}; // xyzaxis 3 cols, all others 0 + int const border_check[6] = {1, 0, 0, 0, 0, 0}; // which types need ghost + int flag, cols, border; + int index[6]; - index_amtype = atom->find_custom("amtype",flag,cols); - if (index_amtype < 0 || flag || cols) - error->all(FLERR,"Pair {} amtype is not defined", mystyle); - index_amgroup = atom->find_custom("amgroup",flag,cols); - if (index_amgroup < 0 || flag || cols) - error->all(FLERR,"Pair {} amgroup is not defined", mystyle); + for (int i = 0; i < 6; i++) { + index[i] = atom->find_custom(names[i], flag, cols, border); + std::string err = ""; + if (index[i] < 0) err = "was not defined"; + else if (flag_check[i] != flag) err = "has the wrong type"; + else if (cols_check[i] != cols) err = "has the wrong number of columns"; + else if (border_check[i] && !border) err = "must be set by fix property/atom with ghost yes"; + if (err != "") + error->all(FLERR,"Pair {} per-atom variable {} {}", mystyle, names[i], err); + } - index_redID = atom->find_custom("redID",flag,cols); - if (index_redID < 0 || !flag || cols) - error->all(FLERR,"Pair {} redID is not defined", mystyle); - index_xyzaxis = atom->find_custom("xyzaxis",flag,cols); - if (index_xyzaxis < 0 || !flag || cols == 0) - error->all(FLERR,"Pair {} xyzaxis is not defined", mystyle); - - index_polaxe = atom->find_custom("polaxe",flag,cols); - if (index_polaxe < 0 || flag || cols) - error->all(FLERR,"Pair {} polaxe is not defined", mystyle); - index_pval = atom->find_custom("pval",flag,cols); - if (index_pval < 0 || !flag || cols) - error->all(FLERR,"Pair {} pval is not defined", mystyle); + index_amtype = index[0]; + index_amgroup = index[1]; + index_redID = index[2]; + index_xyzaxis = index[3]; + index_polaxe = index[4]; + index_pval = index[5]; // ------------------------------------------------------------------- // one-time initializations From e36a764db2a62ad45024e5c7fbdb8482ece4ec66 Mon Sep 17 00:00:00 2001 From: Shern Tee Date: Fri, 15 Dec 2023 12:44:55 +1000 Subject: [PATCH 019/267] add array and vector STL headers --- src/atom.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/atom.h b/src/atom.h index 9724e5662f..6f22ebd160 100644 --- a/src/atom.h +++ b/src/atom.h @@ -18,6 +18,8 @@ #include #include +#include +#include namespace LAMMPS_NS { From 95d1a41ee44a2fe05e4acb266bb550e4fa5dd3bc Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 15 Dec 2023 13:33:45 -0700 Subject: [PATCH 020/267] Fixing bpm/sphere error in fix move, displace atoms --- src/displace_atoms.cpp | 16 +++++++++++---- src/fix_move.cpp | 45 +++++++++++++++++++++++++++++++++--------- src/fix_move.h | 2 +- 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/src/displace_atoms.cpp b/src/displace_atoms.cpp index fa333f1bc2..5ecf5a2c9e 100644 --- a/src/displace_atoms.cpp +++ b/src/displace_atoms.cpp @@ -160,7 +160,7 @@ void DisplaceAtoms::command(int narg, char **arg) int *mask = atom->mask; int nlocal = atom->nlocal; - double fraction,dramp; + double fraction, dramp; for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -255,11 +255,12 @@ void DisplaceAtoms::command(int narg, char **arg) int line_flag = atom->line_flag; int tri_flag = atom->tri_flag; int body_flag = atom->body_flag; + int quat_atom_flag = atom->quat_flag; int theta_flag = 0; int quat_flag = 0; if (line_flag) theta_flag = 1; - if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1; + if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1; // AtomVec pointers to retrieve per-atom storage of extra quantities @@ -269,6 +270,7 @@ void DisplaceAtoms::command(int narg, char **arg) auto avec_body = dynamic_cast(atom->style_match("body")); double **x = atom->x; + double **quat_atom = atom->quat; int *ellipsoid = atom->ellipsoid; int *line = atom->line; int *tri = atom->tri; @@ -313,7 +315,7 @@ void DisplaceAtoms::command(int narg, char **arg) // quats for ellipsoids, tris, and bodies - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -322,12 +324,18 @@ void DisplaceAtoms::command(int narg, char **arg) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) { - MathExtra::quatquat(qrotate,quat,qnew); + MathExtra::quatquat(qrotate, quat, qnew); quat[0] = qnew[0]; quat[1] = qnew[1]; quat[2] = qnew[2]; quat[3] = qnew[3]; } + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, quat_atom[i], qnew); + quat_atom[i][0] = qnew[0]; + quat_atom[i][1] = qnew[1]; + quat_atom[i][2] = qnew[2]; + quat_atom[i][3] = qnew[3]; } } } diff --git a/src/fix_move.cpp b/src/fix_move.cpp index 36bba410fc..99b5b30bec 100644 --- a/src/fix_move.cpp +++ b/src/fix_move.cpp @@ -276,10 +276,11 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : line_flag = atom->line_flag; tri_flag = atom->tri_flag; body_flag = atom->body_flag; + quat_atom_flag = atom->quat_flag; theta_flag = quat_flag = 0; if (line_flag) theta_flag = 1; - if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1; + if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1; extra_flag = 0; if (omega_flag || angmom_flag || theta_flag || quat_flag) extra_flag = 1; @@ -329,7 +330,7 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : } } - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { double *quat; for (int i = 0; i < nlocal; i++) { quat = nullptr; @@ -349,6 +350,16 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) : } else qoriginal[i][0] = qoriginal[i][1] = qoriginal[i][2] = qoriginal[i][3] = 0.0; } + } else if (quat_atom_flag) { + double **quat_atom = atom->quat; + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + qoriginal[i][0] = quat_atom[i][0]; + qoriginal[i][1] = quat_atom[i][1]; + qoriginal[i][2] = quat_atom[i][2]; + qoriginal[i][3] = quat_atom[i][3]; + } + } } // nrestart = size of per-atom restart data @@ -521,6 +532,7 @@ void FixMove::initial_integrate(int /*vflag*/) double *radius = atom->radius; double *rmass = atom->rmass; double *mass = atom->mass; + double **quat_atom = atom->quat; int *type = atom->type; int *ellipsoid = atom->ellipsoid; int *line = atom->line; @@ -749,9 +761,9 @@ void FixMove::initial_integrate(int /*vflag*/) avec_line->bonus[atom->line[i]].theta = theta_new; } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -760,6 +772,8 @@ void FixMove::initial_integrate(int /*vflag*/) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat); + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]); } } @@ -880,9 +894,9 @@ void FixMove::initial_integrate(int /*vflag*/) avec_line->bonus[atom->line[i]].theta = theta_new; } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -891,6 +905,8 @@ void FixMove::initial_integrate(int /*vflag*/) else if (body_flag && body[i] >= 0) quat = avec_body->bonus[body[i]].quat; if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat); + } else if (quat_atom_flag) { + MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]); } } @@ -1263,6 +1279,7 @@ void FixMove::set_arrays(int i) double *quat; double **x = atom->x; + double **quat_atom = atom->quat; imageint *image = atom->image; int *ellipsoid = atom->ellipsoid; int *line = atom->line; @@ -1341,9 +1358,9 @@ void FixMove::set_arrays(int i) toriginal[i] = theta - 0.0; // NOTE: edit this line } - // quats for ellipsoids, tris, and bodies + // quats for ellipsoids, tris, bodies, and bpm/sphere - if (quat_flag) { + if (quat_flag & !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -1354,6 +1371,11 @@ void FixMove::set_arrays(int i) if (quat) { // qoriginal = f(quat,-delta); // NOTE: edit this line } + } else if (quat_atom_flag) { + // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line + // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line + // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line + // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line } } xoriginal[i][0] -= vx * delta; @@ -1400,7 +1422,7 @@ void FixMove::set_arrays(int i) // quats for ellipsoids, tris, and bodies - if (quat_flag) { + if (quat_flag && !quat_atom_flag) { quat = nullptr; if (ellipsoid_flag && ellipsoid[i] >= 0) quat = avec_ellipsoid->bonus[ellipsoid[i]].quat; @@ -1411,6 +1433,11 @@ void FixMove::set_arrays(int i) if (quat) { // qoriginal = f(quat,-delta); // NOTE: edit this line } + } else if (quat_atom_flag) { + // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line + // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line + // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line + // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line } } } diff --git a/src/fix_move.h b/src/fix_move.h index e3c018f54d..244a9d704a 100644 --- a/src/fix_move.h +++ b/src/fix_move.h @@ -61,7 +61,7 @@ class FixMove : public Fix { int xvar, yvar, zvar, vxvar, vyvar, vzvar; int xvarstyle, yvarstyle, zvarstyle, vxvarstyle, vyvarstyle, vzvarstyle; int extra_flag, omega_flag, angmom_flag; - int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag; + int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag, quat_atom_flag; int theta_flag, quat_flag; int nlevels_respa, nrestart; int time_origin; From 4d2aed89374760f611e23c0d121d92a15b01e5b1 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Mon, 18 Dec 2023 11:15:56 -0500 Subject: [PATCH 021/267] bug fix for when reaction site has angles, but post-reaction template has none (same for dihedrals, impropers) --- src/REACTION/fix_bond_react.cpp | 180 +++++++++++++++++--------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 1da26e32a1..10a7023e17 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -3348,7 +3348,7 @@ void FixBondReact::update_everything() dynamic_cast(ihistory)->clear_cache(); // Angles! First let's delete all angle info: - if (force->angle && twomol->angleflag) { + if (force->angle) { int *num_angle = atom->num_angle; int **angle_type = atom->angle_type; tagint **angle_atom1 = atom->angle_atom1; @@ -3389,33 +3389,35 @@ void FixBondReact::update_everything() } } // now let's add the new angle info. - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; - delta_angle += twomol->num_angle[j]; - for (int p = 0; p < twomol->num_angle[j]; p++) { - angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + if (twomol->angleflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j]; + delta_angle += twomol->num_angle[j]; + for (int p = 0; p < twomol->num_angle[j]; p++) { + angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_angle[j]; p++) { - if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { - insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; - angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; - angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; - angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; - angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; - num_angle[atom->map(update_mega_glove[jj+1][i])]++; - if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_angle++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_angle[j]; p++) { + if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) { + insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])]; + angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p]; + angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i]; + angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i]; + angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i]; + num_angle[atom->map(update_mega_glove[jj+1][i])]++; + if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_angle++; + } } } } @@ -3425,7 +3427,7 @@ void FixBondReact::update_everything() } // Dihedrals! first let's delete all dihedral info for landlocked atoms - if (force->dihedral && twomol->dihedralflag) { + if (force->dihedral) { int *num_dihedral = atom->num_dihedral; int **dihedral_type = atom->dihedral_type; tagint **dihedral_atom1 = atom->dihedral_atom1; @@ -3469,36 +3471,38 @@ void FixBondReact::update_everything() } } // now let's add new dihedral info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; - delta_dihed += twomol->num_dihedral[j]; - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->dihedralflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j]; + delta_dihed += twomol->num_dihedral[j]; + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_dihedral[j]; p++) { - if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; - dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; - dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; - dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; - dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; - dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; - num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; - if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_dihed++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_dihedral[j]; p++) { + if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])]; + dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p]; + dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i]; + dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i]; + dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i]; + dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i]; + num_dihedral[atom->map(update_mega_glove[jj+1][i])]++; + if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_dihed++; + } } } } @@ -3508,7 +3512,7 @@ void FixBondReact::update_everything() } // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms - if (force->improper && twomol->improperflag) { + if (force->improper) { int *num_improper = atom->num_improper; int **improper_type = atom->improper_type; tagint **improper_atom1 = atom->improper_atom1; @@ -3552,36 +3556,38 @@ void FixBondReact::update_everything() } } // now let's add new improper info - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { - if (landlocked_atoms[j][rxnID] == 1) { - num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; - delta_imprp += twomol->num_improper[j]; - for (int p = 0; p < twomol->num_improper[j]; p++) { - improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + if (twomol->improperflag) { + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][rxnID]-1; + if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) { + if (landlocked_atoms[j][rxnID] == 1) { + num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j]; + delta_imprp += twomol->num_improper[j]; + for (int p = 0; p < twomol->num_improper[j]; p++) { + improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + } } - } - if (landlocked_atoms[j][rxnID] == 0) { - for (int p = 0; p < twomol->num_improper[j]; p++) { - if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || - landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { - insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; - improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; - improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; - improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; - improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; - improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; - num_improper[atom->map(update_mega_glove[jj+1][i])]++; - if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) - error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); - delta_imprp++; + if (landlocked_atoms[j][rxnID] == 0) { + for (int p = 0; p < twomol->num_improper[j]; p++) { + if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 || + landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) { + insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])]; + improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p]; + improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i]; + improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i]; + improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i]; + improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i]; + num_improper[atom->map(update_mega_glove[jj+1][i])]++; + if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom) + error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom"); + delta_imprp++; + } } } } From a6addbc90761fd9b3e48e5a3064f8f589a58140e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:30:56 -0500 Subject: [PATCH 022/267] Updated documentation for FFT_KOKKOS_ flags and CMake variable selection --- doc/src/Build_settings.rst | 18 +++++++++++++++--- doc/src/Howto_cmake.rst | 2 ++ doc/src/kspace_style.rst | 5 ++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 7576cae3eb..33b0508fe9 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -51,14 +51,18 @@ LAMMPS can use them if they are available on your system. .. code-block:: bash -D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS + -D FFT_KOKKOS=value # FFTW3 or MKL or KISS or CUFFT or HIPFFT, default is KISS -D FFT_SINGLE=value # yes or no (default), no = double precision -D FFT_PACK=value # array (default) or pointer or memcpy .. note:: - The values for the FFT variable must be in upper-case. This is - an exception to the rule that all CMake variables can be specified - with lower-case values. + When the Kokkos variant of a package is compiled and selected at run time, + the FFT library selected by the FFT_KOKKOS variable applies. Otherwise, + the FFT library selected by the FFT variable applies. + The same FFT settings apply to both. FFT_KOKKOS must be compatible with the + Kokkos backend - for example, when using the CUDA backend of Kokkos, + you must use either CUFFT or KISS. Usually these settings are all that is needed. If FFTW3 is selected, then CMake will try to detect, if threaded FFTW @@ -87,6 +91,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS # default is KISS if not specified + FFT_INC = -DFFT_KOKKOS_CUFFT # -DFFT_KOKKOS_{FFTW,FFTW3,MKL,CUFFT,HIPFFT,KISS} + # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries @@ -97,6 +103,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -I/usr/local/include FFT_PATH = -L/usr/local/lib + FFT_LIB = -lhipfft # hipFFT either precision + FFT_LIB = -lcufft # cuFFT either precision FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision @@ -141,6 +149,10 @@ The Intel MKL math library is part of the Intel compiler suite. It can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting above). +The CUFFT and HIPFFT FFT libraries are packaged with NVIDIA's CUDA and AMD's +HIP installations, respectively. These FFT libraries require the Kokkos acceleration +package to be enabled and the Kokkos backend to be GPU-resident (ie, HIP or CUDA). + Performing 3d FFTs in parallel can be time-consuming due to data access and required communication. This cost can be reduced by performing single-precision FFTs instead of double precision. Single precision diff --git a/doc/src/Howto_cmake.rst b/doc/src/Howto_cmake.rst index 42324cf2f1..8b710d1065 100644 --- a/doc/src/Howto_cmake.rst +++ b/doc/src/Howto_cmake.rst @@ -349,6 +349,8 @@ Some common LAMMPS specific variables - when set to ``name`` the LAMMPS executable and library will be called ``lmp_name`` and ``liblammps_name.a`` * - ``FFT`` - select which FFT library to use: ``FFTW3``, ``MKL``, ``KISS`` (default, unless FFTW3 is found) + * - ``FFT_KOKKOS`` + - select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``KISS`` (default) * - ``FFT_SINGLE`` - select whether to use single precision FFTs (default: ``off``) * - ``WITH_JPEG`` diff --git a/doc/src/kspace_style.rst b/doc/src/kspace_style.rst index 38a6fce375..78d7380c01 100644 --- a/doc/src/kspace_style.rst +++ b/doc/src/kspace_style.rst @@ -450,7 +450,10 @@ relative RMS error. For the KOKKOS package, the *pppm/kk* style performs charge assignment and force interpolation calculations, along with the FFTs themselves, on the GPU or (optionally) threaded on the CPU when - using OpenMP and FFTW3. + using OpenMP and FFTW3. The specific FFT library is selected using + the FFT_KOKKOS CMake parameter. See the + :doc:`Build settings ` doc page for how to select a + 3rd-party FFT library. ---------- From bc47f4f3a32c8499d5bd5fd6bc4a68424b700da5 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:56:23 -0500 Subject: [PATCH 023/267] Updated CMake preset files for kokkos-cuda and kokkos-hip --- cmake/presets/kokkos-cuda.cmake | 3 +++ cmake/presets/kokkos-hip.cmake | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake index c3ee081898..3205387044 100644 --- a/cmake/presets/kokkos-cuda.cmake +++ b/cmake/presets/kokkos-cuda.cmake @@ -9,5 +9,8 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE) set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) diff --git a/cmake/presets/kokkos-hip.cmake b/cmake/presets/kokkos-hip.cmake index 827a37152b..ffc259a225 100644 --- a/cmake/presets/kokkos-hip.cmake +++ b/cmake/presets/kokkos-hip.cmake @@ -12,6 +12,9 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE) set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) From d02ffb0e709cb57ff0959c74d74a7a0ad9b7670e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:06:41 -0500 Subject: [PATCH 024/267] Updated Summit & Frontier template Makefiles --- src/MAKE/MACHINES/Makefile.frontier_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.frontier_kokkos b/src/MAKE/MACHINES/Makefile.frontier_kokkos index 86cddd12b7..b58a3d871c 100644 --- a/src/MAKE/MACHINES/Makefile.frontier_kokkos +++ b/src/MAKE/MACHINES/Makefile.frontier_kokkos @@ -55,7 +55,7 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa MY_HIP_EXE = $(shell which hipcc) MY_HIP_PATH = $(dir ${MY_HIP_EXE}) -FFT_INC = -DFFT_HIPFFT +FFT_INC = -DFFT_KOKKOS_HIPFFT FFT_PATH = FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 557ebd22b2..d554e09a5a 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -57,7 +57,7 @@ MPI_LIB = -L${MY_MPI_PATH}../lib -lmpi_ibm # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_CUFFT +FFT_INC = -DFFT_KOKKOS_CUFFT FFT_PATH = FFT_LIB = -lcufft From bc7050ab5001b4480383d9e16995494a25f1bec8 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:11:31 -0500 Subject: [PATCH 025/267] Added LMP_HEFFTE to CMakeLists.txt to attempt to fix a merge conflict --- cmake/CMakeLists.txt | 46 +++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index aacaca4e6c..76248445e9 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -971,20 +971,40 @@ if(PKG_KOKKOS) endif() endif() if(PKG_KSPACE) - message(STATUS "<<< FFT settings >>> + if (LMP_HEFFTE) + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: heFFTe") + if (HEFFTE_BACKEND) + message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}") + else() + message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)") + endif() + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + else() + message(STATUS "<<< FFT settings >>> -- Primary FFT lib: ${FFT}") - if(FFT_SINGLE) - message(STATUS "Using single precision FFTs") - else() - message(STATUS "Using double precision FFTs") - endif() - if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) - message(STATUS "Using threaded FFTs") - else() - message(STATUS "Using non-threaded FFTs") - endif() - if(PKG_KOKKOS) - message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) + message(STATUS "Using threaded FFTs") + else() + message(STATUS "Using non-threaded FFTs") + endif() + if (FFT_HEFFTE) + message(STATUS "Using distributed algorithms from heFTTe") + else() + message(STATUS "Using builtin distributed algorithms") + endif() + if(PKG_KOKKOS) + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + endif() endif() endif() if(BUILD_DOC) From dd1ac640aeec2686b2757d734546d6960804bcc2 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:56:30 -0500 Subject: [PATCH 026/267] Added declaration for FFT_KOKKOS variable --- cmake/Modules/Packages/KOKKOS.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index eb20f93956..a0b872ba85 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,6 +131,7 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS "KISS" CACHE STRING "FFT library for Kokkos-enabled KSPACE package") set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) From e72f186123df9d24f1f4f2bac5bdc4fad8d2a8e6 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 19 Dec 2023 22:01:18 -0700 Subject: [PATCH 027/267] check ghost vel in pair bpm/spring --- src/BPM/pair_bpm_spring.cpp | 13 +++++++++++++ src/BPM/pair_bpm_spring.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/BPM/pair_bpm_spring.cpp b/src/BPM/pair_bpm_spring.cpp index 1177156359..01cee91b4c 100644 --- a/src/BPM/pair_bpm_spring.cpp +++ b/src/BPM/pair_bpm_spring.cpp @@ -19,6 +19,7 @@ #include "force.h" #include "memory.h" #include "neigh_list.h" +#include "neighbor.h" #include @@ -202,6 +203,18 @@ void PairBPMSpring::coeff(int narg, char **arg) if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients"); } +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairBPMSpring::init_style() +{ + if (comm->ghost_velocity == 0) + error->all(FLERR,"Pair bpm/spring requires ghost atoms store velocity"); + + neighbor->add_request(this); +} + /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ diff --git a/src/BPM/pair_bpm_spring.h b/src/BPM/pair_bpm_spring.h index 3cb281bff3..c10e4a3400 100644 --- a/src/BPM/pair_bpm_spring.h +++ b/src/BPM/pair_bpm_spring.h @@ -31,6 +31,7 @@ class PairBPMSpring : public Pair { void compute(int, int) override; void settings(int, char **) override; void coeff(int, char **) override; + void init_style() override; double init_one(int, int) override; void write_restart(FILE *) override; void read_restart(FILE *) override; From ded160cd41653fb9a6d4835045d46279d1245124 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Sun, 31 Dec 2023 10:49:59 -0700 Subject: [PATCH 028/267] Generalizing fix update/special/bonds for pair hybrid --- src/fix_update_special_bonds.cpp | 59 ++++++++++++++++---------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/src/fix_update_special_bonds.cpp b/src/fix_update_special_bonds.cpp index 159b2a1170..4e8cba47ec 100644 --- a/src/fix_update_special_bonds.cpp +++ b/src/fix_update_special_bonds.cpp @@ -19,6 +19,7 @@ #include "error.h" #include "force.h" #include "modify.h" +#include "neighbor.h" #include "neigh_list.h" #include "pair.h" @@ -72,9 +73,6 @@ void FixUpdateSpecialBonds::setup(int /*vflag*/) force->special_coul[3] != 1.0) error->all(FLERR, "Fix update/special/bonds requires special Coulomb weights = 1,1,1"); // Implies neighbor->special_flag = [X, 2, 1, 1] - - if (utils::strmatch(force->pair_style, "^hybrid")) - error->all(FLERR, "Cannot use fix update/special/bonds with hybrid pair styles"); } /* ---------------------------------------------------------------------- @@ -155,44 +153,47 @@ void FixUpdateSpecialBonds::pre_exchange() void FixUpdateSpecialBonds::pre_force(int /*vflag*/) { - int i1, i2, j, jj, jnum; + int ilist, nlist, i1, i2, j, jj, jnum; int *jlist, *numneigh, **firstneigh; tagint tag1, tag2; + NeighList *list; int nlocal = atom->nlocal; - tagint *tag = atom->tag; - NeighList *list = force->pair->list; // may need to be generalized for pair hybrid* - numneigh = list->numneigh; - firstneigh = list->firstneigh; // In theory could communicate a list of broken bonds to neighboring processors here // to remove restriction that users use Newton bond off - for (auto const &it : new_broken_pairs) { - tag1 = it.first; - tag2 = it.second; - i1 = atom->map(tag1); - i2 = atom->map(tag2); + for (int ilist = 0; ilist < neighbor->nlist; ilist ++) { + list = neighbor->lists[ilist]; + numneigh = list->numneigh; + firstneigh = list->firstneigh; - // Loop through atoms of owned atoms i j - if (i1 < nlocal) { - jlist = firstneigh[i1]; - jnum = numneigh[i1]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= SPECIALMASK; // Clear special bond bits - if (tag[j] == tag2) jlist[jj] = j; + for (auto const &it : new_broken_pairs) { + tag1 = it.first; + tag2 = it.second; + i1 = atom->map(tag1); + i2 = atom->map(tag2); + + // Loop through atoms of owned atoms i j + if (i1 < nlocal) { + jlist = firstneigh[i1]; + jnum = numneigh[i1]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= SPECIALMASK; // Clear special bond bits + if (tag[j] == tag2) jlist[jj] = j; + } } - } - if (i2 < nlocal) { - jlist = firstneigh[i2]; - jnum = numneigh[i2]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= SPECIALMASK; // Clear special bond bits - if (tag[j] == tag1) jlist[jj] = j; + if (i2 < nlocal) { + jlist = firstneigh[i2]; + jnum = numneigh[i2]; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= SPECIALMASK; // Clear special bond bits + if (tag[j] == tag1) jlist[jj] = j; + } } } } From 0562c3113879f38f8fc6db7afb88830ecb3ae10c Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Tue, 2 Jan 2024 04:51:10 -0500 Subject: [PATCH 029/267] added pair/lj/charmmfsw/coul/long/kk and dihedral/charmmfsw/kk so that lammps scripts generated by charmm-gui.org can be run without tweaks --- src/KOKKOS/Install.sh | 4 + src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 991 ++++++++++++++++++ src/KOKKOS/dihedral_charmmfsw_kokkos.h | 267 +++++ .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 941 +++++++++++++++++ .../pair_lj_charmmfsw_coul_long_kokkos.h | 230 ++++ 5 files changed, 2433 insertions(+) create mode 100644 src/KOKKOS/dihedral_charmmfsw_kokkos.cpp create mode 100644 src/KOKKOS/dihedral_charmmfsw_kokkos.h create mode 100644 src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp create mode 100644 src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index af80420d7a..462c0cbe57 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -106,6 +106,8 @@ action compute_temp_kokkos.cpp action compute_temp_kokkos.h action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h +action dihedral_charmmfsw_kokkos.cpp dihedral_charmmfsw.cpp +action dihedral_charmmfsw_kokkos.h dihedral_charmmfsw.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp action dihedral_class2_kokkos.h dihedral_class2.h action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp @@ -310,6 +312,8 @@ action pair_lj_charmm_coul_charmm_kokkos.cpp pair_lj_charmm_coul_charmm.cpp action pair_lj_charmm_coul_charmm_kokkos.h pair_lj_charmm_coul_charmm.h action pair_lj_charmm_coul_long_kokkos.cpp pair_lj_charmm_coul_long.cpp action pair_lj_charmm_coul_long_kokkos.h pair_lj_charmm_coul_long.h +action pair_lj_charmmfsw_coul_long_kokkos.cpp pair_lj_charmmfsw_coul_long.cpp +action pair_lj_charmmfsw_coul_long_kokkos.h pair_lj_charmmfsw_coul_long.h action pair_lj_class2_coul_cut_kokkos.cpp pair_lj_class2_coul_cut.cpp action pair_lj_class2_coul_cut_kokkos.h pair_lj_class2_coul_cut.h action pair_lj_class2_coul_long_kokkos.cpp pair_lj_class2_coul_long.cpp diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp new file mode 100644 index 0000000000..facb723580 --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -0,0 +1,991 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + Contributing authors: + + - Stan Moore (SNL) original DihedralCharmmfswKokkos + + - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2023/12) + + Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched) + provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen + University, Germany, with additional assistance from + Robert A. Latour, Clemson University. + +------------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of DihedralCharmmfswKokkos exactly + same as DihedralCharmmfswKokkos but with new class name + + method: track changes from serial kspace dihedral_charmm to + dihedral_charmmfsw and apply to DihedralCharmmfswKokkos + + % diff dihedral_charmm.cpp dihedral_charmmfsw.cpp + +------------------------------------------------------------------------- */ + +/* + 18c21 + < #include "dihedral_charmm.h" + --- + > #include "dihedral_charmmfsw.h" + + */ + +#include "dihedral_charmmfsw_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neighbor_kokkos.h" +#include "pair.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define TOLERANCE 0.05 + +/* ---------------------------------------------------------------------- */ + +/* + + 40c43 + < DihedralCharmm::DihedralCharmm(LAMMPS *_lmp) : Dihedral(_lmp) + --- + > DihedralCharmmfsw::DihedralCharmmfsw(LAMMPS *_lmp) : Dihedral(_lmp) + + */ + +template +DihedralCharmmfswKokkos::DihedralCharmmfswKokkos(LAMMPS *lmp) : DihedralCharmmfsw(lmp) +{ + atomKK = (AtomKokkos *) atom; + neighborKK = (NeighborKokkos *) neighbor; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_warning_flag = Kokkos::DualView("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.template view(); + h_warning_flag = k_warning_flag.h_view; + + centroidstressflag = CENTROID_NOTAVAIL; +} + +/* ---------------------------------------------------------------------- */ + +/* + + 48c51 + < DihedralCharmm::~DihedralCharmm() + --- + > DihedralCharmmfsw::~DihedralCharmmfsw() + + */ + +template +DihedralCharmmfswKokkos::~DihedralCharmmfswKokkos() +{ + if (!copymode) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +/* + + 73c76 + < double delx, dely, delz, rsq, r2inv, r6inv; + --- + > double delx, dely, delz, rsq, r2inv, r6inv, r; + 255a259,264 + > // modifying coul and LJ force and energies to apply + > // force_shift and force_switch as in CHARMM pairwise + > // LJ interactions between 1-4 atoms should usually be + > // for r < cut_inner, so switching not applied + > + > r = sqrt(rsq); + 258c267 + < else + --- + > else if (dihedflag) + 259a269,270 + > else + > forcecoul = qqrd2e * q[i1] * q[i4] * (sqrt(r2inv) - r * cut_coulinv14 * cut_coulinv14); + 264,265c275,284 + < ecoul = weight[type] * forcecoul; + < evdwl = r6inv * (lj14_3[itype][jtype] * r6inv - lj14_4[itype][jtype]); + --- + > if (dihedflag) + > ecoul = weight[type] * forcecoul; + > else + > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * + > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - + > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; + > evdwl14_6 = + > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; + > evdwl = evdwl14_12 + evdwl14_6; + + */ + + +/* + + 63c66 + < void DihedralCharmm::compute(int eflag, int vflag) + --- + > void DihedralCharmmfsw::compute(int eflag, int vflag) + + */ + +template +void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (lmp->kokkos->neighflag == FULL) + error->all(FLERR,"Dihedral_style charmm/kk requires half neighbor list"); + + ev_init(eflag,vflag,0); + + // ensure pair->ev_tally() will use 1-4 virial contribution + + if (weightflag && vflag_global == VIRIAL_FDOTR) + force->pair->vflag_either = force->pair->vflag_global = 1; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + //if(k_eatom.extent(0)destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + d_eatom = k_eatom.template view(); + k_eatom_pair = Kokkos::DualView("dihedral:eatom_pair",maxeatom); + d_eatom_pair = k_eatom_pair.template view(); + //} + } + if (vflag_atom) { + //if(k_vatom.extent(0)destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom"); + d_vatom = k_vatom.template view(); + k_vatom_pair = Kokkos::DualView("dihedral:vatom_pair",maxvatom); + d_vatom_pair = k_vatom_pair.template view(); + //} + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + atomtype = atomKK->k_type.view(); + neighborKK->k_dihedrallist.template sync(); + dihedrallist = neighborKK->k_dihedrallist.view(); + int ndihedrallist = neighborKK->ndihedrallist; + nlocal = atom->nlocal; + newton_bond = force->newton_bond; + qqrd2e = force->qqrd2e; + + h_warning_flag() = 0; + k_warning_flag.template modify(); + k_warning_flag.template sync(); + + copymode = 1; + + // loop over neighbors of my atoms + + EVM_FLOAT evm; + + if (evflag) { + if (newton_bond) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + } + } else { + if (newton_bond) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + } + } + + // error check + + k_warning_flag.template modify(); + k_warning_flag.template sync(); + if (h_warning_flag()) + error->warning(FLERR,"Dihedral problem"); + + if (eflag_global) { + energy += evm.emol; + force->pair->eng_vdwl += evm.evdwl; + force->pair->eng_coul += evm.ecoul; + } + if (vflag_global) { + virial[0] += evm.v[0]; + virial[1] += evm.v[1]; + virial[2] += evm.v[2]; + virial[3] += evm.v[3]; + virial[4] += evm.v[4]; + virial[5] += evm.v[5]; + + force->pair->virial[0] += evm.vp[0]; + force->pair->virial[1] += evm.vp[1]; + force->pair->virial[2] += evm.vp[2]; + force->pair->virial[3] += evm.vp[3]; + force->pair->virial[4] += evm.vp[4]; + force->pair->virial[5] += evm.vp[5]; + } + + // don't yet have dualviews for eatom and vatom in pair_kokkos, + // so need to manually copy these to pair style + + int n = nlocal; + if (newton_bond) n += atom->nghost; + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + + k_eatom_pair.template modify(); + k_eatom_pair.template sync(); + for (int i = 0; i < n; i++) + force->pair->eatom[i] += k_eatom_pair.h_view(i); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + + k_vatom_pair.template modify(); + k_vatom_pair.template sync(); + for (int i = 0; i < n; i++) { + force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0); + force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1); + force->pair->vatom[i][2] += k_vatom_pair.h_view(i,2); + force->pair->vatom[i][3] += k_vatom_pair.h_view(i,3); + force->pair->vatom[i][4] += k_vatom_pair.h_view(i,4); + force->pair->vatom[i][5] += k_vatom_pair.h_view(i,5); + } + } + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n, EVM_FLOAT& evm) const { + + // The f array is atomic + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; + + const int i1 = dihedrallist(n,0); + const int i2 = dihedrallist(n,1); + const int i3 = dihedrallist(n,2); + const int i4 = dihedrallist(n,3); + const int type = dihedrallist(n,4); + + // 1st bond + + const F_FLOAT vb1x = x(i1,0) - x(i2,0); + const F_FLOAT vb1y = x(i1,1) - x(i2,1); + const F_FLOAT vb1z = x(i1,2) - x(i2,2); + + // 2nd bond + + const F_FLOAT vb2x = x(i3,0) - x(i2,0); + const F_FLOAT vb2y = x(i3,1) - x(i2,1); + const F_FLOAT vb2z = x(i3,2) - x(i2,2); + + const F_FLOAT vb2xm = -vb2x; + const F_FLOAT vb2ym = -vb2y; + const F_FLOAT vb2zm = -vb2z; + + // 3rd bond + + const F_FLOAT vb3x = x(i4,0) - x(i3,0); + const F_FLOAT vb3y = x(i4,1) - x(i3,1); + const F_FLOAT vb3z = x(i4,2) - x(i3,2); + + const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym; + const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm; + const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm; + const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym; + const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm; + const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm; + + const F_FLOAT rasq = ax*ax + ay*ay + az*az; + const F_FLOAT rbsq = bx*bx + by*by + bz*bz; + const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + const F_FLOAT rg = sqrt(rgsq); + + F_FLOAT rginv,ra2inv,rb2inv; + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + const F_FLOAT rabinv = sqrt(ra2inv*rb2inv); + + F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv; + F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + d_warning_flag() = 1; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + const int m = d_multiplicity[type]; + F_FLOAT p = 1.0; + F_FLOAT ddf1,df1; + ddf1 = df1 = 0.0; + + for (int i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*d_cos_shift[type] + df1*d_sin_shift[type]; + df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + d_cos_shift[type]; + df1 = 0.0; + } + + E_FLOAT edihedral = 0.0; + if (eflag) edihedral = d_k[type] * p; + + const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + const F_FLOAT fga = fg*ra2inv*rginv; + const F_FLOAT hgb = hg*rb2inv*rginv; + const F_FLOAT gaa = -ra2inv*rg; + const F_FLOAT gbb = rb2inv*rg; + + const F_FLOAT dtfx = gaa*ax; + const F_FLOAT dtfy = gaa*ay; + const F_FLOAT dtfz = gaa*az; + const F_FLOAT dtgx = fga*ax - hgb*bx; + const F_FLOAT dtgy = fga*ay - hgb*by; + const F_FLOAT dtgz = fga*az - hgb*bz; + const F_FLOAT dthx = gbb*bx; + const F_FLOAT dthy = gbb*by; + const F_FLOAT dthz = gbb*bz; + + const F_FLOAT df = -d_k[type] * df1; + + const F_FLOAT sx2 = df*dtgx; + const F_FLOAT sy2 = df*dtgy; + const F_FLOAT sz2 = df*dtgz; + + F_FLOAT f1[3],f2[3],f3[3],f4[3]; + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + a_f(i1,0) += f1[0]; + a_f(i1,1) += f1[1]; + a_f(i1,2) += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + a_f(i2,0) += f2[0]; + a_f(i2,1) += f2[1]; + a_f(i2,2) += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + a_f(i3,0) += f3[0]; + a_f(i3,1) += f3[1]; + a_f(i3,2) += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + a_f(i4,0) += f4[0]; + a_f(i4,1) += f4[1]; + a_f(i4,2) += f4[2]; + } + + if (EVFLAG) + ev_tally(evm,i1,i2,i3,i4,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); + + // 1-4 LJ and Coulomb interactions + // tally energy/virial in pair, using newton_bond as newton flag + + if (d_weight[type] > 0.0) { + const int itype = atomtype[i1]; + const int jtype = atomtype[i4]; + + const F_FLOAT delx = x(i1,0) - x(i4,0); + const F_FLOAT dely = x(i1,1) - x(i4,1); + const F_FLOAT delz = x(i1,2) - x(i4,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + F_FLOAT forcecoul; + if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; + else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + const F_FLOAT forcelj = r6inv * (d_lj14_1(itype,jtype)*r6inv - d_lj14_2(itype,jtype)); + const F_FLOAT fpair = d_weight[type] * (forcelj+forcecoul)*r2inv; + + F_FLOAT ecoul = 0.0; + F_FLOAT evdwl = 0.0; + if (eflag) { + ecoul = d_weight[type] * forcecoul; + evdwl = r6inv * (d_lj14_3(itype,jtype)*r6inv - d_lj14_4(itype,jtype)); + evdwl *= d_weight[type]; + } + + if (newton_bond || i1 < nlocal) { + a_f(i1,0) += delx*fpair; + a_f(i1,1) += dely*fpair; + a_f(i1,2) += delz*fpair; + } + if (newton_bond || i4 < nlocal) { + a_f(i4,0) -= delx*fpair; + a_f(i4,1) -= dely*fpair; + a_f(i4,2) -= delz*fpair; + } + + if (EVFLAG) ev_tally(evm,i1,i4,evdwl,ecoul,fpair,delx,dely,delz); + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n) const { + EVM_FLOAT evm; + this->template operator()(TagDihedralCharmmCompute(), n, evm); +} + +/* ---------------------------------------------------------------------- */ + +/* + + 288c307 + < void DihedralCharmm::allocate() + --- + > void DihedralCharmmfsw::allocate() + + */ + +template +void DihedralCharmmfswKokkos::allocate() +{ + DihedralCharmmfsw::allocate(); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more types +------------------------------------------------------------------------- */ + +/* + + 308c327 + < void DihedralCharmm::coeff(int narg, char **arg) + --- + > void DihedralCharmmfsw::coeff(int narg, char **arg) + + */ + +template +void DihedralCharmmfswKokkos::coeff(int narg, char **arg) +{ + DihedralCharmmfsw::coeff(narg, arg); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + error check and initialize all values needed for force computation +------------------------------------------------------------------------- */ + +/* + + 350c369 + < void DihedralCharmm::init_style() + --- + > void DihedralCharmmfsw::init_style() + 382a402,425 + > + > // constants for applying force switch (LJ) and force_shift (coul) + > // to 1/4 dihedral atoms to match CHARMM pairwise interactions + > + > int itmp; + > int *p_dihedflag = (int *) force->pair->extract("dihedflag", itmp); + > auto p_cutljinner = (double *) force->pair->extract("cut_lj_inner", itmp); + > auto p_cutlj = (double *) force->pair->extract("cut_lj", itmp); + > auto p_cutcoul = (double *) force->pair->extract("cut_coul", itmp); + > + > if (p_cutcoul == nullptr || p_cutljinner == nullptr || p_cutlj == nullptr || + > p_dihedflag == nullptr) + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + > + > dihedflag = *p_dihedflag; + > cut_coul14 = *p_cutcoul; + > cut_lj_inner14 = *p_cutljinner; + > cut_lj14 = *p_cutlj; + > + > cut_coulinv14 = 1 / cut_coul14; + > cut_lj_inner3inv = (1 / cut_lj_inner14) * (1 / cut_lj_inner14) * (1 / cut_lj_inner14); + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj3inv = (1 / cut_lj14) * (1 / cut_lj14) * (1 / cut_lj14); + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + + */ + +template +void DihedralCharmmfswKokkos::init_style() +{ + DihedralCharmmfsw::init_style(); + + int n = atom->ntypes; + DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1); + + d_lj14_1 = k_lj14_1.template view(); + d_lj14_2 = k_lj14_2.template view(); + d_lj14_3 = k_lj14_3.template view(); + d_lj14_4 = k_lj14_4.template view(); + + + if (weightflag) { + int n = atom->ntypes; + for (int i = 1; i <= n; i++) { + for (int j = 1; j <= n; j++) { + k_lj14_1.h_view(i,j) = lj14_1[i][j]; + k_lj14_2.h_view(i,j) = lj14_2[i][j]; + k_lj14_3.h_view(i,j) = lj14_3[i][j]; + k_lj14_4.h_view(i,j) = lj14_4[i][j]; + } + } + } + + k_lj14_1.template modify(); + k_lj14_2.template modify(); + k_lj14_3.template modify(); + k_lj14_4.template modify(); + + k_lj14_1.template sync(); + k_lj14_2.template sync(); + k_lj14_3.template sync(); + k_lj14_4.template sync(); +} + +/* ---------------------------------------------------------------------- + proc 0 reads coeffs from restart file, bcasts them +------------------------------------------------------------------------- */ + +/* + + 402c445 + < void DihedralCharmm::read_restart(FILE *fp) + --- + > void DihedralCharmmfsw::read_restart(FILE *fp) + + */ +template +void DihedralCharmmfswKokkos::read_restart(FILE *fp) +{ + DihedralCharmmfsw::read_restart(fp); + + int nd = atom->ndihedraltypes; + typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + + d_k = k_k.template view(); + d_multiplicity = k_multiplicity.template view(); + d_shift = k_shift.template view(); + d_cos_shift = k_cos_shift.template view(); + d_sin_shift = k_sin_shift.template view(); + d_weight = k_weight.template view(); + + int n = atom->ndihedraltypes; + for (int i = 1; i <= n; i++) { + k_k.h_view[i] = k[i]; + k_multiplicity.h_view[i] = multiplicity[i]; + k_shift.h_view[i] = shift[i]; + k_cos_shift.h_view[i] = cos_shift[i]; + k_sin_shift.h_view[i] = sin_shift[i]; + k_weight.h_view[i] = weight[i]; + } + + k_k.template modify(); + k_multiplicity.template modify(); + k_shift.template modify(); + k_cos_shift.template modify(); + k_sin_shift.template modify(); + k_weight.template modify(); + + k_k.template sync(); + k_multiplicity.template sync(); + k_shift.template sync(); + k_cos_shift.template sync(); + k_sin_shift.template sync(); + k_weight.template sync(); +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +template +//template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const +{ + E_FLOAT edihedralquarter; + F_FLOAT v[6]; + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) evm.emol += edihedral; + else { + edihedralquarter = 0.25*edihedral; + if (i1 < nlocal) evm.emol += edihedralquarter; + if (i2 < nlocal) evm.emol += edihedralquarter; + if (i3 < nlocal) evm.emol += edihedralquarter; + if (i4 < nlocal) evm.emol += edihedralquarter; + } + } + if (eflag_atom) { + edihedralquarter = 0.25*edihedral; + if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter; + } + } + + if (vflag_either) { + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (vflag_global) { + if (newton_bond) { + evm.v[0] += v[0]; + evm.v[1] += v[1]; + evm.v[2] += v[2]; + evm.v[3] += v[3]; + evm.v[4] += v[4]; + evm.v[5] += v[5]; + } else { + if (i1 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i2 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i3 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + if (i4 < nlocal) { + evm.v[0] += 0.25*v[0]; + evm.v[1] += 0.25*v[1]; + evm.v[2] += 0.25*v[2]; + evm.v[3] += 0.25*v[3]; + evm.v[4] += 0.25*v[4]; + evm.v[5] += 0.25*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i1 < nlocal) { + d_vatom(i1,0) += 0.25*v[0]; + d_vatom(i1,1) += 0.25*v[1]; + d_vatom(i1,2) += 0.25*v[2]; + d_vatom(i1,3) += 0.25*v[3]; + d_vatom(i1,4) += 0.25*v[4]; + d_vatom(i1,5) += 0.25*v[5]; + } + if (newton_bond || i2 < nlocal) { + d_vatom(i2,0) += 0.25*v[0]; + d_vatom(i2,1) += 0.25*v[1]; + d_vatom(i2,2) += 0.25*v[2]; + d_vatom(i2,3) += 0.25*v[3]; + d_vatom(i2,4) += 0.25*v[4]; + d_vatom(i2,5) += 0.25*v[5]; + } + if (newton_bond || i3 < nlocal) { + d_vatom(i3,0) += 0.25*v[0]; + d_vatom(i3,1) += 0.25*v[1]; + d_vatom(i3,2) += 0.25*v[2]; + d_vatom(i3,3) += 0.25*v[3]; + d_vatom(i3,4) += 0.25*v[4]; + d_vatom(i3,5) += 0.25*v[5]; + } + if (newton_bond || i4 < nlocal) { + d_vatom(i4,0) += 0.25*v[0]; + d_vatom(i4,1) += 0.25*v[1]; + d_vatom(i4,2) += 0.25*v[2]; + d_vatom(i4,3) += 0.25*v[3]; + d_vatom(i4,4) += 0.25*v[4]; + d_vatom(i4,5) += 0.25*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + need i < nlocal test since called by bond_quartic and dihedral_charmm +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void DihedralCharmmfswKokkos::ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + E_FLOAT evdwlhalf,ecoulhalf,epairhalf; + F_FLOAT v[6]; + + + if (eflag_either) { + if (eflag_global) { + if (newton_bond) { + evm.evdwl += evdwl; + evm.ecoul += ecoul; + } else { + evdwlhalf = 0.5*evdwl; + ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + if (j < nlocal) { + evm.evdwl += evdwlhalf; + evm.ecoul += ecoulhalf; + } + } + } + if (eflag_atom) { + epairhalf = 0.5 * (evdwl + ecoul); + if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf; + if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf; + } + } + + if (vflag_either) { + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (vflag_global) { + if (newton_bond) { + evm.vp[0] += v[0]; + evm.vp[1] += v[1]; + evm.vp[2] += v[2]; + evm.vp[3] += v[3]; + evm.vp[4] += v[4]; + evm.vp[5] += v[5]; + } else { + if (i < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + if (j < nlocal) { + evm.vp[0] += 0.5*v[0]; + evm.vp[1] += 0.5*v[1]; + evm.vp[2] += 0.5*v[2]; + evm.vp[3] += 0.5*v[3]; + evm.vp[4] += 0.5*v[4]; + evm.vp[5] += 0.5*v[5]; + } + } + } + + if (vflag_atom) { + if (newton_bond || i < nlocal) { + d_vatom_pair(i,0) += 0.5*v[0]; + d_vatom_pair(i,1) += 0.5*v[1]; + d_vatom_pair(i,2) += 0.5*v[2]; + d_vatom_pair(i,3) += 0.5*v[3]; + d_vatom_pair(i,4) += 0.5*v[4]; + d_vatom_pair(i,5) += 0.5*v[5]; + } + if (newton_bond || j < nlocal) { + d_vatom_pair(j,0) += 0.5*v[0]; + d_vatom_pair(j,1) += 0.5*v[1]; + d_vatom_pair(j,2) += 0.5*v[2]; + d_vatom_pair(j,3) += 0.5*v[3]; + d_vatom_pair(j,4) += 0.5*v[4]; + d_vatom_pair(j,5) += 0.5*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class DihedralCharmmfswKokkos; +#ifdef LMP_KOKKOS_GPU +template class DihedralCharmmfswKokkos; +#endif +} + + + +/* + + + 355c374 + < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'pair'"); + --- + > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'pair'"); + 357c376 + < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'outer'"); + --- + > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'outer'"); + 373c392 + < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); + --- + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + 380c399 + < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); + --- + > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); + + 389c432 + < void DihedralCharmm::write_restart(FILE *fp) + --- + > void DihedralCharmmfsw::write_restart(FILE *fp) + 430c473 + < void DihedralCharmm::write_data(FILE *fp) + --- + > void DihedralCharmmfsw::write_data(FILE *fp) + + */ + +// nothing to do for all these, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h new file mode 100644 index 0000000000..413945826f --- /dev/null +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -0,0 +1,267 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of DihedralCharmmfswKokkos exactly + same as DihedralCharmmKokkos but with new class name + + (2) second draft version: nothing changed in header file + + method: track changes from serial kspace dihedral_charmm to + dihedral_charmmfsw and apply to DihedralCharmmKokkos + + % diff dihedral_charmm.h dihedral_charmmfsw.h + +------------------------------------------------------------------------- */ + +/* + + 16c16 + < DihedralStyle(charmm,DihedralCharmm); + --- + > DihedralStyle(charmmfsw,DihedralCharmmfsw); + + */ + +#ifdef DIHEDRAL_CLASS +// clang-format off +DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/device,DihedralCharmmfswKokkos); +DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); +// clang-format on +#else + +/* + + 20,21c20,21 + < #ifndef LMP_DIHEDRAL_CHARMM_H + < #define LMP_DIHEDRAL_CHARMM_H + --- + > #ifndef LMP_DIHEDRAL_CHARMMFSW_H + > #define LMP_DIHEDRAL_CHARMMFSW_H + + */ + +// clang-format off +#ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H +#define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H + +#include "dihedral_charmmfsw.h" +#include "kokkos_type.h" +#include "dihedral_charmm_kokkos.h" + +/* + + s_EVM_FLOAT and TagDihedralCharmmCompute conflict because style_dihedral.h + includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h + so comment out definitions in here and include dihedral_charmm_kokkos.h + in dihedral_charmmfsw_kokkos.h: + + In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' + struct s_EVM_FLOAT { + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here + struct s_EVM_FLOAT { + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' + struct TagDihedralCharmmCompute{}; + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here + struct TagDihedralCharmmCompute{}; + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' + struct s_EVM_FLOAT { + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here + struct s_EVM_FLOAT { + ^ + In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: + In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' + struct TagDihedralCharmmCompute{}; + ^ + /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here + struct TagDihedralCharmmCompute{}; + ^ + + */ + +namespace LAMMPS_NS { + +/* +struct s_EVM_FLOAT { + E_FLOAT evdwl; + E_FLOAT ecoul; + E_FLOAT emol; + F_FLOAT v[6]; + F_FLOAT vp[6]; + KOKKOS_INLINE_FUNCTION + s_EVM_FLOAT() { + evdwl = 0; + ecoul = 0; + emol = 0; + v[0] = 0; v[1] = 0; v[2] = 0; + v[3] = 0; v[4] = 0; v[5] = 0; + vp[0] = 0; vp[1] = 0; vp[2] = 0; + vp[3] = 0; vp[4] = 0; vp[5] = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const s_EVM_FLOAT &rhs) { + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + emol += rhs.emol; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + vp[0] += rhs.vp[0]; + vp[1] += rhs.vp[1]; + vp[2] += rhs.vp[2]; + vp[3] += rhs.vp[3]; + vp[4] += rhs.vp[4]; + vp[5] += rhs.vp[5]; + } +}; +typedef struct s_EVM_FLOAT EVM_FLOAT; + +template +struct TagDihedralCharmmCompute{}; + +*/ + +/* + 27c27 + < class DihedralCharmm : public Dihedral { + --- + > class DihedralCharmmfsw : public Dihedral { + 29,30c29,30 + < DihedralCharmm(class LAMMPS *); + < ~DihedralCharmm() override; + --- + > DihedralCharmmfsw(class LAMMPS *); + > ~DihedralCharmmfsw() override; + + */ + +template +class DihedralCharmmfswKokkos : public DihedralCharmmfsw { + public: + typedef DeviceType device_type; + typedef EVM_FLOAT value_type; + typedef ArrayTypes AT; + + DihedralCharmmfswKokkos(class LAMMPS *); + ~DihedralCharmmfswKokkos() override; + void compute(int, int) override; + void coeff(int, char **) override; + void init_style() override; + void read_restart(FILE *) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmCompute, const int&, EVM_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagDihedralCharmmCompute, const int&) const; + + //template + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4, + F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4, + const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z, + const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z, + const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const; + + KOKKOS_INLINE_FUNCTION + void ev_tally(EVM_FLOAT &evm, const int i, const int j, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + protected: + + class NeighborKokkos *neighborKK; + + typename AT::t_x_array_randomread x; + typename AT::t_int_1d_randomread atomtype; + typename AT::t_ffloat_1d_randomread q; + typename AT::t_f_array f; + typename AT::t_int_2d dihedrallist; + + typedef typename KKDevice::value KKDeviceType; + Kokkos::DualView k_eatom; + Kokkos::DualView k_vatom; + Kokkos::View > d_eatom; + Kokkos::View > d_vatom; + + Kokkos::DualView k_eatom_pair; + Kokkos::DualView k_vatom_pair; + Kokkos::View > d_eatom_pair; + Kokkos::View > d_vatom_pair; + + int nlocal,newton_bond; + int eflag,vflag; + double qqrd2e; + + Kokkos::DualView k_warning_flag; + typename Kokkos::DualView::t_dev d_warning_flag; + typename Kokkos::DualView::t_host h_warning_flag; + + typename AT::t_ffloat_2d d_lj14_1; + typename AT::t_ffloat_2d d_lj14_2; + typename AT::t_ffloat_2d d_lj14_3; + typename AT::t_ffloat_2d d_lj14_4; + + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_multiplicity; + typename AT::t_ffloat_1d d_shift; + typename AT::t_ffloat_1d d_sin_shift; + typename AT::t_ffloat_1d d_cos_shift; + typename AT::t_ffloat_1d d_weight; + + void allocate() override; +}; + +} + +#endif +#endif + + + +/* + + 38a39,43 + > int implicit, weightflag, dihedflag; + > double cut_lj_inner14, cut_lj14, cut_coul14; + > double evdwl14_12, evdwl14_6, cut_coulinv14; + > double cut_lj_inner3inv, cut_lj_inner6inv, cut_lj3inv, cut_lj6inv; + > + 42d46 + < int implicit, weightflag; + + */ + +// nothing to do here, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp new file mode 100644 index 0000000000..88efec5fda --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -0,0 +1,941 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + + Contributing authors: + + - Ray Shan (SNL) - original PairLJCharmmCoulLongKokkos + + - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2023/12) + + Based on serial kspace lj-fsw sections (force-switched) provided by + Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany, + with additional assistance from Robert A. Latour, Clemson University + + ------------------------------------------------------------------------- */ + + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of PairLJCharmmfswCoulLongKokkos almost exactly + same as PairLJCharmmCoulLongKokkos but with new class name + + method: track changes from serial kspace pair_lj_charmm_coul_long to + pair_lj_charmmfsw_coul_long and apply to PairLJCharmmCoulLongKokkos + + ISSUES: + + (A) charmm denom_lj_inv cache , is it to optimize code because division + is slower that multiplication ?? + + + + ------------------------------------------------------------------------- */ + + +/* + 19c23 + < #include "pair_lj_charmm_coul_long.h" + --- + > #include "pair_lj_charmmfsw_coul_long.h" + + */ + +#include "pair_lj_charmmfsw_coul_long_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include +#include + +using namespace LAMMPS_NS; + + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +/* + 47c51 + < PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp) + --- + > PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) + 55a60,72 + > + > // short-range/long-range flag accessed by DihedralCharmmfsw + > + > dihedflag = 1; + > + > // switch qqr2e from LAMMPS value to CHARMM value + > + > if (strcmp(update->unit_style,"real") == 0) { + > if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real)) + > error->message(FLERR,"Switching to CHARMM coulomb energy" + > " conversion constant"); + > force->qqr2e = force->qqr2e_charmm_real; + > } + + */ + +// added superclass constructor to inherit from PairLJCharmmfswCoulLong + +template +PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) +{ + + // pair_lj_charmmfsw_coul_long_kokkos.cpp:112:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context + // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(lmp); + + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +/* + + 60c77 + < PairLJCharmmCoulLong::~PairLJCharmmCoulLong() + --- + > PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() + 61a79,87 + > // switch qqr2e back from CHARMM value to LAMMPS value + > + > if (update && strcmp(update->unit_style,"real") == 0) { + > if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) + > error->message(FLERR,"Restoring original LAMMPS coulomb energy" + > " conversion constant"); + > force->qqr2e = force->qqr2e_lammps_real; + > } + > + + */ + +// added superclass constructor to inherit from PairLJCharmmfswCoulLong + +template +PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() +{ + + // pair_lj_charmmfsw_coul_long_kokkos.cpp:150:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context + // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(); + + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +/* + 87c112 + < void PairLJCharmmCoulLong::compute(int eflag, int vflag) + --- + > void PairLJCharmmfswCoulLong::compute(int eflag, int vflag) + 90c115 + < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + --- + > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl12,evdwl6,ecoul,fpair; + 92c117 + < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + --- + > double r,rinv,r2inv,r3inv,r6inv,rsq,forcecoul,forcelj,factor_coul,factor_lj; + 94c119 + < double philj,switch1,switch2; + --- + > double switch1; + 96d120 + < double rsq; + 174,179c198,200 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + < } + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + > } + 205d225 + < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 207,209c227,240 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > r = sqrt(rsq); + > rinv = 1.0/r; + > r3inv = rinv*rinv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + + */ + +template +void PairLJCharmmfswCoulLongKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev; + if (ncoultablebits) + ev = pair_compute,CoulLongTable<1> > + (this,(NeighListKokkos*)list); + else + ev = pair_compute,CoulLongTable<0> > + (this,(NeighListKokkos*)list); + + + if (eflag) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj, switch1, switch2, englj; + + forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj; + englj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); + forcelj = forcelj*switch1 + englj*switch2; + } + + return forcelj*r2inv; +} + +/* ---------------------------------------------------------------------- + compute LJ CHARMM pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT englj, switch1; + + englj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); + + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + englj *= switch1; + } + + return englj; + +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, + const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; + F_FLOAT forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + return forcecoul/rsq; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; + F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + + return forcecoul*rinv*rinv; + } +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCharmmfswCoulLongKokkos:: +compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j, + const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if (Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; + F_FLOAT ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + return ecoul; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; + F_FLOAT ecoul = prefactor * erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + return ecoul; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJCharmmfswCoulLongKokkos::allocate() +{ + PairLJCharmmfswCoulLong::allocate(); + + int n = atom->ntypes; + + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1); + + d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); + + k_params = Kokkos::DualView("PairLJCharmmCoulLong::params",n+1,n+1); + params = k_params.template view(); +} + +template +void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, double *cut_respa) +{ + Pair::init_tables(cut_coul,cut_respa); + + typedef typename ArrayTypes::t_ffloat_1d table_type; + typedef typename ArrayTypes::t_ffloat_1d host_table_type; + + int ntable = 1; + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + + // Copy rtable and drtable + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = rtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_rtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for (int i = 0; i < ntable; i++) { + h_table(i) = drtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_drtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ftable and dftable + for (int i = 0; i < ntable; i++) { + h_table(i) = ftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ctable and dctable + for (int i = 0; i < ntable; i++) { + h_table(i) = ctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = dctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy etable and detable + for (int i = 0; i < ntable; i++) { + h_table(i) = etable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_etable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for (int i = 0; i < ntable; i++) { + h_table(i) = detable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_detable = d_table; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +/* + 682c733 + < void PairLJCharmmCoulLong::init_style() + --- + > void PairLJCharmmfswCoulLong::init_style() + 686c737 + < "Pair style lj/charmm/coul/long requires atom attribute q"); + --- + > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); + 688c739 + < // request regular or rRESPA neighbor list + --- + > // request regular or rRESPA neighbor lists + 705a757,766 + > cut_ljinv = 1.0/cut_lj; + > cut_lj_innerinv = 1.0/cut_lj_inner; + > cut_lj3 = cut_lj * cut_lj * cut_lj; + > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; + > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; + > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; + > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; + 709,711c770,773 + < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + < (cut_ljsq-cut_lj_innersq) ); + < denom_lj_inv = 1.0 / denom_lj; + --- + > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + > (cut_ljsq-cut_lj_innersq); + > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); + > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); + 718,730d779 + < cut_in_off = cut_respa[0]; + < cut_in_on = cut_respa[1]; + < cut_out_on = cut_respa[2]; + < cut_out_off = cut_respa[3]; + < + < cut_in_diff = cut_in_on - cut_in_off; + < cut_out_diff = cut_out_off - cut_out_on; + < cut_in_diff_inv = 1.0 / (cut_in_diff); + < cut_out_diff_inv = 1.0 / (cut_out_diff); + < cut_in_off_sq = cut_in_off*cut_in_off; + < cut_in_on_sq = cut_in_on*cut_in_on; + < cut_out_on_sq = cut_out_on*cut_out_on; + < cut_out_off_sq = cut_out_off*cut_out_off; + + */ + +template +void PairLJCharmmfswCoulLongKokkos::init_style() +{ + PairLJCharmmfswCoulLong::init_style(); + + Kokkos::deep_copy(d_cut_ljsq,cut_ljsq); + Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJCharmmfswCoulLongKokkos::init_one(int i, int j) +{ + double cutone = PairLJCharmmfswCoulLong::init_one(i,j); + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + //k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsq; + k_params.h_view(i,j).cut_coulsq = cut_coulsq; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if (i(); + k_params.template modify(); + + return cutone; +} + +namespace LAMMPS_NS { +template class PairLJCharmmfswCoulLongKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJCharmmfswCoulLongKokkos; +#endif +} + + + + +/* + 80d105 + < memory->destroy(offset); + 598c650 + < void PairLJCharmmCoulLong::allocate() + --- + > void PairLJCharmmfswCoulLong::allocate() + 622d673 + < memory->create(offset,n+1,n+1,"pair:offset"); + 631c682 + < void PairLJCharmmCoulLong::settings(int narg, char **arg) + --- + > void PairLJCharmmfswCoulLong::settings(int narg, char **arg) + 645c696 + < void PairLJCharmmCoulLong::coeff(int narg, char **arg) + --- + > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) + 752c801 + < double PairLJCharmmCoulLong::init_one(int i, int j) + --- + > double PairLJCharmmfswCoulLong::init_one(int i, int j) + 790c839 + < void PairLJCharmmCoulLong::write_restart(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_restart(FILE *fp) + 811c860 + < void PairLJCharmmCoulLong::read_restart(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::read_restart(FILE *fp) + 842c891 + < void PairLJCharmmCoulLong::write_restart_settings(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_restart_settings(FILE *fp) + 857c906 + < void PairLJCharmmCoulLong::read_restart_settings(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::read_restart_settings(FILE *fp) + 882c931 + < void PairLJCharmmCoulLong::write_data(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_data(FILE *fp) + 893c942 + < void PairLJCharmmCoulLong::write_data_all(FILE *fp) + --- + > void PairLJCharmmfswCoulLong::write_data_all(FILE *fp) + 903c952 + < double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype, + --- + > double PairLJCharmmfswCoulLong::single(int i, int j, int itype, int jtype, + 908,909c957,958 + < double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor; + < double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj; + --- + > double r,rinv,r2inv,r3inv,r6inv,grij,expm2,t,erfc,prefactor; + > double switch1,fraction,table,forcecoul,forcelj,phicoul,philj,philj12,philj6; + 911a961,962 + > r = sqrt(rsq); + > rinv = 1.0/r; + 939c990,991 + < r6inv = r2inv*r2inv*r2inv; + --- + > r3inv = rinv*rinv*rinv; + > r6inv = r3inv*r3inv; + 943,947c995,996 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 965d1013 + < philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 967,969c1015,1025 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < philj *= switch1; + --- + > philj12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > philj6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > philj = philj12 + philj6; + > } else { + > philj12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > philj6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > philj = philj12 + philj6; + 979c1035 + < void *PairLJCharmmCoulLong::extract(const char *str, int &dim) + --- + > void *PairLJCharmmfswCoulLong::extract(const char *str, int &dim) + 988a1045,1047 + > + > // info extracted by dihedral_charmmfsw + > + 989a1049,1051 + > if (strcmp(str,"cut_lj_inner") == 0) return (void *) &cut_lj_inner; + > if (strcmp(str,"cut_lj") == 0) return (void *) &cut_lj; + > if (strcmp(str,"dihedflag") == 0) return (void *) &dihedflag; + + + */ + +// nothing to do for all these, inherited from PairLJCharmmfswCoulLong + + + + +/* + + 226c257 + < void PairLJCharmmCoulLong::compute_inner() + --- + > void PairLJCharmmfswCoulLong::compute_inner() + 248a280,286 + > double cut_out_on = cut_respa[0]; + > double cut_out_off = cut_respa[1]; + > + > double cut_out_diff = cut_out_off - cut_out_on; + > double cut_out_on_sq = cut_out_on*cut_out_on; + > double cut_out_off_sq = cut_out_off*cut_out_off; + > + 284c322 + < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + 303c341 + < void PairLJCharmmCoulLong::compute_middle() + --- + > void PairLJCharmmfswCoulLong::compute_middle() + 308c346 + < double philj,switch1,switch2; + --- + > double switch1; + 326a365,376 + > double cut_in_off = cut_respa[0]; + > double cut_in_on = cut_respa[1]; + > double cut_out_on = cut_respa[2]; + > double cut_out_off = cut_respa[3]; + > + > double cut_in_diff = cut_in_on - cut_in_off; + > double cut_out_diff = cut_out_off - cut_out_on; + > double cut_in_off_sq = cut_in_off*cut_in_off; + > double cut_in_on_sq = cut_in_on*cut_in_on; + > double cut_out_on_sq = cut_out_on*cut_out_on; + > double cut_out_off_sq = cut_out_off*cut_out_off; + > + 361,365c411,412 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 370c417 + < rsw = (sqrt(rsq) - cut_in_off)*cut_in_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + 374c421 + < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; + --- + > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + 393c440 + < void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag) + --- + > void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag) + 396c443 + < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + --- + > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl6,evdwl12,ecoul,fpair; + 398c445 + < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + --- + > double r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + 400c447 + < double philj,switch1,switch2; + --- + > double switch1; + 422a470,476 + > double cut_in_off = cut_respa[2]; + > double cut_in_on = cut_respa[3]; + > + > double cut_in_diff = cut_in_on - cut_in_off; + > double cut_in_off_sq = cut_in_off*cut_in_off; + > double cut_in_on_sq = cut_in_on*cut_in_on; + > + 448a503 + > r6inv = r2inv*r2inv*r2inv; + 489d543 + < r6inv = r2inv*r2inv*r2inv; + 493,497c547,548 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 533d583 + < r6inv = r2inv*r2inv*r2inv; + 536,538c586,598 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > rinv = sqrt(r2inv); + > r3inv = r2inv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + 561d620 + < r6inv = r2inv*r2inv*r2inv; + 565,569c624,625 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + 572d627 + < r6inv = r2inv*r2inv*r2inv; + 576,580c631,632 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + + */ + +// kokkos doesnt support respa, so ignore compute_inner / compute_middle / compute_outer diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h new file mode 100644 index 0000000000..e9a6b5486f --- /dev/null +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h @@ -0,0 +1,230 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- + + *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** + + (1) first draft version of PairLJCharmmfswCoulLongKokkos exactly + same as PairLJCharmmCoulLongKokkos but with new class name + + method: track changes from serial kspace pair_lj_charmm_coul_long to + pair_lj_charmmfsw_coul_long and apply to PairLJCharmmfswCoulLongKokkos + + % diff pair_lj_charmm_coul_long.h pair_lj_charmmfsw_coul_long.h + + +------------------------------------------------------------------------- */ + +/* + 16c16 + < PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong); + --- + > PairStyle(lj/charmmfsw/coul/long,PairLJCharmmfswCoulLong); + + */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj/charmmfsw/coul/long/kk,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/device,PairLJCharmmfswCoulLongKokkos); +PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos); +// clang-format on +#else + +/* + + 20,21c20,21 + < #ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_H + < #define LMP_PAIR_LJ_CHARMM_COUL_LONG_H + --- + > #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H + > #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H + + */ + +// clang-format off +#ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H +#define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_charmmfsw_coul_long.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +/* + + 27c27 +< class PairLJCharmmCoulLong : public Pair { +--- +> class PairLJCharmmfswCoulLong : public Pair { + + */ + +template +class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + + /* + + 29,30c29,30 + < PairLJCharmmCoulLong(class LAMMPS *); + < ~PairLJCharmmCoulLong() override; + --- + > PairLJCharmmfswCoulLong(class LAMMPS *); + > ~PairLJCharmmfswCoulLong() override; + + */ + + PairLJCharmmfswCoulLongKokkos(class LAMMPS *); + ~PairLJCharmmfswCoulLongKokkos() override; + + void compute(int, int) override; + + void init_tables(double cut_coul, double *cut_respa) override; + void init_style() override; + double init_one(int, int) override; + + protected: + + /* + 52c52,54 + < double cut_lj_inner, cut_lj; + --- + > int dihedflag; + > + > double cut_lj_inner, cut_lj, cut_ljinv, cut_lj_innerinv; + 53a56,57 + > double cut_lj3inv, cut_lj_inner3inv, cut_lj3, cut_lj_inner3; + > double cut_lj6inv, cut_lj_inner6inv, cut_lj6, cut_lj_inner6; + 56,60c60 + < double cut_in_off, cut_in_on, cut_out_off, cut_out_on; + < double cut_in_diff, cut_out_diff; + < double cut_in_diff_inv, cut_out_diff_inv; + < double cut_in_off_sq, cut_in_on_sq, cut_out_off_sq, cut_out_on_sq; + < double denom_lj, denom_lj_inv; + --- + > double denom_lj, denom_lj12, denom_lj6; + + */ + + // almost nothing to do here, inherited from PairLJCharmmfswCoulLong + // only temporarily need cut_lj_innersq, denom_coul protected variables + // (removed from pair_lj_charmm_coul_long to pair_lj_charmmfsw_coul_long) + // to compile draft version 1, can be removed by draft version 2 + + + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 12 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread q; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::t_ffloat_2d d_cut_ljsq; + typename AT::t_ffloat_2d d_cut_coulsq; + + typename AT::t_ffloat_1d_randomread + d_rtable, d_drtable, d_ftable, d_dftable, + d_ctable, d_dctable, d_etable, d_detable; + + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate() override; + + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmfswCoulLongKokkos*, + NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairLJCharmmfswCoulLongKokkos*); + +}; + +} + +#endif +#endif + From 3a1d3bb64d604f48e76a930e88eefd9de062b472 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 02:42:15 -0500 Subject: [PATCH 030/267] second draft... applied changes to compute methods --- src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 95 ++++---- src/KOKKOS/dihedral_charmmfsw_kokkos.h | 28 +-- .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 206 +++++++++--------- 3 files changed, 169 insertions(+), 160 deletions(-) diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp index facb723580..b309f3d97f 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -18,7 +18,7 @@ - Stan Moore (SNL) original DihedralCharmmfswKokkos - - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2023/12) + - Mitch Murphy (alphataubio) - DihedralCharmmfswKokkos update (2024/01) Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched) provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen @@ -138,21 +138,7 @@ DihedralCharmmfswKokkos::~DihedralCharmmfswKokkos() 259a269,270 > else > forcecoul = qqrd2e * q[i1] * q[i4] * (sqrt(r2inv) - r * cut_coulinv14 * cut_coulinv14); - 264,265c275,284 - < ecoul = weight[type] * forcecoul; - < evdwl = r6inv * (lj14_3[itype][jtype] * r6inv - lj14_4[itype][jtype]); - --- - > if (dihedflag) - > ecoul = weight[type] * forcecoul; - > else - > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * - > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); - > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - - > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; - > evdwl14_6 = - > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; - > evdwl = evdwl14_12 + evdwl14_6; - + */ @@ -225,15 +211,15 @@ void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (newton_bond) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ndihedrallist),*this,evm); } } else { if (newton_bond) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ndihedrallist),*this); } } @@ -303,7 +289,7 @@ void DihedralCharmmfswKokkos::compute(int eflag_in, int vflag_in) template template KOKKOS_INLINE_FUNCTION -void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n, EVM_FLOAT& evm) const { +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n, EVM_FLOAT& evm) const { // The f array is atomic Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; @@ -480,11 +466,38 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute if (dihedflag) + > ecoul = weight[type] * forcecoul; + > else + > ecoul = weight[type] * qqrd2e * q[i1] * q[i4] * + > (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + > evdwl14_12 = r6inv * lj14_3[itype][jtype] * r6inv - + > lj14_3[itype][jtype] * cut_lj_inner6inv * cut_lj6inv; + > evdwl14_6 = + > -lj14_4[itype][jtype] * r6inv + lj14_4[itype][jtype] * cut_lj_inner3inv * cut_lj3inv; + > evdwl = evdwl14_12 + evdwl14_6; + */ + + const F_FLOAT r = sqrt(rsq); F_FLOAT ecoul = 0.0; F_FLOAT evdwl = 0.0; + F_FLOAT evdwl14_12, evdwl14_6; if (eflag) { - ecoul = d_weight[type] * forcecoul; - evdwl = r6inv * (d_lj14_3(itype,jtype)*r6inv - d_lj14_4(itype,jtype)); + if (dihedflag) + ecoul = d_weight[type] * forcecoul; + else + ecoul = d_weight[type] * qqrd2e * q[i1] * q[i4] * + (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14); + evdwl14_12 = r6inv * d_lj14_3(itype,jtype) * r6inv - + d_lj14_3(itype,jtype) * cut_lj_inner6inv * cut_lj6inv; + evdwl14_6 = + -d_lj14_4(itype,jtype) * r6inv + d_lj14_4(itype,jtype) * cut_lj_inner3inv * cut_lj3inv; + evdwl = evdwl14_12 + evdwl14_6; evdwl *= d_weight[type]; } @@ -506,9 +519,9 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute template KOKKOS_INLINE_FUNCTION -void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmCompute, const int &n) const { +void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute, const int &n) const { EVM_FLOAT evm; - this->template operator()(TagDihedralCharmmCompute(), n, evm); + this->template operator()(TagDihedralCharmmfswCompute(), n, evm); } /* ---------------------------------------------------------------------- */ @@ -547,12 +560,12 @@ void DihedralCharmmfswKokkos::coeff(int narg, char **arg) DihedralCharmmfsw::coeff(narg, arg); int nd = atom->ndihedraltypes; - typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); - typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); - typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); - typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); - typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); d_k = k_k.template view(); d_multiplicity = k_multiplicity.template view(); @@ -630,10 +643,10 @@ void DihedralCharmmfswKokkos::init_style() DihedralCharmmfsw::init_style(); int n = atom->ntypes; - DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1); - DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmmfsw:lj14_1",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmmfsw:lj14_2",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmmfsw:lj14_3",n+1,n+1); + DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmmfsw:lj14_4",n+1,n+1); d_lj14_1 = k_lj14_1.template view(); d_lj14_2 = k_lj14_2.template view(); @@ -682,12 +695,12 @@ void DihedralCharmmfswKokkos::read_restart(FILE *fp) DihedralCharmmfsw::read_restart(fp); int nd = atom->ndihedraltypes; - typename AT::tdual_ffloat_1d k_k("DihedralCharmm::k",nd+1); - typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - typename AT::tdual_ffloat_1d k_shift("DihedralCharmm::shift",nd+1); - typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); - typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); - typename AT::tdual_ffloat_1d k_weight("DihedralCharmm::weight",nd+1); + typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1); + typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1); + typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1); + typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1); + typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1); + typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1); d_k = k_k.template view(); d_multiplicity = k_multiplicity.template view(); diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h index 413945826f..8b57b28d0c 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.h +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -78,14 +78,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here struct s_EVM_FLOAT { ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' - struct TagDihedralCharmmCompute{}; - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here - struct TagDihedralCharmmCompute{}; - ^ In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' @@ -94,14 +86,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here struct s_EVM_FLOAT { ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:104:8: error: redefinition of 'TagDihedralCharmmCompute' - struct TagDihedralCharmmCompute{}; - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:70:8: note: previous definition is here - struct TagDihedralCharmmCompute{}; - ^ */ @@ -146,10 +130,12 @@ struct s_EVM_FLOAT { }; typedef struct s_EVM_FLOAT EVM_FLOAT; -template -struct TagDihedralCharmmCompute{}; + */ + +template +struct TagDihedralCharmmfswCompute{}; + -*/ /* 27c27 @@ -181,11 +167,11 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw { template KOKKOS_INLINE_FUNCTION - void operator()(TagDihedralCharmmCompute, const int&, EVM_FLOAT&) const; + void operator()(TagDihedralCharmmfswCompute, const int&, EVM_FLOAT&) const; template KOKKOS_INLINE_FUNCTION - void operator()(TagDihedralCharmmCompute, const int&) const; + void operator()(TagDihedralCharmmfswCompute, const int&) const; //template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 88efec5fda..7c1da2479f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -18,7 +18,7 @@ - Ray Shan (SNL) - original PairLJCharmmCoulLongKokkos - - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2023/12) + - Mitch Murphy (alphataubio) - PairLJCharmmfswCoulLongKokkos update (2024/01) Based on serial kspace lj-fsw sections (force-switched) provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany, @@ -113,9 +113,6 @@ using namespace LAMMPS_NS; template PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) { - - // pair_lj_charmmfsw_coul_long_kokkos.cpp:112:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context - // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(lmp); respa_enable = 0; @@ -147,15 +144,10 @@ PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS */ -// added superclass constructor to inherit from PairLJCharmmfswCoulLong - template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { - // pair_lj_charmmfsw_coul_long_kokkos.cpp:150:28: error: qualified reference to 'PairLJCharmmfswCoulLong' is a constructor name rather than a type in this context - // ??? PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(); - if (copymode) return; if (allocated) { @@ -186,39 +178,7 @@ PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() > double switch1; 96d120 < double rsq; - 174,179c198,200 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - < } - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - > } - 205d225 - < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 207,209c227,240 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > r = sqrt(rsq); - > rinv = 1.0/r; - > r3inv = rinv*rinv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - + */ template @@ -320,20 +280,32 @@ compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; - F_FLOAT forcelj, switch1, switch2, englj; + F_FLOAT forcelj, switch1; forcelj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); if (rsq > cut_lj_innersq) { + + /* + 174,179c198,200 + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < switch2 = 12.0*rsq * (cut_ljsq-rsq) * + < (rsq-cut_lj_innersq) * denom_lj_inv; + < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + < forcelj = forcelj*switch1 + philj*switch2; + < } + --- + > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + > forcelj = forcelj*switch1; + > } + + */ + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj; - englj = r6inv * - ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); - forcelj = forcelj*switch1 + englj*switch2; + forcelj = forcelj*switch1; } return forcelj*r2inv; @@ -350,20 +322,52 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; - F_FLOAT englj, switch1; + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT r3inv = rinv*rinv*rinv; + F_FLOAT englj, englj12, englj6; + + /* + 205d225 + < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + 207,209c227,240 + < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; + < evdwl *= switch1; + --- + > r = sqrt(rsq); + > rinv = 1.0/r; + > r3inv = rinv*rinv*rinv; + > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * + > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * + > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + > evdwl = evdwl12 + evdwl6; + > } else { + > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - + > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + > evdwl6 = -lj4[itype][jtype]*r6inv + + > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; + > evdwl = evdwl12 + evdwl6; + + */ - englj = r6inv * - ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - englj *= switch1; + englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6* + denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); + englj = englj12 + englj6; + } else { + englj12 = r6inv*lj3[itype][jtype]*r6inv - + lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv + + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* + cut_lj_inner3inv*cut_lj3inv; + englj = englj12 + englj6; } - return englj; - } /* ---------------------------------------------------------------------- @@ -458,7 +462,7 @@ void PairLJCharmmfswCoulLongKokkos::allocate() d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1); - k_params = Kokkos::DualView("PairLJCharmmCoulLong::params",n+1,n+1); + k_params = Kokkos::DualView("PairLJCharmmfswCoulLong::params",n+1,n+1); params = k_params.template view(); } @@ -574,49 +578,11 @@ void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, dou < void PairLJCharmmCoulLong::init_style() --- > void PairLJCharmmfswCoulLong::init_style() - 686c737 - < "Pair style lj/charmm/coul/long requires atom attribute q"); - --- - > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); - 688c739 + 688c739 < // request regular or rRESPA neighbor list --- > // request regular or rRESPA neighbor lists - 705a757,766 - > cut_ljinv = 1.0/cut_lj; - > cut_lj_innerinv = 1.0/cut_lj_inner; - > cut_lj3 = cut_lj * cut_lj * cut_lj; - > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; - > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; - > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; - > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; - 709,711c770,773 - < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - < (cut_ljsq-cut_lj_innersq) ); - < denom_lj_inv = 1.0 / denom_lj; - --- - > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - > (cut_ljsq-cut_lj_innersq); - > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); - > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); - 718,730d779 - < cut_in_off = cut_respa[0]; - < cut_in_on = cut_respa[1]; - < cut_out_on = cut_respa[2]; - < cut_out_off = cut_respa[3]; - < - < cut_in_diff = cut_in_on - cut_in_off; - < cut_out_diff = cut_out_off - cut_out_on; - < cut_in_diff_inv = 1.0 / (cut_in_diff); - < cut_out_diff_inv = 1.0 / (cut_out_diff); - < cut_in_off_sq = cut_in_off*cut_in_off; - < cut_in_on_sq = cut_in_on*cut_in_on; - < cut_out_on_sq = cut_out_on*cut_out_on; - < cut_out_off_sq = cut_out_off*cut_out_off; - + */ template @@ -689,6 +655,8 @@ template class PairLJCharmmfswCoulLongKokkos; + + /* 80d105 < memory->destroy(offset); @@ -706,6 +674,48 @@ template class PairLJCharmmfswCoulLongKokkos; < void PairLJCharmmCoulLong::coeff(int narg, char **arg) --- > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) + + 686c737 + < "Pair style lj/charmm/coul/long requires atom attribute q"); + --- + > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); + + 705a757,766 + > cut_ljinv = 1.0/cut_lj; + > cut_lj_innerinv = 1.0/cut_lj_inner; + > cut_lj3 = cut_lj * cut_lj * cut_lj; + > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; + > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; + > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; + > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; + > cut_lj6inv = cut_lj3inv * cut_lj3inv; + > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; + > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; + 709,711c770,773 + < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + < (cut_ljsq-cut_lj_innersq) ); + < denom_lj_inv = 1.0 / denom_lj; + --- + > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * + > (cut_ljsq-cut_lj_innersq); + > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); + > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); + 718,730d779 + < cut_in_off = cut_respa[0]; + < cut_in_on = cut_respa[1]; + < cut_out_on = cut_respa[2]; + < cut_out_off = cut_respa[3]; + < + < cut_in_diff = cut_in_on - cut_in_off; + < cut_out_diff = cut_out_off - cut_out_on; + < cut_in_diff_inv = 1.0 / (cut_in_diff); + < cut_out_diff_inv = 1.0 / (cut_out_diff); + < cut_in_off_sq = cut_in_off*cut_in_off; + < cut_in_on_sq = cut_in_on*cut_in_on; + < cut_out_on_sq = cut_out_on*cut_out_on; + < cut_out_off_sq = cut_out_off*cut_out_off; + + 752c801 < double PairLJCharmmCoulLong::init_one(int i, int j) --- From 163805bc33176f58f9ed579f2fac2f7ed8734ce2 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 15:18:34 -0500 Subject: [PATCH 031/267] removed scaffolding comments and fixed "(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)" in compute_evdwl --- src/KOKKOS/dihedral_charmmfsw_kokkos.cpp | 117 ----- src/KOKKOS/dihedral_charmmfsw_kokkos.h | 138 +----- .../pair_lj_charmmfsw_coul_long_kokkos.cpp | 450 +----------------- .../pair_lj_charmmfsw_coul_long_kokkos.h | 84 +--- 4 files changed, 7 insertions(+), 782 deletions(-) diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp index b309f3d97f..831e7d9b22 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp @@ -27,29 +27,6 @@ ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of DihedralCharmmfswKokkos exactly - same as DihedralCharmmfswKokkos but with new class name - - method: track changes from serial kspace dihedral_charmm to - dihedral_charmmfsw and apply to DihedralCharmmfswKokkos - - % diff dihedral_charmm.cpp dihedral_charmmfsw.cpp - -------------------------------------------------------------------------- */ - -/* - 18c21 - < #include "dihedral_charmm.h" - --- - > #include "dihedral_charmmfsw.h" - - */ - #include "dihedral_charmmfsw_kokkos.h" #include "atom_kokkos.h" @@ -526,15 +503,6 @@ void DihedralCharmmfswKokkos::operator()(TagDihedralCharmmfswCompute /* ---------------------------------------------------------------------- */ -/* - - 288c307 - < void DihedralCharmm::allocate() - --- - > void DihedralCharmmfsw::allocate() - - */ - template void DihedralCharmmfswKokkos::allocate() { @@ -545,15 +513,6 @@ void DihedralCharmmfswKokkos::allocate() set coeffs for one or more types ------------------------------------------------------------------------- */ -/* - - 308c327 - < void DihedralCharmm::coeff(int narg, char **arg) - --- - > void DihedralCharmmfsw::coeff(int narg, char **arg) - - */ - template void DihedralCharmmfswKokkos::coeff(int narg, char **arg) { @@ -603,40 +562,6 @@ void DihedralCharmmfswKokkos::coeff(int narg, char **arg) error check and initialize all values needed for force computation ------------------------------------------------------------------------- */ -/* - - 350c369 - < void DihedralCharmm::init_style() - --- - > void DihedralCharmmfsw::init_style() - 382a402,425 - > - > // constants for applying force switch (LJ) and force_shift (coul) - > // to 1/4 dihedral atoms to match CHARMM pairwise interactions - > - > int itmp; - > int *p_dihedflag = (int *) force->pair->extract("dihedflag", itmp); - > auto p_cutljinner = (double *) force->pair->extract("cut_lj_inner", itmp); - > auto p_cutlj = (double *) force->pair->extract("cut_lj", itmp); - > auto p_cutcoul = (double *) force->pair->extract("cut_coul", itmp); - > - > if (p_cutcoul == nullptr || p_cutljinner == nullptr || p_cutlj == nullptr || - > p_dihedflag == nullptr) - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - > - > dihedflag = *p_dihedflag; - > cut_coul14 = *p_cutcoul; - > cut_lj_inner14 = *p_cutljinner; - > cut_lj14 = *p_cutlj; - > - > cut_coulinv14 = 1 / cut_coul14; - > cut_lj_inner3inv = (1 / cut_lj_inner14) * (1 / cut_lj_inner14) * (1 / cut_lj_inner14); - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj3inv = (1 / cut_lj14) * (1 / cut_lj14) * (1 / cut_lj14); - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - - */ - template void DihedralCharmmfswKokkos::init_style() { @@ -681,14 +606,6 @@ void DihedralCharmmfswKokkos::init_style() proc 0 reads coeffs from restart file, bcasts them ------------------------------------------------------------------------- */ -/* - - 402c445 - < void DihedralCharmm::read_restart(FILE *fp) - --- - > void DihedralCharmmfsw::read_restart(FILE *fp) - - */ template void DihedralCharmmfswKokkos::read_restart(FILE *fp) { @@ -968,37 +885,3 @@ template class DihedralCharmmfswKokkos; #endif } - - -/* - - - 355c374 - < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'pair'"); - --- - > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'pair'"); - 357c376 - < error->all(FLERR, "Dihedral style charmm must be set to same r-RESPA level as 'outer'"); - --- - > error->all(FLERR, "Dihedral style charmmfsw must be set to same r-RESPA level as 'outer'"); - 373c392 - < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); - --- - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - 380c399 - < error->all(FLERR, "Dihedral charmm is incompatible with Pair style"); - --- - > error->all(FLERR, "Dihedral charmmfsw is incompatible with Pair style"); - - 389c432 - < void DihedralCharmm::write_restart(FILE *fp) - --- - > void DihedralCharmmfsw::write_restart(FILE *fp) - 430c473 - < void DihedralCharmm::write_data(FILE *fp) - --- - > void DihedralCharmmfsw::write_data(FILE *fp) - - */ - -// nothing to do for all these, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h index 8b57b28d0c..c3842ca01d 100644 --- a/src/KOKKOS/dihedral_charmmfsw_kokkos.h +++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h @@ -11,31 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of DihedralCharmmfswKokkos exactly - same as DihedralCharmmKokkos but with new class name - - (2) second draft version: nothing changed in header file - - method: track changes from serial kspace dihedral_charmm to - dihedral_charmmfsw and apply to DihedralCharmmKokkos - - % diff dihedral_charmm.h dihedral_charmmfsw.h - -------------------------------------------------------------------------- */ - -/* - - 16c16 - < DihedralStyle(charmm,DihedralCharmm); - --- - > DihedralStyle(charmmfsw,DihedralCharmmfsw); - - */ - #ifdef DIHEDRAL_CLASS // clang-format off DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos); @@ -44,17 +19,6 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); // clang-format on #else -/* - - 20,21c20,21 - < #ifndef LMP_DIHEDRAL_CHARMM_H - < #define LMP_DIHEDRAL_CHARMM_H - --- - > #ifndef LMP_DIHEDRAL_CHARMMFSW_H - > #define LMP_DIHEDRAL_CHARMMFSW_H - - */ - // clang-format off #ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H #define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H @@ -62,95 +26,17 @@ DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos); #include "dihedral_charmmfsw.h" #include "kokkos_type.h" #include "dihedral_charmm_kokkos.h" - -/* - s_EVM_FLOAT and TagDihedralCharmmCompute conflict because style_dihedral.h - includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h - so comment out definitions in here and include dihedral_charmm_kokkos.h - in dihedral_charmmfsw_kokkos.h: - - In file included from /Users/mitch/Dropbox/lammps/lammps/src/force.cpp:18: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' - struct s_EVM_FLOAT { - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here - struct s_EVM_FLOAT { - ^ - In file included from /Users/mitch/Dropbox/lammps/lammps/src/lammps.cpp:23: - In file included from /Users/mitch/Dropbox/lammps/lammps/build/styles/style_dihedral.h:4: - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmmfsw_kokkos.h:65:8: error: redefinition of 's_EVM_FLOAT' - struct s_EVM_FLOAT { - ^ - /Users/mitch/Dropbox/lammps/lammps/src/KOKKOS/dihedral_charmm_kokkos.h:31:8: note: previous definition is here - struct s_EVM_FLOAT { - ^ - - */ - namespace LAMMPS_NS { -/* -struct s_EVM_FLOAT { - E_FLOAT evdwl; - E_FLOAT ecoul; - E_FLOAT emol; - F_FLOAT v[6]; - F_FLOAT vp[6]; - KOKKOS_INLINE_FUNCTION - s_EVM_FLOAT() { - evdwl = 0; - ecoul = 0; - emol = 0; - v[0] = 0; v[1] = 0; v[2] = 0; - v[3] = 0; v[4] = 0; v[5] = 0; - vp[0] = 0; vp[1] = 0; vp[2] = 0; - vp[3] = 0; vp[4] = 0; vp[5] = 0; - } - - KOKKOS_INLINE_FUNCTION - void operator+=(const s_EVM_FLOAT &rhs) { - evdwl += rhs.evdwl; - ecoul += rhs.ecoul; - emol += rhs.emol; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; - vp[0] += rhs.vp[0]; - vp[1] += rhs.vp[1]; - vp[2] += rhs.vp[2]; - vp[3] += rhs.vp[3]; - vp[4] += rhs.vp[4]; - vp[5] += rhs.vp[5]; - } -}; -typedef struct s_EVM_FLOAT EVM_FLOAT; - - */ +// s_EVM_FLOAT definition in here conflicted because style_dihedral.h +// includes both dihedral_charmm_kokkos.h and dihedral_charmmfsw_kokkos.h +// so remove definition of s_EVM_FLOAT in here and include +// dihedral_charmm_kokkos.h template struct TagDihedralCharmmfswCompute{}; - - -/* - 27c27 - < class DihedralCharmm : public Dihedral { - --- - > class DihedralCharmmfsw : public Dihedral { - 29,30c29,30 - < DihedralCharmm(class LAMMPS *); - < ~DihedralCharmm() override; - --- - > DihedralCharmmfsw(class LAMMPS *); - > ~DihedralCharmmfsw() override; - - */ - template class DihedralCharmmfswKokkos : public DihedralCharmmfsw { public: @@ -235,19 +121,3 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw { #endif #endif - - -/* - - 38a39,43 - > int implicit, weightflag, dihedflag; - > double cut_lj_inner14, cut_lj14, cut_coul14; - > double evdwl14_12, evdwl14_6, cut_coulinv14; - > double cut_lj_inner3inv, cut_lj_inner6inv, cut_lj3inv, cut_lj6inv; - > - 42d46 - < int implicit, weightflag; - - */ - -// nothing to do here, inherited from DihedralCharmmfsw diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 7c1da2479f..191626fc9f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -27,35 +27,6 @@ ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of PairLJCharmmfswCoulLongKokkos almost exactly - same as PairLJCharmmCoulLongKokkos but with new class name - - method: track changes from serial kspace pair_lj_charmm_coul_long to - pair_lj_charmmfsw_coul_long and apply to PairLJCharmmCoulLongKokkos - - ISSUES: - - (A) charmm denom_lj_inv cache , is it to optimize code because division - is slower that multiplication ?? - - - - ------------------------------------------------------------------------- */ - - -/* - 19c23 - < #include "pair_lj_charmm_coul_long.h" - --- - > #include "pair_lj_charmmfsw_coul_long.h" - - */ - #include "pair_lj_charmmfsw_coul_long_kokkos.h" #include "atom_kokkos.h" @@ -86,30 +57,6 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -/* - 47c51 - < PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp) - --- - > PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp) - 55a60,72 - > - > // short-range/long-range flag accessed by DihedralCharmmfsw - > - > dihedflag = 1; - > - > // switch qqr2e from LAMMPS value to CHARMM value - > - > if (strcmp(update->unit_style,"real") == 0) { - > if ((comm->me == 0) && (force->qqr2e != force->qqr2e_charmm_real)) - > error->message(FLERR,"Switching to CHARMM coulomb energy" - > " conversion constant"); - > force->qqr2e = force->qqr2e_charmm_real; - > } - - */ - -// added superclass constructor to inherit from PairLJCharmmfswCoulLong - template PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp) { @@ -125,25 +72,6 @@ PairLJCharmmfswCoulLongKokkos::PairLJCharmmfswCoulLongKokkos(LAMMPS /* ---------------------------------------------------------------------- */ -/* - - 60c77 - < PairLJCharmmCoulLong::~PairLJCharmmCoulLong() - --- - > PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() - 61a79,87 - > // switch qqr2e back from CHARMM value to LAMMPS value - > - > if (update && strcmp(update->unit_style,"real") == 0) { - > if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) - > error->message(FLERR,"Restoring original LAMMPS coulomb energy" - > " conversion constant"); - > force->qqr2e = force->qqr2e_lammps_real; - > } - > - - */ - template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { @@ -159,28 +87,6 @@ PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() /* ---------------------------------------------------------------------- */ -/* - 87c112 - < void PairLJCharmmCoulLong::compute(int eflag, int vflag) - --- - > void PairLJCharmmfswCoulLong::compute(int eflag, int vflag) - 90c115 - < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; - --- - > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl12,evdwl6,ecoul,fpair; - 92c117 - < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - --- - > double r,rinv,r2inv,r3inv,r6inv,rsq,forcecoul,forcelj,factor_coul,factor_lj; - 94c119 - < double philj,switch1,switch2; - --- - > double switch1; - 96d120 - < double rsq; - - */ - template void PairLJCharmmfswCoulLongKokkos::compute(int eflag_in, int vflag_in) { @@ -287,22 +193,6 @@ compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); if (rsq > cut_lj_innersq) { - - /* - 174,179c198,200 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - < } - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - > } - - */ - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; forcelj = forcelj*switch1; @@ -327,32 +217,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const F_FLOAT r3inv = rinv*rinv*rinv; F_FLOAT englj, englj12, englj6; - /* - 205d225 - < evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 207,209c227,240 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > r = sqrt(rsq); - > rinv = 1.0/r; - > r3inv = rinv*rinv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - - */ - - if (rsq > cut_lj_innersq) { englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6* denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); @@ -360,8 +224,8 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/, cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); englj = englj12 + englj6; } else { - englj12 = r6inv*lj3[itype][jtype]*r6inv - - lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; + englj12 = r6inv*(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj_inner6inv*cut_lj6inv; englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)* cut_lj_inner3inv*cut_lj3inv; @@ -573,18 +437,6 @@ void PairLJCharmmfswCoulLongKokkos::init_tables(double cut_coul, dou init specific to this pair style ------------------------------------------------------------------------- */ -/* - 682c733 - < void PairLJCharmmCoulLong::init_style() - --- - > void PairLJCharmmfswCoulLong::init_style() - 688c739 - < // request regular or rRESPA neighbor list - --- - > // request regular or rRESPA neighbor lists - - */ - template void PairLJCharmmfswCoulLongKokkos::init_style() { @@ -651,301 +503,3 @@ template class PairLJCharmmfswCoulLongKokkos; template class PairLJCharmmfswCoulLongKokkos; #endif } - - - - - - -/* - 80d105 - < memory->destroy(offset); - 598c650 - < void PairLJCharmmCoulLong::allocate() - --- - > void PairLJCharmmfswCoulLong::allocate() - 622d673 - < memory->create(offset,n+1,n+1,"pair:offset"); - 631c682 - < void PairLJCharmmCoulLong::settings(int narg, char **arg) - --- - > void PairLJCharmmfswCoulLong::settings(int narg, char **arg) - 645c696 - < void PairLJCharmmCoulLong::coeff(int narg, char **arg) - --- - > void PairLJCharmmfswCoulLong::coeff(int narg, char **arg) - - 686c737 - < "Pair style lj/charmm/coul/long requires atom attribute q"); - --- - > "Pair style lj/charmmfsw/coul/long requires atom attribute q"); - - 705a757,766 - > cut_ljinv = 1.0/cut_lj; - > cut_lj_innerinv = 1.0/cut_lj_inner; - > cut_lj3 = cut_lj * cut_lj * cut_lj; - > cut_lj3inv = cut_ljinv * cut_ljinv * cut_ljinv; - > cut_lj_inner3inv = cut_lj_innerinv * cut_lj_innerinv * cut_lj_innerinv; - > cut_lj_inner3 = cut_lj_inner * cut_lj_inner * cut_lj_inner; - > cut_lj6 = cut_ljsq * cut_ljsq * cut_ljsq; - > cut_lj6inv = cut_lj3inv * cut_lj3inv; - > cut_lj_inner6inv = cut_lj_inner3inv * cut_lj_inner3inv; - > cut_lj_inner6 = cut_lj_innersq * cut_lj_innersq * cut_lj_innersq; - 709,711c770,773 - < denom_lj = ( (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - < (cut_ljsq-cut_lj_innersq) ); - < denom_lj_inv = 1.0 / denom_lj; - --- - > denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) * - > (cut_ljsq-cut_lj_innersq); - > denom_lj12 = 1.0/(cut_lj6 - cut_lj_inner6); - > denom_lj6 = 1.0/(cut_lj3 - cut_lj_inner3); - 718,730d779 - < cut_in_off = cut_respa[0]; - < cut_in_on = cut_respa[1]; - < cut_out_on = cut_respa[2]; - < cut_out_off = cut_respa[3]; - < - < cut_in_diff = cut_in_on - cut_in_off; - < cut_out_diff = cut_out_off - cut_out_on; - < cut_in_diff_inv = 1.0 / (cut_in_diff); - < cut_out_diff_inv = 1.0 / (cut_out_diff); - < cut_in_off_sq = cut_in_off*cut_in_off; - < cut_in_on_sq = cut_in_on*cut_in_on; - < cut_out_on_sq = cut_out_on*cut_out_on; - < cut_out_off_sq = cut_out_off*cut_out_off; - - - 752c801 - < double PairLJCharmmCoulLong::init_one(int i, int j) - --- - > double PairLJCharmmfswCoulLong::init_one(int i, int j) - 790c839 - < void PairLJCharmmCoulLong::write_restart(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_restart(FILE *fp) - 811c860 - < void PairLJCharmmCoulLong::read_restart(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::read_restart(FILE *fp) - 842c891 - < void PairLJCharmmCoulLong::write_restart_settings(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_restart_settings(FILE *fp) - 857c906 - < void PairLJCharmmCoulLong::read_restart_settings(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::read_restart_settings(FILE *fp) - 882c931 - < void PairLJCharmmCoulLong::write_data(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_data(FILE *fp) - 893c942 - < void PairLJCharmmCoulLong::write_data_all(FILE *fp) - --- - > void PairLJCharmmfswCoulLong::write_data_all(FILE *fp) - 903c952 - < double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype, - --- - > double PairLJCharmmfswCoulLong::single(int i, int j, int itype, int jtype, - 908,909c957,958 - < double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor; - < double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj; - --- - > double r,rinv,r2inv,r3inv,r6inv,grij,expm2,t,erfc,prefactor; - > double switch1,fraction,table,forcecoul,forcelj,phicoul,philj,philj12,philj6; - 911a961,962 - > r = sqrt(rsq); - > rinv = 1.0/r; - 939c990,991 - < r6inv = r2inv*r2inv*r2inv; - --- - > r3inv = rinv*rinv*rinv; - > r6inv = r3inv*r3inv; - 943,947c995,996 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 965d1013 - < philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); - 967,969c1015,1025 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < philj *= switch1; - --- - > philj12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > philj6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > philj = philj12 + philj6; - > } else { - > philj12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > philj6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > philj = philj12 + philj6; - 979c1035 - < void *PairLJCharmmCoulLong::extract(const char *str, int &dim) - --- - > void *PairLJCharmmfswCoulLong::extract(const char *str, int &dim) - 988a1045,1047 - > - > // info extracted by dihedral_charmmfsw - > - 989a1049,1051 - > if (strcmp(str,"cut_lj_inner") == 0) return (void *) &cut_lj_inner; - > if (strcmp(str,"cut_lj") == 0) return (void *) &cut_lj; - > if (strcmp(str,"dihedflag") == 0) return (void *) &dihedflag; - - - */ - -// nothing to do for all these, inherited from PairLJCharmmfswCoulLong - - - - -/* - - 226c257 - < void PairLJCharmmCoulLong::compute_inner() - --- - > void PairLJCharmmfswCoulLong::compute_inner() - 248a280,286 - > double cut_out_on = cut_respa[0]; - > double cut_out_off = cut_respa[1]; - > - > double cut_out_diff = cut_out_off - cut_out_on; - > double cut_out_on_sq = cut_out_on*cut_out_on; - > double cut_out_off_sq = cut_out_off*cut_out_off; - > - 284c322 - < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; - 303c341 - < void PairLJCharmmCoulLong::compute_middle() - --- - > void PairLJCharmmfswCoulLong::compute_middle() - 308c346 - < double philj,switch1,switch2; - --- - > double switch1; - 326a365,376 - > double cut_in_off = cut_respa[0]; - > double cut_in_on = cut_respa[1]; - > double cut_out_on = cut_respa[2]; - > double cut_out_off = cut_respa[3]; - > - > double cut_in_diff = cut_in_on - cut_in_off; - > double cut_out_diff = cut_out_off - cut_out_on; - > double cut_in_off_sq = cut_in_off*cut_in_off; - > double cut_in_on_sq = cut_in_on*cut_in_on; - > double cut_out_on_sq = cut_out_on*cut_out_on; - > double cut_out_off_sq = cut_out_off*cut_out_off; - > - 361,365c411,412 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 370c417 - < rsw = (sqrt(rsq) - cut_in_off)*cut_in_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; - 374c421 - < rsw = (sqrt(rsq) - cut_out_on)*cut_out_diff_inv; - --- - > rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; - 393c440 - < void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag) - --- - > void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag) - 396c443 - < double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; - --- - > double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwl6,evdwl12,ecoul,fpair; - 398c445 - < double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - --- - > double r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; - 400c447 - < double philj,switch1,switch2; - --- - > double switch1; - 422a470,476 - > double cut_in_off = cut_respa[2]; - > double cut_in_on = cut_respa[3]; - > - > double cut_in_diff = cut_in_on - cut_in_off; - > double cut_in_off_sq = cut_in_off*cut_in_off; - > double cut_in_on_sq = cut_in_on*cut_in_on; - > - 448a503 - > r6inv = r2inv*r2inv*r2inv; - 489d543 - < r6inv = r2inv*r2inv*r2inv; - 493,497c547,548 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 533d583 - < r6inv = r2inv*r2inv*r2inv; - 536,538c586,598 - < switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < evdwl *= switch1; - --- - > rinv = sqrt(r2inv); - > r3inv = r2inv*rinv; - > evdwl12 = lj3[itype][jtype]*cut_lj6*denom_lj12 * - > (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv); - > evdwl6 = -lj4[itype][jtype]*cut_lj3*denom_lj6 * - > (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv); - > evdwl = evdwl12 + evdwl6; - > } else { - > evdwl12 = r6inv*lj3[itype][jtype]*r6inv - - > lj3[itype][jtype]*cut_lj_inner6inv*cut_lj6inv; - > evdwl6 = -lj4[itype][jtype]*r6inv + - > lj4[itype][jtype]*cut_lj_inner3inv*cut_lj3inv; - > evdwl = evdwl12 + evdwl6; - 561d620 - < r6inv = r2inv*r2inv*r2inv; - 565,569c624,625 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - 572d627 - < r6inv = r2inv*r2inv*r2inv; - 576,580c631,632 - < (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj_inv; - < switch2 = 12.0*rsq * (cut_ljsq-rsq) * - < (rsq-cut_lj_innersq) * denom_lj_inv; - < philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); - < forcelj = forcelj*switch1 + philj*switch2; - --- - > (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - > forcelj = forcelj*switch1; - - */ - -// kokkos doesnt support respa, so ignore compute_inner / compute_middle / compute_outer diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h index e9a6b5486f..8fdb8543ed 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h @@ -11,29 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - - *** DRAFT VERSION 1 (lots of comments to be removed just before merge) *** - - (1) first draft version of PairLJCharmmfswCoulLongKokkos exactly - same as PairLJCharmmCoulLongKokkos but with new class name - - method: track changes from serial kspace pair_lj_charmm_coul_long to - pair_lj_charmmfsw_coul_long and apply to PairLJCharmmfswCoulLongKokkos - - % diff pair_lj_charmm_coul_long.h pair_lj_charmmfsw_coul_long.h - - -------------------------------------------------------------------------- */ - -/* - 16c16 - < PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong); - --- - > PairStyle(lj/charmmfsw/coul/long,PairLJCharmmfswCoulLong); - - */ #ifdef PAIR_CLASS // clang-format off @@ -43,17 +20,6 @@ PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H - > #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_H - - */ - // clang-format off #ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H #define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H @@ -64,15 +30,6 @@ PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos class PairLJCharmmfswCoulLong : public Pair { - - */ - template class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { public: @@ -80,18 +37,7 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { enum {COUL_FLAG=1}; typedef DeviceType device_type; typedef ArrayTypes AT; - - /* - - 29,30c29,30 - < PairLJCharmmCoulLong(class LAMMPS *); - < ~PairLJCharmmCoulLong() override; - --- - > PairLJCharmmfswCoulLong(class LAMMPS *); - > ~PairLJCharmmfswCoulLong() override; - - */ - + PairLJCharmmfswCoulLongKokkos(class LAMMPS *); ~PairLJCharmmfswCoulLongKokkos() override; @@ -103,34 +49,6 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong { protected: - /* - 52c52,54 - < double cut_lj_inner, cut_lj; - --- - > int dihedflag; - > - > double cut_lj_inner, cut_lj, cut_ljinv, cut_lj_innerinv; - 53a56,57 - > double cut_lj3inv, cut_lj_inner3inv, cut_lj3, cut_lj_inner3; - > double cut_lj6inv, cut_lj_inner6inv, cut_lj6, cut_lj_inner6; - 56,60c60 - < double cut_in_off, cut_in_on, cut_out_off, cut_out_on; - < double cut_in_diff, cut_out_diff; - < double cut_in_diff_inv, cut_out_diff_inv; - < double cut_in_off_sq, cut_in_on_sq, cut_out_off_sq, cut_out_on_sq; - < double denom_lj, denom_lj_inv; - --- - > double denom_lj, denom_lj12, denom_lj6; - - */ - - // almost nothing to do here, inherited from PairLJCharmmfswCoulLong - // only temporarily need cut_lj_innersq, denom_coul protected variables - // (removed from pair_lj_charmm_coul_long to pair_lj_charmmfsw_coul_long) - // to compile draft version 1, can be removed by draft version 2 - - - template KOKKOS_INLINE_FUNCTION F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, From 600eaf837b5d911bea54d224b6a3616accc2935d Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 16:20:04 -0500 Subject: [PATCH 032/267] update preferred contact info --- src/REACTION/README | 3 ++- src/REACTION/fix_bond_react.cpp | 2 +- src/REACTION/fix_bond_react.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/REACTION/README b/src/REACTION/README index 99a5d604ec..b9199d6d47 100644 --- a/src/REACTION/README +++ b/src/REACTION/README @@ -25,4 +25,5 @@ The REACTER methodology is detailed in: https://doi.org/10.1021/acs.macromol.0c02012 This package was created by Jacob Gissinger -(jacob.r.gissinger@gmail.com) at the NASA Langley Research Center. +(jgissing@stevens.edu) while at the NASA Langley Research Center +and Stevens Institute of Technology. diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 10a7023e17..e704160e93 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -13,7 +13,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- -Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) +Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #include "fix_bond_react.h" diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 534261e11d..3d56c2fc7b 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com) + Contributing Author: Jacob Gissinger (jgissing@stevens.edu) ------------------------------------------------------------------------- */ #ifdef FIX_CLASS From a6b00a60b208b17cd25fe6ff2a1d7c0000718bc0 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 20:31:22 -0500 Subject: [PATCH 033/267] additional check/warning for valid templates --- src/REACTION/fix_bond_react.cpp | 35 +++++++++++++++++++++++++++++---- src/REACTION/fix_bond_react.h | 2 +- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index e704160e93..00292438ec 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -2681,16 +2681,43 @@ void FixBondReact::find_landlocked_atoms(int myrxn) } // also, if atoms change number of bonds, but aren't landlocked, that could be bad + int warnflag = 0; if (comm->me == 0) for (int i = 0; i < twomol->natoms; i++) { if ((create_atoms[i][myrxn] == 0) && (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) && - (landlocked_atoms[i][myrxn] == 0)) - error->warning(FLERR, "Fix bond/react: Atom affected by reaction {} is too close " - "to template edge",rxn_name[myrxn]); - break; + (landlocked_atoms[i][myrxn] == 0)) { + warnflag = 1; + break; + } } + // also, if an atom changes any of its bonds, but is not landlocked, that could be bad + int thereflag; + if (comm->me == 0) + for (int i = 0; i < twomol->natoms; i++) { + if (landlocked_atoms[i][myrxn] == 1) continue; + for (int j = 0; j < twomol_nxspecial[i][0]; j++) { + int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn]; + int ii = equivalences[i][1][myrxn] - 1; + thereflag = 0; + for (int k = 0; k < onemol_nxspecial[ii][0]; k++) { + if (oneneighID == onemol_xspecial[ii][k]) { + thereflag = 1; + break; + } + } + if (thereflag == 0) { + warnflag = 1; + break; + } + } + if (warnflag == 1) break; + } + + if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected " + "by reaction {} is too close to template edge",rxn_name[myrxn]); + // finally, if a created atom is not landlocked, bad! for (int i = 0; i < twomol->natoms; i++) { if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) { diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 3d56c2fc7b..8c9fc9dce4 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -139,7 +139,7 @@ class FixBondReact : public Fix { int avail_guesses; // num of restore points available int *guess_branch; // used when there is more than two choices when guessing int **restore_pt; // contains info about restore points - tagint **restore; // contaings info about restore points + tagint **restore; // contains info about restore points int *pioneer_count; // counts pioneers int **edge; // atoms in molecule templates with incorrect valences From 13b6d40062b01a078dd86bd6fe517e15ac731178 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 3 Jan 2024 20:38:10 -0500 Subject: [PATCH 034/267] tiny_epoxy example correction not sure why this issue showed up in recent LAMMPS versions --- examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized index ea09d06893..7e0350cdb0 100644 --- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized +++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized @@ -20,7 +20,8 @@ improper_style class2 special_bonds lj/coul 0 0 1 pair_modify tail yes mix sixthpower -read_data tiny_epoxy.data +read_data tiny_epoxy.data & + extra/special/per/atom 25 velocity all create 300.0 4928459 dist gaussian From de1f6eefd7c167e4980f4173ad1e0b65d48ea4c5 Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Wed, 3 Jan 2024 23:00:07 -0500 Subject: [PATCH 035/267] // FIXME: // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 // causing ~E-6 errors for steps 1,2,... everywhere in this class when // running kokkos with openmp (and probably with GPUs also). // // WORKAROUND: for now until guidance from lammps devs is to // reset it back force->qqr2e = force->qqr2e_charmm_real here. --- src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 191626fc9f..7701f13768 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -76,6 +76,17 @@ template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { + // FIXME: + // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 + // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 + // causing ~E-6 errors for steps 1,2,... everywhere in this class when + // running kokkos with openmp (and probably with GPUs also). + // + // WORKAROUND: for now until guidance from lammps devs is to + // reset it back force->qqr2e = force->qqr2e_charmm_real here. + + force->qqr2e = force->qqr2e_charmm_real; + if (copymode) return; if (allocated) { From c065d4bac626bd7fd2912cc1bcd368078a2f587e Mon Sep 17 00:00:00 2001 From: Mitch Murphy Date: Thu, 4 Jan 2024 00:50:06 -0500 Subject: [PATCH 036/267] // FIXME: destructor from this class resets // // force->qqr2e = force->qqr2e_lammps_real // // at end of timestep 0 causing ~E-6 errors for steps 1,2,... // everywhere in pair_lj_charmmfsw_coul_long_kokkos when // running kokkos with openmp (and probably with GPUs also). // // WORKAROUND: for now until guidance from lammps devs is to // comment out this line here (commit to be reversed later). //force->qqr2e = force->qqr2e_lammps_real; --- src/KSPACE/pair_lj_charmmfsw_coul_long.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp index b7635c49c7..83b7293178 100644 --- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp +++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp @@ -82,7 +82,19 @@ PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong() if ((comm->me == 0) && (force->qqr2e == force->qqr2e_charmm_real)) error->message(FLERR,"Restoring original LAMMPS coulomb energy" " conversion constant"); - force->qqr2e = force->qqr2e_lammps_real; + + // FIXME: destructor from this class resets + // + // force->qqr2e = force->qqr2e_lammps_real + // + // at end of timestep 0 causing ~E-6 errors for steps 1,2,... + // everywhere in pair_lj_charmmfsw_coul_long_kokkos when + // running kokkos with openmp (and probably with GPUs also). + // + // WORKAROUND: for now until guidance from lammps devs is to + // comment out this line here (commit to be reversed later). + + //force->qqr2e = force->qqr2e_lammps_real; } if (copymode) return; From 4b4e796c190734090c80b2badd2023a66e08bb6c Mon Sep 17 00:00:00 2001 From: alphataubio Date: Thu, 4 Jan 2024 16:06:41 -0500 Subject: [PATCH 037/267] Revert " // FIXME:" This reverts commit de1f6eefd7c167e4980f4173ad1e0b65d48ea4c5. my first idea to reset back force->qqr2e = force->qqr2e_charmm_real didnt work because class destructor gets called first THEN superclass destructor gets called --- src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp index 7701f13768..191626fc9f 100644 --- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp @@ -76,17 +76,6 @@ template PairLJCharmmfswCoulLongKokkos::~PairLJCharmmfswCoulLongKokkos() { - // FIXME: - // superclass destructor from KSPACE/pair_lj_charmmfsw_coul_long.cpp:81 - // resets force->qqr2e = force->qqr2e_lammps_real at end of timestep 0 - // causing ~E-6 errors for steps 1,2,... everywhere in this class when - // running kokkos with openmp (and probably with GPUs also). - // - // WORKAROUND: for now until guidance from lammps devs is to - // reset it back force->qqr2e = force->qqr2e_charmm_real here. - - force->qqr2e = force->qqr2e_charmm_real; - if (copymode) return; if (allocated) { From f9aafff9928c43f27728647ebcb5b5f245f0ad19 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 4 Jan 2024 22:45:53 -0500 Subject: [PATCH 038/267] must include fmt/ranges.h for fmt::join() --- src/KIM/kim_interactions.cpp | 2 ++ src/KIM/kim_param.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/KIM/kim_interactions.cpp b/src/KIM/kim_interactions.cpp index 1f4f84e648..ce550bf5da 100644 --- a/src/KIM/kim_interactions.cpp +++ b/src/KIM/kim_interactions.cpp @@ -70,6 +70,8 @@ #include "modify.h" #include "update.h" +#include "fmt/ranges.h" + #include #include diff --git a/src/KIM/kim_param.cpp b/src/KIM/kim_param.cpp index f72df81989..c50474fe67 100644 --- a/src/KIM/kim_param.cpp +++ b/src/KIM/kim_param.cpp @@ -68,6 +68,8 @@ #include "pair_kim.h" #include "variable.h" +#include "fmt/ranges.h" + #include #include #include From 01482e7a2e0324eabc3dc0b81809573f93ce89b4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 4 Jan 2024 22:46:40 -0500 Subject: [PATCH 039/267] update fmtlib to version 10.2.1 --- src/fmt/args.h | 13 +- src/fmt/chrono.h | 272 ++++++++-------- src/fmt/color.h | 77 +++-- src/fmt/compile.h | 17 +- src/fmt/core.h | 647 +++++++++++++++++++------------------- src/fmt/format-inl.h | 161 +++++----- src/fmt/format.h | 718 ++++++++++++++++++++++--------------------- src/fmt/os.h | 64 ++-- src/fmt/ostream.h | 100 ++++-- src/fmt/printf.h | 34 +- src/fmt/ranges.h | 117 +++++-- src/fmt/std.h | 154 +++++++--- src/fmt/xchar.h | 26 +- src/fmtlib_os.cpp | 67 ++-- 14 files changed, 1384 insertions(+), 1083 deletions(-) diff --git a/src/fmt/args.h b/src/fmt/args.h index 2d684e7cc1..b77a2d0661 100644 --- a/src/fmt/args.h +++ b/src/fmt/args.h @@ -12,7 +12,7 @@ #include // std::unique_ptr #include -#include "core.h" +#include "format.h" // std_string_view FMT_BEGIN_NAMESPACE @@ -22,8 +22,9 @@ template struct is_reference_wrapper : std::false_type {}; template struct is_reference_wrapper> : std::true_type {}; -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { +template auto unwrap(const T& v) -> const T& { return v; } +template +auto unwrap(const std::reference_wrapper& v) -> const T& { return static_cast(v); } @@ -50,7 +51,7 @@ class dynamic_arg_list { std::unique_ptr> head_; public: - template const T& push(const Arg& arg) { + template auto push(const Arg& arg) -> const T& { auto new_node = std::unique_ptr>(new typed_node(arg)); auto& value = new_node->value; new_node->next = std::move(head_); @@ -110,14 +111,14 @@ class dynamic_format_arg_store friend class basic_format_args; - unsigned long long get_types() const { + auto get_types() const -> unsigned long long { return detail::is_unpacked_bit | data_.size() | (named_info_.empty() ? 0ULL : static_cast(detail::has_named_args_bit)); } - const basic_format_arg* data() const { + auto data() const -> const basic_format_arg* { return named_info_.empty() ? data_.data() : data_.data() + 1; } diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h index ff3e1445b9..9d54574e16 100644 --- a/src/fmt/chrono.h +++ b/src/fmt/chrono.h @@ -18,7 +18,7 @@ #include #include -#include "format.h" +#include "ostream.h" // formatbuf FMT_BEGIN_NAMESPACE @@ -72,7 +72,8 @@ template ::value && std::numeric_limits::is_signed == std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -101,7 +102,8 @@ template ::value && std::numeric_limits::is_signed != std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -133,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; return from; } // function @@ -154,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { // clang-format on template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; using T = std::numeric_limits; static_assert(std::is_floating_point::value, "From must be floating"); @@ -176,7 +179,7 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; static_assert(std::is_floating_point::value, "From must be floating"); return from; @@ -188,8 +191,8 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value), FMT_ENABLE_IF(std::is_integral::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; // the basic idea is that we need to convert from count() in the from type @@ -240,8 +243,8 @@ To safe_duration_cast(std::chrono::duration from, template ::value), FMT_ENABLE_IF(std::is_floating_point::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; if (std::isnan(from.count())) { @@ -321,12 +324,12 @@ To safe_duration_cast(std::chrono::duration from, namespace detail { template struct null {}; -inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } -inline null<> localtime_s(...) { return null<>(); } -inline null<> gmtime_r(...) { return null<>(); } -inline null<> gmtime_s(...) { return null<>(); } +inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); } +inline auto localtime_s(...) -> null<> { return null<>(); } +inline auto gmtime_r(...) -> null<> { return null<>(); } +inline auto gmtime_s(...) -> null<> { return null<>(); } -inline const std::locale& get_classic_locale() { +inline auto get_classic_locale() -> const std::locale& { static const auto& locale = std::locale::classic(); return locale; } @@ -336,8 +339,6 @@ template struct codecvt_result { CodeUnit buf[max_size]; CodeUnit* end; }; -template -constexpr const size_t codecvt_result::max_size; template void write_codecvt(codecvt_result& out, string_view in_buf, @@ -408,8 +409,7 @@ inline void do_write(buffer& buf, const std::tm& time, auto&& format_buf = formatbuf>(buf); auto&& os = std::basic_ostream(&format_buf); os.imbue(loc); - using iterator = std::ostreambuf_iterator; - const auto& facet = std::use_facet>(loc); + const auto& facet = std::use_facet>(loc); auto end = facet.put(os, os, Char(' '), &time, format, modifier); if (end.failed()) FMT_THROW(format_error("failed to format time")); } @@ -432,6 +432,51 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc, return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); } +template +struct is_same_arithmetic_type + : public std::integral_constant::value && + std::is_integral::value) || + (std::is_floating_point::value && + std::is_floating_point::value)> { +}; + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { +#if FMT_SAFE_DURATION_CAST + // Throwing version of safe_duration_cast is only available for + // integer to integer or float to float casts. + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +#else + // Standard duration cast, may overflow. + return std::chrono::duration_cast(from); +#endif +} + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(!is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { + // Mixed integer <-> float cast is not supported by safe_duration_cast. + return std::chrono::duration_cast(from); +} + +template +auto to_time_t( + std::chrono::time_point time_point) + -> std::time_t { + // Cannot use std::chrono::system_clock::to_time_t since this would first + // require a cast to std::chrono::system_clock::time_point, which could + // overflow. + return fmt_duration_cast>( + time_point.time_since_epoch()) + .count(); +} } // namespace detail FMT_BEGIN_EXPORT @@ -441,29 +486,29 @@ FMT_BEGIN_EXPORT expressed in local time. Unlike ``std::localtime``, this function is thread-safe on most platforms. */ -inline std::tm localtime(std::time_t time) { +inline auto localtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(localtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(localtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { using namespace fmt::detail; std::tm* tm = std::localtime(&time_); if (tm) tm_ = *tm; @@ -480,8 +525,8 @@ inline std::tm localtime(std::time_t time) { #if FMT_USE_LOCAL_TIME template inline auto localtime(std::chrono::local_time time) -> std::tm { - return localtime(std::chrono::system_clock::to_time_t( - std::chrono::current_zone()->to_sys(time))); + return localtime( + detail::to_time_t(std::chrono::current_zone()->to_sys(time))); } #endif @@ -490,29 +535,29 @@ inline auto localtime(std::chrono::local_time time) -> std::tm { expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this function is thread-safe on most platforms. */ -inline std::tm gmtime(std::time_t time) { +inline auto gmtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(gmtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(gmtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { std::tm* tm = std::gmtime(&time_); if (tm) tm_ = *tm; return tm != nullptr; @@ -525,9 +570,11 @@ inline std::tm gmtime(std::time_t time) { return gt.tm_; } -inline std::tm gmtime( - std::chrono::time_point time_point) { - return gmtime(std::chrono::system_clock::to_time_t(time_point)); +template +inline auto gmtime( + std::chrono::time_point time_point) + -> std::tm { + return gmtime(detail::to_time_t(time_point)); } namespace detail { @@ -566,7 +613,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, } } -template FMT_CONSTEXPR inline const char* get_units() { +template +FMT_CONSTEXPR inline auto get_units() -> const char* { if (std::is_same::value) return "as"; if (std::is_same::value) return "fs"; if (std::is_same::value) return "ps"; @@ -584,8 +632,9 @@ template FMT_CONSTEXPR inline const char* get_units() { if (std::is_same::value) return "Ts"; if (std::is_same::value) return "Ps"; if (std::is_same::value) return "Es"; - if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "min"; if (std::is_same>::value) return "h"; + if (std::is_same>::value) return "d"; return nullptr; } @@ -621,9 +670,8 @@ auto write_padding(OutputIt out, pad_type pad) -> OutputIt { // Parses a put_time-like format string and invokes handler actions. template -FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, - const Char* end, - Handler&& handler) { +FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { if (begin == end || *begin == '}') return begin; if (*begin != '%') FMT_THROW(format_error("invalid format")); auto ptr = begin; @@ -954,25 +1002,25 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_tz_name() {} }; -inline const char* tm_wday_full_name(int wday) { +inline auto tm_wday_full_name(int wday) -> const char* { static constexpr const char* full_name_list[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; } -inline const char* tm_wday_short_name(int wday) { +inline auto tm_wday_short_name(int wday) -> const char* { static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; } -inline const char* tm_mon_full_name(int mon) { +inline auto tm_mon_full_name(int mon) -> const char* { static constexpr const char* full_name_list[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; } -inline const char* tm_mon_short_name(int mon) { +inline auto tm_mon_short_name(int mon) -> const char* { static constexpr const char* short_name_list[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", @@ -1004,21 +1052,21 @@ inline void tzset_once() { // Converts value to Int and checks that it's in the range [0, upper). template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - FMT_ASSERT(std::is_unsigned::value || - (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), - "invalid value"); - (void)upper; +inline auto to_nonnegative_int(T value, Int upper) -> Int { + if (!std::is_unsigned::value && + (value < 0 || to_unsigned(value) > to_unsigned(upper))) { + FMT_THROW(fmt::format_error("chrono value is out of range")); + } return static_cast(value); } template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { +inline auto to_nonnegative_int(T value, Int upper) -> Int { if (value < 0 || value > static_cast(upper)) FMT_THROW(format_error("invalid value")); return static_cast(value); } -constexpr long long pow10(std::uint32_t n) { +constexpr auto pow10(std::uint32_t n) -> long long { return n == 0 ? 1 : 10 * pow10(n - 1); } @@ -1052,13 +1100,12 @@ void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) { std::chrono::seconds::rep>::type, std::ratio<1, detail::pow10(num_fractional_digits)>>; - const auto fractional = - d - std::chrono::duration_cast(d); + const auto fractional = d - fmt_duration_cast(d); const auto subseconds = std::chrono::treat_as_floating_point< typename subsecond_precision::rep>::value ? fractional.count() - : std::chrono::duration_cast(fractional).count(); + : fmt_duration_cast(fractional).count(); auto n = static_cast>(subseconds); const int num_digits = detail::count_digits(n); @@ -1109,11 +1156,11 @@ void write_floating_seconds(memory_buffer& buf, Duration duration, num_fractional_digits = 6; } - format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), - std::fmod(val * static_cast(Duration::period::num) / - static_cast(Duration::period::den), - static_cast(60)), - num_fractional_digits); + fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), + std::fmod(val * static_cast(Duration::period::num) / + static_cast(Duration::period::den), + static_cast(60)), + num_fractional_digits); } template (l); } - // Algorithm: - // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date. auto iso_year_weeks(long long curr_year) const noexcept -> int { const auto prev_year = curr_year - 1; const auto curr_p = @@ -1315,7 +1361,7 @@ class tm_writer { subsecs_(subsecs), tm_(tm) {} - OutputIt out() const { return out_; } + auto out() const -> OutputIt { return out_; } FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { out_ = copy_str(begin, end, out_); @@ -1579,6 +1625,7 @@ struct chrono_format_checker : null_chrono_spec_handler { template FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_day_of_year() {} FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} @@ -1597,16 +1644,16 @@ struct chrono_format_checker : null_chrono_spec_handler { template ::value&& has_isfinite::value)> -inline bool isfinite(T) { +inline auto isfinite(T) -> bool { return true; } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return x % static_cast(y); } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return std::fmod(x, static_cast(y)); } @@ -1621,49 +1668,38 @@ template struct make_unsigned_or_unchanged { using type = typename std::make_unsigned::type; }; -#if FMT_SAFE_DURATION_CAST -// throwing version of safe_duration_cast -template -To fmt_safe_duration_cast(std::chrono::duration from) { - int ec; - To to = safe_duration_cast::safe_duration_cast(from, ec); - if (ec) FMT_THROW(format_error("cannot format duration")); - return to; -} -#endif - template ::value)> -inline std::chrono::duration get_milliseconds( - std::chrono::duration d) { +inline auto get_milliseconds(std::chrono::duration d) + -> std::chrono::duration { // this may overflow and/or the result may not fit in the // target type. #if FMT_SAFE_DURATION_CAST using CommonSecondsType = typename std::common_type::type; - const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_common = fmt_duration_cast(d); const auto d_as_whole_seconds = - fmt_safe_duration_cast(d_as_common); + fmt_duration_cast(d_as_common); // this conversion should be nonproblematic const auto diff = d_as_common - d_as_whole_seconds; const auto ms = - fmt_safe_duration_cast>(diff); + fmt_duration_cast>(diff); return ms; #else - auto s = std::chrono::duration_cast(d); - return std::chrono::duration_cast(d - s); + auto s = fmt_duration_cast(d); + return fmt_duration_cast(d - s); #endif } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int) { +auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt { return write(out, val); } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { +auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt { auto specs = format_specs(); specs.precision = precision; specs.type = precision >= 0 ? presentation_type::fixed_lower @@ -1672,12 +1708,12 @@ OutputIt format_duration_value(OutputIt out, Rep val, int precision) { } template -OutputIt copy_unit(string_view unit, OutputIt out, Char) { +auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt { return std::copy(unit.begin(), unit.end(), out); } template -OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { +auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt { // This works when wchar_t is UTF-32 because units only contain characters // that have the same representation in UTF-16 and UTF-32. utf8_to_utf16 u(unit); @@ -1685,7 +1721,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { } template -OutputIt format_duration_unit(OutputIt out) { +auto format_duration_unit(OutputIt out) -> OutputIt { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); *out++ = '['; @@ -1752,18 +1788,12 @@ struct chrono_formatter { // this may overflow and/or the result may not fit in the // target type. -#if FMT_SAFE_DURATION_CAST // might need checked conversion (rep!=Rep) - auto tmpval = std::chrono::duration(val); - s = fmt_safe_duration_cast(tmpval); -#else - s = std::chrono::duration_cast( - std::chrono::duration(val)); -#endif + s = fmt_duration_cast(std::chrono::duration(val)); } // returns true if nan or inf, writes to out. - bool handle_nan_inf() { + auto handle_nan_inf() -> bool { if (isfinite(val)) { return false; } @@ -1780,17 +1810,22 @@ struct chrono_formatter { return true; } - Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + auto days() const -> Rep { return static_cast(s.count() / 86400); } + auto hour() const -> Rep { + return static_cast(mod((s.count() / 3600), 24)); + } - Rep hour12() const { + auto hour12() const -> Rep { Rep hour = static_cast(mod((s.count() / 3600), 12)); return hour <= 0 ? 12 : hour; } - Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } - Rep second() const { return static_cast(mod(s.count(), 60)); } + auto minute() const -> Rep { + return static_cast(mod((s.count() / 60), 60)); + } + auto second() const -> Rep { return static_cast(mod(s.count(), 60)); } - std::tm time() const { + auto time() const -> std::tm { auto time = std::tm(); time.tm_hour = to_nonnegative_int(hour(), 24); time.tm_min = to_nonnegative_int(minute(), 60); @@ -1858,10 +1893,14 @@ struct chrono_formatter { void on_dec0_week_of_year(numeric_system) {} void on_dec1_week_of_year(numeric_system) {} void on_iso_week_of_year(numeric_system) {} - void on_day_of_year() {} void on_day_of_month(numeric_system) {} void on_day_of_month_space(numeric_system) {} + void on_day_of_year() { + if (handle_nan_inf()) return; + write(days(), 0); + } + void on_24_hour(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; @@ -1968,7 +2007,7 @@ class weekday { weekday() = default; explicit constexpr weekday(unsigned wd) noexcept : value(static_cast(wd != 7 ? wd : 0)) {} - constexpr unsigned c_encoding() const noexcept { return value; } + constexpr auto c_encoding() const noexcept -> unsigned { return value; } }; class year_month_day {}; @@ -2083,25 +2122,22 @@ struct formatter, period::num != 1 || period::den != 1 || std::is_floating_point::value)) { const auto epoch = val.time_since_epoch(); - auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); if (subsecs.count() < 0) { auto second = - std::chrono::duration_cast(std::chrono::seconds(1)); + detail::fmt_duration_cast(std::chrono::seconds(1)); if (epoch.count() < ((Duration::min)() + second).count()) FMT_THROW(format_error("duration is too small")); subsecs += second; val -= second; } - return formatter::do_format( - gmtime(std::chrono::time_point_cast(val)), ctx, - &subsecs); + return formatter::do_format(gmtime(val), ctx, &subsecs); } - return formatter::format( - gmtime(std::chrono::time_point_cast(val)), ctx); + return formatter::format(gmtime(val), ctx); } }; @@ -2120,17 +2156,13 @@ struct formatter, Char> if (period::num != 1 || period::den != 1 || std::is_floating_point::value) { const auto epoch = val.time_since_epoch(); - const auto subsecs = std::chrono::duration_cast( - epoch - std::chrono::duration_cast(epoch)); + const auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); - return formatter::do_format( - localtime(std::chrono::time_point_cast(val)), - ctx, &subsecs); + return formatter::do_format(localtime(val), ctx, &subsecs); } - return formatter::format( - localtime(std::chrono::time_point_cast(val)), - ctx); + return formatter::format(localtime(val), ctx); } }; #endif diff --git a/src/fmt/color.h b/src/fmt/color.h index 8697e1ca0b..464519e582 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -233,7 +233,7 @@ class text_style { FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept : set_foreground_color(), set_background_color(), ems(em) {} - FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& { if (!set_foreground_color) { set_foreground_color = rhs.set_foreground_color; foreground_color = rhs.foreground_color; @@ -257,29 +257,29 @@ class text_style { return *this; } - friend FMT_CONSTEXPR text_style operator|(text_style lhs, - const text_style& rhs) { + friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs) + -> text_style { return lhs |= rhs; } - FMT_CONSTEXPR bool has_foreground() const noexcept { + FMT_CONSTEXPR auto has_foreground() const noexcept -> bool { return set_foreground_color; } - FMT_CONSTEXPR bool has_background() const noexcept { + FMT_CONSTEXPR auto has_background() const noexcept -> bool { return set_background_color; } - FMT_CONSTEXPR bool has_emphasis() const noexcept { + FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool { return static_cast(ems) != 0; } - FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type { FMT_ASSERT(has_foreground(), "no foreground specified for this style"); return foreground_color; } - FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type { FMT_ASSERT(has_background(), "no background specified for this style"); return background_color; } - FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis { FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); return ems; } @@ -297,9 +297,11 @@ class text_style { } } - friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept + -> text_style; - friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept + -> text_style; detail::color_type foreground_color; detail::color_type background_color; @@ -309,16 +311,19 @@ class text_style { }; /** Creates a text style from the foreground (text) color. */ -FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { +FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept + -> text_style { return text_style(true, foreground); } /** Creates a text style from the background color. */ -FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { +FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept + -> text_style { return text_style(false, background); } -FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { +FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept + -> text_style { return text_style(lhs) | rhs; } @@ -384,8 +389,8 @@ template struct ansi_color_escape { } FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } - FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } - FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { + FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; } + FMT_CONSTEXPR20 auto end() const noexcept -> const Char* { return buffer + std::char_traits::length(buffer); } @@ -400,25 +405,27 @@ template struct ansi_color_escape { out[2] = static_cast('0' + c % 10); out[3] = static_cast(delimiter); } - static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept + -> bool { return static_cast(em) & static_cast(mask); } }; template -FMT_CONSTEXPR ansi_color_escape make_foreground_color( - detail::color_type foreground) noexcept { +FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept + -> ansi_color_escape { return ansi_color_escape(foreground, "\x1b[38;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_background_color( - detail::color_type background) noexcept { +FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept + -> ansi_color_escape { return ansi_color_escape(background, "\x1b[48;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { +FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept + -> ansi_color_escape { return ansi_color_escape(em); } @@ -427,9 +434,10 @@ template inline void reset_color(buffer& buffer) { buffer.append(reset_color.begin(), reset_color.end()); } -template struct styled_arg { +template struct styled_arg : detail::view { const T& value; text_style style; + styled_arg(const T& v, text_style s) : value(v), style(s) {} }; template @@ -510,9 +518,10 @@ void print(const text_style& ts, const S& format_str, const Args&... args) { } template > -inline std::basic_string vformat( +inline auto vformat( const text_style& ts, const S& format_str, - basic_format_args>> args) { + basic_format_args>> args) + -> std::basic_string { basic_memory_buffer buf; detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); return fmt::to_string(buf); @@ -531,8 +540,8 @@ inline std::basic_string vformat( \endrst */ template > -inline std::basic_string format(const text_style& ts, const S& format_str, - const Args&... args) { +inline auto format(const text_style& ts, const S& format_str, + const Args&... args) -> std::basic_string { return fmt::vformat(ts, detail::to_string_view(format_str), fmt::make_format_args>(args...)); } @@ -542,9 +551,10 @@ inline std::basic_string format(const text_style& ts, const S& format_str, */ template ::value)> -OutputIt vformat_to( - OutputIt out, const text_style& ts, basic_string_view format_str, - basic_format_args>> args) { +auto vformat_to(OutputIt out, const text_style& ts, + basic_string_view format_str, + basic_format_args>> args) + -> OutputIt { auto&& buf = detail::get_buffer(out); detail::vformat_to(buf, ts, format_str, args); return detail::get_iterator(buf, out); @@ -562,9 +572,10 @@ OutputIt vformat_to( fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); \endrst */ -template >::value&& - detail::is_string::value> +template < + typename OutputIt, typename S, typename... Args, + bool enable = detail::is_output_iterator>::value && + detail::is_string::value> inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, Args&&... args) -> typename std::enable_if::type { diff --git a/src/fmt/compile.h b/src/fmt/compile.h index af76507f07..71fa69c67e 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -14,8 +14,8 @@ FMT_BEGIN_NAMESPACE namespace detail { template -FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { +FMT_CONSTEXPR inline auto copy_str(InputIt begin, InputIt end, + counting_iterator it) -> counting_iterator { return it + (end - begin); } @@ -57,7 +57,7 @@ struct udl_compiled_string : compiled_string { #endif template -const T& first(const T& value, const Tail&...) { +auto first(const T& value, const Tail&...) -> const T& { return value; } @@ -489,18 +489,19 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, Args&&... args) { +auto format_to_n(OutputIt out, size_t n, const S& format_str, Args&&... args) + -> format_to_n_result { using traits = detail::fixed_buffer_traits; auto buf = detail::iterator_buffer(out, n); - format_to(std::back_inserter(buf), format_str, std::forward(args)...); + fmt::format_to(std::back_inserter(buf), format_str, + std::forward(args)...); return {buf.out(), buf.count()}; } template ::value)> -FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, - const Args&... args) { +FMT_CONSTEXPR20 auto formatted_size(const S& format_str, const Args&... args) + -> size_t { return fmt::format_to(detail::counting_iterator(), format_str, args...) .count(); } diff --git a/src/fmt/core.h b/src/fmt/core.h index 9f7de781bb..6a53b8c52c 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -8,17 +8,15 @@ #ifndef FMT_CORE_H_ #define FMT_CORE_H_ -#include // std::byte -#include // std::FILE -#include // std::strlen -#include -#include -#include // std::addressof -#include -#include +#include // std::byte +#include // std::FILE +#include // std::strlen +#include // CHAR_BIT +#include // std::string +#include // std::enable_if // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 100100 +#define FMT_VERSION 100200 #if defined(__clang__) && !defined(__ibmxl__) # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -58,6 +56,12 @@ # define FMT_MSC_WARNING(...) #endif +#ifdef _GLIBCXX_RELEASE +# define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE +#else +# define FMT_GLIBCXX_RELEASE 0 +#endif + #ifdef _MSVC_LANG # define FMT_CPLUSPLUS _MSVC_LANG #else @@ -88,6 +92,20 @@ #define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) +#ifndef FMT_DEPRECATED +# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VERSION +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + // Check if relaxed C++14 constexpr is supported. // GCC doesn't allow throw in constexpr until version 6 (bug 67371). #ifndef FMT_USE_CONSTEXPR @@ -105,30 +123,17 @@ # define FMT_CONSTEXPR #endif -#if ((FMT_CPLUSPLUS >= 202002L) && \ - (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ - (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) +#if (FMT_CPLUSPLUS >= 202002L || \ + (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)) && \ + ((!FMT_GLIBCXX_RELEASE || FMT_GLIBCXX_RELEASE >= 10) && \ + (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION >= 10000) && \ + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1928)) && \ + defined(__cpp_lib_is_constant_evaluated) # define FMT_CONSTEXPR20 constexpr #else # define FMT_CONSTEXPR20 #endif -// Check if constexpr std::char_traits<>::{compare,length} are supported. -#if defined(__GLIBCXX__) -# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -# endif -#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ - _LIBCPP_VERSION >= 4000 -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#endif -#ifndef FMT_CONSTEXPR_CHAR_TRAITS -# define FMT_CONSTEXPR_CHAR_TRAITS -#endif - // Check if exceptions are disabled. #ifndef FMT_EXCEPTIONS # if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ @@ -191,33 +196,25 @@ # define FMT_END_EXPORT #endif +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +#else +# define FMT_VISIBILITY(value) +#endif + #if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# ifdef FMT_LIB_EXPORT +# if defined(FMT_LIB_EXPORT) # define FMT_API __declspec(dllexport) # elif defined(FMT_SHARED) # define FMT_API __declspec(dllimport) # endif -#else -# if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) -# if defined(__GNUC__) || defined(__clang__) -# define FMT_API __attribute__((visibility("default"))) -# endif -# endif +#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_API FMT_VISIBILITY("default") #endif #ifndef FMT_API # define FMT_API #endif -// libc++ supports string_view in pre-c++17. -#if FMT_HAS_INCLUDE() && \ - (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - #ifndef FMT_UNICODE # define FMT_UNICODE !FMT_MSC_VERSION #endif @@ -228,8 +225,9 @@ __apple_build_version__ >= 14000029L) && \ FMT_CPLUSPLUS >= 202002L) || \ (defined(__cpp_consteval) && \ - (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) -// consteval is broken in MSVC before VS2022 and Apple clang before 14. + (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1929)) +// consteval is broken in MSVC before VS2019 version 16.10 and Apple clang +// before 14. # define FMT_CONSTEVAL consteval # define FMT_HAS_CONSTEVAL # else @@ -248,6 +246,15 @@ # endif #endif +// GCC < 5 requires this-> in decltype. +#ifndef FMT_DECLTYPE_THIS +# if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +# define FMT_DECLTYPE_THIS this-> +# else +# define FMT_DECLTYPE_THIS +# endif +#endif + // Enable minimal optimizations for more compact code in debug mode. FMT_GCC_PRAGMA("GCC push_options") #if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \ @@ -269,20 +276,57 @@ template using remove_const_t = typename std::remove_const::type; template using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; +template struct type_identity { + using type = T; +}; template using type_identity_t = typename type_identity::type; template using underlying_t = typename std::underlying_type::type; -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { + using type = void; +}; +template using void_t = typename void_t_impl::type; +#else +template using void_t = void; +#endif struct monostate { constexpr monostate() {} }; +// An implementation of back_insert_iterator to avoid dependency on . +template class back_insert_iterator { + private: + Container* container_; + + friend auto get_container(back_insert_iterator it) -> Container& { + return *it.container_; + } + + public: + using difference_type = ptrdiff_t; + FMT_UNCHECKED_ITERATOR(back_insert_iterator); + + explicit back_insert_iterator(Container& c) : container_(&c) {} + + auto operator=(const typename Container::value_type& value) + -> back_insert_iterator& { + container_->push_back(value); + return *this; + } + auto operator*() -> back_insert_iterator& { return *this; } + auto operator++() -> back_insert_iterator& { return *this; } + auto operator++(int) -> back_insert_iterator { return *this; } +}; + +template +auto back_inserter(Container& c) -> back_insert_iterator { + return {c}; +} + // An enable_if helper to be used in template parameters which results in much // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed // to workaround a bug in MSVC 2019 (see #1140 and #1186). @@ -310,10 +354,9 @@ template FMT_CONSTEXPR void ignore_unused(const T&...) {} constexpr FMT_INLINE auto is_constant_evaluated( bool default_value = false) noexcept -> bool { // Workaround for incompatibility between libstdc++ consteval-based -// std::is_constant_evaluated() implementation and clang-14. -// https://github.com/fmtlib/fmt/issues/3247 -#if FMT_CPLUSPLUS >= 202002L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 12 && \ +// std::is_constant_evaluated() implementation and clang-14: +// https://github.com/fmtlib/fmt/issues/3247. +#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \ (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500) ignore_unused(default_value); return __builtin_is_constant_evaluated(); @@ -346,15 +389,6 @@ FMT_NORETURN FMT_API void assert_fail(const char* file, int line, # endif #endif -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - #ifdef FMT_USE_INT128 // Do nothing. #elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ @@ -386,6 +420,15 @@ FMT_CONSTEXPR auto to_unsigned(Int value) -> return static_cast::type>(value); } +template +struct is_string_like : std::false_type {}; + +// A heuristic to detect std::string and std::string_view. +template +struct is_string_like().find_first_of( + typename T::value_type(), 0))>> : std::true_type { +}; + FMT_CONSTEXPR inline auto is_utf8() -> bool { FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char section[] = "\u00A7"; @@ -394,8 +437,33 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool { return FMT_UNICODE || (sizeof(section) == 3 && uchar(section[0]) == 0xC2 && uchar(section[1]) == 0xA7); } + +template FMT_CONSTEXPR auto length(const Char* s) -> size_t { + size_t len = 0; + while (*s++) ++len; + return len; +} + +template +FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n) + -> int { + for (; n != 0; ++s1, ++s2, --n) { + if (*s1 < *s2) return -1; + if (*s1 > *s2) return 1; + } + return 0; +} } // namespace detail +template +using basic_string = + std::basic_string, std::allocator>; + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; +template +struct is_contiguous> : std::true_type {}; + /** An implementation of ``std::basic_string_view`` for pre-C++17. It provides a subset of the API. ``fmt::basic_string_view`` is used for format strings even @@ -420,29 +488,25 @@ template class basic_string_view { : data_(s), size_(count) {} /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst + Constructs a string reference object from a C string. */ - FMT_CONSTEXPR_CHAR_TRAITS + FMT_CONSTEXPR20 FMT_INLINE basic_string_view(const Char* s) : data_(s), size_(detail::const_check(std::is_same::value && - !detail::is_constant_evaluated(true)) + !detail::is_constant_evaluated(false)) ? std::strlen(reinterpret_cast(s)) - : std::char_traits::length(s)) {} + : detail::length(s)) {} - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) noexcept - : data_(s.data()), size_(s.size()) {} - - template >::value)> - FMT_CONSTEXPR basic_string_view(S s) noexcept + /** + Constructs a string reference from a ``std::basic_string`` or a + ``std::basic_string_view`` object. + */ + template ::value&& std::is_same< + typename S::value_type, Char>::value)> + FMT_CONSTEXPR basic_string_view(const S& s) noexcept : data_(s.data()), size_(s.size()) {} /** Returns a pointer to the string data. */ @@ -463,30 +527,28 @@ template class basic_string_view { size_ -= n; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( - basic_string_view sv) const noexcept { - return size_ >= sv.size_ && - std::char_traits::compare(data_, sv.data_, sv.size_) == 0; + FMT_CONSTEXPR auto starts_with(basic_string_view sv) const noexcept + -> bool { + return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { - return size_ >= 1 && std::char_traits::eq(*data_, c); + FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool { + return size_ >= 1 && *data_ == c; } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { + FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool { return starts_with(basic_string_view(s)); } // Lexicographically compare this string reference to other. - FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { + FMT_CONSTEXPR auto compare(basic_string_view other) const -> int { size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); + int result = detail::compare(data_, other.data_, str_size); if (result == 0) result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); return result; } - FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, - basic_string_view rhs) - -> bool { + FMT_CONSTEXPR friend auto operator==(basic_string_view lhs, + basic_string_view rhs) -> bool { return lhs.compare(rhs) == 0; } friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { @@ -526,21 +588,16 @@ template ::value)> FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { return s; } -template -inline auto to_string_view(const std::basic_string& s) - -> basic_string_view { - return s; +template ::value)> +inline auto to_string_view(const S& s) + -> basic_string_view { + return s; // std::basic_string[_view] } template constexpr auto to_string_view(basic_string_view s) -> basic_string_view { return s; } -template >::value)> -inline auto to_string_view(std_string_view s) -> basic_string_view { - return s; -} template ::value)> constexpr auto to_string_view(const S& s) -> basic_string_view { @@ -609,10 +666,10 @@ FMT_TYPE_CONSTANT(const Char*, cstring_type); FMT_TYPE_CONSTANT(basic_string_view, string_type); FMT_TYPE_CONSTANT(const void*, pointer_type); -constexpr bool is_integral_type(type t) { +constexpr auto is_integral_type(type t) -> bool { return t > type::none_type && t <= type::last_integer_type; } -constexpr bool is_arithmetic_type(type t) { +constexpr auto is_arithmetic_type(type t) -> bool { return t > type::none_type && t <= type::last_numeric_type; } @@ -635,21 +692,10 @@ enum { cstring_set = set(type::cstring_type), pointer_set = set(type::pointer_type) }; - -FMT_NORETURN FMT_API void throw_format_error(const char* message); - -struct error_handler { - constexpr error_handler() = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN void on_error(const char* message) { - throw_format_error(message); - } -}; } // namespace detail /** Throws ``format_error`` with a given message. */ -using detail::throw_format_error; +FMT_NORETURN FMT_API void throw_format_error(const char* message); /** String's character type. */ template using char_t = typename detail::char_t_impl::type; @@ -701,7 +747,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR auto next_arg_id() -> int { if (next_arg_id_ < 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from manual to automatic argument indexing"); return 0; } @@ -716,7 +762,7 @@ template class basic_format_parse_context { */ FMT_CONSTEXPR void check_arg_id(int id) { if (next_arg_id_ > 0) { - detail::throw_format_error( + throw_format_error( "cannot switch from automatic to manual argument indexing"); return; } @@ -769,35 +815,6 @@ class compile_parse_context : public basic_format_parse_context { } }; -// Extracts a reference to the container from back_insert_iterator. -template -inline auto get_container(std::back_insert_iterator it) - -> Container& { - using base = std::back_insert_iterator; - struct accessor : base { - accessor(base b) : base(b) {} - using base::container; - }; - return *accessor(it).container; -} - -template -FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) - -> OutputIt { - while (begin != end) *out++ = static_cast(*begin++); - return out; -} - -template , U>::value&& is_char::value)> -FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { - if (is_constant_evaluated()) return copy_str(begin, end, out); - auto size = to_unsigned(end - begin); - if (size > 0) memcpy(out, begin, size * sizeof(U)); - return out + size; -} - /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -810,13 +827,18 @@ template class buffer { size_t size_; size_t capacity_; + using grow_fun = void (*)(buffer& buf, size_t capacity); + grow_fun grow_; + protected: // Don't initialize ptr_ since it is not accessed to save a few cycles. FMT_MSC_WARNING(suppress : 26495) - buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} + FMT_CONSTEXPR buffer(grow_fun grow, size_t sz) noexcept + : size_(sz), capacity_(sz), grow_(grow) {} - FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept - : ptr_(p), size_(sz), capacity_(cap) {} + FMT_CONSTEXPR20 buffer(grow_fun grow, T* p = nullptr, size_t sz = 0, + size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {} FMT_CONSTEXPR20 ~buffer() = default; buffer(buffer&&) = default; @@ -827,9 +849,6 @@ template class buffer { capacity_ = buf_capacity; } - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; - public: using value_type = T; using const_reference = const T&; @@ -868,7 +887,7 @@ template class buffer { // for at least one additional element either by increasing the capacity or by // flushing the buffer if it is full. FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); + if (new_capacity > capacity_) grow_(*this, new_capacity); } FMT_CONSTEXPR20 void push_back(const T& value) { @@ -917,22 +936,25 @@ class iterator_buffer final : public Traits, public buffer { enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == buffer_size) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buffer_size) static_cast(buf).flush(); } void flush() { auto size = this->size(); this->clear(); - out_ = copy_str(data_, data_ + this->limit(size), out_); + const T* begin = data_; + const T* end = begin + this->limit(size); + while (begin != end) *out_++ = *begin++; } public: explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} + : Traits(n), buffer(grow, data_, 0, buffer_size), out_(out) {} iterator_buffer(iterator_buffer&& other) - : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} + : Traits(other), + buffer(grow, data_, 0, buffer_size), + out_(other.out_) {} ~iterator_buffer() { flush(); } auto out() -> OutputIt { @@ -951,9 +973,9 @@ class iterator_buffer final enum { buffer_size = 256 }; T data_[buffer_size]; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == this->capacity()) flush(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() == buf.capacity()) + static_cast(buf).flush(); } void flush() { @@ -967,10 +989,10 @@ class iterator_buffer final public: explicit iterator_buffer(T* out, size_t n = buffer_size) - : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} + : fixed_buffer_traits(n), buffer(grow, out, 0, n), out_(out) {} iterator_buffer(iterator_buffer&& other) : fixed_buffer_traits(other), - buffer(std::move(other)), + buffer(static_cast(other)), out_(other.out_) { if (this->data() != out_) { this->set(data_, buffer_size); @@ -989,38 +1011,37 @@ class iterator_buffer final }; template class iterator_buffer final : public buffer { - protected: - FMT_CONSTEXPR20 void grow(size_t) override {} - public: - explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} + explicit iterator_buffer(T* out, size_t = 0) + : buffer([](buffer&, size_t) {}, out, 0, ~size_t()) {} auto out() -> T* { return &*this->end(); } }; // A buffer that writes to a container with the contiguous storage. template -class iterator_buffer, +class iterator_buffer, enable_if_t::value, typename Container::value_type>> final : public buffer { private: + using value_type = typename Container::value_type; Container& container_; - protected: - FMT_CONSTEXPR20 void grow(size_t capacity) override { - container_.resize(capacity); - this->set(&container_[0], capacity); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t capacity) { + auto& self = static_cast(buf); + self.container_.resize(capacity); + self.set(&self.container_[0], capacity); } public: explicit iterator_buffer(Container& c) - : buffer(c.size()), container_(c) {} - explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) + : buffer(grow, c.size()), container_(c) {} + explicit iterator_buffer(back_insert_iterator out, size_t = 0) : iterator_buffer(get_container(out)) {} - auto out() -> std::back_insert_iterator { - return std::back_inserter(container_); + auto out() -> back_insert_iterator { + return fmt::back_inserter(container_); } }; @@ -1031,15 +1052,14 @@ template class counting_buffer final : public buffer { T data_[buffer_size]; size_t count_ = 0; - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() != buffer_size) return; - count_ += this->size(); - this->clear(); + static FMT_CONSTEXPR20 void grow(buffer& buf, size_t) { + if (buf.size() != buffer_size) return; + static_cast(buf).count_ += buf.size(); + buf.clear(); } public: - counting_buffer() : buffer(data_, 0, buffer_size) {} + counting_buffer() : buffer(grow, data_, 0, buffer_size) {} auto count() -> size_t { return count_ + this->size(); } }; @@ -1053,7 +1073,7 @@ FMT_CONSTEXPR void basic_format_parse_context::do_check_arg_id(int id) { (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { using context = detail::compile_parse_context; if (id >= static_cast(this)->num_args()) - detail::throw_format_error("argument not found"); + throw_format_error("argument not found"); } } @@ -1085,18 +1105,29 @@ template using has_formatter = std::is_constructible>; -// An output iterator that appends to a buffer. -// It is used to reduce symbol sizes for the common case. -class appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; +// An output iterator that appends to a buffer. It is used instead of +// back_insert_iterator to reduce symbol sizes for the common case. +class appender { + private: + detail::buffer* buffer_; + + friend auto get_container(appender app) -> detail::buffer& { + return *app.buffer_; + } public: - using std::back_insert_iterator>::back_insert_iterator; - appender(base it) noexcept : base(it) {} + using difference_type = ptrdiff_t; FMT_UNCHECKED_ITERATOR(appender); - auto operator++() noexcept -> appender& { return *this; } - auto operator++(int) noexcept -> appender { return *this; } + appender(detail::buffer& buf) : buffer_(&buf) {} + + auto operator=(char c) -> appender& { + buffer_->push_back(c); + return *this; + } + auto operator*() -> appender& { return *this; } + auto operator++() -> appender& { return *this; } + auto operator++(int) -> appender { return *this; } }; namespace detail { @@ -1119,7 +1150,7 @@ constexpr auto has_const_formatter() -> bool { template using buffer_appender = conditional_t::value, appender, - std::back_insert_iterator>>; + back_insert_iterator>>; // Maps an output iterator to a buffer. template @@ -1128,7 +1159,7 @@ auto get_buffer(OutputIt out) -> iterator_buffer { } template , Buf>::value)> -auto get_buffer(std::back_insert_iterator out) -> buffer& { +auto get_buffer(back_insert_iterator out) -> buffer& { return get_container(out); } @@ -1293,7 +1324,13 @@ template class value { template FMT_CONSTEXPR20 FMT_INLINE value(T& val) { using value_type = remove_const_t; - custom.value = const_cast(std::addressof(val)); + // T may overload operator& e.g. std::vector::reference in libc++. +#ifdef __cpp_if_constexpr + if constexpr (std::is_same::value) + custom.value = const_cast(&val); +#endif + if (!is_constant_evaluated()) + custom.value = const_cast(&reinterpret_cast(val)); // Get the formatter type through the context to allow different contexts // have different extension points, e.g. `formatter` for `format` and // `printf_formatter` for `printf`. @@ -1314,6 +1351,7 @@ template class value { parse_ctx.advance_to(f.parse(parse_ctx)); using qualified_type = conditional_t(), const T, T>; + // Calling format through a mutable reference is deprecated. ctx.advance_to(f.format(*static_cast(arg), ctx)); } }; @@ -1327,7 +1365,7 @@ using ulong_type = conditional_t; template struct format_as_result { template ::value || std::is_class::value)> - static auto map(U*) -> decltype(format_as(std::declval())); + static auto map(U*) -> remove_cvref_t()))>; static auto map(...) -> void; using type = decltype(map(static_cast(nullptr))); @@ -1444,7 +1482,8 @@ template struct arg_mapper { // Only map owning types because mapping views can be unsafe. template , FMT_ENABLE_IF(std::is_arithmetic::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(this->map(U())) { + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> decltype(FMT_DECLTYPE_THIS map(U())) { return map(format_as(val)); } @@ -1468,13 +1507,14 @@ template struct arg_mapper { !is_string::value && !is_char::value && !is_named_arg::value && !std::is_arithmetic>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) { + FMT_CONSTEXPR FMT_INLINE auto map(T& val) + -> decltype(FMT_DECLTYPE_THIS do_map(val)) { return do_map(val); } template ::value)> FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) - -> decltype(this->map(named_arg.value)) { + -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) { return map(named_arg.value); } @@ -1493,45 +1533,19 @@ enum { max_packed_args = 62 / packed_arg_bits }; enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; -template -auto copy_str(InputIt begin, InputIt end, appender out) -> appender { - get_container(out).append(begin, end); - return out; -} -template -auto copy_str(InputIt begin, InputIt end, - std::back_insert_iterator out) - -> std::back_insert_iterator { - get_container(out).append(begin, end); - return out; -} - -template -FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { - return detail::copy_str(rng.begin(), rng.end(), out); -} - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; -template using void_t = typename void_t_impl::type; -#else -template using void_t = void; -#endif - template struct is_output_iterator : std::false_type {}; +template <> struct is_output_iterator : std::true_type {}; + template struct is_output_iterator< - It, T, - void_t::iterator_category, - decltype(*std::declval() = std::declval())>> + It, T, void_t()++ = std::declval())>> : std::true_type {}; template struct is_back_insert_iterator : std::false_type {}; template -struct is_back_insert_iterator> +struct is_back_insert_iterator> : std::true_type {}; // A type-erased reference to an std::locale to avoid a heavy include. @@ -1607,8 +1621,8 @@ FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg { } // namespace detail FMT_BEGIN_EXPORT -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. +// A formatting argument. Context is a template parameter for the compiled API +// where output can be unbuffered. template class basic_format_arg { private: detail::value value_; @@ -1618,11 +1632,6 @@ template class basic_format_arg { friend FMT_CONSTEXPR auto detail::make_arg(T& value) -> basic_format_arg; - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - friend class basic_format_args; friend class dynamic_format_arg_store; @@ -1660,55 +1669,68 @@ template class basic_format_arg { auto is_arithmetic() const -> bool { return detail::is_arithmetic_type(type_); } + + /** + \rst + Visits an argument dispatching to the appropriate visit method based on + the argument type. For example, if the argument type is ``double`` then + ``vis(value)`` will be called with the value of type ``double``. + \endrst + */ + template + FMT_CONSTEXPR auto visit(Visitor&& vis) -> decltype(vis(0)) { + switch (type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(value_.int_value); + case detail::type::uint_type: + return vis(value_.uint_value); + case detail::type::long_long_type: + return vis(value_.long_long_value); + case detail::type::ulong_long_type: + return vis(value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(value_.uint128_value)); + case detail::type::bool_type: + return vis(value_.bool_value); + case detail::type::char_type: + return vis(value_.char_value); + case detail::type::float_type: + return vis(value_.float_value); + case detail::type::double_type: + return vis(value_.double_value); + case detail::type::long_double_type: + return vis(value_.long_double_value); + case detail::type::cstring_type: + return vis(value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(value_.string.data, value_.string.size)); + case detail::type::pointer_type: + return vis(value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(value_.custom)); + } + return vis(monostate()); + } + + FMT_INLINE auto format_custom(const char_type* parse_begin, + typename Context::parse_context_type& parse_ctx, + Context& ctx) -> bool { + if (type_ != detail::type::custom_type) return false; + parse_ctx.advance_to(parse_begin); + value_.custom.format(value_.custom.value, parse_ctx, ctx); + return true; + } }; -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -// DEPRECATED! template -FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( +FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - switch (arg.type_) { - case detail::type::none_type: - break; - case detail::type::int_type: - return vis(arg.value_.int_value); - case detail::type::uint_type: - return vis(arg.value_.uint_value); - case detail::type::long_long_type: - return vis(arg.value_.long_long_value); - case detail::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); - case detail::type::int128_type: - return vis(detail::convert_for_visit(arg.value_.int128_value)); - case detail::type::uint128_type: - return vis(detail::convert_for_visit(arg.value_.uint128_value)); - case detail::type::bool_type: - return vis(arg.value_.bool_value); - case detail::type::char_type: - return vis(arg.value_.char_value); - case detail::type::float_type: - return vis(arg.value_.float_value); - case detail::type::double_type: - return vis(arg.value_.double_value); - case detail::type::long_double_type: - return vis(arg.value_.long_double_value); - case detail::type::cstring_type: - return vis(arg.value_.string.data); - case detail::type::string_type: - using sv = basic_string_view; - return vis(sv(arg.value_.string.data, arg.value_.string.size)); - case detail::type::pointer_type: - return vis(arg.value_.pointer); - case detail::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); + return arg.visit(std::forward(vis)); } // Formatting context. @@ -1748,8 +1770,8 @@ template class basic_format_context { } auto args() const -> const format_args& { return args_; } - FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } + // This function is intentionally not constexpr to give a compile-time error. + void on_error(const char* message) { throw_format_error(message); } // Returns an iterator to the beginning of the output range. FMT_CONSTEXPR auto out() -> iterator { return out_; } @@ -1831,7 +1853,7 @@ class format_arg_store // Arguments are taken by lvalue references to avoid some lifetime issues. template constexpr auto make_format_args(T&... args) - -> format_arg_store...> { + -> format_arg_store...> { return {args...}; } @@ -2107,11 +2129,8 @@ struct dynamic_format_specs : format_specs { }; // Converts a character to ASCII. Returns '\0' on conversion failure. -template ::value)> -constexpr auto to_ascii(Char c) -> char { - return c <= 0xff ? static_cast(c) : '\0'; -} -template ::value)> +template ::value || + std::is_enum::value)> constexpr auto to_ascii(Char c) -> char { return c <= 0xff ? static_cast(c) : '\0'; } @@ -2156,11 +2175,11 @@ FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, } while (p != end && '0' <= *p && *p <= '9'); auto num_digits = p - begin; begin = p; - if (num_digits <= std::numeric_limits::digits10) - return static_cast(value); + int digits10 = static_cast(sizeof(int) * CHAR_BIT * 3 / 10); + if (num_digits <= digits10) return static_cast(value); // Check for overflow. - const unsigned max = to_unsigned((std::numeric_limits::max)()); - return num_digits == std::numeric_limits::digits10 + 1 && + unsigned max = INT_MAX; + return num_digits == digits10 + 1 && prev * 10ull + unsigned(p[-1] - '0') <= max ? static_cast(value) : error_value; @@ -2188,9 +2207,8 @@ FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, Char c = *begin; if (c >= '0' && c <= '9') { int index = 0; - constexpr int max = (std::numeric_limits::max)(); if (c != '0') - index = parse_nonnegative_int(begin, end, max); + index = parse_nonnegative_int(begin, end, INT_MAX); else ++begin; if (begin == end || (*begin != '}' && *begin != ':')) @@ -2309,9 +2327,12 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( dynamic_format_specs& specs; type arg_type; - FMT_CONSTEXPR auto operator()(pres type, int set) -> const Char* { - if (!in(arg_type, set)) throw_format_error("invalid format specifier"); - specs.type = type; + FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* { + if (!in(arg_type, set)) { + if (arg_type == type::none_type) return begin; + throw_format_error("invalid format specifier"); + } + specs.type = pres_type; return begin + 1; } } parse_presentation_type{begin, specs, arg_type}; @@ -2328,6 +2349,7 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case '+': case '-': case ' ': + if (arg_type == type::none_type) return begin; enter_state(state::sign, in(arg_type, sint_set | float_set)); switch (c) { case '+': @@ -2343,14 +2365,17 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( ++begin; break; case '#': + if (arg_type == type::none_type) return begin; enter_state(state::hash, is_arithmetic_type(arg_type)); specs.alt = true; ++begin; break; case '0': enter_state(state::zero); - if (!is_arithmetic_type(arg_type)) + if (!is_arithmetic_type(arg_type)) { + if (arg_type == type::none_type) return begin; throw_format_error("format specifier requires numeric argument"); + } if (specs.align == align::none) { // Ignore 0 if align is specified for compatibility with std::format. specs.align = align::numeric; @@ -2372,12 +2397,14 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx); break; case '.': + if (arg_type == type::none_type) return begin; enter_state(state::precision, in(arg_type, float_set | string_set | cstring_set)); begin = parse_precision(begin, end, specs.precision, specs.precision_ref, ctx); break; case 'L': + if (arg_type == type::none_type) return begin; enter_state(state::locale, is_arithmetic_type(arg_type)); specs.localized = true; ++begin; @@ -2411,6 +2438,8 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs( case 'G': return parse_presentation_type(pres::general_upper, float_set); case 'c': + if (arg_type == type::bool_type) + throw_format_error("invalid format specifier"); return parse_presentation_type(pres::chr, integral_set); case 's': return parse_presentation_type(pres::string, @@ -2550,9 +2579,9 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) decltype(arg_mapper().map(std::declval())), typename strip_named_arg::type>; // LAMMPS customization. Fails to compile with (some) Intel compilers -#if defined(__cpp_if_constexpr) && 0 - if constexpr (std::is_default_constructible_v< - formatter>) { +#if defined(__cpp_if_constexpr) && 1 + if constexpr (std::is_default_constructible< + formatter>::value) { return formatter().parse(ctx); } else { type_is_unformattable_for _; @@ -2675,9 +2704,11 @@ void check_format_string(S format_str) { template struct vformat_args { using type = basic_format_args< - basic_format_context>, Char>>; + basic_format_context>, Char>>; +}; +template <> struct vformat_args { + using type = format_args; }; -template <> struct vformat_args { using type = format_args; }; // Use vformat_args and avoid type_identity to keep symbols short. template @@ -2779,7 +2810,7 @@ using format_string = basic_format_string...>; inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; } #endif -FMT_API auto vformat(string_view fmt, format_args args) -> std::string; +FMT_API auto vformat(string_view fmt, format_args args) -> basic_string; /** \rst @@ -2794,7 +2825,7 @@ FMT_API auto vformat(string_view fmt, format_args args) -> std::string; */ template FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) - -> std::string { + -> basic_string { return vformat(fmt, fmt::make_format_args(args...)); } @@ -2816,7 +2847,7 @@ auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { **Example**:: auto out = std::vector(); - fmt::format_to(std::back_inserter(out), "{}", 42); + fmt::format_to(fmt::back_inserter(out), "{}", 42); \endrst */ template #endif -#ifdef _WIN32 +#if defined(_WIN32) && !defined(FMT_WINDOWS_NO_WCHAR) # include // _isatty #endif @@ -36,10 +36,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) { std::terminate(); } -FMT_FUNC void throw_format_error(const char* message) { - FMT_THROW(format_error(message)); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) noexcept { // Report error code making sure that the output fits into @@ -58,8 +54,8 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, FMT_STRING("{}{}"), message, SEP); - format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + fmt::format_to(it, FMT_STRING("{}{}"), message, SEP); + fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } @@ -73,9 +69,8 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -inline void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); +inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) { + size_t written = std::fwrite(ptr, 1, count, stream); if (written < count) FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); } @@ -86,7 +81,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } -template Locale locale_ref::get() const { +template auto locale_ref::get() const -> Locale { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } @@ -98,7 +93,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { +template +FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char { return std::use_facet>(loc.get()) .decimal_point(); } @@ -127,6 +123,10 @@ FMT_FUNC auto write_loc(appender out, loc_value value, } } // namespace detail +FMT_FUNC void throw_format_error(const char* message) { + FMT_THROW(format_error(message)); +} + template typename Locale::id format_facet::id; #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -144,24 +144,25 @@ FMT_API FMT_FUNC auto format_facet::do_put( } #endif -FMT_FUNC std::system_error vsystem_error(int error_code, string_view fmt, - format_args args) { +FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args) + -> std::system_error { auto ec = std::error_code(error_code, std::generic_category()); return std::system_error(ec, vformat(fmt, args)); } namespace detail { -template inline bool operator==(basic_fp x, basic_fp y) { +template +inline auto operator==(basic_fp x, basic_fp y) -> bool { return x.f == y.f && x.e == y.e; } // Compilers should be able to optimize this into the ror instruction. -FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t { r &= 31; return (n >> r) | (n << (32 - r)); } -FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t { r &= 63; return (n >> r) | (n << (64 - r)); } @@ -170,14 +171,14 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { namespace dragonbox { // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t { return umul128_upper64(static_cast(x) << 32, y); } // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_lower128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint64_t high = x * y.high(); uint128_fallback high_low = umul128(x, y.low()); return {high + high_low.high(), high_low.low()}; @@ -185,12 +186,12 @@ inline uint128_fallback umul192_lower128(uint64_t x, // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t { return x * y; } // Various fast log computations. -inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { +inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int { FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); return (e * 631305 - 261663) >> 21; } @@ -204,7 +205,7 @@ FMT_INLINE_VARIABLE constexpr struct { // divisible by pow(10, N). // Precondition: n <= pow(10, N + 1). template -bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { +auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool { // The numbers below are chosen such that: // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, // 2. nm mod 2^k < m if and only if n is divisible by d, @@ -229,7 +230,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). -template uint32_t small_division_by_pow10(uint32_t n) noexcept { +template auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t { constexpr auto info = div_small_pow10_infos[N - 1]; FMT_ASSERT(n <= info.divisor * 10, "n is too large"); constexpr uint32_t magic_number = @@ -238,12 +239,12 @@ template uint32_t small_division_by_pow10(uint32_t n) noexcept { } // Computes floor(n / 10^(kappa + 1)) (float) -inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t { // 1374389535 = ceil(2^37/100) return static_cast((static_cast(n) * 1374389535) >> 37); } // Computes floor(n / 10^(kappa + 1)) (double) -inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t { // 2361183241434822607 = ceil(2^(64+7)/1000) return umul128_upper64(n, 2361183241434822607ull) >> 7; } @@ -255,7 +256,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; - static uint64_t get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint64_t { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint64_t pow10_significands[] = { @@ -297,20 +298,23 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul96_upper64(u, cache); return {static_cast(r >> 32), static_cast(r) == 0}; } - static uint32_t compute_delta(const cache_entry_type& cache, - int beta) noexcept { + static auto compute_delta(const cache_entry_type& cache, int beta) noexcept + -> uint32_t { return static_cast(cache >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -319,22 +323,22 @@ template <> struct cache_accessor { static_cast(r >> (32 - beta)) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache - (cache >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache + (cache >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (static_cast( cache >> (64 - num_significand_bits() - 2 - beta)) + 1) / @@ -346,7 +350,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_fallback; - static uint128_fallback get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint128_fallback { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); @@ -985,8 +989,7 @@ template <> struct cache_accessor { {0xe0accfa875af45a7, 0x93eb1b80a33b8606}, {0x8c6c01c9498d8b88, 0xbc72f130660533c4}, {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5}, - { 0xdb68c2ca82ed2a05, - 0xa67398db9f6820e2 } + {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2}, #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, @@ -1071,19 +1074,22 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul192_upper128(u, cache); return {r.high(), r.low() == 0}; } - static uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { + static auto compute_delta(cache_entry_type const& cache, int beta) noexcept + -> uint32_t { return static_cast(cache.high() >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -1092,35 +1098,35 @@ template <> struct cache_accessor { ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() - (cache.high() >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() + (cache.high() >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + 1) / 2; } }; -FMT_FUNC uint128_fallback get_cached_power(int k) noexcept { +FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback { return cache_accessor::get_cached_power(k); } // Various integer checks template -bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { +auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool { const int case_shorter_interval_left_endpoint_lower_threshold = 2; const int case_shorter_interval_left_endpoint_upper_threshold = 3; return exponent >= case_shorter_interval_left_endpoint_lower_threshold && @@ -1132,7 +1138,7 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept { FMT_ASSERT(n != 0, ""); // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1. constexpr uint32_t mod_inv_5 = 0xcccccccd; - constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 while (true) { auto q = rotr(n * mod_inv_25, 2); @@ -1168,7 +1174,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { // If n is not divisible by 10^8, work with n itself. constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd; - constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5 + constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // mod_inv_5 * mod_inv_5 int s = 0; while (true) { @@ -1234,7 +1240,7 @@ FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { return ret_value; } -template decimal_fp to_decimal(T x) noexcept { +template auto to_decimal(T x) noexcept -> decimal_fp { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -1373,15 +1379,15 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, FMT_STRING("{:x}"), value); + out = fmt::format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, FMT_STRING("{:08x}"), value); + out = fmt::format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, FMT_STRING("p{}"), - n.exp_ * detail::bigint::bigit_bits); + out = fmt::format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; @@ -1417,7 +1423,7 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -FMT_FUNC std::string vformat(string_view fmt, format_args args) { +FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); @@ -1426,33 +1432,38 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) { } namespace detail { -#ifndef _WIN32 -FMT_FUNC bool write_console(std::FILE*, string_view) { return false; } +#if !defined(_WIN32) || defined(FMT_WINDOWS_NO_WCHAR) +FMT_FUNC auto write_console(int, string_view) -> bool { return false; } #else using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); -FMT_FUNC bool write_console(std::FILE* f, string_view text) { - auto fd = _fileno(f); - if (!_isatty(fd)) return false; +FMT_FUNC bool write_console(int fd, string_view text) { auto u16 = utf8_to_utf16(text); - auto written = dword(); return WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), - static_cast(u16.size()), &written, nullptr) != 0; + static_cast(u16.size()), nullptr, nullptr) != 0; } +#endif +#ifdef _WIN32 // Print assuming legacy (non-Unicode) encoding. FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args) { auto buffer = memory_buffer(); - detail::vformat_to(buffer, fmt, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); + detail::vformat_to(buffer, fmt, args); + fwrite_fully(buffer.data(), buffer.size(), f); } #endif FMT_FUNC void print(std::FILE* f, string_view text) { - if (!write_console(f, text)) fwrite_fully(text.data(), 1, text.size(), f); +#ifdef _WIN32 + int fd = _fileno(f); + if (_isatty(fd)) { + std::fflush(f); + if (write_console(fd, text)) return; + } +#endif + fwrite_fully(text.data(), text.size(), f); } } // namespace detail diff --git a/src/fmt/format.h b/src/fmt/format.h index 87a34b972c..8cdf95b7bd 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -37,17 +37,28 @@ #include // uint32_t #include // std::memcpy #include // std::initializer_list -#include // std::numeric_limits -#include // std::uninitialized_copy -#include // std::runtime_error -#include // std::system_error +#include +#include // std::numeric_limits +#include // std::uninitialized_copy +#include // std::runtime_error +#include // std::system_error #ifdef __cpp_lib_bit_cast -# include // std::bitcast +# include // std::bit_cast #endif #include "core.h" +// libc++ supports string_view in pre-c++17. +#if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L +# include +# define FMT_USE_EXPERIMENTAL_STRING_VIEW +#endif + #if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L # define FMT_INLINE_VARIABLE inline #else @@ -65,25 +76,11 @@ # define FMT_FALLTHROUGH #endif -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VERSION -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - #ifndef FMT_NO_UNIQUE_ADDRESS # if FMT_CPLUSPLUS >= 202002L # if FMT_HAS_CPP_ATTRIBUTE(no_unique_address) # define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]] -// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485) +// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485). # elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION # define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] # endif @@ -93,10 +90,11 @@ # define FMT_NO_UNIQUE_ADDRESS #endif -#if FMT_GCC_VERSION || defined(__clang__) -# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +// Visibility when compiled as a shared library/object. +#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value) #else -# define FMT_VISIBILITY(value) +# define FMT_SO_VISIBILITY(value) #endif #ifdef __has_builtin @@ -152,7 +150,10 @@ FMT_END_NAMESPACE #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ +// +// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later +// compiler versions. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \ FMT_MSC_VERSION >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 @@ -272,20 +273,19 @@ inline auto ctzll(uint64_t x) -> int { FMT_END_NAMESPACE #endif +namespace std { +template <> struct iterator_traits { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +template +struct iterator_traits> { + using value_type = void; + using iterator_category = std::output_iterator_tag; +}; +} // namespace std + FMT_BEGIN_NAMESPACE - -template struct disjunction : std::false_type {}; -template struct disjunction

: P {}; -template -struct disjunction - : conditional_t> {}; - -template struct conjunction : std::true_type {}; -template struct conjunction

: P {}; -template -struct conjunction - : conditional_t, P1> {}; - namespace detail { FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { @@ -295,6 +295,15 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { #endif } +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) +template +using std_string_view = std::experimental::basic_string_view; +#else +template struct std_string_view {}; +#endif + template struct string_literal { static constexpr CharT value[sizeof...(C)] = {C...}; constexpr operator basic_string_view() const { @@ -307,37 +316,6 @@ template constexpr CharT string_literal::value[sizeof...(C)]; #endif -template class formatbuf : public Streambuf { - private: - using char_type = typename Streambuf::char_type; - using streamsize = decltype(std::declval().sputn(nullptr, 0)); - using int_type = typename Streambuf::int_type; - using traits_type = typename Streambuf::traits_type; - - buffer& buffer_; - - public: - explicit formatbuf(buffer& buf) : buffer_(buf) {} - - protected: - // The put area is always empty. This makes the implementation simpler and has - // the advantage that the streambuf and the buffer are always in sync and - // sputc never writes into uninitialized memory. A disadvantage is that each - // call to sputc always results in a (virtual) call to overflow. There is no - // disadvantage here for sputn since this always results in a call to xsputn. - - auto overflow(int_type ch) -> int_type override { - if (!traits_type::eq_int_type(ch, traits_type::eof())) - buffer_.push_back(static_cast(ch)); - return ch; - } - - auto xsputn(const char_type* s, streamsize count) -> streamsize override { - buffer_.append(s, s + count); - return count; - } -}; - // Implementation of std::bit_cast for pre-C++20. template FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { @@ -373,8 +351,8 @@ class uint128_fallback { constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} - constexpr uint64_t high() const noexcept { return hi_; } - constexpr uint64_t low() const noexcept { return lo_; } + constexpr auto high() const noexcept -> uint64_t { return hi_; } + constexpr auto low() const noexcept -> uint64_t { return lo_; } template ::value)> constexpr explicit operator T() const { @@ -450,7 +428,7 @@ class uint128_fallback { hi_ &= n.hi_; } - FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& { if (is_constant_evaluated()) { lo_ += n; hi_ += (lo_ < n ? 1 : 0); @@ -546,6 +524,52 @@ FMT_INLINE void assume(bool condition) { #endif } +// Extracts a reference to the container from back_insert_iterator. +template +inline auto get_container(std::back_insert_iterator it) + -> Container& { + using base = std::back_insert_iterator; + struct accessor : base { + accessor(base b) : base(b) {} + using base::container; + }; + return *accessor(it).container; +} + +template +FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) + -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template , U>::value&& is_char::value)> +FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { + if (is_constant_evaluated()) return copy_str(begin, end, out); + auto size = to_unsigned(end - begin); + if (size > 0) memcpy(out, begin, size * sizeof(U)); + return out + size; +} + +template +auto copy_str(InputIt begin, InputIt end, appender out) -> appender { + get_container(out).append(begin, end); + return out; +} +template +auto copy_str(InputIt begin, InputIt end, back_insert_iterator out) + -> back_insert_iterator { + get_container(out).append(begin, end); + return out; +} + +template +FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { + return detail::copy_str(rng.begin(), rng.end(), out); +} + // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); @@ -740,7 +764,7 @@ inline auto compute_width(basic_string_view s) -> size_t { } // Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline size_t compute_width(string_view s) { +FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { @@ -787,12 +811,17 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } inline auto code_point_index(basic_string_view s, size_t n) @@ -902,7 +931,7 @@ enum { inline_buffer_size = 500 }; **Example**:: auto out = fmt::memory_buffer(); - format_to(std::back_inserter(out), "The answer is {}.", 42); + fmt::format_to(std::back_inserter(out), "The answer is {}.", 42); This will append the following output to the ``out`` object: @@ -929,27 +958,29 @@ class basic_memory_buffer final : public detail::buffer { } protected: - FMT_CONSTEXPR20 void grow(size_t size) override { + static FMT_CONSTEXPR20 void grow(detail::buffer& buf, size_t size) { detail::abort_fuzzing_if(size > 5000); - const size_t max_size = std::allocator_traits::max_size(alloc_); - size_t old_capacity = this->capacity(); + auto& self = static_cast(buf); + const size_t max_size = + std::allocator_traits::max_size(self.alloc_); + size_t old_capacity = buf.capacity(); size_t new_capacity = old_capacity + old_capacity / 2; if (size > new_capacity) new_capacity = size; else if (new_capacity > max_size) new_capacity = size > max_size ? size : max_size; - T* old_data = this->data(); + T* old_data = buf.data(); T* new_data = - std::allocator_traits::allocate(alloc_, new_capacity); + std::allocator_traits::allocate(self.alloc_, new_capacity); // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481). - detail::assume(this->size() <= new_capacity); + detail::assume(buf.size() <= new_capacity); // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy_n(old_data, this->size(), new_data); - this->set(new_data, new_capacity); + std::uninitialized_copy_n(old_data, buf.size(), new_data); + self.set(new_data, new_capacity); // deallocate must not throw according to the standard, but even if it does, // the buffer already uses the new storage and will deallocate it in // destructor. - if (old_data != store_) alloc_.deallocate(old_data, old_capacity); + if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity); } public: @@ -958,7 +989,7 @@ class basic_memory_buffer final : public detail::buffer { FMT_CONSTEXPR20 explicit basic_memory_buffer( const Allocator& alloc = Allocator()) - : alloc_(alloc) { + : detail::buffer(grow), alloc_(alloc) { this->set(store_, SIZE); if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); } @@ -990,7 +1021,8 @@ class basic_memory_buffer final : public detail::buffer { of the other object to it. \endrst */ - FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept + : detail::buffer(grow) { move(other); } @@ -1018,7 +1050,6 @@ class basic_memory_buffer final : public detail::buffer { /** Increases the buffer capacity to *new_capacity*. */ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } - // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { @@ -1034,7 +1065,7 @@ struct is_contiguous> : std::true_type { FMT_END_EXPORT namespace detail { -FMT_API bool write_console(std::FILE* f, string_view text); +FMT_API auto write_console(int fd, string_view text) -> bool; FMT_API void print(std::FILE*, string_view); } // namespace detail @@ -1046,7 +1077,7 @@ FMT_BEGIN_EXPORT #endif /** An error reported from a formatting function. */ -class FMT_VISIBILITY("default") format_error : public std::runtime_error { +class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error { public: using std::runtime_error::runtime_error; }; @@ -1089,7 +1120,7 @@ class loc_value { loc_value(T) {} template auto visit(Visitor&& vis) -> decltype(vis(0)) { - return visit_format_arg(vis, value_); + return value_.visit(vis); } }; @@ -1153,13 +1184,13 @@ using uint32_or_64_or_128_t = template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \ + (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \ + (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr const char* digits2(size_t value) { +constexpr auto digits2(size_t value) -> const char* { // GCC generates slightly better code when value is pointer-size. return &"0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" @@ -1169,7 +1200,7 @@ constexpr const char* digits2(size_t value) { } // Sign is a template parameter to workaround a bug in gcc 4.8. -template constexpr Char sign(Sign s) { +template constexpr auto sign(Sign s) -> Char { #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 static_assert(std::is_same::value, ""); #endif @@ -1394,7 +1425,7 @@ FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits, return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; + char buffer[num_bits() / BASE_BITS + 1] = {}; format_uint(buffer, value, num_digits, upper); return detail::copy_str_noinline(buffer, buffer + num_digits, out); } @@ -1430,22 +1461,23 @@ template class to_utf8 { : "invalid utf32")); } operator string_view() const { return string_view(&buffer_[0], size()); } - size_t size() const { return buffer_.size() - 1; } - const char* c_str() const { return &buffer_[0]; } - std::string str() const { return std::string(&buffer_[0], size()); } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const char* { return &buffer_[0]; } + auto str() const -> std::string { return std::string(&buffer_[0], size()); } // Performs conversion returning a bool instead of throwing exception on // conversion error. This method may still throw in case of memory allocation // error. - bool convert(basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + auto convert(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { if (!convert(buffer_, s, policy)) return false; buffer_.push_back(0); return true; } - static bool convert( - Buffer& buf, basic_string_view s, - to_utf8_error_policy policy = to_utf8_error_policy::abort) { + static auto convert(Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { for (auto p = s.begin(); p != s.end(); ++p) { uint32_t c = static_cast(*p); if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { @@ -1481,7 +1513,7 @@ template class to_utf8 { }; // Computes 128-bit result of multiplication of two 64-bit unsigned integers. -inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { +inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return {static_cast(p >> 64), static_cast(p)}; @@ -1512,19 +1544,19 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { namespace dragonbox { // Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from // https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. -inline int floor_log10_pow2(int e) noexcept { +inline auto floor_log10_pow2(int e) noexcept -> int { FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); return (e * 315653) >> 20; } -inline int floor_log2_pow10(int e) noexcept { +inline auto floor_log2_pow10(int e) noexcept -> int { FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); return (e * 1741647) >> 19; } // Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { +inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return static_cast(p >> 64); @@ -1537,14 +1569,14 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { // Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_upper128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint128_fallback r = umul128(x, y.high()); r += umul128_upper64(x, y.low()); return r; } -FMT_API uint128_fallback get_cached_power(int k) noexcept; +FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback; // Type-specific information that Dragonbox uses. template struct float_info; @@ -1598,14 +1630,14 @@ template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; } // namespace dragonbox // Returns true iff Float has the implicit bit which is not stored. -template constexpr bool has_implicit_bit() { +template constexpr auto has_implicit_bit() -> bool { // An 80-bit FP number has a 64-bit significand an no implicit bit. return std::numeric_limits::digits != 64; } // Returns the number of significand bits stored in Float. The implicit bit is // not counted since it is not stored. -template constexpr int num_significand_bits() { +template constexpr auto num_significand_bits() -> int { // std::numeric_limits may not support __float128. return is_float128() ? 112 : (std::numeric_limits::digits - @@ -1698,7 +1730,7 @@ using fp = basic_fp; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { +FMT_CONSTEXPR auto normalize(basic_fp value) -> basic_fp { // Handle subnormals. const auto implicit_bit = F(1) << num_significand_bits(); const auto shifted_implicit_bit = implicit_bit << SHIFT; @@ -1715,7 +1747,7 @@ FMT_CONSTEXPR basic_fp normalize(basic_fp value) { } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); @@ -1732,33 +1764,10 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #endif } -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { +FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp { return {multiply(x.f, y.f), x.e + y.e + 64}; } -template struct basic_data { - // For checking rounding thresholds. - // The kth entry is chosen to be the smallest integer such that the - // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. - static constexpr uint32_t fractional_part_rounding_thresholds[8] = { - 2576980378U, // ceil(2^31 + 2^32/10^1) - 2190433321U, // ceil(2^31 + 2^32/10^2) - 2151778616U, // ceil(2^31 + 2^32/10^3) - 2147913145U, // ceil(2^31 + 2^32/10^4) - 2147526598U, // ceil(2^31 + 2^32/10^5) - 2147487943U, // ceil(2^31 + 2^32/10^6) - 2147484078U, // ceil(2^31 + 2^32/10^7) - 2147483691U // ceil(2^31 + 2^32/10^8) - }; -}; -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -#if FMT_CPLUSPLUS < 201703L -template -constexpr uint32_t basic_data::fractional_part_rounding_thresholds[]; -#endif - template () == num_bits()> using convert_float_result = conditional_t::value || doublish, double, T>; @@ -1939,15 +1948,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result& escape) *out++ = static_cast('\\'); break; default: - if (escape.cp < 0x100) { - return write_codepoint<2, Char>(out, 'x', escape.cp); - } - if (escape.cp < 0x10000) { + if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp); + if (escape.cp < 0x10000) return write_codepoint<4, Char>(out, 'u', escape.cp); - } - if (escape.cp < 0x110000) { + if (escape.cp < 0x110000) return write_codepoint<8, Char>(out, 'U', escape.cp); - } for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { out = write_codepoint<2, Char>(out, 'x', @@ -1977,11 +1982,13 @@ auto write_escaped_string(OutputIt out, basic_string_view str) template auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + Char v_array[1] = {v}; *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{&v, &v + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } @@ -2070,10 +2077,10 @@ template class digit_grouping { std::string::const_iterator group; int pos; }; - next_state initial_state() const { return {grouping_.begin(), 0}; } + auto initial_state() const -> next_state { return {grouping_.begin(), 0}; } // Returns the next digit group separator position. - int next(next_state& state) const { + auto next(next_state& state) const -> int { if (thousands_sep_.empty()) return max_value(); if (state.group == grouping_.end()) return state.pos += grouping_.back(); if (*state.group <= 0 || *state.group == max_value()) @@ -2092,9 +2099,9 @@ template class digit_grouping { digit_grouping(std::string grouping, std::basic_string sep) : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} - bool has_separator() const { return !thousands_sep_.empty(); } + auto has_separator() const -> bool { return !thousands_sep_.empty(); } - int count_separators(int num_digits) const { + auto count_separators(int num_digits) const -> int { int count = 0; auto state = initial_state(); while (num_digits > next(state)) ++count; @@ -2103,7 +2110,7 @@ template class digit_grouping { // Applies grouping to digits and write the output to out. template - Out apply(Out out, basic_string_view digits) const { + auto apply(Out out, basic_string_view digits) const -> Out { auto num_digits = static_cast(digits.size()); auto separators = basic_memory_buffer(); separators.push_back(0); @@ -2126,24 +2133,66 @@ template class digit_grouping { } }; +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + // Writes a decimal integer with digit grouping. template auto write_int(OutputIt out, UInt value, unsigned prefix, const format_specs& specs, const digit_grouping& grouping) -> OutputIt { static_assert(std::is_same, UInt>::value, ""); - int num_digits = count_digits(value); - char digits[40]; - format_decimal(digits, value, num_digits); - unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + - grouping.count_separators(num_digits)); + int num_digits = 0; + auto buffer = memory_buffer(); + switch (specs.type) { + case presentation_type::none: + case presentation_type::dec: { + num_digits = count_digits(value); + format_decimal(appender(buffer), value, num_digits); + break; + } + case presentation_type::hex_lower: + case presentation_type::hex_upper: { + bool upper = specs.type == presentation_type::hex_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + num_digits = count_digits<4>(value); + format_uint<4, char>(appender(buffer), value, num_digits, upper); + break; + } + case presentation_type::bin_lower: + case presentation_type::bin_upper: { + bool upper = specs.type == presentation_type::bin_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); + num_digits = count_digits<1>(value); + format_uint<1, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::oct: { + num_digits = count_digits<3>(value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && value != 0) + prefix_append(prefix, '0'); + format_uint<3, char>(appender(buffer), value, num_digits); + break; + } + case presentation_type::chr: + return write_char(out, static_cast(value), specs); + default: + throw_format_error("invalid format specifier"); + } + + unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) + + to_unsigned(grouping.count_separators(num_digits)); return write_padded( out, specs, size, size, [&](reserve_iterator it) { - if (prefix != 0) { - char sign = static_cast(prefix); - *it++ = static_cast(sign); - } - return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + return grouping.apply(it, string_view(buffer.data(), buffer.size())); }); } @@ -2156,11 +2205,6 @@ inline auto write_loc(OutputIt, loc_value, const format_specs&, return false; } -FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { - prefix |= prefix != 0 ? value << 8 : value; - prefix += (1u + (value > 0xff ? 1 : 0)) << 24; -} - template struct write_int_arg { UInt abs_value; unsigned prefix; @@ -2307,25 +2351,25 @@ class counting_iterator { FMT_CONSTEXPR counting_iterator() : count_(0) {} - FMT_CONSTEXPR size_t count() const { return count_; } + FMT_CONSTEXPR auto count() const -> size_t { return count_; } - FMT_CONSTEXPR counting_iterator& operator++() { + FMT_CONSTEXPR auto operator++() -> counting_iterator& { ++count_; return *this; } - FMT_CONSTEXPR counting_iterator operator++(int) { + FMT_CONSTEXPR auto operator++(int) -> counting_iterator { auto it = *this; ++*this; return it; } - FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, - difference_type n) { + FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n) + -> counting_iterator { it.count_ += static_cast(n); return it; } - FMT_CONSTEXPR value_type operator*() const { return {}; } + FMT_CONSTEXPR auto operator*() const -> value_type { return {}; } }; template @@ -2360,9 +2404,10 @@ template FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, locale_ref) -> OutputIt { - return specs.type != presentation_type::pointer - ? write(out, basic_string_view(s), specs, {}) - : write_ptr(out, bit_cast(s), &specs); + if (specs.type == presentation_type::pointer) + return write_ptr(out, bit_cast(s), &specs); + if (!s) throw_format_error("string pointer is null"); + return write(out, basic_string_view(s), specs, {}); } template -FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, - ErrorHandler&& eh = {}) +template +FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs) -> float_specs { auto result = float_specs(); result.showpoint = specs.alt; @@ -2486,7 +2530,7 @@ FMT_CONSTEXPR auto parse_float_type_spec(const format_specs& specs, result.format = float_format::hex; break; default: - eh.on_error("invalid format specifier"); + throw_format_error("invalid format specifier"); break; } return result; @@ -2725,12 +2769,12 @@ template class fallback_digit_grouping { public: constexpr fallback_digit_grouping(locale_ref, bool) {} - constexpr bool has_separator() const { return false; } + constexpr auto has_separator() const -> bool { return false; } - constexpr int count_separators(int) const { return 0; } + constexpr auto count_separators(int) const -> int { return 0; } template - constexpr Out apply(Out out, basic_string_view) const { + constexpr auto apply(Out out, basic_string_view) const -> Out { return out; } }; @@ -2749,7 +2793,7 @@ FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, } } -template constexpr bool isnan(T value) { +template constexpr auto isnan(T value) -> bool { return !(value >= value); // std::isnan doesn't support __float128. } @@ -2762,14 +2806,14 @@ struct has_isfinite> template ::value&& has_isfinite::value)> -FMT_CONSTEXPR20 bool isfinite(T value) { +FMT_CONSTEXPR20 auto isfinite(T value) -> bool { constexpr T inf = T(std::numeric_limits::infinity()); if (is_constant_evaluated()) return !detail::isnan(value) && value < inf && value > -inf; return std::isfinite(value); } template ::value)> -FMT_CONSTEXPR bool isfinite(T value) { +FMT_CONSTEXPR auto isfinite(T value) -> bool { T inf = T(std::numeric_limits::infinity()); // std::isfinite doesn't support __float128. return !detail::isnan(value) && value < inf && value > -inf; @@ -2806,10 +2850,10 @@ class bigint { basic_memory_buffer bigits_; int exp_; - FMT_CONSTEXPR20 bigit operator[](int index) const { + FMT_CONSTEXPR20 auto operator[](int index) const -> bigit { return bigits_[to_unsigned(index)]; } - FMT_CONSTEXPR20 bigit& operator[](int index) { + FMT_CONSTEXPR20 auto operator[](int index) -> bigit& { return bigits_[to_unsigned(index)]; } @@ -2905,11 +2949,11 @@ class bigint { assign(uint64_or_128_t(n)); } - FMT_CONSTEXPR20 int num_bigits() const { + FMT_CONSTEXPR20 auto num_bigits() const -> int { return static_cast(bigits_.size()) + exp_; } - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; @@ -2924,13 +2968,15 @@ class bigint { return *this; } - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + template + FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs) + -> int { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; @@ -2947,8 +2993,9 @@ class bigint { } // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { + friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1, + const bigint& lhs2, const bigint& rhs) + -> int { auto minimum = [](int a, int b) { return a < b ? a : b; }; auto maximum = [](int a, int b) { return a > b ? a : b; }; int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); @@ -3029,13 +3076,13 @@ class bigint { bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0u); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); @@ -3178,8 +3225,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } if (buf[0] == overflow) { buf[0] = '1'; - if ((flags & dragon::fixed) != 0) buf.push_back('0'); - else ++exp10; + if ((flags & dragon::fixed) != 0) + buf.push_back('0'); + else + ++exp10; } return; } @@ -3276,6 +3325,17 @@ FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision, format_hexfloat(static_cast(value), precision, specs, buf); } +constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t { + // For checking rounding thresholds. + // The kth entry is chosen to be the smallest integer such that the + // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. + // It is equal to ceil(2^31 + 2^32/10^(k + 1)). + // These are stored in a string literal because we cannot have static arrays + // in constexpr functions and non-static ones are poorly optimized. + return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7" + U"\x800001ae\x8000002b"[index]; +} + template FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, buffer& buf) -> int { @@ -3480,12 +3540,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // fractional part is strictly larger than 1/2. if (precision < 9) { uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (second_third_subsegments != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (second_third_subsegments != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. // In this case, the fractional part is at least 1/2 if and only if @@ -3520,12 +3580,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // of 19 digits, so in this case the third segment should be // consisting of a genuine digit from the input. uint32_t fractional_part = static_cast(prod); - should_round_up = fractional_part >= - data::fractional_part_rounding_thresholds - [8 - number_of_digits_to_print] || - ((fractional_part >> 31) & - ((digits & 1) | (third_subsegment != 0) | - has_more_segments)) != 0; + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (third_subsegment != 0) | + has_more_segments)) != 0; } // Rounding at the subsegment boundary. else { @@ -3726,8 +3786,7 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { } template -FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) - -> OutputIt { +FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt { if (value) return write(out, basic_string_view(value)); throw_format_error("string pointer is null"); return out; @@ -3757,8 +3816,11 @@ template enable_if_t::value == type::custom_type, OutputIt> { + auto formatter = typename Context::template formatter_type(); + auto parse_ctx = typename Context::parse_context_type({}); + formatter.parse(parse_ctx); auto ctx = Context(out, {}, {}); - return typename Context::template formatter_type().format(value, ctx); + return formatter.format(value, ctx); } // An argument visitor that formats the argument and writes it via the output @@ -3801,62 +3863,39 @@ template struct arg_formatter { } }; -template struct custom_formatter { - basic_format_parse_context& parse_ctx; - buffer_context& ctx; - - void operator()( - typename basic_format_arg>::handle h) const { - h.format(parse_ctx, ctx); - } - template void operator()(T) const {} -}; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - +struct width_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative width"); + if (is_negative(value)) throw_format_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("width is not integer"); + throw_format_error("width is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - +struct precision_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative precision"); + if (is_negative(value)) throw_format_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("precision is not integer"); + throw_format_error("precision is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template