diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.cpp b/src/KOKKOS/fix_wall_lj93_kokkos.cpp index fe75c92a8a..d855273035 100644 --- a/src/KOKKOS/fix_wall_lj93_kokkos.cpp +++ b/src/KOKKOS/fix_wall_lj93_kokkos.cpp @@ -28,8 +28,6 @@ #include "variable.h" #include "update.h" -#include - using namespace LAMMPS_NS; using MathSpecial::powint; @@ -44,7 +42,6 @@ FixWallLJ93Kokkos::FixWallLJ93Kokkos(LAMMPS *lmp, int narg, char **a execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | V_MASK | MASK_MASK; datamask_modify = F_MASK; - virial_global_flag = virial_peratom_flag = 0; memoryKK->create_kokkos(k_epsilon,6,"wall_lj93:epsilon"); memoryKK->create_kokkos(k_sigma,6,"wall_lj93:sigma"); @@ -94,7 +91,7 @@ FixWallLJ93Kokkos::~FixWallLJ93Kokkos() memoryKK->destroy_kokkos(d_coeff4); memoryKK->destroy_kokkos(d_offset); - //std::cerr << "ok 2\n"; + memoryKK->destroy_kokkos(k_vatom,vatom); } @@ -120,9 +117,6 @@ void FixWallLJ93Kokkos::precompute(int m) template void FixWallLJ93Kokkos::post_force(int vflag) { - - //std::cerr << "post_force DeviceType=" << DeviceType << "\n"; - atomKK->sync(execution_space,datamask_read); atomKK->modified(execution_space,datamask_modify); @@ -130,12 +124,19 @@ void FixWallLJ93Kokkos::post_force(int vflag) v_init(vflag); + // reallocate per-atom arrays if necessary + + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"wall_lj93:vatom"); + d_vatom = k_vatom.template view(); + } + // energy intialize. // eflag is used to track whether wall energies have been communicated. eflag = 0; - //for (int m = 0; m <= nwall; m++) d_ewall(m) = 0.0; - for (int m = 0; m <= nwall; m++) k_ewall.d_view(m) = 0.0; + for (int m = 0; m <= nwall; m++) d_ewall(m) = 0.0; // coord = current position of wall // evaluate variables if necessary, wrap with clear/add @@ -144,6 +145,7 @@ void FixWallLJ93Kokkos::post_force(int vflag) if (varflag) modify->clearstep_compute(); double coord; + for (int m = 0; m < nwall; m++) { if (xstyle[m] == VARIABLE) { coord = input->variable->compute_equal(xindex[m]); @@ -168,12 +170,22 @@ void FixWallLJ93Kokkos::post_force(int vflag) } wall_particle(m, wallwhich[m], coord); + } k_ewall.template modify(); k_ewall.template sync(); if (varflag) modify->addstep_compute(update->ntimestep + 1); + + atomKK->modified(execution_space,F_MASK); + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + } @@ -194,31 +206,35 @@ void FixWallLJ93Kokkos::wall_particle(int m_in, int which, double co d_f = atomKK->k_f.template view(); d_mask = atomKK->k_mask.template view(); int nlocal = atomKK->nlocal; - double tmp[7]; dim = which / 2; side = which % 2; if (side == 0) side = -1; + double result[13]; + copymode = 1; - FixWallLJ93KokkosFunctor wp_functor(this); - Kokkos::parallel_reduce(nlocal,wp_functor,tmp); + FixWallLJ93KokkosFunctor functor(this); + Kokkos::parallel_reduce(nlocal,functor,result); copymode = 0; - //std::cerr << fmt::format("tmp[0]={} tmp[{}]={} \n",tmp[0],m+1,tmp[m+1]); - - - Kokkos::atomic_add(&(d_ewall[0]),tmp[0]); - Kokkos::atomic_add(&(d_ewall[m+1]),tmp[m+1]); - - //std::cerr << fmt::format("k_ewall.d_view[0]={} k_ewall.d_view[{}]={} \n",k_ewall.d_view[0],m+1,k_ewall.d_view[m+1]); + Kokkos::atomic_add(&(d_ewall[0]),result[0]); + Kokkos::atomic_add(&(d_ewall[m+1]),result[m+1]); + if (vflag_global) { + virial[0] += result[7]; + virial[1] += result[8]; + virial[2] += result[9]; + virial[3] += result[10]; + virial[4] += result[11]; + virial[5] += result[12]; + } } template KOKKOS_INLINE_FUNCTION -void FixWallLJ93Kokkos::wall_particle_item(int i, value_type ewall) const { +void FixWallLJ93Kokkos::wall_particle_item(int i, value_type result) const { if (d_mask(i) & groupbit) { double delta; if (side < 0) delta = d_x(i,dim) - coord; @@ -232,37 +248,46 @@ void FixWallLJ93Kokkos::wall_particle_item(int i, value_type ewall) double r10inv = r4inv*r4inv*r2inv; double fwall = side * (d_coeff1(m)*r10inv - d_coeff2(m)*r4inv); d_f(i,dim) -= fwall; - ewall[0] += d_coeff3(m)*r4inv*r4inv*rinv - d_coeff4(m)*r2inv*rinv - d_offset(m); - ewall[m+1] += fwall; + result[0] += d_coeff3(m)*r4inv*r4inv*rinv - d_coeff4(m)*r2inv*rinv - d_offset(m); + result[m+1] += fwall; + + if (evflag) { + double vn; + if (side < 0) + vn = -fwall * delta; + else + vn = fwall * delta; + v_tally(result, dim, i, vn); + } + } } /* ---------------------------------------------------------------------- - energy of wall interaction + tally virial component into global and per-atom accumulators + n = index of virial component (0-5) + i = local index of atom + vn = nth component of virial for the interaction + increment nth component of global virial by vn + increment nth component of per-atom virial by vn + this method can be used when fix computes forces in post_force() + and the force depends on a distance to some external object + e.g. fix wall/lj93: compute virial only on owned atoms ------------------------------------------------------------------------- */ -/* template -double FixWallLJ93Kokkos::compute_scalar() +KOKKOS_INLINE_FUNCTION +void FixWallLJ93Kokkos::v_tally(value_type result, int n, int i, double vn) const { - // only sum across procs one time - //std::cerr << fmt::format("k_ewall[0] = {} d_ewall[0] = {}\n", k_ewall.h_view[0], k_ewall.d_view[0] ); + if (vflag_global) + result[n+7] += vn; - k_ewall.template sync(); - - if (eflag == 0) { - MPI_Allreduce(k_ewall.h_view.data(), ewall_all, 7, MPI_DOUBLE, MPI_SUM, world); - eflag = 1; - } - - std::cerr << fmt::format("compute_scalar() = {}\n", ewall_all[0]); - return ewall_all[0]; + if (vflag_atom) + Kokkos::atomic_add(&(d_vatom(i,n)),vn); } -*/ - namespace LAMMPS_NS { template class FixWallLJ93Kokkos; diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.h b/src/KOKKOS/fix_wall_lj93_kokkos.h index 26b50052fd..915dcd429a 100644 --- a/src/KOKKOS/fix_wall_lj93_kokkos.h +++ b/src/KOKKOS/fix_wall_lj93_kokkos.h @@ -28,6 +28,7 @@ FixStyle(wall/lj93/kk/host,FixWallLJ93Kokkos); namespace LAMMPS_NS { + template class FixWallLJ93Kokkos : public FixWallLJ93 { public: @@ -54,6 +55,9 @@ class FixWallLJ93Kokkos : public FixWallLJ93 { typename AT::t_f_array d_f; typename AT::t_int_1d d_mask; + DAT::tdual_virial_array k_vatom; + typename AT::t_virial_array d_vatom; + typename AT::tdual_ffloat_1d k_epsilon,k_sigma,k_cutoff; typename AT::t_ffloat_1d d_epsilon,d_sigma,d_cutoff; @@ -62,25 +66,34 @@ class FixWallLJ93Kokkos : public FixWallLJ93 { typename AT::tdual_ffloat_1d k_ewall; typename AT::t_ffloat_1d d_ewall; + KOKKOS_INLINE_FUNCTION + void v_tally(value_type, int, int, double) const; + + }; + template -struct FixWallLJ93KokkosFunctor { - typedef DeviceType device_type ; +struct FixWallLJ93KokkosFunctor { + typedef DeviceType device_type; typedef double value_type[]; const int value_count; - FixWallLJ93Kokkos c; + FixWallLJ93KokkosFunctor(FixWallLJ93Kokkos* c_ptr): - //value_count(c_ptr->m+1), c(*c_ptr) {} - value_count(7), c(*c_ptr) {} + value_count(13), c(*c_ptr) {} KOKKOS_INLINE_FUNCTION - void operator()(const int i, value_type ewall) const { - c.wall_particle_item(i,ewall); + void init(value_type result) const { + for (int i=0 ; i<13 ; i++ ) result[i] = 0.0; } -}; + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type result) const { + c.wall_particle_item(i,result); + } + +}; } diff --git a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml index 1756751e5e..a5eff76773 100644 --- a/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml +++ b/unittest/force-styles/tests/fix-timestep-wall_lj93_const.yaml @@ -1,82 +1,83 @@ --- -lammps_version: 17 Feb 2022 -date_generated: Fri Mar 18 22:18:01 2022 +lammps_version: 27 Jun 2024 +tags: generated +date_generated: Fri Aug 2 23:56:34 2024 epsilon: 2e-14 skip_tests: prerequisites: ! | atom full fix wall/lj93 pre_commands: ! | - boundary p f p + boundary f f f post_commands: ! | fix move all nve - fix test solute wall/lj93 ylo EDGE 100.0 2.0 5.0 yhi EDGE 100.0 2.0 5.0 -# fix_modify test virial yes + fix test solute wall/lj93 xlo EDGE 10 1 9 xhi EDGE 20 2 8 ylo EDGE 30 3 7 yhi EDGE 40 4 6 zlo EDGE 50 9 1 zhi EDGE 60 8 2 + fix_modify test virial yes input_file: in.fourmol natoms: 29 -run_stress: ! |2- - 0.0000000000000000e+00 -5.0602303343219951e+01 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -global_scalar: -4.108987997282236 +run_stress: ! |- + -2.0397146716028331e+01 -2.2856621254611628e+02 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +global_scalar: -40.30685365068726 global_vector: ! |- - 2 0 -11.164431050504302 + 6 0.29980999393772645 -3.0501185308112175 10.543906177602482 -35.13125907303744 0 0 run_pos: ! |2 - 1 -2.7045470054680359e-01 2.4914748797509958e+00 -1.6696421170224640e-01 - 2 3.1002519717316468e-01 2.9626113274873114e+00 -8.5465020014164161e-01 - 3 -7.0398162067377834e-01 1.2305605729036173e+00 -6.2777311164097793e-01 - 4 -1.5818159800097509e+00 1.4837403071563420e+00 -1.2538711657744748e+00 - 5 -9.0719738875553246e-01 9.2652132637432294e-01 3.9954221603268608e-01 - 6 2.4831723728788374e-01 2.8313028009631025e-01 -1.2314233417272680e+00 - 7 3.4143527622588610e-01 -2.2646550717066182e-02 -2.5292291415442443e+00 - 8 1.1743552225802836e+00 -4.8863228494885080e-01 -6.3783432950414043e-01 - 9 1.3800524229825968e+00 -2.5274721033806635e-01 2.8353985887329919e-01 - 10 2.0510765220590312e+00 -1.4604063740279585e+00 -9.8323745081970004e-01 - 11 1.7878031944447623e+00 -1.9921863272956677e+00 -1.8890602447628950e+00 - 12 3.0063007039452563e+00 -4.9013350497696184e-01 -1.6231898107433360e+00 - 13 4.0515402959196631e+00 -8.9202011606671716e-01 -1.6400005529926165e+00 - 14 2.6066963345552767e+00 -4.1789253965578244e-01 -2.6634003608801833e+00 - 15 2.9695287185759982e+00 5.5422613165070589e-01 -1.2342022021804264e+00 - 16 2.6747029695251561e+00 -2.4124119054596429e+00 -2.3435746150366733e-02 - 17 2.2153577785319198e+00 -2.0897985186960004e+00 1.1963150794498112e+00 - 18 2.1369701704057920e+00 3.0158507413651594e+00 -3.5179348337167076e+00 - 19 1.5355837136080241e+00 2.6255292355373734e+00 -4.2353987779868740e+00 - 20 2.7727573003167976e+00 3.6923910440726924e+00 -3.9330842459576383e+00 - 21 4.9040128073837339e+00 -4.0752348170758461e+00 -3.6210314709795299e+00 - 22 4.3582355554510048e+00 -4.2126119427061379e+00 -4.4612844196307497e+00 - 23 5.7439382849366911e+00 -3.5821957939240279e+00 -3.8766361295959513e+00 - 24 2.0689243582397667e+00 3.1513346907334592e+00 3.1550389754792341e+00 - 25 1.3045351331439887e+00 3.2665125705877287e+00 2.5111855257342670e+00 - 26 2.5809237402706717e+00 4.0117602605484128e+00 3.2212060529083963e+00 - 27 -1.9611343130358911e+00 -4.3563411931365605e+00 2.1098293115526028e+00 - 28 -2.7473562684515085e+00 -4.0200819932383203e+00 1.5830052163435302e+00 - 29 -1.3126000191360121e+00 -3.5962518039484110e+00 2.2746342468738372e+00 + 1 -2.7045514660423325e-01 2.4919366600449733e+00 -1.6697231639531507e-01 + 2 3.1000084135335193e-01 2.9645912303343991e+00 -8.5463206395936475e-01 + 3 -7.0397943335818880e-01 1.2305810523260226e+00 -6.2776977494480968e-01 + 4 -1.5818284818588833e+00 1.4850714726761622e+00 -1.2538699508869366e+00 + 5 -9.0720735536750730e-01 9.2652311870881010e-01 3.9954248474779297e-01 + 6 2.4831612077180826e-01 2.8312998598620581e-01 -1.2314234208099035e+00 + 7 3.4143432145485658e-01 -2.2646550480708717e-02 -2.5292291412289485e+00 + 8 1.1743655769886039e+00 -4.8863573859247217e-01 -6.3783597964106820e-01 + 9 1.3800803748715951e+00 -2.5274721986531667e-01 2.8353956705790218e-01 + 10 2.0510864147345527e+00 -1.4605128263713651e+00 -9.8323665039331498e-01 + 11 1.7878396739201072e+00 -1.9926579680652934e+00 -1.8890577068286438e+00 + 12 3.0063280485262682e+00 -4.9013506205958607e-01 -1.6231897745485795e+00 + 13 4.0517059148826196e+00 -8.9225492586105981e-01 -1.6400002853466527e+00 + 14 2.6067556451864973e+00 -4.1789236780812949e-01 -2.6634004714578920e+00 + 15 2.9696053942689340e+00 5.5422609386529809e-01 -1.2342020619448599e+00 + 16 2.6747224655532205e+00 -2.4126238070421215e+00 -2.3433759666701557e-02 + 17 2.2153701802770058e+00 -2.0899272946638008e+00 1.1963135022288505e+00 + 18 2.1369701704036461e+00 3.0158507413784363e+00 -3.5179348337072978e+00 + 19 1.5355837135394632e+00 2.6255292354749651e+00 -4.2353987771402171e+00 + 20 2.7727573003740993e+00 3.6923910441184606e+00 -3.9330842453158126e+00 + 21 4.9040128073779838e+00 -4.0752348170721300e+00 -3.6210314709713383e+00 + 22 4.3582355554508565e+00 -4.2126119427059994e+00 -4.4612844196303154e+00 + 23 5.7439382849363732e+00 -3.5821957939239444e+00 -3.8766361295956120e+00 + 24 2.0689243582383923e+00 3.1513346907508031e+00 3.1550389751044130e+00 + 25 1.3045351331268087e+00 3.2665125705965665e+00 2.5111855257113618e+00 + 26 2.5809237402701828e+00 4.0117602605517018e+00 3.2212060528784647e+00 + 27 -1.9611343130326251e+00 -4.3563411931375260e+00 2.1098293115519637e+00 + 28 -2.7473562684514818e+00 -4.0200819932384704e+00 1.5830052163435808e+00 + 29 -1.3126000191358462e+00 -3.5962518039486921e+00 2.2746342468738376e+00 run_vel: ! |2 - 1 8.1727799799441551e-03 1.6773171346479498e-02 4.7791658662457619e-03 - 2 5.4195488166242462e-03 6.5417253116750503e-03 -1.4101336509904706e-03 - 3 -8.2223992085164246e-03 -1.2908451784809975e-02 -4.0945960825392183e-03 - 4 -3.7699737621353186e-03 -6.5732339099368641e-03 -1.1186165990462573e-03 - 5 -1.1021432529769989e-02 -9.8900600342308066e-03 -2.8408577395243402e-03 - 6 -3.9676605755577875e-02 4.6817208693894503e-02 3.7148485968758402e-02 - 7 9.1033903828574569e-04 -1.0128523335196111e-02 -5.1568251845436980e-02 - 8 7.9064705135345748e-03 -3.3507235864772382e-03 3.4557097186682206e-02 - 9 1.5644176974450778e-03 3.7365545213281916e-03 1.5047408817898655e-02 - 10 2.9201446823370270e-02 -2.9249578721348601e-02 -1.5018077430999392e-02 - 11 -4.7835961499775063e-03 -3.7481385151954837e-03 -2.3464104148495902e-03 - 12 2.2696452071679614e-03 -3.4774155893559008e-04 -3.0640770423525257e-03 - 13 2.7531740460470166e-03 5.8171061608719103e-03 -7.9467454050767680e-04 - 14 3.5246182390652257e-03 -5.7939995598917460e-03 -3.9478431188056783e-03 - 15 -1.8547943544027830e-03 -5.8554729976774897e-03 6.2938485111532208e-03 - 16 1.8681499978038456e-02 -1.3262466211006751e-02 -4.5638651456391420e-02 - 17 -1.2896269973838886e-02 9.7527665158999004e-03 3.7296535364672513e-02 - 18 -8.0065796043573602e-04 -8.6270472784857216e-04 -1.4483040597982864e-03 - 19 1.2452390821689194e-03 -2.5061097122815435e-03 7.2998631031035615e-03 - 20 3.5930057864791303e-03 3.6938851570931927e-03 3.2322732168550625e-03 - 21 -1.4689219756961087e-03 -2.7352107824530806e-04 7.0581625180889649e-04 - 22 -7.0694199165145062e-03 -4.2577148692717554e-03 2.8079117911323321e-04 - 23 6.0446963236685256e-03 -1.4000131545098763e-03 2.5819754799379711e-03 - 24 3.1926367400896563e-04 -9.9445663445895099e-04 1.4999996220927227e-04 - 25 1.3789753472275083e-04 -4.4335894812290054e-03 -8.1808138555694750e-04 - 26 2.0485904026313513e-03 2.7813358637372391e-03 4.3245727137078242e-03 - 27 4.5604120258846295e-04 -1.0305523038990200e-03 2.1188058429346654e-04 - 28 -6.2544520868040715e-03 1.4127711161841167e-03 -1.8429821879802923e-03 - 29 6.4110631528576680e-04 3.1273432717073592e-03 3.7253671106756733e-03 + 1 8.1742887450585739e-03 1.7228249520172705e-02 4.7636200154285678e-03 + 2 5.3742131209134954e-03 8.5014084989938534e-03 -1.3734731162460402e-03 + 3 -8.2161778164033632e-03 -1.2869362708201489e-02 -4.0888375199228388e-03 + 4 -3.7787634477676642e-03 -5.2533470458882917e-03 -1.1161118587284771e-03 + 5 -1.1032293389383448e-02 -9.8863340104885598e-03 -2.8403367946737107e-03 + 6 -3.9677570612003256e-02 4.6816683666629423e-02 3.7148298982540365e-02 + 7 9.0938409416312202e-04 -1.0128522535253528e-02 -5.1568254916651068e-02 + 8 7.9194167018994642e-03 -3.3569176675024141e-03 3.4554033379153554e-02 + 9 1.5923285603331428e-03 3.7365113011970019e-03 1.5046752270197748e-02 + 10 2.9208357286776593e-02 -2.9353993769823783e-02 -1.5016704917306721e-02 + 11 -4.7465540645016645e-03 -4.2165276646841865e-03 -2.3412478153019894e-03 + 12 2.2980757077101510e-03 -3.5091405216452847e-04 -3.0640046931904290e-03 + 13 2.9148511000306786e-03 5.5851722208680194e-03 -7.9412268026523897e-04 + 14 3.5836019249020573e-03 -5.7936540681715965e-03 -3.9480663628116922e-03 + 15 -1.7778657432845671e-03 -5.8555598171606280e-03 6.2941335584716009e-03 + 16 1.8700222665862708e-02 -1.3473926436394508e-02 -4.5635589324539552e-02 + 17 -1.2883485803873765e-02 9.6240189248986089e-03 3.7294040194137568e-02 + 18 -8.0065796511591945e-04 -8.6270470075116407e-04 -1.4483040404113881e-03 + 19 1.2452390066448272e-03 -2.5061097760180838e-03 7.2998639311393438e-03 + 20 3.5930058446123790e-03 3.6938852063833996e-03 3.2322738498526673e-03 + 21 -1.4689219875639078e-03 -2.7352107061007723e-04 7.0581626870114592e-04 + 22 -7.0694199167938608e-03 -4.2577148689919644e-03 2.8079118009157448e-04 + 23 6.0446963229432490e-03 -1.4000131543898697e-03 2.5819754806854374e-03 + 24 3.1926367036916678e-04 -9.9445660295952466e-04 1.4999958504844645e-04 + 25 1.3789750016399599e-04 -4.4335894629157517e-03 -8.1808143179135507e-04 + 26 2.0485903999350758e-03 2.7813358672621209e-03 4.3245726820250576e-03 + 27 4.5604120926159787e-04 -1.0305523058923018e-03 2.1188058299333266e-04 + 28 -6.2544520867115292e-03 1.4127711158545754e-03 -1.8429821878482673e-03 + 29 6.4110631563933543e-04 3.1273432711488012e-03 3.7253671106878979e-03 ...