diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 2a0b5389c1..bfa5642fff 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -947,6 +947,7 @@ int FixQEqReaxFFKokkos::cg_solve1() "{}", loop, update->ntimestep, sqrt(sig_new)/b_norm)); return loop; +#endif } /* ---------------------------------------------------------------------- */ @@ -1108,7 +1109,7 @@ int FixQEqReaxFFKokkos::cg_solve2() #ifdef HIP_OPT_CG_SOLVE_FUSED template -void FixQEqReaxKokkos::cg_solve_fused() +int FixQEqReaxFFKokkos::cg_solve_fused() // b = b_s, x = s; { // reset converged @@ -1139,7 +1140,7 @@ void FixQEqReaxKokkos::cg_solve_fused() } // sparse_matvec( &H, x, q ); - FixQEqReaxKokkosSparse12_32Functor sparse_12_32_functor(this); + FixQEqReaxFFKokkosSparse12_32Functor sparse_12_32_functor(this); Kokkos::parallel_for(inum,sparse_12_32_functor); if (neighflag != FULL) { Kokkos::abort("Not implemented!"); @@ -1159,7 +1160,7 @@ void FixQEqReaxKokkos::cg_solve_fused() // preconditioning: d[j] = r[j] * Hdia_inv[j]; // b_norm = parallel_norm( b, nn ); F_FLOAT2 my_norm; - FixQEqReaxKokkosNorm12Functor norm12_functor(this); + FixQEqReaxFFKokkosNorm12Functor norm12_functor(this); Kokkos::parallel_reduce(inum,norm12_functor,my_norm); F_FLOAT2 norm_sqr; MPI_Allreduce( &my_norm.v, &norm_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world ); @@ -1167,7 +1168,7 @@ void FixQEqReaxKokkos::cg_solve_fused() b_norm.v[1] = sqrt(norm_sqr.v[1]); F_FLOAT2 my_dot; - FixQEqReaxKokkosDot11Functor dot11_functor(this); + FixQEqReaxFFKokkosDot11Functor dot11_functor(this); Kokkos::parallel_reduce(inum,dot11_functor,my_dot); F_FLOAT2 dot_sqr; MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world ); @@ -1194,7 +1195,7 @@ void FixQEqReaxKokkos::cg_solve_fused() comm->forward_comm_fix(this, 2); // sparse_matvec( &H, d, q ); - FixQEqReaxKokkosSparse22FusedFunctor sparse22_functor(this); + FixQEqReaxFFKokkosSparse22FusedFunctor sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); if (neighflag != FULL) { Kokkos::abort("Not implemented!"); @@ -1213,7 +1214,7 @@ void FixQEqReaxKokkos::cg_solve_fused() // tmp = parallel_dot( d, q, nn); my_dot.init(); dot_sqr.init(); - FixQEqReaxKokkosDot22Functor dot22_functor(this); + FixQEqReaxFFKokkosDot22Functor dot22_functor(this); Kokkos::parallel_reduce(inum,dot22_functor,my_dot); MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; @@ -1228,11 +1229,11 @@ void FixQEqReaxKokkos::cg_solve_fused() // vector_add( r, -alpha, q, nn ); my_dot.init(); dot_sqr.init(); - FixQEqReaxKokkosPrecon12Functor precon12_functor(this); + FixQEqReaxFFKokkosPrecon12Functor precon12_functor(this); Kokkos::parallel_for(inum,precon12_functor); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); - FixQEqReaxKokkosPreconFusedFunctor precon_functor(this); + FixQEqReaxFFKokkosPreconFusedFunctor precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; @@ -1243,7 +1244,7 @@ void FixQEqReaxKokkos::cg_solve_fused() beta[1] = sig_new.v[1] / sig_old.v[1]; // vector_sum( d, 1., p, beta, d, nn ); - FixQEqReaxKokkosVecSum2FusedFunctor vecsum12_functor(this); + FixQEqReaxFFKokkosVecSum2FusedFunctor vecsum12_functor(this); Kokkos::parallel_for(inum,vecsum12_functor); } @@ -1314,7 +1315,7 @@ void FixQEqReaxFFKokkos::sparse12_item(int ii) const // fused operator template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::sparse12_32_item(int ii) const +void FixQEqReaxFFKokkos::sparse12_32_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); @@ -1372,7 +1373,7 @@ void FixQEqReaxFFKokkos::operator() (TagSparseMatvec1, const membert #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec13, const membertype13 &team) const +void FixQEqReaxFFKokkos::operator() (TagSparseMatvec13, const membertype13 &team) const { const int i = d_ilist[team.league_rank()]; if (mask[i] & groupbit) { @@ -1418,7 +1419,7 @@ void FixQEqReaxFFKokkos::operator() (TagSparseMatvec1Vector, const m #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec13Vector, const membertype13vec &team) const +void FixQEqReaxFFKokkos::operator() (TagSparseMatvec13Vector, const membertype13vec &team) const { int k = team.league_rank () * team.team_size () + team.team_rank (); const int i = d_ilist[k]; @@ -1459,7 +1460,7 @@ void FixQEqReaxFFKokkos::sparse22_item(int ii) const #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::sparse22_fused_item(int ii) const +void FixQEqReaxFFKokkos::sparse22_fused_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); @@ -1535,7 +1536,7 @@ void FixQEqReaxFFKokkos::operator() (TagSparseMatvec2, const membert #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec2FusedVector, const membertype2fusedvec &team) const +void FixQEqReaxFFKokkos::operator() (TagSparseMatvec2FusedVector, const membertype2fusedvec &team) const { int k = team.league_rank () * team.team_size () + team.team_rank (); const int i = d_ilist[k]; @@ -1563,7 +1564,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec2FusedVector, cons #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec2Fused, const membertype2fused &team) const +void FixQEqReaxFFKokkos::operator() (TagSparseMatvec2Fused, const membertype2fused &team) const { const int i = d_ilist[team.league_rank()]; if (mask[i] & groupbit) { @@ -1680,7 +1681,7 @@ void FixQEqReaxFFKokkos::vecsum2_item(int ii) const #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::vecsum2_fused_item(int ii) const +void FixQEqReaxFFKokkos::vecsum2_fused_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { @@ -1729,7 +1730,7 @@ double FixQEqReaxFFKokkos::norm2_item(int ii) const #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::norm12_item(int ii, F_FLOAT2& out) const +void FixQEqReaxFFKokkos::norm12_item(int ii, F_FLOAT2& out) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { @@ -1781,7 +1782,7 @@ double FixQEqReaxFFKokkos::dot2_item(int ii) const #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::dot11_item(int ii, F_FLOAT2& out) const +void FixQEqReaxFFKokkos::dot11_item(int ii, F_FLOAT2& out) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { @@ -1794,7 +1795,7 @@ void FixQEqReaxKokkos::dot11_item(int ii, F_FLOAT2& out) const template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::dot22_item(int ii, F_FLOAT2& out) const +void FixQEqReaxFFKokkos::dot22_item(int ii, F_FLOAT2& out) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { @@ -1842,7 +1843,7 @@ void FixQEqReaxFFKokkos::precon2_item(int ii) const // fused operator template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::precon12_item(int ii) const +void FixQEqReaxFFKokkos::precon12_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { @@ -1878,7 +1879,7 @@ double FixQEqReaxFFKokkos::precon_item(int ii) const #ifdef HIP_OPT_CG_SOLVE_FUSED template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::precon_fused_item(int ii, F_FLOAT2& out) const +void FixQEqReaxFFKokkos::precon_fused_item(int ii, F_FLOAT2& out) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 3b40918056..3303da574c 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -318,10 +318,10 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void init_hist(); void allocate_matrix(); void allocate_array(); - void cg_solve1(); - void cg_solve2(); + int cg_solve1(); + int cg_solve2(); #ifdef HIP_OPT_CG_SOLVE_FUSED - void cg_solve_fused(); + int cg_solve_fused(); #endif void calculate_q(); @@ -471,10 +471,10 @@ struct FixQEqReaxFFKokkosSparse22Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED template -struct FixQEqReaxKokkosSparse22FusedFunctor { +struct FixQEqReaxFFKokkosSparse22FusedFunctor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; - FixQEqReaxKokkosSparse22FusedFunctor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkos c; + FixQEqReaxFFKokkosSparse22FusedFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -512,10 +512,10 @@ struct FixQEqReaxFFKokkosSparse32Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED template -struct FixQEqReaxKokkosSparse12_32Functor { +struct FixQEqReaxFFKokkosSparse12_32Functor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; - FixQEqReaxKokkosSparse12_32Functor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkos c; + FixQEqReaxFFKokkosSparse12_32Functor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -553,10 +553,10 @@ struct FixQEqReaxFFKokkosVecSum2Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED template -struct FixQEqReaxKokkosVecSum2FusedFunctor { +struct FixQEqReaxFFKokkosVecSum2FusedFunctor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; - FixQEqReaxKokkosVecSum2FusedFunctor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkos c; + FixQEqReaxFFKokkosVecSum2FusedFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -597,11 +597,11 @@ struct FixQEqReaxFFKokkosNorm2Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED // fused operator template -struct FixQEqReaxKokkosNorm12Functor { +struct FixQEqReaxFFKokkosNorm12Functor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; + FixQEqReaxFFKokkos c; typedef F_FLOAT2 value_type; - FixQEqReaxKokkosNorm12Functor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkosNorm12Functor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -642,11 +642,11 @@ struct FixQEqReaxFFKokkosDot2Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED // fused operators template -struct FixQEqReaxKokkosDot11Functor { +struct FixQEqReaxFFKokkosDot11Functor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; + FixQEqReaxFFKokkos c; typedef F_FLOAT2 value_type; - FixQEqReaxKokkosDot11Functor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkosDot11Functor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -656,11 +656,11 @@ struct FixQEqReaxKokkosDot11Functor { }; template -struct FixQEqReaxKokkosDot22Functor { +struct FixQEqReaxFFKokkosDot22Functor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; + FixQEqReaxFFKokkos c; typedef F_FLOAT2 value_type; - FixQEqReaxKokkosDot22Functor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkosDot22Functor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -698,10 +698,10 @@ struct FixQEqReaxFFKokkosPrecon2Functor { #ifdef HIP_OPT_CG_SOLVE_FUSED template -struct FixQEqReaxKokkosPrecon12Functor { +struct FixQEqReaxFFKokkosPrecon12Functor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; - FixQEqReaxKokkosPrecon12Functor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkos c; + FixQEqReaxFFKokkosPrecon12Functor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION @@ -727,11 +727,11 @@ struct FixQEqReaxFFKokkosPreconFunctor { #ifdef HIP_OPT_CG_SOLVE_FUSED template -struct FixQEqReaxKokkosPreconFusedFunctor { +struct FixQEqReaxFFKokkosPreconFusedFunctor { typedef DeviceType device_type ; - FixQEqReaxKokkos c; + FixQEqReaxFFKokkos c; typedef F_FLOAT2 value_type; - FixQEqReaxKokkosPreconFusedFunctor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + FixQEqReaxFFKokkosPreconFusedFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION