diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 22a67ec0c4..86f610a2eb 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -62,10 +62,10 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : grow_arrays(atom->nmax); memoryKK->create_kokkos(k_s_hist_last,s_hist_last,2,nprev,"acks2/reax:s_hist_last"); d_s_hist_last = k_s_hist_last.template view(); + buf = new double[2*nprev]; + prev_last_rows_rank = 0; d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset"); - - comm_me_0_flag = (comm->me == 0); } /* ---------------------------------------------------------------------- */ @@ -78,6 +78,7 @@ FixACKS2ReaxFFKokkos::~FixACKS2ReaxFFKokkos() memoryKK->destroy_kokkos(k_s_hist,s_hist); memoryKK->destroy_kokkos(k_s_hist_X,s_hist_X); memoryKK->destroy_kokkos(k_s_hist_last,s_hist_last); + delete [] buf; deallocate_array(); } @@ -236,10 +237,61 @@ void FixACKS2ReaxFFKokkos::pre_force(int vflag) allocate_array(); - // get max number of neighbor - if (!allocated_flag || last_allocate < neighbor->lastcall) { + + // get max number of neighbor + allocate_matrix(); + + // last_rows_rank proc must not own zero atoms (unless no atoms total) + // otherwise some loops are no-ops and last rows contribution won't + // be computed correctly + + int flag = comm->me; + if (nn == 0) flag = MPI_MAX; + MPI_Allreduce(&flag, &last_rows_rank, 1, MPI_INT, MPI_MIN, world); + last_rows_flag = (comm->me == last_rows_rank); + + // pass along "s" array history if necessary + + if (prev_last_rows_rank != last_rows_rank) { + + MPI_Request request; + if (comm->me == last_rows_rank) + MPI_Irecv(buf,2*nprev,MPI_DOUBLE, + prev_last_rows_rank,0,world,&request); + + if (comm->me == prev_last_rows_rank) { + + // pack buffer + k_s_hist_last.template sync(); + auto h_s_hist_last = k_s_hist_last.h_view; + int n = 0; + for (int k = 0; k < nprev; k++) { + buf[n++] = h_s_hist_last(0,k); + buf[n++] = h_s_hist_last(1,k); + } + + MPI_Send(buf,2*nprev,MPI_DOUBLE,last_rows_rank,0,world); + } + + if (comm->me == last_rows_rank) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + + // unpack buffer + k_s_hist_last.template sync(); + auto h_s_hist_last = k_s_hist_last.h_view; + int n = 0; + for (int k = 0; k < nprev; k++) { + h_s_hist_last(0,k) = buf[n++]; + h_s_hist_last(1,k) = buf[n++]; + } + k_s_hist_last.template modify(); + } + } + + prev_last_rows_rank = last_rows_rank; + last_allocate = update->ntimestep; } @@ -1135,9 +1187,9 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2InitMatvec, const int } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { for (int k = 0; k < 2; k++) { - d_b_s[2*NN+i] = 0.0; + d_b_s[2*NN+k] = 0.0; d_s[2*NN+k] = 4*(d_s_hist_last(k,0)+d_s_hist_last(k,2))-(6*d_s_hist_last(k,1)+d_s_hist_last(k,3)); } } @@ -1350,11 +1402,9 @@ void FixACKS2ReaxFFKokkos::sparse_matvec_acks2(typename AT::t_ffloat else ndup_bb = Kokkos::Experimental::create_scatter_view (d_bb); + Kokkos::deep_copy(d_bb,0.0); // can make more efficient? Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); - if (neighflag != FULL) - Kokkos::parallel_for(Kokkos::RangePolicy(nn,NN),*this); - if (neighflag == FULL) { int teamsize; if (execution_space == Host) teamsize = 1; @@ -1387,12 +1437,6 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec1, const d_bb[i] = params(itype).eta * d_xx[i]; d_bb[NN + i] = d_X_diag[i] * d_xx[NN + i]; } - - // last two rows - if (ii == 0) { - d_bb[2*NN] = 0.0; - d_bb[2*NN + 1] = 0.0; - } } /* ---------------------------------------------------------------------- */ @@ -1511,7 +1555,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm1, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_r[2*NN] = d_b_s[2*NN] - d_d[2*NN]; d_r[2*NN + 1] = d_b_s[2*NN + 1] - d_d[2*NN + 1]; @@ -1533,7 +1577,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm2, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { lsum += d_r[2*NN] * d_r[2*NN]; lsum += d_r[2*NN + 1] * d_r[2*NN + 1]; } @@ -1552,7 +1596,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot1, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { lsum += d_r_hat[2*NN] * d_r[2*NN]; lsum += d_r_hat[2*NN + 1] * d_r[2*NN + 1]; } @@ -1577,7 +1621,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon1A, const int & } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_q[2*NN] = d_p[2*NN] - omega*d_z[2*NN]; d_q[2*NN + 1] = d_p[2*NN + 1] - omega*d_z[2*NN + 1]; @@ -1605,7 +1649,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon1B, const int & } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_p[2*NN] = d_r[2*NN]; d_p[2*NN + 1] = d_r[2*NN + 1]; @@ -1627,7 +1671,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot2, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { lsum += d_r_hat[2*NN] * d_z[2*NN]; lsum += d_r_hat[2*NN + 1] * d_z[2*NN + 1]; } @@ -1649,7 +1693,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot3, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_q[2*NN] = d_r[2*NN] - alpha*d_z[2*NN]; d_q[2*NN + 1] = d_r[2*NN + 1] - alpha*d_z[2*NN + 1]; @@ -1671,7 +1715,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot4, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { lsum += d_y[2*NN] * d_q[2*NN]; lsum += d_y[2*NN + 1] * d_q[2*NN + 1]; } @@ -1690,7 +1734,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot5, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { lsum += d_y[2*NN] * d_y[2*NN]; lsum += d_y[2*NN + 1] * d_y[2*NN + 1]; } @@ -1709,7 +1753,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Add, const int &ii) c } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_s[2*NN] += alpha*d_d[2*NN]; d_s[2*NN + 1] += alpha*d_d[2*NN + 1]; } @@ -1728,7 +1772,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon2, const int &i } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_q_hat[2*NN] = d_q[2*NN]; d_q_hat[2*NN + 1] = d_q[2*NN + 1]; } @@ -1756,7 +1800,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm3, const int &ii, } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { d_g[2*NN] = alpha*d_d[2*NN] + omega*d_q_hat[2*NN]; d_g[2*NN + 1] = alpha*d_d[2*NN + 1] + omega*d_q_hat[2*NN + 1]; @@ -1790,7 +1834,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2CalculateQ1, const in } // last two rows - if (comm_me_0_flag && ii == 0) { + if (last_rows_flag && ii == 0) { for (int i = 0; i < 2; ++i) { for (int k = nprev-1; k > 0; --k) d_s_hist_last(i,k) = d_s_hist_last(i,k-1); diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h index 852cab141b..f28e524ebc 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -174,7 +174,8 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF { private: int inum; int allocated_flag, last_allocate; - int need_dup,comm_me_0_flag; + int need_dup,comm_me_0_flag,prev_last_rows_rank; + double* buf; typename AT::t_int_scalar d_mfill_offset; diff --git a/src/REAXFF/fix_acks2_reaxff.cpp b/src/REAXFF/fix_acks2_reaxff.cpp index 0196b1319b..4a430c8228 100644 --- a/src/REAXFF/fix_acks2_reaxff.cpp +++ b/src/REAXFF/fix_acks2_reaxff.cpp @@ -88,6 +88,9 @@ FixACKS2ReaxFF::FixACKS2ReaxFF(LAMMPS *lmp, int narg, char **arg) : comm_forward = comm_reverse = 2; s_hist_X = s_hist_last = NULL; + + last_rows_rank = 0; + last_rows_flag = (comm->me == last_rows_rank); } /* ---------------------------------------------------------------------- */ @@ -413,7 +416,7 @@ void FixACKS2ReaxFF::init_matvec() } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { for (i = 0; i < 2; i++) { b_s[2*NN+i] = 0.0; s[2*NN+i] = 4*(s_hist_last[i][0]+s_hist_last[i][2])-(6*s_hist_last[i][1]+s_hist_last[i][3]); @@ -558,7 +561,7 @@ int FixACKS2ReaxFF::BiCGStab(double *b, double *x) } } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { d[2*NN] = p[2*NN]; d[2*NN + 1] = p[2*NN + 1]; } @@ -593,7 +596,7 @@ int FixACKS2ReaxFF::BiCGStab(double *b, double *x) } } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { q_hat[2*NN] = q[2*NN]; q_hat[2*NN + 1] = q[2*NN + 1]; } @@ -713,7 +716,7 @@ void FixACKS2ReaxFF::calculate_Q() } } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { for (int i = 0; i < 2; ++i) { for (k = nprev-1; k > 0; --k) s_hist_last[i][k] = s_hist_last[i][k-1]; @@ -851,24 +854,24 @@ void FixACKS2ReaxFF::unpack_reverse_comm(int n, int *list, double *buf) } /* ---------------------------------------------------------------------- - proc 0 broadcasts last two rows of vector to everyone else + one proc broadcasts last two rows of vector to everyone else ------------------------------------------------------------------------- */ void FixACKS2ReaxFF::more_forward_comm(double *vec) { - MPI_Bcast(&vec[2*NN],2,MPI_DOUBLE,0,world); + MPI_Bcast(&vec[2*NN],2,MPI_DOUBLE,last_rows_rank,world); } /* ---------------------------------------------------------------------- - reduce last two rows of vector and give to proc 0 + reduce last two rows of vector and give to one proc ------------------------------------------------------------------------- */ void FixACKS2ReaxFF::more_reverse_comm(double *vec) { - if (comm->me == 0) - MPI_Reduce(MPI_IN_PLACE,&vec[2*NN],2,MPI_DOUBLE,MPI_SUM,0,world); + if (last_rows_flag) + MPI_Reduce(MPI_IN_PLACE,&vec[2*NN],2,MPI_DOUBLE,MPI_SUM,last_rows_rank,world); else - MPI_Reduce(&vec[2*NN],NULL,2,MPI_DOUBLE,MPI_SUM,0,world); + MPI_Reduce(&vec[2*NN],NULL,2,MPI_DOUBLE,MPI_SUM,last_rows_rank,world); } /* ---------------------------------------------------------------------- @@ -955,7 +958,7 @@ double FixACKS2ReaxFF::parallel_norm(double *v, int n) } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { my_sum += SQR(v[2*NN]); my_sum += SQR(v[2*NN + 1]); } @@ -985,7 +988,7 @@ double FixACKS2ReaxFF::parallel_dot(double *v1, double *v2, int n) } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { my_dot += v1[2*NN] * v2[2*NN]; my_dot += v1[2*NN + 1] * v2[2*NN + 1]; } @@ -1015,7 +1018,7 @@ double FixACKS2ReaxFF::parallel_vector_acc(double *v, int n) } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { my_acc += v[2*NN]; my_acc += v[2*NN + 1]; } @@ -1041,7 +1044,7 @@ void FixACKS2ReaxFF::vector_sum(double* dest, double c, double* v, } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { dest[2*NN] = c * v[2*NN] + d * y[2*NN]; dest[2*NN + 1] = c * v[2*NN + 1] + d * y[2*NN + 1]; } @@ -1062,7 +1065,7 @@ void FixACKS2ReaxFF::vector_add(double* dest, double c, double* v, int k) } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { dest[2*NN] += c * v[2*NN]; dest[2*NN + 1] += c * v[2*NN + 1]; } @@ -1084,7 +1087,7 @@ void FixACKS2ReaxFF::vector_copy(double* dest, double* v, int k) } // last two rows - if (comm->me == 0) { + if (last_rows_flag) { dest[2*NN] = v[2*NN]; dest[2*NN + 1] = v[2*NN + 1]; } diff --git a/src/REAXFF/fix_acks2_reaxff.h b/src/REAXFF/fix_acks2_reaxff.h index 71104f13dc..85ac344b4a 100644 --- a/src/REAXFF/fix_acks2_reaxff.h +++ b/src/REAXFF/fix_acks2_reaxff.h @@ -38,6 +38,8 @@ class FixACKS2ReaxFF : public FixQEqReaxFF { double* get_s() {return s;} protected: + int last_rows_rank,last_rows_flag; + double **s_hist_X,**s_hist_last; double *bcut_acks2,bond_softness,**bcut; // acks2 parameters