diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 2a3fc7547a..a6da0306bb 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -202,6 +202,373 @@ void FixRxKokkos::rk4(const double t_stop, double *y, double *rwork, /* ---------------------------------------------------------------------- */ +template + template +void FixRxKokkos::k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const +{ + double *k1 = rwork; + double *k2 = k1 + nspecies; + double *k3 = k2 + nspecies; + double *k4 = k3 + nspecies; + double *yp = k4 + nspecies; + + const int numSteps = minSteps; + + const double h = t_stop / double(numSteps); + + // Run the requested steps with h. + for (int step = 0; step < numSteps; step++) + { + // k1 + k_rhs(0.0,y,k1, userData); + + // k2 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies]; + + k_rhs(0.0,yp,k2, userData); + + // k3 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies]; + + k_rhs(0.0,yp,k3, userData); + + // k4 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + h*k3[ispecies]; + + k_rhs(0.0,yp,k4, userData); + + for (int ispecies = 0; ispecies < nspecies; ispecies++) + y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0); + + } // end for (int step... + +} + +/* ---------------------------------------------------------------------- */ + +// f1 = dt*f(t,x) +// f2 = dt*f(t+ c20*dt,x + c21*f1) +// f3 = dt*f(t+ c30*dt,x + c31*f1 + c32*f2) +// f4 = dt*f(t+ c40*dt,x + c41*f1 + c42*f2 + c43*f3) +// f5 = dt*f(t+dt,x + c51*f1 + c52*f2 + c53*f3 + c54*f4) +// f6 = dt*f(t+ c60*dt,x + c61*f1 + c62*f2 + c63*f3 + c64*f4 + c65*f5) +// +// fifth-order runge-kutta integration +// x5 = x + b1*f1 + b3*f3 + b4*f4 + b5*f5 + b6*f6 +// fourth-order runge-kutta integration +// x = x + a1*f1 + a3*f3 + a4*f4 + a5*f5 + +template + template +void FixRxKokkos::k_rkf45_step (const int neq, const double h, double y[], double y_out[], double rwk[], UserDataType& userData) const +{ + const double c21=0.25; + const double c31=0.09375; + const double c32=0.28125; + const double c41=0.87938097405553; + const double c42=-3.2771961766045; + const double c43=3.3208921256258; + const double c51=2.0324074074074; + const double c52=-8.0; + const double c53=7.1734892787524; + const double c54=-0.20589668615984; + const double c61=-0.2962962962963; + const double c62=2.0; + const double c63=-1.3816764132554; + const double c64=0.45297270955166; + const double c65=-0.275; + const double a1=0.11574074074074; + const double a3=0.54892787524366; + const double a4=0.5353313840156; + const double a5=-0.2; + const double b1=0.11851851851852; + const double b3=0.51898635477583; + const double b4=0.50613149034201; + const double b5=-0.18; + const double b6=0.036363636363636; + + // local dependent variables (5 total) + double* f1 = &rwk[ 0]; + double* f2 = &rwk[ neq]; + double* f3 = &rwk[2*neq]; + double* f4 = &rwk[3*neq]; + double* f5 = &rwk[4*neq]; + double* f6 = &rwk[5*neq]; + + // scratch for the intermediate solution. + //double* ytmp = &rwk[6*neq]; + double* ytmp = y_out; + + // 1) + k_rhs (0.0, y, f1, userData); + + for (int k = 0; k < neq; k++){ + f1[k] *= h; + ytmp[k] = y[k] + c21 * f1[k]; + } + + // 2) + k_rhs(0.0, ytmp, f2, userData); + + for (int k = 0; k < neq; k++){ + f2[k] *= h; + ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k]; + } + + // 3) + k_rhs(0.0, ytmp, f3, userData); + + for (int k = 0; k < neq; k++) { + f3[k] *= h; + ytmp[k] = y[k] + c41 * f1[k] + c42 * f2[k] + c43 * f3[k]; + } + + // 4) + k_rhs(0.0, ytmp, f4, userData); + + for (int k = 0; k < neq; k++) { + f4[k] *= h; + ytmp[k] = y[k] + c51 * f1[k] + c52 * f2[k] + c53 * f3[k] + c54 * f4[k]; + } + + // 5) + k_rhs(0.0, ytmp, f5, userData); + + for (int k = 0; k < neq; k++) { + f5[k] *= h; + ytmp[k] = y[k] + c61*f1[k] + c62*f2[k] + c63*f3[k] + c64*f4[k] + c65*f5[k]; + } + + // 6) + k_rhs(0.0, ytmp, f6, userData); + + for (int k = 0; k < neq; k++) + { + //const double f6 = h * ydot[k]; + f6[k] *= h; + + // 5th-order solution. + const double r5 = b1*f1[k] + b3*f3[k] + b4*f4[k] + b5*f5[k] + b6*f6[k]; + + // 4th-order solution. + const double r4 = a1*f1[k] + a3*f3[k] + a4*f4[k] + a5*f5[k]; + + // Truncation error: difference between 4th and 5th-order solutions. + rwk[k] = fabs(r5 - r4); + + // Update solution. + //y_out[k] = y[k] + r5; // Local extrapolation + y_out[k] = y[k] + r4; + } + + return; +} + +template + template +int FixRxKokkos::k_rkf45_h0 + (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], UserDataType& userData) const +{ + // Set lower and upper bounds on h0, and take geometric mean as first trial value. + // Exit with this value if the bounds cross each other. + + // Adjust upper bound based on ydot ... + double hg = sqrt(hmin*hmax); + + //if (hmax < hmin) + //{ + // h0 = hg; + // return; + //} + + // Start iteration to find solution to ... {WRMS norm of (h0^2 y'' / 2)} = 1 + + double *ydot = rwk; + double *y1 = ydot + neq; + double *ydot1 = y1 + neq; + + const int max_iters = 10; + bool hnew_is_ok = false; + double hnew = hg; + int iter = 0; + + // compute ydot at t=t0 + k_rhs (t, y, ydot, userData); + + while(1) + { + // Estimate y'' with finite-difference ... + + for (int k = 0; k < neq; k++) + y1[k] = y[k] + hg * ydot[k]; + + // compute y' at t1 + k_rhs (t + hg, y1, ydot1, userData); + + // Compute WRMS norm of y'' + double yddnrm = 0.0; + for (int k = 0; k < neq; k++){ + double ydd = (ydot1[k] - ydot[k]) / hg; + double wterr = ydd / (relTol * fabs( y[k] ) + absTol); + yddnrm += wterr * wterr; + } + + yddnrm = sqrt( yddnrm / double(neq) ); + + //std::cout << "iter " << _iter << " hg " << hg << " y'' " << yddnrm << std::endl; + //std::cout << "ydot " << ydot[neq-1] << std::endl; + + // should we accept this? + if (hnew_is_ok || iter == max_iters){ + hnew = hg; + if (iter == max_iters) + fprintf(stderr, "ERROR_HIN_MAX_ITERS\n"); + break; + } + + // Get the new value of h ... + hnew = (yddnrm*hmax*hmax > 2.0) ? sqrt(2.0 / yddnrm) : sqrt(hg * hmax); + + // test the stopping conditions. + double hrat = hnew / hg; + + // Accept this value ... the bias factor should bring it within range. + if ( (hrat > 0.5) && (hrat < 2.0) ) + hnew_is_ok = true; + + // If y'' is still bad after a few iterations, just accept h and give up. + if ( (iter > 1) && hrat > 2.0 ) { + hnew = hg; + hnew_is_ok = true; + } + + //printf("iter=%d, yddnrw=%e, hnew=%e, hmin=%e, hmax=%e\n", iter, yddnrm, hnew, hmin, hmax); + + hg = hnew; + iter ++; + } + + // bound and bias estimate + h0 = hnew * 0.5; + h0 = fmax(h0, hmin); + h0 = fmin(h0, hmax); + //printf("h0=%e, hmin=%e, hmax=%e\n", h0, hmin, hmax); + + return (iter + 1); +} + +template + template +void FixRxKokkos::k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const +{ + // Rounding coefficient. + const double uround = DBL_EPSILON; + + // Adaption limit (shrink or grow) + const double adaption_limit = 4.0; + + // Safety factor on the adaption. very specific but not necessary .. 0.9 is common. + const double hsafe = 0.840896415; + + // Time rounding factor. + const double tround = t_stop * uround; + + // Counters for diagnostics. + int nst = 0; // # of steps (accepted) + int nit = 0; // # of iterations total + int nfe = 0; // # of RHS evaluations + + // Min/Max step-size limits. + const double h_min = 100.0 * tround; + const double h_max = (minSteps > 0) ? t_stop / double(minSteps) : t_stop; + + // Set the initial step-size. 0 forces an internal estimate ... stable Euler step size. + double h = (minSteps > 0) ? t_stop / double(minSteps) : 0.0; + + double t = 0.0; + + if (h < h_min){ + //fprintf(stderr,"hin not implemented yet\n"); + //exit(-1); + nfe = k_rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, userData); + } + + //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h); + + // Integrate until we reach the end time. + while (fabs(t - t_stop) > tround){ + double *yout = rwork; + double *eout = yout + neq; + + // Take a trial step. + k_rkf45_step (neq, h, y, yout, eout, userData); + + // Estimate the solution error. + // ... weighted 2-norm of the error. + double err2 = 0.0; + for (int k = 0; k < neq; k++){ + const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol); + err2 += wterr * wterr; + } + + double err = fmax( uround, sqrt( err2 / double(nspecies) )); + + // Accept the solution? + if (err <= 1.0 || h <= h_min){ + t += h; + nst++; + + for (int k = 0; k < neq; k++) + y[k] = yout[k]; + } + + // Adjust h for the next step. + double hfac = hsafe * sqrt( sqrt( 1.0 / err ) ); + + // Limit the adaption. + hfac = fmax( hfac, 1.0 / adaption_limit ); + hfac = fmin( hfac, adaption_limit ); + + // Apply the adaption factor... + h *= hfac; + + // Limit h. + h = fmin( h, h_max ); + h = fmax( h, h_min ); + + // Stretch h if we're within 5% ... and we didn't just fail. + if (err <= 1.0 && (t + 1.05*h) > t_stop) + h = t_stop - t; + + // And don't overshoot the end. + if (t + h > t_stop) + h = t_stop - t; + + nit++; + nfe += 6; + + if (maxIters && nit > maxIters){ + //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop); + counter.nFails ++; + break; + // We should set an error here so that the solution is not used! + } + + } // end while + + counter.nSteps += nst; + counter.nIters += nit; + counter.nFuncs += nfe; + + //printf("id= %d nst= %d nit= %d\n", id, nst, nit); +} +/* ---------------------------------------------------------------------- */ + // f1 = dt*f(t,x) // f2 = dt*f(t+ c20*dt,x + c21*f1) // f3 = dt*f(t+ c30*dt,x + c31*f1 + c32*f2) @@ -664,6 +1031,152 @@ int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, /* ---------------------------------------------------------------------- */ +template + template +int FixRxKokkos::k_rhs(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + //StridedArrayType _y( const_cast( y ) ), _dydt( dydt ); + + // Use the sparse format instead. + if (useSparseKinetics) + return this->k_rhs_sparse( t, y, dydt, userData); + else + return this->k_rhs_dense ( t, y, dydt, userData); +} + +/* ---------------------------------------------------------------------- */ + +template + template +int FixRxKokkos::k_rhs_dense(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + #define rxnRateLaw (userData.rxnRateLaw) + #define kFor (userData.kFor ) + + //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + //const int nspecies = atom->nspecies_dpd; + + for(int ispecies=0; ispecies + template +int FixRxKokkos::k_rhs_sparse(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + #define kFor (userData.kFor) + #define kRev (NULL) + #define rxnRateLaw (userData.rxnRateLaw) + #define conc (dydt) + #define maxReactants (this->sparseKinetics_maxReactants) + #define maxSpecies (this->sparseKinetics_maxSpecies) + #define nuk (this->d_kineticsData.nuk) + #define nu (this->d_kineticsData.nu) + #define inu (this->d_kineticsData.inu) + #define isIntegral(idx) ( SparseKinetics_enableIntegralReactions \ + && this->d_kineticsData.isIntegral(idx) ) + + for (int k = 0; k < nspecies; ++k) + conc[k] = y[k] / VDPD; + + // Construct the reaction rate laws + for (int i = 0; i < nreactions; ++i) + { + double rxnRateLawForward; + if (isIntegral(i)){ + rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) ); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= powint( conc[k], inu(i,kk) ); + } + } else { + rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) ); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= pow( conc[k], nu(i,kk) ); + } + } + + rxnRateLaw[i] = rxnRateLawForward; + } + + // Construct the reaction rates for each species from the + // Stoichiometric matrix and ROP vector. + for (int k = 0; k < nspecies; ++k) + dydt[k] = 0.0; + + for (int i = 0; i < nreactions; ++i){ + // Reactants ... + dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i]; + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] -= nu(i,kk) * rxnRateLaw[i]; + } + + // Products ... + dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i]; + for (int kk = maxReactants+1; kk < maxSpecies; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] += nu(i,kk) * rxnRateLaw[i]; + } + } + + // Add in the volume factor to convert to the proper units. + for (int k = 0; k < nspecies; ++k) + dydt[k] *= VDPD; + + #undef kFor + #undef kRev + #undef rxnRateLaw + #undef conc + #undef maxReactants + #undef maxSpecies + #undef nuk + #undef nu + #undef inu + #undef isIntegral + //#undef invalidIndex + + return 0; +} + +/* ---------------------------------------------------------------------- */ + /*template template KOKKOS_INLINE_FUNCTION @@ -907,6 +1420,10 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF userData.kFor = new double[nreactions]; userData.rxnRateLaw = new double[nreactions]; + UserRHSDataKokkos<1> userDataKokkos; + userDataKokkos.kFor.m_data = userData.kFor; + userDataKokkos.rxnRateLaw.m_data = userData.rxnRateLaw; + CounterType counter_i; const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i); @@ -935,7 +1452,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Solver the ODE system. if (odeIntegrationFlag == ODE_LAMMPS_RK4) { - rk4(t_stop, y, rwork, &userData); + //rk4(t_stop, y, rwork, &userData); + k_rk4(t_stop, y, rwork, userDataKokkos); } else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) { diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 4a11ac9fb9..e36d606525 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -76,12 +76,43 @@ class FixRxKokkos : public FixRX { PairDPDfdtEnergyKokkos* pairDPDEKK; double VDPD; + template + struct StridedArrayType + { + typedef T value_type; + enum { Stride = stride }; + + value_type *m_data; + + StridedArrayType() : m_data(NULL) {} + StridedArrayType(value_type *ptr) : m_data(ptr) {} + + inline value_type& operator()(const int idx) { return m_data[Stride*idx]; } + inline const value_type& operator()(const int idx) const { return m_data[Stride*idx]; } + inline value_type& operator[](const int idx) { return m_data[Stride*idx]; } + inline const value_type& operator[](const int idx) const { return m_data[Stride*idx]; } + }; + + template + struct UserRHSDataKokkos + { + StridedArrayType kFor; + StridedArrayType rxnRateLaw; + }; + void solve_reactions(const int vflag, const bool isPreForce = true); - int rhs(double, const double *, double *, void *) const; + int rhs (double, const double *, double *, void *) const; int rhs_dense (double, const double *, double *, void *) const; int rhs_sparse(double, const double *, double *, void *) const; + template + int k_rhs (double, const VectorType&, VectorType&, UserDataType& ) const; + template + int k_rhs_dense (double, const VectorType&, VectorType&, UserDataType& ) const; + template + int k_rhs_sparse(double, const VectorType&, VectorType&, UserDataType& ) const; + //!< Classic Runge-Kutta 4th-order stepper. void rk4(const double t_stop, double *y, double *rwork, void *v_params) const; @@ -97,6 +128,25 @@ class FixRxKokkos : public FixRX { const double hmin, const double hmax, double& h0, double y[], double rwk[], void *v_params) const; + //!< Classic Runge-Kutta 4th-order stepper. + template + void k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const; + + //!< Runge-Kutta-Fehlberg ODE Solver. + template + void k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const; + + //!< Runge-Kutta-Fehlberg ODE stepper function. + template + void k_rkf45_step (const int neq, const double h, double y[], double y_out[], + double rwk[], UserDataType& userData) const; + + //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. + template + int k_rkf45_h0 (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], UserDataType& userData) const; + //!< ODE Solver diagnostics. void odeDiagnostics(void);