Minor fixes to align w.r.t upstream
Change-Id: I4d8bbfe286c986f5bc603041b04f272b3f537476
This commit is contained in:
@ -947,6 +947,7 @@ int FixQEqReaxFFKokkos<DeviceType>::cg_solve1()
|
||||
"{}", loop, update->ntimestep,
|
||||
sqrt(sig_new)/b_norm));
|
||||
return loop;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1108,7 +1109,7 @@ int FixQEqReaxFFKokkos<DeviceType>::cg_solve2()
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
|
||||
template<class DeviceType>
|
||||
void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
int FixQEqReaxFFKokkos<DeviceType>::cg_solve_fused()
|
||||
// b = b_s, x = s;
|
||||
{
|
||||
// reset converged
|
||||
@ -1139,7 +1140,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
}
|
||||
|
||||
// sparse_matvec( &H, x, q );
|
||||
FixQEqReaxKokkosSparse12_32Functor<DeviceType> sparse_12_32_functor(this);
|
||||
FixQEqReaxFFKokkosSparse12_32Functor<DeviceType> sparse_12_32_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse_12_32_functor);
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::abort("Not implemented!");
|
||||
@ -1159,7 +1160,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
// preconditioning: d[j] = r[j] * Hdia_inv[j];
|
||||
// b_norm = parallel_norm( b, nn );
|
||||
F_FLOAT2 my_norm;
|
||||
FixQEqReaxKokkosNorm12Functor<DeviceType> norm12_functor(this);
|
||||
FixQEqReaxFFKokkosNorm12Functor<DeviceType> norm12_functor(this);
|
||||
Kokkos::parallel_reduce(inum,norm12_functor,my_norm);
|
||||
F_FLOAT2 norm_sqr;
|
||||
MPI_Allreduce( &my_norm.v, &norm_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world );
|
||||
@ -1167,7 +1168,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
b_norm.v[1] = sqrt(norm_sqr.v[1]);
|
||||
|
||||
F_FLOAT2 my_dot;
|
||||
FixQEqReaxKokkosDot11Functor<DeviceType> dot11_functor(this);
|
||||
FixQEqReaxFFKokkosDot11Functor<DeviceType> dot11_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot11_functor,my_dot);
|
||||
F_FLOAT2 dot_sqr;
|
||||
MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world );
|
||||
@ -1194,7 +1195,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
comm->forward_comm_fix(this, 2);
|
||||
|
||||
// sparse_matvec( &H, d, q );
|
||||
FixQEqReaxKokkosSparse22FusedFunctor<DeviceType> sparse22_functor(this);
|
||||
FixQEqReaxFFKokkosSparse22FusedFunctor<DeviceType> sparse22_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse22_functor);
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::abort("Not implemented!");
|
||||
@ -1213,7 +1214,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
// tmp = parallel_dot( d, q, nn);
|
||||
my_dot.init();
|
||||
dot_sqr.init();
|
||||
FixQEqReaxKokkosDot22Functor<DeviceType> dot22_functor(this);
|
||||
FixQEqReaxFFKokkosDot22Functor<DeviceType> dot22_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot22_functor,my_dot);
|
||||
MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world );
|
||||
tmp = dot_sqr;
|
||||
@ -1228,11 +1229,11 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
// vector_add( r, -alpha, q, nn );
|
||||
my_dot.init();
|
||||
dot_sqr.init();
|
||||
FixQEqReaxKokkosPrecon12Functor<DeviceType> precon12_functor(this);
|
||||
FixQEqReaxFFKokkosPrecon12Functor<DeviceType> precon12_functor(this);
|
||||
Kokkos::parallel_for(inum,precon12_functor);
|
||||
// preconditioning: p[j] = r[j] * Hdia_inv[j];
|
||||
// sig_new = parallel_dot( r, p, nn);
|
||||
FixQEqReaxKokkosPreconFusedFunctor<DeviceType> precon_functor(this);
|
||||
FixQEqReaxFFKokkosPreconFusedFunctor<DeviceType> precon_functor(this);
|
||||
Kokkos::parallel_reduce(inum,precon_functor,my_dot);
|
||||
MPI_Allreduce( &my_dot.v, &dot_sqr.v, 2, MPI_DOUBLE, MPI_SUM, world );
|
||||
sig_new = dot_sqr;
|
||||
@ -1243,7 +1244,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve_fused()
|
||||
beta[1] = sig_new.v[1] / sig_old.v[1];
|
||||
|
||||
// vector_sum( d, 1., p, beta, d, nn );
|
||||
FixQEqReaxKokkosVecSum2FusedFunctor<DeviceType> vecsum12_functor(this);
|
||||
FixQEqReaxFFKokkosVecSum2FusedFunctor<DeviceType> vecsum12_functor(this);
|
||||
Kokkos::parallel_for(inum,vecsum12_functor);
|
||||
}
|
||||
|
||||
@ -1314,7 +1315,7 @@ void FixQEqReaxFFKokkos<DeviceType>::sparse12_item(int ii) const
|
||||
// fused operator
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::sparse12_32_item(int ii) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::sparse12_32_item(int ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
const int itype = type(i);
|
||||
@ -1372,7 +1373,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec1, const membert
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec13, const membertype13 &team) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec13, const membertype13 &team) const
|
||||
{
|
||||
const int i = d_ilist[team.league_rank()];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1418,7 +1419,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec1Vector, const m
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec13Vector, const membertype13vec &team) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec13Vector, const membertype13vec &team) const
|
||||
{
|
||||
int k = team.league_rank () * team.team_size () + team.team_rank ();
|
||||
const int i = d_ilist[k];
|
||||
@ -1459,7 +1460,7 @@ void FixQEqReaxFFKokkos<DeviceType>::sparse22_item(int ii) const
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::sparse22_fused_item(int ii) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::sparse22_fused_item(int ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
const int itype = type(i);
|
||||
@ -1535,7 +1536,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec2, const membert
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec2FusedVector, const membertype2fusedvec &team) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec2FusedVector, const membertype2fusedvec &team) const
|
||||
{
|
||||
int k = team.league_rank () * team.team_size () + team.team_rank ();
|
||||
const int i = d_ilist[k];
|
||||
@ -1563,7 +1564,7 @@ void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec2FusedVector, cons
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec2Fused, const membertype2fused &team) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagSparseMatvec2Fused, const membertype2fused &team) const
|
||||
{
|
||||
const int i = d_ilist[team.league_rank()];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1680,7 +1681,7 @@ void FixQEqReaxFFKokkos<DeviceType>::vecsum2_item(int ii) const
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::vecsum2_fused_item(int ii) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::vecsum2_fused_item(int ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1729,7 +1730,7 @@ double FixQEqReaxFFKokkos<DeviceType>::norm2_item(int ii) const
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::norm12_item(int ii, F_FLOAT2& out) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::norm12_item(int ii, F_FLOAT2& out) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1781,7 +1782,7 @@ double FixQEqReaxFFKokkos<DeviceType>::dot2_item(int ii) const
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::dot11_item(int ii, F_FLOAT2& out) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::dot11_item(int ii, F_FLOAT2& out) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1794,7 +1795,7 @@ void FixQEqReaxKokkos<DeviceType>::dot11_item(int ii, F_FLOAT2& out) const
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::dot22_item(int ii, F_FLOAT2& out) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::dot22_item(int ii, F_FLOAT2& out) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1842,7 +1843,7 @@ void FixQEqReaxFFKokkos<DeviceType>::precon2_item(int ii) const
|
||||
// fused operator
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::precon12_item(int ii) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::precon12_item(int ii) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
@ -1878,7 +1879,7 @@ double FixQEqReaxFFKokkos<DeviceType>::precon_item(int ii) const
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::precon_fused_item(int ii, F_FLOAT2& out) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::precon_fused_item(int ii, F_FLOAT2& out) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
|
||||
@ -318,10 +318,10 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
void init_hist();
|
||||
void allocate_matrix();
|
||||
void allocate_array();
|
||||
void cg_solve1();
|
||||
void cg_solve2();
|
||||
int cg_solve1();
|
||||
int cg_solve2();
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
void cg_solve_fused();
|
||||
int cg_solve_fused();
|
||||
#endif
|
||||
void calculate_q();
|
||||
|
||||
@ -471,10 +471,10 @@ struct FixQEqReaxFFKokkosSparse22Functor {
|
||||
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosSparse22FusedFunctor {
|
||||
struct FixQEqReaxFFKokkosSparse22FusedFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxKokkosSparse22FusedFunctor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosSparse22FusedFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -512,10 +512,10 @@ struct FixQEqReaxFFKokkosSparse32Functor {
|
||||
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosSparse12_32Functor {
|
||||
struct FixQEqReaxFFKokkosSparse12_32Functor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxKokkosSparse12_32Functor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosSparse12_32Functor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -553,10 +553,10 @@ struct FixQEqReaxFFKokkosVecSum2Functor {
|
||||
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosVecSum2FusedFunctor {
|
||||
struct FixQEqReaxFFKokkosVecSum2FusedFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxKokkosVecSum2FusedFunctor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosVecSum2FusedFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -597,11 +597,11 @@ struct FixQEqReaxFFKokkosNorm2Functor {
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
// fused operator
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosNorm12Functor {
|
||||
struct FixQEqReaxFFKokkosNorm12Functor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
typedef F_FLOAT2 value_type;
|
||||
FixQEqReaxKokkosNorm12Functor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkosNorm12Functor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -642,11 +642,11 @@ struct FixQEqReaxFFKokkosDot2Functor {
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
// fused operators
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosDot11Functor {
|
||||
struct FixQEqReaxFFKokkosDot11Functor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
typedef F_FLOAT2 value_type;
|
||||
FixQEqReaxKokkosDot11Functor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkosDot11Functor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -656,11 +656,11 @@ struct FixQEqReaxKokkosDot11Functor {
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosDot22Functor {
|
||||
struct FixQEqReaxFFKokkosDot22Functor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
typedef F_FLOAT2 value_type;
|
||||
FixQEqReaxKokkosDot22Functor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkosDot22Functor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -698,10 +698,10 @@ struct FixQEqReaxFFKokkosPrecon2Functor {
|
||||
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosPrecon12Functor {
|
||||
struct FixQEqReaxFFKokkosPrecon12Functor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxKokkosPrecon12Functor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosPrecon12Functor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -727,11 +727,11 @@ struct FixQEqReaxFFKokkosPreconFunctor {
|
||||
|
||||
#ifdef HIP_OPT_CG_SOLVE_FUSED
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxKokkosPreconFusedFunctor {
|
||||
struct FixQEqReaxFFKokkosPreconFusedFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
typedef F_FLOAT2 value_type;
|
||||
FixQEqReaxKokkosPreconFusedFunctor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
FixQEqReaxFFKokkosPreconFusedFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
Reference in New Issue
Block a user