this might work on cuda
This commit is contained in:
@ -111,6 +111,10 @@ FixCMAPKokkos<DeviceType>::FixCMAPKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
k_d1cmapgrid.template sync<DeviceType>();
|
||||
k_d2cmapgrid.template sync<DeviceType>();
|
||||
k_d12cmapgrid.template sync<DeviceType>();
|
||||
|
||||
d_count = typename AT::t_int_scalar("fix_cmap:count");
|
||||
h_count = Kokkos::create_mirror_view(d_count);
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -673,8 +677,7 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
auto d_copylist = k_copylist.template view<DeviceType>();
|
||||
auto d_exchange_sendlist = k_exchange_sendlist.template view<DeviceType>();
|
||||
|
||||
int n;
|
||||
copymode = 1;
|
||||
Kokkos::deep_copy(d_count,0);
|
||||
|
||||
auto l_num_crossterm = d_num_crossterm;
|
||||
auto l_crossterm_type = d_crossterm_type;
|
||||
@ -683,31 +686,42 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
auto l_crossterm_atom3 = d_crossterm_atom3;
|
||||
auto l_crossterm_atom4 = d_crossterm_atom4;
|
||||
auto l_crossterm_atom5 = d_crossterm_atom5;
|
||||
//auto l_nsend = nsend;
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
|
||||
|
||||
const int i = d_exchange_sendlist(mysend);
|
||||
|
||||
if (!final) offset += l_num_crossterm(i);
|
||||
if (!final) offset += (1+l_num_crossterm(i)*6);
|
||||
else {
|
||||
|
||||
int m = nsend + offset;
|
||||
d_buf(mysend) = m;
|
||||
d_buf(m++) = static_cast<double> (l_num_crossterm(i));
|
||||
for (int k = 0; k < l_num_crossterm(i); k++) {
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_type(i,k));
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_atom1(i,k));
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_atom2(i,k));
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_atom3(i,k));
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_atom4(i,k));
|
||||
d_buf(m++) = static_cast<double> (l_crossterm_atom5(i,k));
|
||||
Kokkos::printf(" *** ok 1 ... i %i m %i l_num_crossterm(i) %i k %i d_buf[] %f %f %f %f %f %f %f\n", i, m, l_num_crossterm(i), k, d_buf(m-7), d_buf(m-6), d_buf(m-5), d_buf(m-4), d_buf(m-3), d_buf(m-2), d_buf(m-1));
|
||||
d_buf(mysend) = d_ubuf(m).d;
|
||||
d_buf(m++) = d_ubuf(l_num_crossterm(i)).d;
|
||||
|
||||
if( l_num_crossterm(i) > 0 ) {
|
||||
|
||||
for (int k = 0; k < l_num_crossterm(i); k++) {
|
||||
|
||||
d_buf(m++) = d_ubuf(l_crossterm_type(i,k)).d;
|
||||
d_buf(m++) = d_ubuf(l_crossterm_atom1(i,k)).d;
|
||||
d_buf(m++) = d_ubuf(l_crossterm_atom2(i,k)).d;
|
||||
d_buf(m++) = d_ubuf(l_crossterm_atom3(i,k)).d;
|
||||
d_buf(m++) = d_ubuf(l_crossterm_atom4(i,k)).d;
|
||||
d_buf(m++) = d_ubuf(l_crossterm_atom5(i,k)).d;
|
||||
|
||||
Kokkos::printf(" *** pack_exchange_kokkos() ... mysend %i i %i l_nsend %i offset %i m %i l_num_crossterm(i) %i l_crossterm_type(i,k) %i k %i d_buf[] %i %i %i %i %i %i %i\n", mysend, i, nsend, offset, m, l_num_crossterm(i), l_crossterm_type(i,k), k, d_ubuf(d_buf(m-7)).i, d_ubuf(d_buf(m-6)).i, d_ubuf(d_buf(m-5)).i, d_ubuf(d_buf(m-4)).i, d_ubuf(d_buf(m-3)).i, d_ubuf(d_buf(m-2)).i, d_ubuf(d_buf(m-1)).i);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (mysend == nsend-1) d_count() = m;
|
||||
offset = m - nsend;
|
||||
|
||||
const int j = d_copylist(mysend);
|
||||
if (j > -1) {
|
||||
|
||||
// Kokkos::printf(" *** ok 2 ... i %i k %i\n", i, k);
|
||||
|
||||
l_num_crossterm(i) = l_num_crossterm(j);
|
||||
for (int k = 0; k < l_num_crossterm(i); k++) {
|
||||
l_crossterm_type(i,k) = l_crossterm_type(j,k);
|
||||
@ -718,14 +732,8 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
l_crossterm_atom5(i,k) = l_crossterm_atom5(j,k);
|
||||
}
|
||||
}
|
||||
|
||||
for( int k=0 ; k<100 ; k++ )
|
||||
Kokkos::printf(" *** pack_exchange_kokkos() ... mysend %i d_buf(%i) %f\n", mysend, k, d_buf(k));
|
||||
|
||||
}
|
||||
},n);
|
||||
|
||||
Kokkos::printf(" *** ok 3 ... n %i \n", n);
|
||||
});
|
||||
|
||||
copymode = 0;
|
||||
|
||||
@ -741,7 +749,8 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
k_crossterm_atom4.template modify<DeviceType>();
|
||||
k_crossterm_atom5.template modify<DeviceType>();
|
||||
|
||||
return n;
|
||||
Kokkos::deep_copy(h_count,d_count);
|
||||
return h_count();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -770,8 +779,6 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
k_crossterm_atom4.template sync<DeviceType>();
|
||||
k_crossterm_atom5.template sync<DeviceType>();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
auto l_num_crossterm = d_num_crossterm;
|
||||
auto l_crossterm_type = d_crossterm_type;
|
||||
auto l_crossterm_atom1 = d_crossterm_atom1;
|
||||
@ -780,32 +787,28 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
auto l_crossterm_atom4 = d_crossterm_atom4;
|
||||
auto l_crossterm_atom5 = d_crossterm_atom5;
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) {
|
||||
int index = d_indices(i);
|
||||
|
||||
Kokkos::printf(" *** unpack_exchange_kokkos() ... nrecv %i nrecv1 %i nextrarecv1 %i i %i index %i\n", nrecv, nrecv1, nextrarecv1, i, index);
|
||||
|
||||
for( int k=0 ; k<100 ; k++ )
|
||||
Kokkos::printf(" *** unpack_exchange_kokkos() ... i %i d_buf(%i) %f\n", i, k, d_buf(k));
|
||||
|
||||
if (index > -1) {
|
||||
|
||||
int m = d_buf(i);
|
||||
// if (i >= nrecv1) m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1];
|
||||
|
||||
l_num_crossterm(index) = static_cast<int> (d_buf(m++));
|
||||
|
||||
Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i \n", m, l_num_crossterm(index));
|
||||
|
||||
int m = d_ubuf(d_buf(i)).i;
|
||||
if (i >= nrecv1) m = nextrarecv1 + d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i;
|
||||
|
||||
l_num_crossterm(index) = static_cast<int> (d_ubuf(d_buf(m++)).i);
|
||||
|
||||
for (int k = 0; k < l_num_crossterm(index); k++) {
|
||||
l_crossterm_type(index,k) = static_cast<int> (d_buf(m++));
|
||||
l_crossterm_atom1(index,k) = static_cast<tagint> (d_buf(m++));
|
||||
l_crossterm_atom2(index,k) = static_cast<tagint> (d_buf(m++));
|
||||
l_crossterm_atom3(index,k) = static_cast<tagint> (d_buf(m++));
|
||||
l_crossterm_atom4(index,k) = static_cast<tagint> (d_buf(m++));
|
||||
l_crossterm_atom5(index,k) = static_cast<tagint> (d_buf(m++));
|
||||
l_crossterm_type(index,k) = static_cast<int> (d_ubuf(d_buf(m++)).i);
|
||||
l_crossterm_atom1(index,k) = static_cast<tagint> (d_ubuf(d_buf(m++)).i);
|
||||
l_crossterm_atom2(index,k) = static_cast<tagint> (d_ubuf(d_buf(m++)).i);
|
||||
l_crossterm_atom3(index,k) = static_cast<tagint> (d_ubuf(d_buf(m++)).i);
|
||||
l_crossterm_atom4(index,k) = static_cast<tagint> (d_ubuf(d_buf(m++)).i);
|
||||
l_crossterm_atom5(index,k) = static_cast<tagint> (d_ubuf(d_buf(m++)).i);
|
||||
|
||||
Kokkos::printf(" *** unpack_exchange_kokkos() ... m %i l_num_crossterm(index) %i l_crossterm_type %i l_crossterm_atom1-5 %i %i %i %i %i \n", m, l_num_crossterm(index), l_crossterm_type(index,k), l_crossterm_atom1(index,k), l_crossterm_atom2(index,k), l_crossterm_atom3(index,k), l_crossterm_atom4(index,k), l_crossterm_atom5(index,k));
|
||||
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@ -81,6 +81,9 @@ class FixCMAPKokkos : public FixCMAP, public KokkosBase {
|
||||
DAT::tdual_int_1d k_map_array;
|
||||
dual_hash_type k_map_hash;
|
||||
|
||||
typename AT::t_int_scalar d_count;
|
||||
HAT::t_int_scalar h_count;
|
||||
|
||||
DAT::tdual_int_1d k_num_crossterm;
|
||||
typename AT::t_int_1d d_num_crossterm;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user