Merge branch 'amoeba' into amoeba-gpu

This commit is contained in:
Trung Nguyen
2022-04-22 16:10:24 -05:00
4933 changed files with 388163 additions and 506968 deletions

View File

@ -39,7 +39,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
const int block_cell_2d, const int block_cell_id,
const int block_nbor_build, const int threads_per_atom,
const int simd_size, const bool time_device,
const std::string compile_flags, const bool ilist_map) {
const std::string &compile_flags, const bool ilist_map) {
clear();
_ilist_map = ilist_map;
@ -113,7 +113,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
if (!success)
return false;
if (_use_packing==false) {
if (!_use_packing) {
#ifndef LAL_USE_OLD_NEIGHBOR
_shared->compile_kernels(devi, gpu_nbor, compile_flags+
" -DMAX_SUBGROUPS_PER_BLOCK="+toa(_block_nbor_build/_simd_size));
@ -153,7 +153,7 @@ void Neighbor::alloc(bool &success) {
int nt=_max_atoms+_max_host;
if (_max_nbors)
_max_nbors = ((_max_nbors-1)/_threads_per_atom+1)*_threads_per_atom;
if (_use_packing==false || _gpu_nbor>0) {
if (!_use_packing || _gpu_nbor>0) {
if (_max_nbors)
success=success &&
(dev_nbor.alloc((_max_nbors+2)*_max_atoms,*dev)==UCL_SUCCESS);
@ -166,7 +166,7 @@ void Neighbor::alloc(bool &success) {
_c_bytes=dev_nbor.row_bytes();
if (_alloc_packed) {
if (_use_packing==false) {
if (!_use_packing) {
dev_packed_begin.clear();
success=success && (dev_packed_begin.alloc(_max_atoms,*dev,
_packed_permissions)==UCL_SUCCESS);
@ -373,7 +373,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
time_nbor.stop();
if (_use_packing==false) {
if (!_use_packing) {
time_kernel.start();
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
block_size));
@ -450,7 +450,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int *ilist, int *numj,
}
time_nbor.stop();
if (_use_packing==false) {
if (!_use_packing) {
time_kernel.start();
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
block_size));
@ -564,7 +564,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
#endif
}
const numtyp cutoff_cast=static_cast<numtyp>(_cutoff);
const auto cutoff_cast=static_cast<numtyp>(_cutoff);
if (_maxspecial>0) {
time_nbor.start();
@ -713,11 +713,11 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
const int bin_stencil_size = bin_stencil_stride * bin_stencil_stride;
if (bin_stencil_size > _host_bin_stencil.numel())
_host_bin_stencil.alloc(bin_stencil_size,*dev);
for (int s = 0; s<bin_stencil_size; s++) {
const int nbory = s % bin_stencil_stride - cells_in_cutoff;
const int nborz = s / bin_stencil_stride - cells_in_cutoff;
_host_bin_stencil[s] = nbory*ncellx + nborz*ncellx*ncelly;
}
for (int s = 0; s<bin_stencil_size; s++) {
const int nbory = s % bin_stencil_stride - cells_in_cutoff;
const int nborz = s / bin_stencil_stride - cells_in_cutoff;
_host_bin_stencil[s] = nbory*ncellx + nborz*ncellx*ncelly;
}
_bin_stencil.update_device(_host_bin_stencil,bin_stencil_size);
}
#endif
@ -747,12 +747,12 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
// If binning on GPU, do this now
if (_gpu_nbor==1) {
mn = _max_nbors;
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
const auto i_cell_size=static_cast<numtyp>(1.0/_cell_size);
const int neigh_block=_block_cell_id;
const int GX=(int)ceil((float)nall/neigh_block);
const numtyp sublo0=static_cast<numtyp>(sublo[0]);
const numtyp sublo1=static_cast<numtyp>(sublo[1]);
const numtyp sublo2=static_cast<numtyp>(sublo[2]);
const int GX=(int)ceil((double)nall/neigh_block);
const auto sublo0=static_cast<numtyp>(sublo[0]);
const auto sublo1=static_cast<numtyp>(sublo[1]);
const auto sublo2=static_cast<numtyp>(sublo[2]);
_shared->k_cell_id.set_size(GX,neigh_block);
_shared->k_cell_id.run(&atom.x, &atom.dev_cell_id,
&atom.dev_particle_id, &sublo0, &sublo1,