git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8693 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -51,10 +51,14 @@ bool AtomT::alloc(const int nall) {
|
||||
bool success=true;
|
||||
|
||||
// Ignore host/device transfers?
|
||||
bool cpuview=false;
|
||||
if (dev->device_type()==UCL_CPU)
|
||||
cpuview=true;
|
||||
|
||||
_host_view=false;
|
||||
if (dev->shared_memory()) {
|
||||
_host_view=true;
|
||||
#ifdef GPU_CAST
|
||||
assert(0==1);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Allocate storage for CUDPP sort
|
||||
#ifdef USE_CUDPP
|
||||
if (_gpu_nbor==1) {
|
||||
@ -64,63 +68,101 @@ bool AtomT::alloc(const int nall) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// -------------------------- Host allocations
|
||||
// Get a host write only buffer
|
||||
#ifdef GPU_CAST
|
||||
success=success && (host_x_cast.alloc(_max_atoms*3,*dev,
|
||||
UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
|
||||
success=success && (host_type_cast.alloc(_max_atoms,*dev,
|
||||
UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
|
||||
#else
|
||||
success=success && (host_x.alloc(_max_atoms*4,*dev,
|
||||
UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
|
||||
#endif
|
||||
// Buffer for casting only if different precisions
|
||||
if (_charge)
|
||||
success=success && (host_q.alloc(_max_atoms,*dev,
|
||||
UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
|
||||
// Buffer for casting only if different precisions
|
||||
if (_rot)
|
||||
success=success && (host_quat.alloc(_max_atoms*4,*dev,
|
||||
UCL_WRITE_OPTIMIZED)==UCL_SUCCESS);
|
||||
|
||||
|
||||
// --------------------------- Device allocations
|
||||
int gpu_bytes=0;
|
||||
if (cpuview) {
|
||||
#ifdef GPU_CAST
|
||||
assert(0==1);
|
||||
#else
|
||||
dev_x.view(host_x);
|
||||
#endif
|
||||
if (_rot)
|
||||
dev_quat.view(host_quat);
|
||||
if (_charge)
|
||||
dev_q.view(host_q);
|
||||
} else {
|
||||
#ifdef GPU_CAST
|
||||
success=success && (UCL_SUCCESS==dev_x.alloc(_max_atoms*4,*dev));
|
||||
success=success && (UCL_SUCCESS==
|
||||
dev_x_cast.alloc(_max_atoms*3,*dev,UCL_READ_ONLY));
|
||||
success=success && (UCL_SUCCESS==
|
||||
dev_type_cast.alloc(_max_atoms,*dev,UCL_READ_ONLY));
|
||||
gpu_bytes+=dev_x_cast.row_bytes()+dev_type_cast.row_bytes();
|
||||
#else
|
||||
success=success && (UCL_SUCCESS==
|
||||
dev_x.alloc(_max_atoms*4,*dev,UCL_READ_ONLY));
|
||||
#endif
|
||||
if (_charge) {
|
||||
success=success && (dev_q.alloc(_max_atoms,*dev,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_q.row_bytes();
|
||||
success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_OPTIMIZED,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
#ifdef GPU_CAST
|
||||
success=success && (x_cast.alloc(_max_atoms*3,*dev,UCL_READ_ONLY)==
|
||||
UCL_SUCCESS);
|
||||
success=success && (type_cast.alloc(_max_atoms,*dev,UCL_READ_ONLY)==
|
||||
UCL_SUCCESS);
|
||||
gpu_bytes+=x_cast.device.row_bytes()+type_cast.device.row_bytes();
|
||||
#endif
|
||||
|
||||
if (_charge && _host_view==false) {
|
||||
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_OPTIMIZED,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=q.device.row_bytes();
|
||||
}
|
||||
if (_rot && _host_view==false) {
|
||||
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_OPTIMIZED,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=quat.device.row_bytes();
|
||||
}
|
||||
|
||||
if (_gpu_nbor>0) {
|
||||
if (_bonds) {
|
||||
success=success && (dev_tag.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_tag.row_bytes();
|
||||
}
|
||||
if (_rot) {
|
||||
success=success && (dev_quat.alloc(_max_atoms*4,*dev,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_quat.row_bytes();
|
||||
if (_gpu_nbor==1) {
|
||||
success=success && (dev_cell_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_cell_id.row_bytes();
|
||||
} else {
|
||||
success=success && (host_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
success=success &&
|
||||
(host_cell_id.alloc(_max_atoms,*dev,UCL_NOT_PINNED)==UCL_SUCCESS);
|
||||
}
|
||||
if (_gpu_nbor==2 && _host_view)
|
||||
dev_particle_id.view(host_particle_id);
|
||||
else
|
||||
success=success && (dev_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_particle_id.row_bytes();
|
||||
}
|
||||
|
||||
gpu_bytes+=x.device.row_bytes();
|
||||
if (gpu_bytes>_max_gpu_bytes)
|
||||
_max_gpu_bytes=gpu_bytes;
|
||||
|
||||
_allocated=true;
|
||||
return success;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
const int gpu_nbor, const bool bonds) {
|
||||
bool success=true;
|
||||
// Ignore host/device transfers?
|
||||
int gpu_bytes=0;
|
||||
|
||||
if (charge && _charge==false) {
|
||||
_charge=true;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_OPTIMIZED,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=q.device.row_bytes();
|
||||
}
|
||||
}
|
||||
if (_gpu_nbor>0) {
|
||||
|
||||
if (rot && _rot==false) {
|
||||
_rot=true;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_OPTIMIZED,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=quat.device.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (bonds && _bonds==false) {
|
||||
_bonds=true;
|
||||
if (_bonds && _gpu_nbor>0) {
|
||||
success=success && (dev_tag.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_tag.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu_nbor>0 && _gpu_nbor==0) {
|
||||
_gpu_nbor=gpu_nbor;
|
||||
#ifdef USE_CUDPP
|
||||
if (_gpu_nbor==1) {
|
||||
CUDPPResult result = cudppPlan(&sort_plan, sort_config, _max_atoms, 1, 0);
|
||||
if (CUDPP_SUCCESS != result)
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
success=success && (dev_particle_id.alloc(_max_atoms,*dev)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_particle_id.row_bytes();
|
||||
if (_bonds) {
|
||||
@ -137,43 +179,9 @@ bool AtomT::alloc(const int nall) {
|
||||
}
|
||||
}
|
||||
|
||||
gpu_bytes+=dev_x.row_bytes();
|
||||
if (gpu_bytes>_max_gpu_bytes)
|
||||
_max_gpu_bytes=gpu_bytes;
|
||||
|
||||
_allocated=true;
|
||||
return success;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
const int gpu_nbor, const bool bonds) {
|
||||
bool realloc=false;
|
||||
if (charge && _charge==false) {
|
||||
_charge=true;
|
||||
realloc=true;
|
||||
}
|
||||
if (rot && _rot==false) {
|
||||
_rot=true;
|
||||
realloc=true;
|
||||
}
|
||||
if (gpu_nbor>0 && _gpu_nbor==0) {
|
||||
_gpu_nbor=gpu_nbor;
|
||||
realloc=true;
|
||||
}
|
||||
if (bonds && _bonds==false) {
|
||||
_bonds=true;
|
||||
realloc=true;
|
||||
}
|
||||
if (realloc) {
|
||||
_other=_charge || _rot;
|
||||
int max_atoms=_max_atoms;
|
||||
clear_resize();
|
||||
return alloc(max_atoms);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||
UCL_Device &devi, const int gpu_nbor, const bool bonds) {
|
||||
@ -219,27 +227,18 @@ void AtomT::clear_resize() {
|
||||
return;
|
||||
_allocated=false;
|
||||
|
||||
dev_x.clear();
|
||||
if (_charge) {
|
||||
dev_q.clear();
|
||||
host_q.clear();
|
||||
}
|
||||
if (_rot) {
|
||||
dev_quat.clear();
|
||||
host_quat.clear();
|
||||
}
|
||||
#ifndef GPU_CAST
|
||||
host_x.clear();
|
||||
#else
|
||||
host_x_cast.clear();
|
||||
host_type_cast.clear();
|
||||
#endif
|
||||
x.clear();
|
||||
if (_charge)
|
||||
q.clear();
|
||||
if (_rot)
|
||||
quat.clear();
|
||||
|
||||
dev_cell_id.clear();
|
||||
dev_particle_id.clear();
|
||||
dev_tag.clear();
|
||||
#ifdef GPU_CAST
|
||||
dev_x_cast.clear();
|
||||
dev_type_cast.clear();
|
||||
x_cast.clear();
|
||||
type_cast.clear();
|
||||
#endif
|
||||
|
||||
#ifdef USE_CUDPP
|
||||
@ -279,8 +278,7 @@ double AtomT::host_memory_usage() const {
|
||||
atom_bytes+=1;
|
||||
if (_rot)
|
||||
atom_bytes+=4;
|
||||
return _max_atoms*atom_bytes*sizeof(numtyp)+
|
||||
sizeof(Atom<numtyp,acctyp>);
|
||||
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
// Sort arrays for neighbor list calculation
|
||||
@ -292,16 +290,18 @@ void AtomT::sort_neighbor(const int num_atoms) {
|
||||
8*sizeof(unsigned), num_atoms);
|
||||
if (CUDPP_SUCCESS != result) {
|
||||
printf("Error in cudppSort\n");
|
||||
NVD_GERYON_EXIT;
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef GPU_CAST
|
||||
#ifdef USE_OPENCL
|
||||
#if defined(USE_OPENCL)
|
||||
#include "atom_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *atom=0;
|
||||
#else
|
||||
#include "atom_ptx.h"
|
||||
#include "atom_cubin.h"
|
||||
#endif
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -316,3 +316,4 @@ void AtomT::compile_kernels(UCL_Device &dev) {
|
||||
#endif
|
||||
|
||||
template class Atom<PRECISION,ACC_PRECISION>;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user