reformat for increased readability
This commit is contained in:
@ -23,7 +23,7 @@ namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
AnswerT::Answer() : _allocated(false),_eflag(false),_vflag(false),
|
||||
_inum(0),_ilist(nullptr),_newton(false) {
|
||||
_inum(0),_ilist(nullptr),_newton(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -127,9 +127,8 @@ class Answer {
|
||||
/// Add forces and torques from the GPU into a LAMMPS pointer
|
||||
void get_answers(double **f, double **tor);
|
||||
|
||||
inline double get_answers(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul,
|
||||
int &error_flag_in) {
|
||||
inline double get_answers(double **f, double **tor, double *eatom, double **vatom,
|
||||
double *virial, double &ecoul, int &error_flag_in) {
|
||||
double ta=MPI_Wtime();
|
||||
time_answer.sync_stop();
|
||||
_time_cpu_idle+=MPI_Wtime()-ta;
|
||||
|
||||
@ -984,18 +984,16 @@ int DeviceT::compile_kernels() {
|
||||
_max_bio_shared_types=gpu_lib_data[17];
|
||||
_pppm_max_spline=gpu_lib_data[18];
|
||||
|
||||
if (static_cast<size_t>(_block_pair)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_bio_pair)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_ellipse)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_pppm_block)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_nbor_build)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_cell_2d)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_cell_2d)>gpu->group_size_dim(1) ||
|
||||
static_cast<size_t>(_block_cell_id)>gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_max_shared_types*_max_shared_types*
|
||||
sizeof(numtyp)*17 > gpu->slm_size()) ||
|
||||
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) >
|
||||
gpu->slm_size()))
|
||||
if (static_cast<size_t>(_block_pair) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_bio_pair) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_ellipse) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_pppm_block) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_nbor_build) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
|
||||
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
|
||||
static_cast<size_t>(_max_shared_types*_max_shared_types*sizeof(numtyp)*17 > gpu->slm_size()) ||
|
||||
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) > gpu->slm_size()))
|
||||
return -13;
|
||||
|
||||
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
|
||||
@ -1071,9 +1069,8 @@ void lmp_clear_device() {
|
||||
global_device.clear_device();
|
||||
}
|
||||
|
||||
double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul,
|
||||
int &error_flag) {
|
||||
double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
|
||||
double *virial, double &ecoul, int &error_flag) {
|
||||
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||
}
|
||||
|
||||
|
||||
@ -163,17 +163,15 @@ class Device {
|
||||
{ ans_queue.push(ans); }
|
||||
|
||||
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
||||
inline double fix_gpu(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul,
|
||||
int &error_flag) {
|
||||
inline double fix_gpu(double **f, double **tor, double *eatom, double **vatom,
|
||||
double *virial, double &ecoul, int &error_flag) {
|
||||
error_flag=0;
|
||||
atom.data_unavail();
|
||||
if (ans_queue.empty()==false) {
|
||||
stop_host_timer();
|
||||
double evdw=0.0;
|
||||
while (ans_queue.empty()==false) {
|
||||
evdw+=ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,
|
||||
error_flag);
|
||||
evdw += ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||
ans_queue.pop();
|
||||
}
|
||||
return evdw;
|
||||
|
||||
@ -50,12 +50,10 @@ extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
const int ocl_platform, char *device_type_flags,
|
||||
const int block_pair);
|
||||
extern void lmp_clear_device();
|
||||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul,
|
||||
int &err_flag);
|
||||
extern double lmp_gpu_update_bin_size(const double subx, const double suby,
|
||||
const double subz, const int nlocal,
|
||||
const double cut);
|
||||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
|
||||
double *virial, double &ecoul, int &err_flag);
|
||||
extern double lmp_gpu_update_bin_size(const double subx, const double suby, const double subz,
|
||||
const int nlocal, const double cut);
|
||||
|
||||
static const char cite_gpu_package[] =
|
||||
"GPU package (short-range, long-range and three-body potentials):\n\n"
|
||||
@ -327,13 +325,11 @@ void FixGPU::post_force(int /* vflag */)
|
||||
double lvirial[6];
|
||||
for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
|
||||
int err_flag;
|
||||
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom,
|
||||
force->pair->vatom, lvirial,
|
||||
force->pair->eng_coul, err_flag);
|
||||
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom, force->pair->vatom,
|
||||
lvirial, force->pair->eng_coul, err_flag);
|
||||
if (err_flag) {
|
||||
if (err_flag==1)
|
||||
error->one(FLERR,
|
||||
"Too many neighbors on GPU. Use neigh_modify one to increase limit.");
|
||||
error->one(FLERR,"Too many neighbors on GPU. Use neigh_modify one to increase limit.");
|
||||
}
|
||||
|
||||
force->pair->eng_vdwl += my_eng;
|
||||
|
||||
Reference in New Issue
Block a user