reformat for increased readability
This commit is contained in:
@ -127,9 +127,8 @@ class Answer {
|
|||||||
/// Add forces and torques from the GPU into a LAMMPS pointer
|
/// Add forces and torques from the GPU into a LAMMPS pointer
|
||||||
void get_answers(double **f, double **tor);
|
void get_answers(double **f, double **tor);
|
||||||
|
|
||||||
inline double get_answers(double **f, double **tor, double *eatom,
|
inline double get_answers(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag_in) {
|
||||||
int &error_flag_in) {
|
|
||||||
double ta=MPI_Wtime();
|
double ta=MPI_Wtime();
|
||||||
time_answer.sync_stop();
|
time_answer.sync_stop();
|
||||||
_time_cpu_idle+=MPI_Wtime()-ta;
|
_time_cpu_idle+=MPI_Wtime()-ta;
|
||||||
|
|||||||
@ -992,10 +992,8 @@ int DeviceT::compile_kernels() {
|
|||||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
|
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
|
||||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
|
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
|
||||||
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
|
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
|
||||||
static_cast<size_t>(_max_shared_types*_max_shared_types*
|
static_cast<size_t>(_max_shared_types*_max_shared_types*sizeof(numtyp)*17 > gpu->slm_size()) ||
|
||||||
sizeof(numtyp)*17 > gpu->slm_size()) ||
|
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) > gpu->slm_size()))
|
||||||
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) >
|
|
||||||
gpu->slm_size()))
|
|
||||||
return -13;
|
return -13;
|
||||||
|
|
||||||
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
|
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
|
||||||
@ -1071,9 +1069,8 @@ void lmp_clear_device() {
|
|||||||
global_device.clear_device();
|
global_device.clear_device();
|
||||||
}
|
}
|
||||||
|
|
||||||
double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag) {
|
||||||
int &error_flag) {
|
|
||||||
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -163,17 +163,15 @@ class Device {
|
|||||||
{ ans_queue.push(ans); }
|
{ ans_queue.push(ans); }
|
||||||
|
|
||||||
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
||||||
inline double fix_gpu(double **f, double **tor, double *eatom,
|
inline double fix_gpu(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag) {
|
||||||
int &error_flag) {
|
|
||||||
error_flag=0;
|
error_flag=0;
|
||||||
atom.data_unavail();
|
atom.data_unavail();
|
||||||
if (ans_queue.empty()==false) {
|
if (ans_queue.empty()==false) {
|
||||||
stop_host_timer();
|
stop_host_timer();
|
||||||
double evdw=0.0;
|
double evdw=0.0;
|
||||||
while (ans_queue.empty()==false) {
|
while (ans_queue.empty()==false) {
|
||||||
evdw+=ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,
|
evdw += ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||||
error_flag);
|
|
||||||
ans_queue.pop();
|
ans_queue.pop();
|
||||||
}
|
}
|
||||||
return evdw;
|
return evdw;
|
||||||
|
|||||||
@ -50,12 +50,10 @@ extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
|||||||
const int ocl_platform, char *device_type_flags,
|
const int ocl_platform, char *device_type_flags,
|
||||||
const int block_pair);
|
const int block_pair);
|
||||||
extern void lmp_clear_device();
|
extern void lmp_clear_device();
|
||||||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
extern double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &err_flag);
|
||||||
int &err_flag);
|
extern double lmp_gpu_update_bin_size(const double subx, const double suby, const double subz,
|
||||||
extern double lmp_gpu_update_bin_size(const double subx, const double suby,
|
const int nlocal, const double cut);
|
||||||
const double subz, const int nlocal,
|
|
||||||
const double cut);
|
|
||||||
|
|
||||||
static const char cite_gpu_package[] =
|
static const char cite_gpu_package[] =
|
||||||
"GPU package (short-range, long-range and three-body potentials):\n\n"
|
"GPU package (short-range, long-range and three-body potentials):\n\n"
|
||||||
@ -327,13 +325,11 @@ void FixGPU::post_force(int /* vflag */)
|
|||||||
double lvirial[6];
|
double lvirial[6];
|
||||||
for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
|
for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
|
||||||
int err_flag;
|
int err_flag;
|
||||||
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom,
|
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom, force->pair->vatom,
|
||||||
force->pair->vatom, lvirial,
|
lvirial, force->pair->eng_coul, err_flag);
|
||||||
force->pair->eng_coul, err_flag);
|
|
||||||
if (err_flag) {
|
if (err_flag) {
|
||||||
if (err_flag==1)
|
if (err_flag==1)
|
||||||
error->one(FLERR,
|
error->one(FLERR,"Too many neighbors on GPU. Use neigh_modify one to increase limit.");
|
||||||
"Too many neighbors on GPU. Use neigh_modify one to increase limit.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
force->pair->eng_vdwl += my_eng;
|
force->pair->eng_vdwl += my_eng;
|
||||||
|
|||||||
Reference in New Issue
Block a user