segmentation faults for subdomains with no work. Chaning fp data copy to only be local atoms for device to host and only be ghost atoms for host to device.
583 lines
19 KiB
C++
583 lines
19 KiB
C++
/***************************************************************************
|
|
lal_eam.cpp
|
|
-------------------
|
|
W. Michael Brown, Trung Dac Nguyen (ORNL)
|
|
|
|
Class for acceleration of the eam pair style.
|
|
|
|
__________________________________________________________________________
|
|
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
|
__________________________________________________________________________
|
|
|
|
begin :
|
|
email : brownw@ornl.gov nguyentd@ornl.gov
|
|
***************************************************************************/
|
|
|
|
#ifdef USE_OPENCL
|
|
#include "eam_cl.h"
|
|
#else
|
|
#include "eam_ptx.h"
|
|
#endif
|
|
|
|
#include "lal_eam.h"
|
|
#include <cassert>
|
|
using namespace LAMMPS_AL;
|
|
#define EAMT EAM<numtyp, acctyp>
|
|
|
|
|
|
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
|
#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
|
|
|
extern Device<PRECISION,ACC_PRECISION> device;
|
|
|
|
template <class numtyp, class acctyp>
|
|
EAMT::EAM() : BaseAtomic<numtyp,acctyp>(),
|
|
_compiled_energy(false), _allocated(false) {
|
|
}
|
|
|
|
template <class numtyp, class acctyp>
|
|
EAMT::~EAM() {
|
|
clear();
|
|
}
|
|
|
|
template <class numtyp, class acctyp>
|
|
int EAMT::init(const int ntypes, double host_cutforcesq,
|
|
int **host_type2rhor, int **host_type2z2r, int *host_type2frho,
|
|
double ***host_rhor_spline, double ***host_z2r_spline,
|
|
double ***host_frho_spline,
|
|
double rdr, double rdrho, int nrhor, int nrho,
|
|
int nz2r, int nfrho, int nr,
|
|
const int nlocal, const int nall, const int max_nbors,
|
|
const int maxspecial, const double cell_size,
|
|
const double gpu_split, FILE *_screen)
|
|
{
|
|
int success;
|
|
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
|
_screen,eam);
|
|
|
|
if (success!=0)
|
|
return success;
|
|
|
|
// allocate fp
|
|
|
|
bool cpuview=false;
|
|
if (this->ucl_device->device_type()==UCL_CPU)
|
|
cpuview=true;
|
|
|
|
int ef_nall=nall;
|
|
if (ef_nall==0)
|
|
ef_nall=2000;
|
|
|
|
_max_fp_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
|
|
host_fp.alloc(_max_fp_size,*(this->ucl_device));
|
|
if (cpuview)
|
|
dev_fp.view(host_fp);
|
|
else
|
|
dev_fp.alloc(_max_fp_size,*(this->ucl_device),UCL_WRITE_ONLY);
|
|
|
|
k_energy.set_function(*(this->pair_program),"kernel_energy");
|
|
k_energy_fast.set_function(*(this->pair_program),"kernel_energy_fast");
|
|
fp_tex.get_texture(*(this->pair_program),"fp_tex");
|
|
fp_tex.bind_float(dev_fp,1);
|
|
_compiled_energy = true;
|
|
|
|
// Initialize timers for selected GPU
|
|
time_pair2.init(*(this->ucl_device));
|
|
time_pair2.zero();
|
|
|
|
time_fp1.init(*(this->ucl_device));
|
|
time_fp1.zero();
|
|
|
|
time_fp2.init(*(this->ucl_device));
|
|
time_fp2.zero();
|
|
|
|
// If atom type constants fit in shared memory use fast kernel
|
|
int lj_types=ntypes;
|
|
shared_types=false;
|
|
|
|
int max_shared_types=this->device->max_shared_types();
|
|
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
|
lj_types=max_shared_types;
|
|
shared_types=true;
|
|
}
|
|
|
|
_ntypes=lj_types;
|
|
_cutforcesq=host_cutforcesq;
|
|
_rdr=rdr;
|
|
_rdrho = rdrho;
|
|
_nrhor=nrhor;
|
|
_nrho=nrho;
|
|
_nz2r=nz2r;
|
|
_nfrho=nfrho;
|
|
_nr=nr;
|
|
|
|
UCL_H_Vec<int2> dview_type(lj_types*lj_types,*(this->ucl_device),
|
|
UCL_WRITE_OPTIMIZED);
|
|
|
|
for (int i=0; i<lj_types*lj_types; i++) {
|
|
dview_type[i].x=0; dview_type[i].y=0;
|
|
}
|
|
|
|
// pack type2rhor and type2z2r
|
|
type2rhor_z2r.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
|
|
|
for (int i=0; i<ntypes; i++) {
|
|
for (int j=0; j<ntypes; j++) {
|
|
dview_type[i*lj_types+j].x=host_type2rhor[i][j];
|
|
dview_type[i*lj_types+j].y=host_type2z2r[i][j];
|
|
}
|
|
}
|
|
|
|
ucl_copy(type2rhor_z2r,dview_type,false);
|
|
|
|
// pack type2frho
|
|
UCL_H_Vec<int> dview_type2frho(lj_types,*(this->ucl_device),
|
|
UCL_WRITE_OPTIMIZED);
|
|
|
|
type2frho.alloc(lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
|
for (int i=0; i<ntypes; i++)
|
|
dview_type2frho[i]=host_type2frho[i];
|
|
ucl_copy(type2frho,dview_type2frho,false);
|
|
|
|
|
|
|
|
// pack frho_spline
|
|
UCL_H_Vec<numtyp4> dview_frho_spline(nfrho*(nrho+1),*(this->ucl_device),
|
|
UCL_WRITE_OPTIMIZED);
|
|
|
|
for (int ix=0; ix<nfrho; ix++)
|
|
for (int iy=0; iy<nrho+1; iy++) {
|
|
dview_frho_spline[ix*(nrho+1)+iy].x=host_frho_spline[ix][iy][0];
|
|
dview_frho_spline[ix*(nrho+1)+iy].y=host_frho_spline[ix][iy][1];
|
|
dview_frho_spline[ix*(nrho+1)+iy].z=host_frho_spline[ix][iy][2];
|
|
dview_frho_spline[ix*(nrho+1)+iy].w=0;
|
|
}
|
|
|
|
frho_spline1.alloc(nfrho*(nrho+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(frho_spline1,dview_frho_spline,false);
|
|
frho_spline1_tex.get_texture(*(this->pair_program),"frho_sp1_tex");
|
|
frho_spline1_tex.bind_float(frho_spline1,4);
|
|
|
|
|
|
for (int ix=0; ix<nfrho; ix++)
|
|
for (int iy=0; iy<nrho+1; iy++) {
|
|
dview_frho_spline[ix*(nrho+1)+iy].x=host_frho_spline[ix][iy][3];
|
|
dview_frho_spline[ix*(nrho+1)+iy].y=host_frho_spline[ix][iy][4];
|
|
dview_frho_spline[ix*(nrho+1)+iy].z=host_frho_spline[ix][iy][5];
|
|
dview_frho_spline[ix*(nrho+1)+iy].w=host_frho_spline[ix][iy][6];
|
|
}
|
|
|
|
frho_spline2.alloc(nfrho*(nrho+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(frho_spline2,dview_frho_spline,false);
|
|
frho_spline2_tex.get_texture(*(this->pair_program),"frho_sp2_tex");
|
|
frho_spline2_tex.bind_float(frho_spline2,4);
|
|
|
|
// pack rhor_spline
|
|
UCL_H_Vec<numtyp4> dview_rhor_spline(nrhor*(nr+1),*(this->ucl_device),
|
|
UCL_WRITE_OPTIMIZED);
|
|
|
|
for (int ix=0; ix<nrhor; ix++)
|
|
for (int iy=0; iy<nr+1; iy++) {
|
|
dview_rhor_spline[ix*(nr+1)+iy].x=host_rhor_spline[ix][iy][0];
|
|
dview_rhor_spline[ix*(nr+1)+iy].y=host_rhor_spline[ix][iy][1];
|
|
dview_rhor_spline[ix*(nr+1)+iy].z=host_rhor_spline[ix][iy][2];
|
|
dview_rhor_spline[ix*(nr+1)+iy].w=(numtyp)0;
|
|
}
|
|
|
|
rhor_spline1.alloc(nrhor*(nr+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(rhor_spline1,dview_rhor_spline,false);
|
|
rhor_spline1_tex.get_texture(*(this->pair_program),"rhor_sp1_tex");
|
|
rhor_spline1_tex.bind_float(rhor_spline1,4);
|
|
|
|
for (int ix=0; ix<nrhor; ix++)
|
|
for (int iy=0; iy<nr+1; iy++) {
|
|
dview_rhor_spline[ix*(nr+1)+iy].x=host_rhor_spline[ix][iy][3];
|
|
dview_rhor_spline[ix*(nr+1)+iy].y=host_rhor_spline[ix][iy][4];
|
|
dview_rhor_spline[ix*(nr+1)+iy].z=host_rhor_spline[ix][iy][5];
|
|
dview_rhor_spline[ix*(nr+1)+iy].w=host_rhor_spline[ix][iy][6];
|
|
}
|
|
|
|
rhor_spline2.alloc(nrhor*(nr+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(rhor_spline2,dview_rhor_spline,false);
|
|
rhor_spline2_tex.get_texture(*(this->pair_program),"rhor_sp2_tex");
|
|
rhor_spline2_tex.bind_float(rhor_spline2,4);
|
|
|
|
// pack z2r_spline
|
|
UCL_H_Vec<numtyp4> dview_z2r_spline(nz2r*(nr+1),*(this->ucl_device),
|
|
UCL_WRITE_OPTIMIZED);
|
|
|
|
for (int ix=0; ix<nz2r; ix++)
|
|
for (int iy=0; iy<nr+1; iy++) {
|
|
dview_z2r_spline[ix*(nr+1)+iy].x=host_z2r_spline[ix][iy][0];
|
|
dview_z2r_spline[ix*(nr+1)+iy].y=host_z2r_spline[ix][iy][1];
|
|
dview_z2r_spline[ix*(nr+1)+iy].z=host_z2r_spline[ix][iy][2];
|
|
dview_z2r_spline[ix*(nr+1)+iy].w=(numtyp)0;
|
|
}
|
|
|
|
z2r_spline1.alloc(nz2r*(nr+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(z2r_spline1,dview_z2r_spline,false);
|
|
z2r_spline1_tex.get_texture(*(this->pair_program),"z2r_sp1_tex");
|
|
z2r_spline1_tex.bind_float(z2r_spline1,4);
|
|
|
|
for (int ix=0; ix<nz2r; ix++)
|
|
for (int iy=0; iy<nr+1; iy++) {
|
|
dview_z2r_spline[ix*(nr+1)+iy].x=host_z2r_spline[ix][iy][3];
|
|
dview_z2r_spline[ix*(nr+1)+iy].y=host_z2r_spline[ix][iy][4];
|
|
dview_z2r_spline[ix*(nr+1)+iy].z=host_z2r_spline[ix][iy][5];
|
|
dview_z2r_spline[ix*(nr+1)+iy].w=host_z2r_spline[ix][iy][6];
|
|
}
|
|
|
|
z2r_spline2.alloc(nz2r*(nr+1),*(this->ucl_device),UCL_READ_ONLY);
|
|
ucl_copy(z2r_spline2,dview_z2r_spline,false);
|
|
z2r_spline2_tex.get_texture(*(this->pair_program),"z2r_sp2_tex");
|
|
z2r_spline2_tex.bind_float(z2r_spline2,4);
|
|
|
|
_allocated=true;
|
|
this->_max_bytes=type2rhor_z2r.row_bytes()
|
|
+ type2frho.row_bytes()
|
|
+ rhor_spline1.row_bytes()
|
|
+ rhor_spline2.row_bytes()
|
|
+ frho_spline1.row_bytes()
|
|
+ frho_spline2.row_bytes()
|
|
+ z2r_spline1.row_bytes()
|
|
+ z2r_spline2.row_bytes()
|
|
+ dev_fp.row_bytes();
|
|
return 0;
|
|
}
|
|
|
|
template <class numtyp, class acctyp>
|
|
void EAMT::clear() {
|
|
if (!_allocated)
|
|
return;
|
|
_allocated=false;
|
|
|
|
type2rhor_z2r.clear();
|
|
type2frho.clear();
|
|
rhor_spline1.clear();
|
|
rhor_spline2.clear();
|
|
frho_spline1.clear();
|
|
frho_spline2.clear();
|
|
z2r_spline1.clear();
|
|
z2r_spline2.clear();
|
|
|
|
host_fp.clear();
|
|
dev_fp.clear();
|
|
|
|
time_pair2.clear();
|
|
time_fp1.clear();
|
|
time_fp2.clear();
|
|
|
|
if (_compiled_energy) {
|
|
k_energy_fast.clear();
|
|
k_energy.clear();
|
|
_compiled_energy=false;
|
|
}
|
|
|
|
this->clear_atomic();
|
|
}
|
|
|
|
template <class numtyp, class acctyp>
|
|
double EAMT::host_memory_usage() const {
|
|
return this->host_memory_usage_atomic()+sizeof(EAM<numtyp,acctyp>);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Copy nbor list from host if necessary and then compute atom energies/forces
|
|
// ---------------------------------------------------------------------------
|
|
template <class numtyp, class acctyp>
|
|
void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
|
|
const int nall, double **host_x, int *host_type,
|
|
int *ilist, int *numj, int **firstneigh,
|
|
const bool eflag, const bool vflag,
|
|
const bool eatom, const bool vatom,
|
|
int &host_start, const double cpu_time,
|
|
bool &success, void **fp_ptr) {
|
|
this->acc_timers();
|
|
|
|
if (this->device->time_device()) {
|
|
// Put time from the second part to the total time_pair
|
|
this->time_pair.add_time_to_total(time_pair2.time());
|
|
|
|
// Add transfer time from device -> host after part 1
|
|
this->atom->add_transfer_time(time_fp1.time());
|
|
|
|
// Add transfer time from host -> device before part 2
|
|
this->atom->add_transfer_time(time_fp2.time());
|
|
}
|
|
|
|
// ------------------- Resize FP Array for EAM --------------------
|
|
|
|
if (nall>_max_fp_size) {
|
|
dev_fp.clear();
|
|
host_fp.clear();
|
|
|
|
_max_fp_size=static_cast<int>(static_cast<double>(nall)*1.10);
|
|
host_fp.alloc(_max_fp_size,*(this->ucl_device));
|
|
if (this->ucl_device->device_type()==UCL_CPU)
|
|
dev_fp.view(host_fp);
|
|
else
|
|
dev_fp.alloc(_max_fp_size,*(this->ucl_device));
|
|
|
|
fp_tex.bind_float(dev_fp,1);
|
|
}
|
|
*fp_ptr=host_fp.begin();
|
|
|
|
// ----------------------------------------------------------------
|
|
|
|
if (inum_full==0) {
|
|
host_start=0;
|
|
// Make sure textures are correct if realloc by a different hybrid style
|
|
this->resize_atom(0,nall,success);
|
|
this->zero_timers();
|
|
return;
|
|
}
|
|
|
|
int ago=this->hd_balancer.ago_first(f_ago);
|
|
int inum=this->hd_balancer.balance(ago,inum_full,cpu_time);
|
|
this->ans->inum(inum);
|
|
host_start=inum;
|
|
|
|
// -----------------------------------------------------------------
|
|
|
|
if (ago==0) {
|
|
this->reset_nbors(nall, inum, ilist, numj, firstneigh, success);
|
|
if (!success)
|
|
return;
|
|
}
|
|
|
|
this->atom->cast_x_data(host_x,host_type);
|
|
this->atom->add_x_data(host_x,host_type);
|
|
|
|
loop(eflag,vflag);
|
|
|
|
// copy fp from device to host for comm
|
|
_nlocal=nlocal;
|
|
time_fp1.start();
|
|
ucl_copy(host_fp,dev_fp,nlocal,false);
|
|
time_fp1.stop();
|
|
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Reneighbor on GPU and then compute per-atom densities
|
|
// ---------------------------------------------------------------------------
|
|
template <class numtyp, class acctyp>
|
|
int** EAMT::compute(const int ago, const int inum_full,
|
|
const int nall, double **host_x, int *host_type,
|
|
double *sublo, double *subhi, int *tag,
|
|
int **nspecial, int **special, const bool eflag,
|
|
const bool vflag, const bool eatom,
|
|
const bool vatom, int &host_start,
|
|
int **ilist, int **jnum,
|
|
const double cpu_time, bool &success,
|
|
int &inum, void **fp_ptr) {
|
|
this->acc_timers();
|
|
|
|
if (this->device->time_device()) {
|
|
// Put time from the second part to the total time_pair
|
|
this->time_pair.add_time_to_total(time_pair2.time());
|
|
|
|
// Add transfer time from device -> host after part 1
|
|
this->atom->add_transfer_time(time_fp1.time());
|
|
|
|
// Add transfer time from host -> device before part 2
|
|
this->atom->add_transfer_time(time_fp2.time());
|
|
}
|
|
|
|
// ------------------- Resize FP Array for EAM --------------------
|
|
|
|
if (nall>_max_fp_size) {
|
|
dev_fp.clear();
|
|
host_fp.clear();
|
|
|
|
_max_fp_size=static_cast<int>(static_cast<double>(nall)*1.10);
|
|
host_fp.alloc(_max_fp_size,*(this->ucl_device));
|
|
if (this->ucl_device->device_type()==UCL_CPU)
|
|
dev_fp.view(host_fp);
|
|
else
|
|
dev_fp.alloc(_max_fp_size,*(this->ucl_device));
|
|
|
|
fp_tex.bind_float(dev_fp,1);
|
|
}
|
|
*fp_ptr=host_fp.begin();
|
|
|
|
// -----------------------------------------------------------------
|
|
|
|
if (inum_full==0) {
|
|
host_start=0;
|
|
// Make sure textures are correct if realloc by a different hybrid style
|
|
this->resize_atom(0,nall,success);
|
|
this->zero_timers();
|
|
return NULL;
|
|
}
|
|
|
|
// load balance, returning the atom count on the device (inum)
|
|
this->hd_balancer.balance(cpu_time);
|
|
inum=this->hd_balancer.get_gpu_count(ago,inum_full);
|
|
this->ans->inum(inum);
|
|
host_start=inum;
|
|
|
|
// Build neighbor list on GPU if necessary
|
|
if (ago==0) {
|
|
this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
|
|
sublo, subhi, tag, nspecial, special, success);
|
|
if (!success)
|
|
return NULL;
|
|
} else {
|
|
this->atom->cast_x_data(host_x,host_type);
|
|
this->atom->add_x_data(host_x,host_type);
|
|
}
|
|
*ilist=this->nbor->host_ilist.begin();
|
|
*jnum=this->nbor->host_acc.begin();
|
|
|
|
loop(eflag,vflag);
|
|
|
|
// copy fp from device to host for comm
|
|
_nlocal=inum_full;
|
|
time_fp1.start();
|
|
ucl_copy(host_fp,dev_fp,inum_full,false);
|
|
time_fp1.stop();
|
|
|
|
return this->nbor->host_jlist.begin()-host_start;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Copy nbor list from host if necessary and then calculate forces, virials,..
|
|
// ---------------------------------------------------------------------------
|
|
template <class numtyp, class acctyp>
|
|
void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
|
|
const bool eatom, const bool vatom) {
|
|
if (this->ans->inum()==0)
|
|
return;
|
|
|
|
this->hd_balancer.start_timer();
|
|
time_fp2.start();
|
|
this->add_fp_data();
|
|
time_fp2.stop();
|
|
|
|
loop2(eflag,vflag);
|
|
if (ilist == NULL)
|
|
this->ans->copy_answers(eflag,vflag,eatom,vatom);
|
|
else
|
|
this->ans->copy_answers(eflag,vflag,eatom,vatom, ilist);
|
|
|
|
this->device->add_ans_object(this->ans);
|
|
this->hd_balancer.stop_timer();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Calculate per-atom energies and forces
|
|
// ---------------------------------------------------------------------------
|
|
template <class numtyp, class acctyp>
|
|
void EAMT::loop(const bool _eflag, const bool _vflag) {
|
|
// Compute the block size and grid size to keep all cores busy
|
|
const int BX=this->block_size();
|
|
int eflag, vflag;
|
|
if (_eflag)
|
|
eflag=1;
|
|
else
|
|
eflag=0;
|
|
|
|
if (_vflag)
|
|
vflag=1;
|
|
else
|
|
vflag=0;
|
|
|
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
|
(BX/this->_threads_per_atom)));
|
|
|
|
int ainum=this->ans->inum();
|
|
int nbor_pitch=this->nbor->nbor_pitch();
|
|
this->time_pair.start();
|
|
|
|
if (shared_types) {
|
|
this->k_energy_fast.set_size(GX,BX);
|
|
this->k_energy_fast.run(&this->atom->dev_x.begin(),
|
|
&type2rhor_z2r.begin(), &type2frho.begin(),
|
|
&rhor_spline2.begin(),
|
|
&frho_spline1.begin(),&frho_spline2.begin(),
|
|
&this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(),
|
|
&dev_fp.begin(),
|
|
&this->ans->dev_engv.begin(),
|
|
&eflag, &ainum,
|
|
&nbor_pitch,
|
|
&_ntypes, &_cutforcesq,
|
|
&_rdr, &_rdrho,
|
|
&_nrho, &_nr,
|
|
&this->_threads_per_atom);
|
|
} else {
|
|
this->k_energy.set_size(GX,BX);
|
|
this->k_energy.run(&this->atom->dev_x.begin(),
|
|
&type2rhor_z2r.begin(), &type2frho.begin(),
|
|
&rhor_spline2.begin(),
|
|
&frho_spline1.begin(),&frho_spline2.begin(),
|
|
&this->nbor->dev_nbor.begin(), &this->_nbor_data->begin(),
|
|
&dev_fp.begin(),
|
|
&this->ans->dev_engv.begin(),
|
|
&eflag, &ainum,
|
|
&nbor_pitch,
|
|
&_ntypes, &_cutforcesq,
|
|
&_rdr, &_rdrho,
|
|
&_nrho, &_nr,
|
|
&this->_threads_per_atom);
|
|
}
|
|
|
|
this->time_pair.stop();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Calculate energies, forces, and torques
|
|
// ---------------------------------------------------------------------------
|
|
template <class numtyp, class acctyp>
|
|
void EAMT::loop2(const bool _eflag, const bool _vflag) {
|
|
// Compute the block size and grid size to keep all cores busy
|
|
const int BX=this->block_size();
|
|
int eflag, vflag;
|
|
if (_eflag)
|
|
eflag=1;
|
|
else
|
|
eflag=0;
|
|
|
|
if (_vflag)
|
|
vflag=1;
|
|
else
|
|
vflag=0;
|
|
|
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
|
(BX/this->_threads_per_atom)));
|
|
|
|
int ainum=this->ans->inum();
|
|
int nbor_pitch=this->nbor->nbor_pitch();
|
|
this->time_pair2.start();
|
|
|
|
if (shared_types) {
|
|
this->k_pair_fast.set_size(GX,BX);
|
|
this->k_pair_fast.run(&this->atom->dev_x.begin(), &dev_fp.begin(),
|
|
&type2rhor_z2r.begin(),
|
|
&rhor_spline1.begin(),
|
|
&z2r_spline1.begin(),
|
|
&z2r_spline2.begin(),
|
|
&this->nbor->dev_nbor.begin(),
|
|
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
|
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
|
&nbor_pitch, &_cutforcesq, &_rdr, &_nr,
|
|
&this->_threads_per_atom);
|
|
} else {
|
|
this->k_pair.set_size(GX,BX);
|
|
this->k_pair.run(&this->atom->dev_x.begin(), &dev_fp.begin(),
|
|
&type2rhor_z2r.begin(),
|
|
&rhor_spline1.begin(),
|
|
&z2r_spline1.begin(),
|
|
&z2r_spline2.begin(),
|
|
&this->nbor->dev_nbor.begin(),
|
|
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
|
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
|
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_nr,
|
|
&this->_threads_per_atom);
|
|
}
|
|
|
|
this->time_pair2.stop();
|
|
}
|
|
|
|
template class EAM<PRECISION,ACC_PRECISION>;
|