git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14265 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-11-18 18:23:45 +00:00
parent e3c4db746c
commit f6c76f4623
9 changed files with 568 additions and 582 deletions

View File

@ -9,7 +9,7 @@
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________ __________________________________________________________________________
begin : begin :
email : brownw@ornl.gov email : brownw@ornl.gov
***************************************************************************/ ***************************************************************************/
@ -49,14 +49,18 @@ class Answer {
inline void inum(const int n) { _inum=n; } inline void inum(const int n) { _inum=n; }
/// Return the maximum number of atoms that can be stored currently /// Return the maximum number of atoms that can be stored currently
inline int max_inum() const { return _max_local; } inline int max_inum() const { return _max_local; }
/// Return the number of fields used for energy and virial
inline int ev_fields(const int mode) const {
return (mode == 1) ? _ev_fields : _e_fields;
}
/// Memory usage per atom in this class /// Memory usage per atom in this class
int bytes_per_atom() const; int bytes_per_atom() const;
/// Clear any previous data and set up for a new LAMMPS run /// Clear any previous data and set up for a new LAMMPS run
/** \param rot True if atom storage needs quaternions **/ /** \param rot True if atom storage needs quaternions **/
bool init(const int inum, const bool charge, const bool rot, UCL_Device &dev); bool init(const int inum, const bool charge, const bool rot, UCL_Device &dev);
/// Check if we have enough device storage and realloc if not /// Check if we have enough device storage and realloc if not
inline void resize(const int inum, bool &success) { inline void resize(const int inum, bool &success) {
_inum=inum; _inum=inum;
@ -67,14 +71,14 @@ class Answer {
_gpu_bytes=engv.device.row_bytes()+force.device.row_bytes(); _gpu_bytes=engv.device.row_bytes()+force.device.row_bytes();
} }
} }
/// If already initialized by another LAMMPS style, add fields as necessary /// If already initialized by another LAMMPS style, add fields as necessary
/** \param rot True if atom storage needs quaternions **/ /** \param rot True if atom storage needs quaternions **/
bool add_fields(const bool charge, const bool rot); bool add_fields(const bool charge, const bool rot);
/// Free all memory on host and device /// Free all memory on host and device
void clear(); void clear();
/// Return the total amount of host memory used by class in bytes /// Return the total amount of host memory used by class in bytes
double host_memory_usage() const; double host_memory_usage() const;
@ -92,12 +96,12 @@ class Answer {
inline double transfer_time() { inline double transfer_time() {
return time_answer.total_seconds(); return time_answer.total_seconds();
} }
/// Return the total time for data cast/pack /// Return the total time for data cast/pack
inline double cast_time() { return _time_cast; } inline double cast_time() { return _time_cast; }
/// Return number of bytes used on device /// Return number of bytes used on device
inline double gpu_bytes() { return _gpu_bytes; } inline double gpu_bytes() { return _gpu_bytes; }
// -------------------------COPY FROM GPU ------------------------------- // -------------------------COPY FROM GPU -------------------------------
@ -108,7 +112,7 @@ class Answer {
/// Copy answers from device into read buffer asynchronously /// Copy answers from device into read buffer asynchronously
void copy_answers(const bool eflag, const bool vflag, void copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom, int *ilist); const bool ef_atom, const bool vf_atom, int *ilist);
/// Copy energy and virial data into LAMMPS memory /// Copy energy and virial data into LAMMPS memory
double energy_virial(double *eatom, double **vatom, double *virial); double energy_virial(double *eatom, double **vatom, double *virial);
@ -119,7 +123,7 @@ class Answer {
/// Add forces and torques from the GPU into a LAMMPS pointer /// Add forces and torques from the GPU into a LAMMPS pointer
void get_answers(double **f, double **tor); void get_answers(double **f, double **tor);
inline double get_answers(double **f, double **tor, double *eatom, inline double get_answers(double **f, double **tor, double *eatom,
double **vatom, double *virial, double &ecoul) { double **vatom, double *virial, double &ecoul) {
double ta=MPI_Wtime(); double ta=MPI_Wtime();
time_answer.sync_stop(); time_answer.sync_stop();
@ -130,7 +134,7 @@ class Answer {
_time_cast+=MPI_Wtime()-ts; _time_cast+=MPI_Wtime()-ts;
return evdw; return evdw;
} }
/// Return the time the CPU was idle waiting for GPU /// Return the time the CPU was idle waiting for GPU
inline double cpu_idle_time() { return _time_cpu_idle; } inline double cpu_idle_time() { return _time_cpu_idle; }
@ -143,23 +147,23 @@ class Answer {
UCL_Vector<acctyp,acctyp> force; UCL_Vector<acctyp,acctyp> force;
/// Energy and virial per-atom storage /// Energy and virial per-atom storage
UCL_Vector<acctyp,acctyp> engv; UCL_Vector<acctyp,acctyp> engv;
/// Device timers /// Device timers
UCL_Timer time_answer; UCL_Timer time_answer;
/// Geryon device /// Geryon device
UCL_Device *dev; UCL_Device *dev;
private: private:
bool alloc(const int inum); bool alloc(const int inum);
bool _allocated, _eflag, _vflag, _ef_atom, _vf_atom, _rot, _charge, _other; bool _allocated, _eflag, _vflag, _ef_atom, _vf_atom, _rot, _charge, _other;
int _max_local, _inum, _e_fields, _ev_fields, _ans_fields; int _max_local, _inum, _e_fields, _ev_fields, _ans_fields;
int *_ilist; int *_ilist;
double _time_cast, _time_cpu_idle; double _time_cast, _time_cpu_idle;
double _gpu_bytes; double _gpu_bytes;
bool _newton; bool _newton;
}; };

View File

@ -194,7 +194,7 @@ inline int BaseThreeT::build_nbor_list(const int inum, const int host_inum,
resize_atom(inum,nall,success); resize_atom(inum,nall,success);
resize_local(nall,host_inum,nbor->max_nbors(),success); resize_local(nall,host_inum,nbor->max_nbors(),success);
if (!success) if (!success)
return 1; return 0;
atom->cast_copy_x(host_x,host_type); atom->cast_copy_x(host_x,host_type);
int mn; int mn;

View File

@ -13,8 +13,8 @@
email : brownw@ornl.gov email : brownw@ornl.gov
***************************************************************************/ ***************************************************************************/
#ifndef LAL_BASE_ATOMIC_H #ifndef LAL_BASE_THREE_H
#define LAL_BASE_ATOMIC_H #define LAL_BASE_THREE_H
#include "lal_device.h" #include "lal_device.h"
#include "lal_balance.h" #include "lal_balance.h"
@ -28,7 +28,7 @@
#include "geryon/nvd_texture.h" #include "geryon/nvd_texture.h"
#endif #endif
#define THREE_CONCURRENT //#define THREE_CONCURRENT
namespace LAMMPS_AL { namespace LAMMPS_AL {

View File

@ -37,7 +37,7 @@ texture<int4> sw3_tex;
#define THIRD (numtyp)0.66666667 #define THIRD (numtyp)0.66666667
#define THREE_CONCURRENT //#define THREE_CONCURRENT
#if (ARCH < 300) #if (ARCH < 300)

View File

@ -33,10 +33,10 @@ TersoffT::Tersoff() : BaseThree<numtyp,acctyp>(), _allocated(false) {
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
TersoffT::~Tersoff() { TersoffT::~Tersoff() {
clear(); clear();
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int TersoffT::bytes_per_atom(const int max_nbors) const { int TersoffT::bytes_per_atom(const int max_nbors) const {
return this->bytes_per_atom_atomic(max_nbors); return this->bytes_per_atom_atomic(max_nbors);
@ -45,11 +45,11 @@ int TersoffT::bytes_per_atom(const int max_nbors) const {
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int max_nbors, int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
const double cell_size, const double gpu_split, FILE *_screen, const double cell_size, const double gpu_split, FILE *_screen,
int* host_map, const int nelements, int*** host_elem2param, const int nparams, int* host_map, const int nelements, int*** host_elem2param, const int nparams,
const double* lam1, const double* lam2, const double* lam3,const double* powermint, const double* lam1, const double* lam2, const double* lam3,const double* powermint,
const double* biga, const double* bigb, const double* bigr, const double* bigd, const double* biga, const double* bigb, const double* bigr, const double* bigd,
const double* c1, const double* c2, const double* c3, const double* c4, const double* c1, const double* c2, const double* c3, const double* c4,
const double* c, const double* d, const double* h, const double* gamma, const double* c, const double* d, const double* h, const double* gamma,
const double* beta, const double* powern, const double* host_cutsq) const double* beta, const double* powern, const double* host_cutsq)
{ {
int success; int success;
@ -62,11 +62,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
int ef_nall=nall; int ef_nall=nall;
if (ef_nall==0) if (ef_nall==0)
ef_nall=2000; ef_nall=2000;
_zetaij.alloc(ef_nall*max_nbors,*(this->ucl_device),UCL_READ_WRITE);
_max_zij_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
_zetaij.alloc(_max_zij_size*max_nbors,*(this->ucl_device),UCL_READ_WRITE);
zeta_tex.get_texture(*(this->pair_program),"zeta_tex");
zeta_tex.bind_float(_zetaij,1);
k_zeta.set_function(*(this->pair_program),"k_tersoff_zeta"); k_zeta.set_function(*(this->pair_program),"k_tersoff_zeta");
@ -87,74 +83,74 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
UCL_WRITE_ONLY); UCL_WRITE_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=(numtyp)0; dview[i].x=(numtyp)0;
dview[i].y=(numtyp)0; dview[i].y=(numtyp)0;
dview[i].z=(numtyp)0; dview[i].z=(numtyp)0;
dview[i].w=(numtyp)0; dview[i].w=(numtyp)0;
} }
// pack coefficients into arrays // pack coefficients into arrays
ts1.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ts1.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=static_cast<numtyp>(lam1[i]); dview[i].x=static_cast<numtyp>(lam1[i]);
dview[i].y=static_cast<numtyp>(lam2[i]); dview[i].y=static_cast<numtyp>(lam2[i]);
dview[i].z=static_cast<numtyp>(lam3[i]); dview[i].z=static_cast<numtyp>(lam3[i]);
dview[i].w=static_cast<numtyp>(powermint[i]); dview[i].w=static_cast<numtyp>(powermint[i]);
} }
ucl_copy(ts1,dview,false); ucl_copy(ts1,dview,false);
ts1_tex.get_texture(*(this->pair_program),"ts1_tex"); ts1_tex.get_texture(*(this->pair_program),"ts1_tex");
ts1_tex.bind_float(ts1,4); ts1_tex.bind_float(ts1,4);
ts2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ts2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=static_cast<numtyp>(biga[i]); dview[i].x=static_cast<numtyp>(biga[i]);
dview[i].y=static_cast<numtyp>(bigb[i]); dview[i].y=static_cast<numtyp>(bigb[i]);
dview[i].z=static_cast<numtyp>(bigr[i]); dview[i].z=static_cast<numtyp>(bigr[i]);
dview[i].w=static_cast<numtyp>(bigd[i]); dview[i].w=static_cast<numtyp>(bigd[i]);
} }
ucl_copy(ts2,dview,false); ucl_copy(ts2,dview,false);
ts2_tex.get_texture(*(this->pair_program),"ts2_tex"); ts2_tex.get_texture(*(this->pair_program),"ts2_tex");
ts2_tex.bind_float(ts2,4); ts2_tex.bind_float(ts2,4);
ts3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ts3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=static_cast<numtyp>(c1[i]); dview[i].x=static_cast<numtyp>(c1[i]);
dview[i].y=static_cast<numtyp>(c2[i]); dview[i].y=static_cast<numtyp>(c2[i]);
dview[i].z=static_cast<numtyp>(c3[i]); dview[i].z=static_cast<numtyp>(c3[i]);
dview[i].w=static_cast<numtyp>(c4[i]); dview[i].w=static_cast<numtyp>(c4[i]);
} }
ucl_copy(ts3,dview,false); ucl_copy(ts3,dview,false);
ts3_tex.get_texture(*(this->pair_program),"ts3_tex"); ts3_tex.get_texture(*(this->pair_program),"ts3_tex");
ts3_tex.bind_float(ts3,4); ts3_tex.bind_float(ts3,4);
ts4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ts4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=static_cast<numtyp>(c[i]); dview[i].x=static_cast<numtyp>(c[i]);
dview[i].y=static_cast<numtyp>(d[i]); dview[i].y=static_cast<numtyp>(d[i]);
dview[i].z=static_cast<numtyp>(h[i]); dview[i].z=static_cast<numtyp>(h[i]);
dview[i].w=static_cast<numtyp>(gamma[i]); dview[i].w=static_cast<numtyp>(gamma[i]);
} }
ucl_copy(ts4,dview,false); ucl_copy(ts4,dview,false);
ts4_tex.get_texture(*(this->pair_program),"ts4_tex"); ts4_tex.get_texture(*(this->pair_program),"ts4_tex");
ts4_tex.bind_float(ts4,4); ts4_tex.bind_float(ts4,4);
ts5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ts5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<nparams; i++) { for (int i=0; i<nparams; i++) {
dview[i].x=static_cast<numtyp>(beta[i]); dview[i].x=static_cast<numtyp>(beta[i]);
dview[i].y=static_cast<numtyp>(powern[i]); dview[i].y=static_cast<numtyp>(powern[i]);
dview[i].z=(numtyp)0; dview[i].z=(numtyp)0;
dview[i].w=(numtyp)0; dview[i].w=(numtyp)0;
} }
ucl_copy(ts5,dview,false); ucl_copy(ts5,dview,false);
ts5_tex.get_texture(*(this->pair_program),"ts5_tex"); ts5_tex.get_texture(*(this->pair_program),"ts5_tex");
ts5_tex.bind_float(ts5,4); ts5_tex.bind_float(ts5,4);
@ -227,11 +223,11 @@ double TersoffT::host_memory_usage() const {
// Copy nbor list from host if necessary and then calculate forces, virials,.. // Copy nbor list from host if necessary and then calculate forces, virials,..
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void TersoffT::compute(const int f_ago, const int nlocal, const int nall, void TersoffT::compute(const int f_ago, const int nlocal, const int nall,
const int nlist, double **host_x, int *host_type, const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const bool vatom, int &host_start,
const double cpu_time, bool &success) { const double cpu_time, bool &success) {
this->acc_timers(); this->acc_timers();
if (nlist==0) { if (nlist==0) {
@ -254,6 +250,7 @@ void TersoffT::compute(const int f_ago, const int nlocal, const int nall,
this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success); this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success);
if (!success) if (!success)
return; return;
_max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist);
} }
this->atom->cast_x_data(host_x,host_type); this->atom->cast_x_data(host_x,host_type);
@ -261,24 +258,28 @@ void TersoffT::compute(const int f_ago, const int nlocal, const int nall,
this->atom->add_x_data(host_x,host_type); this->atom->add_x_data(host_x,host_type);
// re-allocate zetaij if necessary // re-allocate zetaij if necessary
if (nall>_max_zij_size) { if (nall*_max_nbors > _zetaij.cols()) {
_max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist); int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
_max_zij_size=static_cast<int>(static_cast<double>(nall)*1.10); _zetaij.resize(_max_nbors*_nmax);
_zetaij.resize(_max_nbors*_max_zij_size);
zeta_tex.bind_float(_zetaij,1);
} }
int _eflag;
if (eflag)
_eflag=1;
else
_eflag=0;
int ainum=nall; int ainum=nall;
int nbor_pitch=this->nbor->nbor_pitch(); int nbor_pitch=this->nbor->nbor_pitch();
int BX=this->block_pair(); int BX=this->block_pair();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/ int GX=static_cast<int>(ceil(static_cast<double>(ainum)/
(BX/this->_threads_per_atom))); (BX/(JTHREADS*KTHREADS))));
this->k_zeta.set_size(GX,BX); this->k_zeta.set_size(GX,BX);
this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij, &map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&nall, &ainum, &nbor_pitch, &this->_threads_per_atom); &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
int evatom=0; int evatom=0;
if (eatom || vatom) if (eatom || vatom)
@ -303,7 +304,7 @@ template <class numtyp, class acctyp>
int ** TersoffT::compute(const int ago, const int inum_full, int ** TersoffT::compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const bool vatom, int &host_start,
int **ilist, int **jnum, int **ilist, int **jnum,
@ -317,7 +318,7 @@ int ** TersoffT::compute(const int ago, const int inum_full,
this->zero_timers(); this->zero_timers();
return NULL; return NULL;
} }
this->hd_balancer.balance(cpu_time); this->hd_balancer.balance(cpu_time);
int inum=this->hd_balancer.get_gpu_count(ago,inum_full); int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
this->ans->inum(inum); this->ans->inum(inum);
@ -325,7 +326,7 @@ int ** TersoffT::compute(const int ago, const int inum_full,
this->ans2->inum(inum); this->ans2->inum(inum);
#endif #endif
host_start=inum; host_start=inum;
// Build neighbor list on GPU if necessary // Build neighbor list on GPU if necessary
if (ago==0) { if (ago==0) {
_max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
@ -342,23 +343,28 @@ int ** TersoffT::compute(const int ago, const int inum_full,
*jnum=this->nbor->host_acc.begin(); *jnum=this->nbor->host_acc.begin();
// re-allocate zetaij if necessary // re-allocate zetaij if necessary
if (nall>_max_zij_size) { if (nall*_max_nbors > _zetaij.cols()) {
_max_zij_size=static_cast<int>(static_cast<double>(nall)*1.10); int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
_zetaij.resize(_max_nbors*_max_zij_size); _zetaij.resize(_max_nbors*_nmax);
zeta_tex.bind_float(_zetaij,1);
} }
int _eflag;
if (eflag)
_eflag=1;
else
_eflag=0;
int ainum=nall; int ainum=nall;
int nbor_pitch=this->nbor->nbor_pitch(); int nbor_pitch=this->nbor->nbor_pitch();
int BX=this->block_pair(); int BX=this->block_pair();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/ int GX=static_cast<int>(ceil(static_cast<double>(ainum)/
(BX/this->_threads_per_atom))); (BX/(JTHREADS*KTHREADS))));
this->k_zeta.set_size(GX,BX); this->k_zeta.set_size(GX,BX);
this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq,
&elem2param, &_nelements, &_nparams, &_zetaij, &map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&nall, &ainum, &nbor_pitch, &this->_threads_per_atom); &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
int evatom=0; int evatom=0;
if (eatom || vatom) if (eatom || vatom)
@ -374,7 +380,7 @@ int ** TersoffT::compute(const int ago, const int inum_full,
this->device->add_ans_object(this->ans2); this->device->add_ans_object(this->ans2);
#endif #endif
this->hd_balancer.stop_timer(); this->hd_balancer.stop_timer();
return this->nbor->host_jlist.begin()-host_start; return this->nbor->host_jlist.begin()-host_start;
} }
@ -403,21 +409,21 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->time_pair.start(); this->time_pair.start();
this->k_pair.set_size(GX,BX); this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq, this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &map, &elem2param, &_nelements, &_nparams,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &this->ans->force, &this->ans->engv,
&eflag, &vflag, &ainum, &nbor_pitch, &eflag, &vflag, &ainum, &nbor_pitch,
&this->_threads_per_atom); &this->_threads_per_atom);
BX=this->block_size(); BX=this->block_size();
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/ GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
(BX/(KTHREADS*JTHREADS)))); (BX/(KTHREADS*JTHREADS))));
this->k_three_center.set_size(GX,BX); this->k_three_center.set_size(GX,BX);
this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij, &map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &evatom); &nbor_pitch, &this->_threads_per_atom, &evatom);
Answer<numtyp,acctyp> *end_ans; Answer<numtyp,acctyp> *end_ans;
@ -428,20 +434,19 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
#endif #endif
if (evatom!=0) { if (evatom!=0) {
this->k_three_end_vatom.set_size(GX,BX); this->k_three_end_vatom.set_size(GX,BX);
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij, &map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
} else { } else {
this->k_three_end.set_size(GX,BX); this->k_three_end.set_size(GX,BX);
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij, &map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
} }
this->time_pair.stop(); this->time_pair.stop();

File diff suppressed because it is too large Load Diff

View File

@ -30,25 +30,25 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
/** \param max_nbors initial number of rows in the neighbor matrix /** \param max_nbors initial number of rows in the neighbor matrix
* \param cell_size cutoff + skin * \param cell_size cutoff + skin
* \param gpu_split fraction of particles handled by device * \param gpu_split fraction of particles handled by device
* *
* Returns: * Returns:
* - 0 if successfull * - 0 if successfull
* - -1 if fix gpu not found * - -1 if fix gpu not found
* - -3 if there is an out of memory error * - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU * - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/ * - -5 Double precision is not supported on card **/
int init(const int ntypes, const int nlocal, const int nall, const int max_nbors, int init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
const double cell_size, const double gpu_split, FILE *screen, const double cell_size, const double gpu_split, FILE *screen,
int* host_map, const int nelements, int*** host_elem2param, const int nparams, int* host_map, const int nelements, int*** host_elem2param, const int nparams,
const double* lam1, const double* lam2, const double* lam3, const double* lam1, const double* lam2, const double* lam3,
const double* powermint, const double* biga, const double* bigb, const double* powermint, const double* biga, const double* bigb,
const double* bigr, const double* bigd, const double* c1, const double* c2, const double* bigr, const double* bigd, const double* c1, const double* c2,
const double* c3, const double* c4, const double* c, const double* d, const double* c3, const double* c4, const double* c, const double* d,
const double* h, const double* gamma, const double* beta, const double* h, const double* gamma, const double* beta,
const double* powern, const double* cutsq); const double* powern, const double* cutsq);
/// Pair loop with host neighboring /// Pair loop with host neighboring
void compute(const int f_ago, const int inum_full, const int nall, void compute(const int f_ago, const int inum_full, const int nall,
const int nlist, double **host_x, int *host_type, const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, const bool vflag, const bool eatom, const bool vatom,
@ -58,10 +58,10 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
int ** compute(const int ago, const int inum_full, int ** compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type, double *sublo, const int nall, double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success); int **ilist, int **numj, const double cpu_time, bool &success);
/// Clear all host and device data /// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/ /** \note This is called at the beginning of the init() routine **/
void clear(); void clear();
@ -77,7 +77,7 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
/// If atom type constants fit in shared memory, use fast kernels /// If atom type constants fit in shared memory, use fast kernels
bool shared_types; bool shared_types;
/// Number of atom types /// Number of atom types
int _lj_types; int _lj_types;
/// ts1.x = lam1, ts1.y = lam2, ts1.z = lam3, ts1.w = powermint /// ts1.x = lam1, ts1.y = lam2, ts1.z = lam3, ts1.w = powermint
@ -97,13 +97,15 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
UCL_D_Vec<int> map; UCL_D_Vec<int> map;
int _nparams,_nelements; int _nparams,_nelements;
/// Per-atom arrays /// Per-atom arrays:
UCL_D_Vec<numtyp> _zetaij; /// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
/// zetaij.w = zetaij
UCL_D_Vec<numtyp4> _zetaij;
UCL_Kernel k_zeta; UCL_Kernel k_zeta;
UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex, zeta_tex; UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
int _max_zij_size, _max_nbors; int _max_nbors;
private: private:
bool _allocated; bool _allocated;

View File

@ -27,15 +27,15 @@ static Tersoff<PRECISION,ACC_PRECISION> TSMF;
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Allocate memory on host and device and copy constants to device // Allocate memory on host and device and copy constants to device
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors, int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
int* host_map, const int nelements, int*** host_elem2param, const int nparams, int* host_map, const int nelements, int*** host_elem2param, const int nparams,
const double* ts_lam1, const double* ts_lam2, const double* ts_lam3, const double* ts_lam1, const double* ts_lam2, const double* ts_lam3,
const double* ts_powermint, const double* ts_biga, const double* ts_bigb, const double* ts_powermint, const double* ts_biga, const double* ts_bigb,
const double* ts_bigr, const double* ts_bigd, const double* ts_bigr, const double* ts_bigd,
const double* ts_c1, const double* ts_c2, const double* ts_c3, const double* ts_c4, const double* ts_c1, const double* ts_c2, const double* ts_c3, const double* ts_c4,
const double* ts_c, const double* ts_d, const double* ts_h, const double* ts_c, const double* ts_d, const double* ts_h,
const double* ts_gamma, const double* ts_beta, const double* ts_gamma, const double* ts_beta,
const double* ts_powern, const double* ts_cutsq) { const double* ts_powern, const double* ts_cutsq) {
TSMF.clear(); TSMF.clear();
gpu_mode=TSMF.device->gpu_mode(); gpu_mode=TSMF.device->gpu_mode();
@ -47,13 +47,9 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
int procs_per_gpu=TSMF.device->procs_per_gpu(); int procs_per_gpu=TSMF.device->procs_per_gpu();
// disable host/device split for now // disable host/device split for now
if (gpu_split != 1.0) if (gpu_split != 1.0)
return -8; return -8;
// disable multiple threads per atom for now
if (TSMF.device->threads_per_atom() != 1)
return -10;
TSMF.device->init_message(screen,"tersoff/gpu",first_gpu,last_gpu); TSMF.device->init_message(screen,"tersoff/gpu",first_gpu,last_gpu);
bool message=false; bool message=false;
@ -69,9 +65,9 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
if (world_me==0) if (world_me==0)
init_ok=TSMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen, init_ok=TSMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen,
host_map, nelements, host_elem2param, nparams, host_map, nelements, host_elem2param, nparams,
ts_lam1, ts_lam2, ts_lam3, ts_powermint, ts_lam1, ts_lam2, ts_lam3, ts_powermint,
ts_biga, ts_bigb, ts_bigr, ts_bigd, ts_biga, ts_bigb, ts_bigr, ts_bigd,
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h, ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
ts_gamma, ts_beta, ts_powern, ts_cutsq); ts_gamma, ts_beta, ts_powern, ts_cutsq);
TSMF.device->world_barrier(); TSMF.device->world_barrier();
@ -90,13 +86,13 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
init_ok=TSMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen, init_ok=TSMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen,
host_map, nelements, host_elem2param, nparams, host_map, nelements, host_elem2param, nparams,
ts_lam1, ts_lam2, ts_lam3, ts_powermint, ts_lam1, ts_lam2, ts_lam3, ts_powermint,
ts_biga, ts_bigb, ts_bigr, ts_bigd, ts_biga, ts_bigb, ts_bigr, ts_bigd,
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h, ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
ts_gamma, ts_beta, ts_powern, ts_cutsq); ts_gamma, ts_beta, ts_powern, ts_cutsq);
TSMF.device->gpu_barrier(); TSMF.device->gpu_barrier();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
if (message) if (message)
@ -121,12 +117,12 @@ int ** tersoff_gpu_compute_n(const int ago, const int inum_full,
return TSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, return TSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
subhi, tag, nspecial, special, eflag, vflag, eatom, subhi, tag, nspecial, special, eflag, vflag, eatom,
vatom, host_start, ilist, jnum, cpu_time, success); vatom, host_start, ilist, jnum, cpu_time, success);
} }
void tersoff_gpu_compute(const int ago, const int nlocal, const int nall, void tersoff_gpu_compute(const int ago, const int nlocal, const int nall,
const int nlist, double **host_x, int *host_type, const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success) { int &host_start, const double cpu_time, bool &success) {
TSMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj, TSMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj,
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success); firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);

View File

@ -1,7 +1,7 @@
/// ************************************************************************** /// **************************************************************************
// tersoff_extra.h // tersoff_extra.h
// ------------------- // -------------------
// Trung Dac Nguyen // Trung Dac Nguyen
// //
// Device code for Tersoff math routines // Device code for Tersoff math routines
// //
@ -26,7 +26,7 @@
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline numtyp vec3_dot(const numtyp x[3], const numtyp y[3]) ucl_inline numtyp vec3_dot(const numtyp x[3], const numtyp y[3])
{ {
return (x[0]*y[0] + x[1]*y[1] + x[2]*y[2]); return (x[0]*y[0] + x[1]*y[1] + x[2]*y[2]);
} }
@ -36,12 +36,12 @@ ucl_inline void vec3_add(const numtyp x[3], const numtyp y[3], numtyp z[3])
z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
} }
ucl_inline void vec3_scale(const numtyp k, const numtyp x[3], numtyp y[3]) ucl_inline void vec3_scale(const numtyp k, const numtyp x[3], numtyp y[3])
{ {
y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
} }
ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3], ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3],
const numtyp y[3], numtyp z[3]) const numtyp y[3], numtyp z[3])
{ {
z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
@ -51,14 +51,14 @@ ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3],
ucl_inline numtyp ters_gijk(const numtyp costheta, ucl_inline numtyp ters_gijk(const numtyp costheta,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma) const numtyp param_gamma)
{ {
const numtyp ters_c = param_c * param_c; const numtyp ters_c = param_c * param_c;
const numtyp ters_d = param_d * param_d; const numtyp ters_d = param_d * param_d;
const numtyp hcth = param_h - costheta; const numtyp hcth = param_h - costheta;
return param_gamma*((numtyp)1.0 + ters_c*ucl_recip(ters_d) - return param_gamma*((numtyp)1.0 + ters_c*ucl_recip(ters_d) -
ters_c *ucl_recip(ters_d + hcth*hcth)); ters_c *ucl_recip(ters_d + hcth*hcth));
} }
@ -66,9 +66,9 @@ ucl_inline numtyp ters_gijk(const numtyp costheta,
ucl_inline numtyp ters_gijk_d(const numtyp costheta, ucl_inline numtyp ters_gijk_d(const numtyp costheta,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma) const numtyp param_gamma)
{ {
const numtyp ters_c = param_c * param_c; const numtyp ters_c = param_c * param_c;
const numtyp ters_d = param_d * param_d; const numtyp ters_d = param_d * param_d;
@ -80,12 +80,12 @@ ucl_inline numtyp ters_gijk_d(const numtyp costheta,
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline void costheta_d(const numtyp rij_hat[3], ucl_inline void costheta_d(const numtyp rij_hat[3],
const numtyp rij, const numtyp rij,
const numtyp rik_hat[3], const numtyp rik_hat[3],
const numtyp rik, const numtyp rik,
numtyp *dri, numtyp *dri,
numtyp *drj, numtyp *drj,
numtyp *drk) numtyp *drk)
{ {
// first element is derivative wrt Ri, second wrt Rj, third wrt Rk // first element is derivative wrt Ri, second wrt Rj, third wrt Rk
@ -131,85 +131,87 @@ ucl_inline numtyp ters_fa(const numtyp r,
const numtyp param_lam2) const numtyp param_lam2)
{ {
if (r > param_bigr + param_bigd) return (numtyp)0.0; if (r > param_bigr + param_bigd) return (numtyp)0.0;
return -param_bigb * ucl_exp(-param_lam2 * r) * return -param_bigb * ucl_exp(-param_lam2 * r) *
ters_fc(r,param_bigr,param_bigd); ters_fc(r,param_bigr,param_bigd);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline numtyp ters_fa_d(const numtyp r, ucl_inline numtyp ters_fa_d(const numtyp r,
const numtyp param_bigb, const numtyp param_bigb,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_lam2) const numtyp param_lam2)
{ {
if (r > param_bigr + param_bigd) return (numtyp)0.0; if (r > param_bigr + param_bigd) return (numtyp)0.0;
return param_bigb * ucl_exp(-param_lam2 * r) * (param_lam2 * return param_bigb * ucl_exp(-param_lam2 * r) * (param_lam2 *
ters_fc(r,param_bigr,param_bigd) - ters_fc_d(r,param_bigr,param_bigd)); ters_fc(r,param_bigr,param_bigd) - ters_fc_d(r,param_bigr,param_bigd));
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline numtyp ters_bij(const numtyp zeta, ucl_inline numtyp ters_bij(const numtyp zeta,
const numtyp param_beta, const numtyp param_beta,
const numtyp param_powern, const numtyp param_powern,
const numtyp param_c1, const numtyp param_c1,
const numtyp param_c2, const numtyp param_c2,
const numtyp param_c3, const numtyp param_c3,
const numtyp param_c4) const numtyp param_c4)
{ {
numtyp tmp = param_beta * zeta; numtyp tmp = param_beta * zeta;
if (tmp > param_c1) return ucl_rsqrt(tmp); if (tmp > param_c1) return ucl_rsqrt(tmp);
if (tmp > param_c2) if (tmp > param_c2)
return ((numtyp)1.0 - ucl_powr(tmp,-param_powern) / return ((numtyp)1.0 - ucl_powr(tmp,-param_powern) /
((numtyp)2.0*param_powern))*ucl_rsqrt(tmp); ((numtyp)2.0*param_powern))*ucl_rsqrt(tmp);
if (tmp < param_c4) return (numtyp)1.0; if (tmp < param_c4) return (numtyp)1.0;
if (tmp < param_c3) if (tmp < param_c3)
return (numtyp)1.0 - ucl_powr(tmp,param_powern)/((numtyp)2.0*param_powern); return (numtyp)1.0 - ucl_powr(tmp,param_powern)/((numtyp)2.0*param_powern);
return ucl_powr((numtyp)1.0 + ucl_powr(tmp,param_powern), return ucl_powr((numtyp)1.0 + ucl_powr(tmp,param_powern),
(numtyp)-1.0/((numtyp)2.0*param_powern)); (numtyp)-1.0/((numtyp)2.0*param_powern));
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline numtyp ters_bij_d(const numtyp zeta, ucl_inline numtyp ters_bij_d(const numtyp zeta,
const numtyp param_beta, const numtyp param_beta,
const numtyp param_powern, const numtyp param_powern,
const numtyp param_c1, const numtyp param_c1,
const numtyp param_c2, const numtyp param_c2,
const numtyp param_c3, const numtyp param_c3,
const numtyp param_c4) const numtyp param_c4)
{ {
numtyp tmp = param_beta * zeta; numtyp tmp = param_beta * zeta;
if (tmp > param_c1) if (tmp > param_c1)
return param_beta * (numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5); return param_beta * (numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5);
if (tmp > param_c2) if (tmp > param_c2)
return param_beta * ((numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5) * return param_beta * ((numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5) *
((numtyp)1.0 - (numtyp)0.5 * ((numtyp)1.0 + (numtyp)1.0 / // error in negligible 2nd term fixed 9/30/2015
((numtyp)2.0 * param_powern)) * ucl_powr(tmp,-param_powern))); // (1.0 - 0.5*(1.0 + 1.0/(2.0*param->powern)) *
((numtyp)1.0 - ((numtyp)1.0 + (numtyp)1.0 /((numtyp)2.0 * param_powern)) *
ucl_powr(tmp,-param_powern)));
if (tmp < param_c4) return (numtyp)0.0; if (tmp < param_c4) return (numtyp)0.0;
if (tmp < param_c3) if (tmp < param_c3)
return (numtyp)-0.5*param_beta * ucl_powr(tmp,param_powern-(numtyp)1.0); return (numtyp)-0.5*param_beta * ucl_powr(tmp,param_powern-(numtyp)1.0);
numtyp tmp_n = ucl_powr(tmp,param_powern); numtyp tmp_n = ucl_powr(tmp,param_powern);
return (numtyp)-0.5 * ucl_powr((numtyp)1.0+tmp_n, (numtyp) - return (numtyp)-0.5 * ucl_powr((numtyp)1.0+tmp_n, (numtyp) -
(numtyp)1.0-((numtyp)1.0 / ((numtyp)2.0 * param_powern)))*tmp_n / zeta; (numtyp)1.0-((numtyp)1.0 / ((numtyp)2.0 * param_powern)))*tmp_n / zeta;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline void ters_zetaterm_d(const numtyp prefactor, ucl_inline void ters_zetaterm_d(const numtyp prefactor,
const numtyp rij_hat[3], const numtyp rij_hat[3],
const numtyp rij, const numtyp rij,
const numtyp rik_hat[3], const numtyp rik_hat[3],
const numtyp rik, const numtyp rik,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
numtyp dri[3], numtyp dri[3],
numtyp drj[3], numtyp drj[3],
@ -229,7 +231,7 @@ ucl_inline void ters_zetaterm_d(const numtyp prefactor,
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0; else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp); else ex_delr = ucl_exp(tmp);
if ((int)param_powermint == 3) if ((int)param_powermint == 3)
ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr; ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
else ex_delr_d = param_lam3 * ex_delr; else ex_delr_d = param_lam3 * ex_delr;
@ -269,17 +271,17 @@ ucl_inline void ters_zetaterm_d(const numtyp prefactor,
} }
ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor, ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
const numtyp rij_hat[3], const numtyp rij_hat[3],
const numtyp rij, const numtyp rij,
const numtyp rik_hat[3], const numtyp rik_hat[3],
const numtyp rik, const numtyp rik,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
numtyp dri[3]) numtyp dri[3])
{ {
@ -297,7 +299,7 @@ ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0; else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp); else ex_delr = ucl_exp(tmp);
if ((int)param_powermint == 3) if ((int)param_powermint == 3)
ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr; ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
else ex_delr_d = param_lam3 * ex_delr; else ex_delr_d = param_lam3 * ex_delr;
@ -319,17 +321,17 @@ ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
} }
ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor, ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
const numtyp rij_hat[3], const numtyp rij_hat[3],
const numtyp rij, const numtyp rij,
const numtyp rik_hat[3], const numtyp rik_hat[3],
const numtyp rik, const numtyp rik,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
numtyp drj[3]) numtyp drj[3])
{ {
@ -346,7 +348,7 @@ ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0; else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp); else ex_delr = ucl_exp(tmp);
if ((int)param_powermint == 3) if ((int)param_powermint == 3)
ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr; ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
else ex_delr_d = param_lam3 * ex_delr; else ex_delr_d = param_lam3 * ex_delr;
@ -365,17 +367,17 @@ ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
} }
ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor, ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
const numtyp rij_hat[3], const numtyp rij_hat[3],
const numtyp rij, const numtyp rij,
const numtyp rik_hat[3], const numtyp rik_hat[3],
const numtyp rik, const numtyp rik,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
numtyp drk[3]) numtyp drk[3])
{ {
@ -393,7 +395,7 @@ ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0; else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp); else ex_delr = ucl_exp(tmp);
if ((int)param_powermint == 3) if ((int)param_powermint == 3)
ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr; ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
else ex_delr_d = param_lam3 * ex_delr; else ex_delr_d = param_lam3 * ex_delr;
@ -419,20 +421,20 @@ ucl_inline void repulsive(const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_lam1, const numtyp param_lam1,
const numtyp param_biga, const numtyp param_biga,
const numtyp rsq, const numtyp rsq,
const int eflag, const int eflag,
numtyp *ans) numtyp *ans)
{ {
numtyp r,tmp_fc,tmp_fc_d,tmp_exp; numtyp r,tmp_fc,tmp_fc_d,tmp_exp;
r = ucl_sqrt(rsq); r = ucl_sqrt(rsq);
tmp_fc = ters_fc(r,param_bigr,param_bigd); tmp_fc = ters_fc(r,param_bigr,param_bigd);
tmp_fc_d = ters_fc_d(r,param_bigr,param_bigd); tmp_fc_d = ters_fc_d(r,param_bigr,param_bigd);
tmp_exp = ucl_exp(-param_lam1 * r); tmp_exp = ucl_exp(-param_lam1 * r);
// fforce // fforce
ans[0] = -param_biga*tmp_exp*(tmp_fc_d - tmp_fc*param_lam1)*ucl_recip(r); ans[0] = -param_biga*tmp_exp*(tmp_fc_d - tmp_fc*param_lam1)*ucl_recip(r);
// eng // eng
if (eflag) ans[1] = tmp_fc * param_biga * tmp_exp; if (eflag) ans[1] = tmp_fc * param_biga * tmp_exp;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -441,7 +443,7 @@ ucl_inline numtyp zeta(const numtyp param_powermint,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
const numtyp rsqij, const numtyp rsqij,
@ -464,23 +466,23 @@ ucl_inline numtyp zeta(const numtyp param_powermint,
else if (arg < (numtyp)-69.0776) ex_delr = (numtyp)0.0; else if (arg < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(arg); else ex_delr = ucl_exp(arg);
return ters_fc(rik,param_bigr,param_bigd) * return ters_fc(rik,param_bigr,param_bigd) *
ters_gijk(costheta,param_c, param_d, param_h, param_gamma) * ex_delr; ters_gijk(costheta,param_c, param_d, param_h, param_gamma) * ex_delr;
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
ucl_inline void force_zeta(const numtyp param_bigb, ucl_inline void force_zeta(const numtyp param_bigb,
const numtyp param_bigr, const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_lam2, const numtyp param_lam2,
const numtyp param_beta, const numtyp param_beta,
const numtyp param_powern, const numtyp param_powern,
const numtyp param_c1, const numtyp param_c1,
const numtyp param_c2, const numtyp param_c2,
const numtyp param_c3, const numtyp param_c3,
const numtyp param_c4, const numtyp param_c4,
const numtyp rsq, const numtyp rsq,
const numtyp zeta_ij, const numtyp zeta_ij,
const int eflag, const int eflag,
numtyp fpfeng[4]) numtyp fpfeng[4])
@ -494,7 +496,7 @@ ucl_inline void force_zeta(const numtyp param_bigb,
param_c1,param_c2, param_c3, param_c4); param_c1,param_c2, param_c3, param_c4);
fpfeng[0] = (numtyp)0.5*bij*fa_d * ucl_recip(r); // fforce fpfeng[0] = (numtyp)0.5*bij*fa_d * ucl_recip(r); // fforce
fpfeng[1] = (numtyp)-0.5*fa * ters_bij_d(zeta_ij,param_beta, param_powern, fpfeng[1] = (numtyp)-0.5*fa * ters_bij_d(zeta_ij,param_beta, param_powern,
param_c1,param_c2, param_c3, param_c4); // prefactor param_c1,param_c2, param_c3, param_c4); // prefactor
if (eflag) fpfeng[2] = (numtyp)0.5*bij*fa; // eng if (eflag) fpfeng[2] = (numtyp)0.5*bij*fa; // eng
} }
@ -504,23 +506,23 @@ ucl_inline void force_zeta(const numtyp param_bigb,
use param_ijk cutoff for rik test use param_ijk cutoff for rik test
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
ucl_inline void attractive(const numtyp param_bigr, ucl_inline void attractive(const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
const numtyp prefactor, const numtyp prefactor,
const numtyp rij, const numtyp rij,
const numtyp rijinv, const numtyp rijinv,
const numtyp rik, const numtyp rik,
const numtyp rikinv, const numtyp rikinv,
const numtyp delrij[3], const numtyp delrij[3],
const numtyp delrik[3], const numtyp delrik[3],
numtyp fi[3], numtyp fi[3],
numtyp fj[3], numtyp fj[3],
numtyp fk[3]) numtyp fk[3])
{ {
numtyp rij_hat[3],rik_hat[3]; numtyp rij_hat[3],rik_hat[3];
@ -531,20 +533,20 @@ ucl_inline void attractive(const numtyp param_bigr,
param_c, param_d, param_h, param_gamma, fi, fj, fk); param_c, param_d, param_h, param_gamma, fi, fj, fk);
} }
ucl_inline void attractive_fi(const numtyp param_bigr, ucl_inline void attractive_fi(const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
const numtyp prefactor, const numtyp prefactor,
const numtyp rij, const numtyp rij,
const numtyp rijinv, const numtyp rijinv,
const numtyp rik, const numtyp rik,
const numtyp rikinv, const numtyp rikinv,
const numtyp delrij[3], const numtyp delrij[3],
const numtyp delrik[3], const numtyp delrik[3],
numtyp fi[3]) numtyp fi[3])
{ {
@ -556,20 +558,20 @@ ucl_inline void attractive_fi(const numtyp param_bigr,
param_c, param_d, param_h, param_gamma, fi); param_c, param_d, param_h, param_gamma, fi);
} }
ucl_inline void attractive_fj(const numtyp param_bigr, ucl_inline void attractive_fj(const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
const numtyp prefactor, const numtyp prefactor,
const numtyp rij, const numtyp rij,
const numtyp rijinv, const numtyp rijinv,
const numtyp rik, const numtyp rik,
const numtyp rikinv, const numtyp rikinv,
const numtyp delrij[3], const numtyp delrij[3],
const numtyp delrik[3], const numtyp delrik[3],
numtyp fj[3]) numtyp fj[3])
{ {
@ -581,20 +583,20 @@ ucl_inline void attractive_fj(const numtyp param_bigr,
param_c, param_d, param_h, param_gamma, fj); param_c, param_d, param_h, param_gamma, fj);
} }
ucl_inline void attractive_fk(const numtyp param_bigr, ucl_inline void attractive_fk(const numtyp param_bigr,
const numtyp param_bigd, const numtyp param_bigd,
const numtyp param_powermint, const numtyp param_powermint,
const numtyp param_lam3, const numtyp param_lam3,
const numtyp param_c, const numtyp param_c,
const numtyp param_d, const numtyp param_d,
const numtyp param_h, const numtyp param_h,
const numtyp param_gamma, const numtyp param_gamma,
const numtyp prefactor, const numtyp prefactor,
const numtyp rij, const numtyp rij,
const numtyp rijinv, const numtyp rijinv,
const numtyp rik, const numtyp rik,
const numtyp rikinv, const numtyp rikinv,
const numtyp delrij[3], const numtyp delrij[3],
const numtyp delrik[3], const numtyp delrik[3],
numtyp fk[3]) numtyp fk[3])
{ {