git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14807 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -70,6 +70,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
@ -122,6 +124,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
||||
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
|
||||
$(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl_cubin.h \
|
||||
$(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod_cubin.h \
|
||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
||||
@ -705,6 +709,30 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ters
|
||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_zbl.cubin: lal_tersoff_zbl.cu lal_precision.h lal_tersoff_zbl_extra.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_zbl.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_zbl_cubin.h: $(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl.cubin
|
||||
$(BIN2C) -c -n tersoff_zbl $(OBJ_DIR)/tersoff_zbl.cubin > $(OBJ_DIR)/tersoff_zbl_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_mod.cubin: lal_tersoff_mod.cu lal_precision.h lal_tersoff_mod_extra.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_mod.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_mod_cubin.h: $(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod.cubin
|
||||
$(BIN2C) -c -n tersoff_mod $(OBJ_DIR)/tersoff_mod.cubin > $(OBJ_DIR)/tersoff_mod_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
|
||||
|
||||
|
||||
@ -59,6 +59,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
@ -87,7 +89,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
||||
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
|
||||
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
|
||||
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
|
||||
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h \
|
||||
$(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
|
||||
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
|
||||
$(OBJ_DIR)/lj_cubic_cl.h
|
||||
|
||||
@ -510,6 +513,24 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ter
|
||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu lal_tersoff_zbl_extra.h $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(OCL) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
|
||||
$(OCL) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu lal_tersoff_mod_extra.h $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh tersoff_mod $(PRE1_H) lal_tersoff_mod_extra.h lal_tersoff_mod.cu $(OBJ_DIR)/tersoff_mod_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(OCL) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
|
||||
$(OCL) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
|
||||
|
||||
|
||||
@ -142,7 +142,7 @@ int SWT::init(const int ntypes, const int nlocal, const int nall, const int max_
|
||||
ucl_copy(elem2param,dview_elem2param,false);
|
||||
|
||||
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||
for (int i = 0; i < lj_types; i++)
|
||||
for (int i = 0; i < ntypes; i++)
|
||||
dview_map[i] = host_map[i];
|
||||
|
||||
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
||||
@ -196,6 +196,8 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
|
||||
// this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa == 1
|
||||
// this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1
|
||||
int ainum=this->ans->inum();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
|
||||
@ -35,7 +35,7 @@ texture<int4> sw3_tex;
|
||||
#define sw3_tex sw3
|
||||
#endif
|
||||
|
||||
#define THIRD (numtyp)0.66666667
|
||||
#define THIRD (numtyp)0.66666666666666666667
|
||||
|
||||
//#define THREE_CONCURRENT
|
||||
|
||||
|
||||
@ -178,7 +178,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
|
||||
ucl_copy(elem2param,dview_elem2param,false);
|
||||
|
||||
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||
for (int i = 0; i < lj_types; i++)
|
||||
for (int i = 0; i < ntypes; i++)
|
||||
dview_map[i] = host_map[i];
|
||||
|
||||
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
||||
|
||||
@ -43,7 +43,7 @@ texture<int4> ts5_tex;
|
||||
|
||||
//#define THREE_CONCURRENT
|
||||
|
||||
#define THIRD (numtyp)0.66666667
|
||||
#define TWOTHIRD (numtyp)0.66666666666666666667
|
||||
|
||||
#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
|
||||
i, nbor_j, offset_j, idx) \
|
||||
@ -108,6 +108,7 @@ texture<int4> ts5_tex;
|
||||
|
||||
#define store_zeta(z, tid, t_per_atom, offset) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[BLOCK_PAIR]; \
|
||||
red_acc[tid]=z; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
@ -180,7 +181,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict elem2param,
|
||||
const int nelements, const int nparams,
|
||||
__global numtyp4 * zetaij,
|
||||
__global acctyp4 * zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const int eflag, const int nall, const int inum,
|
||||
@ -205,9 +206,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
||||
ts5[tid]=ts5_in[tid];
|
||||
}
|
||||
|
||||
numtyp z = (numtyp)0;
|
||||
__local numtyp red_acc[BLOCK_PAIR];
|
||||
if (tid<BLOCK_PAIR) red_acc[tid] = (numtyp)0;
|
||||
acctyp z = (acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -245,7 +244,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
||||
if (rsq1 > cutsq[ijparam]) continue;
|
||||
|
||||
// compute zeta_ij
|
||||
z = (numtyp)0;
|
||||
z = (acctyp)0;
|
||||
|
||||
int nbor_k = nborj_start-offset_j+offset_k;
|
||||
for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
|
||||
@ -310,7 +309,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
||||
force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
|
||||
ijparam_beta, ijparam_powern, ijparam_c1, ijparam_c2, ijparam_c3,
|
||||
ijparam_c4, rsq1, z, eflag, fpfeng);
|
||||
numtyp4 zij;
|
||||
acctyp4 zij;
|
||||
zij.x = fpfeng[0];
|
||||
zij.y = fpfeng[1];
|
||||
zij.z = fpfeng[2];
|
||||
@ -426,7 +425,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict elem2param,
|
||||
const int nelements, const int nparams,
|
||||
const __global numtyp4 *restrict zetaij,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
@ -501,7 +500,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
||||
int idx;
|
||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||
i, nbor_j, offset_j, idx);
|
||||
numtyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
|
||||
acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
|
||||
numtyp force = zeta_ij.x*tpainv;
|
||||
numtyp prefactor = zeta_ij.y;
|
||||
f.x += delr1[0]*force;
|
||||
@ -568,7 +567,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
||||
if (vflag>0) {
|
||||
acctyp v[6];
|
||||
numtyp pre = (numtyp)2.0;
|
||||
if (evatom==1) pre = THIRD;
|
||||
if (evatom==1) pre = TWOTHIRD;
|
||||
v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
|
||||
v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
|
||||
v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
|
||||
@ -595,7 +594,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict elem2param,
|
||||
const int nelements, const int nparams,
|
||||
const __global numtyp4 *restrict zetaij,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
@ -710,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
int idx;
|
||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||
j, ijnum, offset_kf, idx);
|
||||
numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||
acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||
numtyp force = zeta_ji.x*tpainv;
|
||||
numtyp prefactor_ji = zeta_ji.y;
|
||||
f.x += delr1[0]*force;
|
||||
@ -776,7 +775,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
int idx;
|
||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||
j, nbor_k, offset_k, idx);
|
||||
numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||
acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||
numtyp prefactor_jk = zeta_jk.y;
|
||||
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
||||
ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
|
||||
@ -816,7 +815,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict elem2param,
|
||||
const int nelements, const int nparams,
|
||||
const __global numtyp4 *restrict zetaij,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
@ -931,9 +930,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int idx;
|
||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||
j, ijnum, offset_kf, idx);
|
||||
numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||
acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||
numtyp force = zeta_ji.x*tpainv;
|
||||
numtyp prefactor = zeta_ji.y;
|
||||
numtyp prefactor_ji = zeta_ji.y;
|
||||
f.x += delr1[0]*force;
|
||||
f.y += delr1[1]*force;
|
||||
f.z += delr1[2]*force;
|
||||
@ -987,24 +986,24 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
h = ts4_param.z;
|
||||
gamma = ts4_param.w;
|
||||
attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
|
||||
prefactor, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
|
||||
prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
|
||||
f.x += fj[0];
|
||||
f.y += fj[1];
|
||||
f.z += fj[2];
|
||||
|
||||
virial[0] += THIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
|
||||
virial[1] += THIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
|
||||
virial[2] += THIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
|
||||
virial[3] += THIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
|
||||
virial[4] += THIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
|
||||
virial[5] += THIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
|
||||
virial[0] += TWOTHIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
|
||||
virial[1] += TWOTHIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
|
||||
virial[2] += TWOTHIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
|
||||
virial[3] += TWOTHIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
|
||||
virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
|
||||
virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
|
||||
|
||||
//int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
|
||||
//int idx = kk*n_stride + j*t_per_atom + offset_k;
|
||||
int idx;
|
||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||
j, nbor_k, offset_k, idx);
|
||||
numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||
acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||
numtyp prefactor_jk = zeta_jk.y;
|
||||
|
||||
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
||||
@ -1025,12 +1024,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
f.y += fk[1];
|
||||
f.z += fk[2];
|
||||
|
||||
virial[0] += THIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
|
||||
virial[1] += THIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
|
||||
virial[2] += THIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
|
||||
virial[3] += THIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
|
||||
virial[4] += THIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
|
||||
virial[5] += THIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
|
||||
virial[0] += TWOTHIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
|
||||
virial[1] += TWOTHIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
|
||||
virial[2] += TWOTHIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
|
||||
virial[3] += TWOTHIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
|
||||
virial[4] += TWOTHIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
|
||||
virial[5] += TWOTHIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
|
||||
|
||||
}
|
||||
} // for nbor
|
||||
|
||||
|
||||
@ -100,7 +100,7 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
|
||||
/// Per-atom arrays:
|
||||
/// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
|
||||
/// zetaij.w = zetaij
|
||||
UCL_D_Vec<numtyp4> _zetaij;
|
||||
UCL_D_Vec<acctyp4> _zetaij;
|
||||
|
||||
UCL_Kernel k_zeta;
|
||||
UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
|
||||
|
||||
@ -227,7 +227,7 @@ ucl_inline void ters_zetaterm_d(const numtyp prefactor,
|
||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||
else tmp = t;
|
||||
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||
else ex_delr = ucl_exp(tmp);
|
||||
|
||||
@ -295,7 +295,7 @@ ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
|
||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||
else tmp = t;
|
||||
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||
else ex_delr = ucl_exp(tmp);
|
||||
|
||||
@ -344,7 +344,7 @@ ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
|
||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||
else tmp = t;
|
||||
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||
else ex_delr = ucl_exp(tmp);
|
||||
|
||||
@ -391,7 +391,7 @@ ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
|
||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||
else tmp = t;
|
||||
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
||||
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||
else ex_delr = ucl_exp(tmp);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user