git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14807 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -70,6 +70,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||||
@ -122,6 +124,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
||||||
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
|
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
|
||||||
|
$(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl_cubin.h \
|
||||||
|
$(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod_cubin.h \
|
||||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
||||||
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
||||||
@ -705,6 +709,30 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ters
|
|||||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||||
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_zbl.cubin: lal_tersoff_zbl.cu lal_precision.h lal_tersoff_zbl_extra.h lal_preprocessor.h
|
||||||
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_zbl.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_zbl_cubin.h: $(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl.cubin
|
||||||
|
$(BIN2C) -c -n tersoff_zbl $(OBJ_DIR)/tersoff_zbl.cubin > $(OBJ_DIR)/tersoff_zbl_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||||
|
$(CUDR) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
|
||||||
|
$(CUDR) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_mod.cubin: lal_tersoff_mod.cu lal_precision.h lal_tersoff_mod_extra.h lal_preprocessor.h
|
||||||
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_mod.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_mod_cubin.h: $(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod.cubin
|
||||||
|
$(BIN2C) -c -n tersoff_mod $(OBJ_DIR)/tersoff_mod.cubin > $(OBJ_DIR)/tersoff_mod_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||||
|
$(CUDR) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
|
||||||
|
$(CUDR) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
||||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
|
||||||
|
|
||||||
|
|||||||
@ -59,6 +59,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||||
@ -87,7 +89,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||||||
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
||||||
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
|
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
|
||||||
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
|
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
|
||||||
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
|
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h \
|
||||||
|
$(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
|
||||||
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
|
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
|
||||||
$(OBJ_DIR)/lj_cubic_cl.h
|
$(OBJ_DIR)/lj_cubic_cl.h
|
||||||
|
|
||||||
@ -510,6 +513,24 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ter
|
|||||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||||
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu lal_tersoff_zbl_extra.h $(PRE1_H)
|
||||||
|
$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/lal_base_three.o
|
||||||
|
$(OCL) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
|
||||||
|
$(OCL) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu lal_tersoff_mod_extra.h $(PRE1_H)
|
||||||
|
$(BSH) ./geryon/file_to_cstr.sh tersoff_mod $(PRE1_H) lal_tersoff_mod_extra.h lal_tersoff_mod.cu $(OBJ_DIR)/tersoff_mod_cl.h;
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/lal_base_three.o
|
||||||
|
$(OCL) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
|
||||||
|
$(OCL) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
|
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
|
||||||
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
|
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
|
||||||
|
|
||||||
|
|||||||
@ -142,7 +142,7 @@ int SWT::init(const int ntypes, const int nlocal, const int nall, const int max_
|
|||||||
ucl_copy(elem2param,dview_elem2param,false);
|
ucl_copy(elem2param,dview_elem2param,false);
|
||||||
|
|
||||||
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||||
for (int i = 0; i < lj_types; i++)
|
for (int i = 0; i < ntypes; i++)
|
||||||
dview_map[i] = host_map[i];
|
dview_map[i] = host_map[i];
|
||||||
|
|
||||||
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
||||||
@ -196,13 +196,15 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||||
(BX/this->_threads_per_atom)));
|
(BX/this->_threads_per_atom)));
|
||||||
|
|
||||||
|
// this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa == 1
|
||||||
|
// this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
this->k_pair.set_size(GX,BX);
|
this->k_pair.set_size(GX,BX);
|
||||||
this->k_pair.run(&this->atom->x, &sw1, &sw2, &sw3,
|
this->k_pair.run(&this->atom->x, &sw1, &sw2, &sw3,
|
||||||
&map, &elem2param, &_nelements,
|
&map, &elem2param, &_nelements,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->ans->force, &this->ans->engv,
|
&this->ans->force, &this->ans->engv,
|
||||||
&eflag, &vflag, &ainum, &nbor_pitch,
|
&eflag, &vflag, &ainum, &nbor_pitch,
|
||||||
&this->_threads_per_atom);
|
&this->_threads_per_atom);
|
||||||
|
|||||||
@ -35,7 +35,7 @@ texture<int4> sw3_tex;
|
|||||||
#define sw3_tex sw3
|
#define sw3_tex sw3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define THIRD (numtyp)0.66666667
|
#define THIRD (numtyp)0.66666666666666666667
|
||||||
|
|
||||||
//#define THREE_CONCURRENT
|
//#define THREE_CONCURRENT
|
||||||
|
|
||||||
|
|||||||
@ -178,7 +178,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
|
|||||||
ucl_copy(elem2param,dview_elem2param,false);
|
ucl_copy(elem2param,dview_elem2param,false);
|
||||||
|
|
||||||
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||||
for (int i = 0; i < lj_types; i++)
|
for (int i = 0; i < ntypes; i++)
|
||||||
dview_map[i] = host_map[i];
|
dview_map[i] = host_map[i];
|
||||||
|
|
||||||
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
|
||||||
|
|||||||
@ -43,7 +43,7 @@ texture<int4> ts5_tex;
|
|||||||
|
|
||||||
//#define THREE_CONCURRENT
|
//#define THREE_CONCURRENT
|
||||||
|
|
||||||
#define THIRD (numtyp)0.66666667
|
#define TWOTHIRD (numtyp)0.66666666666666666667
|
||||||
|
|
||||||
#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
|
#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
|
||||||
i, nbor_j, offset_j, idx) \
|
i, nbor_j, offset_j, idx) \
|
||||||
@ -108,6 +108,7 @@ texture<int4> ts5_tex;
|
|||||||
|
|
||||||
#define store_zeta(z, tid, t_per_atom, offset) \
|
#define store_zeta(z, tid, t_per_atom, offset) \
|
||||||
if (t_per_atom>1) { \
|
if (t_per_atom>1) { \
|
||||||
|
__local acctyp red_acc[BLOCK_PAIR]; \
|
||||||
red_acc[tid]=z; \
|
red_acc[tid]=z; \
|
||||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||||
if (offset < s) { \
|
if (offset < s) { \
|
||||||
@ -180,7 +181,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
|||||||
const __global int *restrict map,
|
const __global int *restrict map,
|
||||||
const __global int *restrict elem2param,
|
const __global int *restrict elem2param,
|
||||||
const int nelements, const int nparams,
|
const int nelements, const int nparams,
|
||||||
__global numtyp4 * zetaij,
|
__global acctyp4 * zetaij,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
const int eflag, const int nall, const int inum,
|
const int eflag, const int nall, const int inum,
|
||||||
@ -205,9 +206,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
|||||||
ts5[tid]=ts5_in[tid];
|
ts5[tid]=ts5_in[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
numtyp z = (numtyp)0;
|
acctyp z = (acctyp)0;
|
||||||
__local numtyp red_acc[BLOCK_PAIR];
|
|
||||||
if (tid<BLOCK_PAIR) red_acc[tid] = (numtyp)0;
|
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -245,7 +244,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
|||||||
if (rsq1 > cutsq[ijparam]) continue;
|
if (rsq1 > cutsq[ijparam]) continue;
|
||||||
|
|
||||||
// compute zeta_ij
|
// compute zeta_ij
|
||||||
z = (numtyp)0;
|
z = (acctyp)0;
|
||||||
|
|
||||||
int nbor_k = nborj_start-offset_j+offset_k;
|
int nbor_k = nborj_start-offset_j+offset_k;
|
||||||
for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
|
for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
|
||||||
@ -310,7 +309,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
|||||||
force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
|
force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
|
||||||
ijparam_beta, ijparam_powern, ijparam_c1, ijparam_c2, ijparam_c3,
|
ijparam_beta, ijparam_powern, ijparam_c1, ijparam_c2, ijparam_c3,
|
||||||
ijparam_c4, rsq1, z, eflag, fpfeng);
|
ijparam_c4, rsq1, z, eflag, fpfeng);
|
||||||
numtyp4 zij;
|
acctyp4 zij;
|
||||||
zij.x = fpfeng[0];
|
zij.x = fpfeng[0];
|
||||||
zij.y = fpfeng[1];
|
zij.y = fpfeng[1];
|
||||||
zij.z = fpfeng[2];
|
zij.z = fpfeng[2];
|
||||||
@ -426,7 +425,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
|||||||
const __global int *restrict map,
|
const __global int *restrict map,
|
||||||
const __global int *restrict elem2param,
|
const __global int *restrict elem2param,
|
||||||
const int nelements, const int nparams,
|
const int nelements, const int nparams,
|
||||||
const __global numtyp4 *restrict zetaij,
|
const __global acctyp4 *restrict zetaij,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
@ -501,7 +500,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
|||||||
int idx;
|
int idx;
|
||||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||||
i, nbor_j, offset_j, idx);
|
i, nbor_j, offset_j, idx);
|
||||||
numtyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
|
acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
|
||||||
numtyp force = zeta_ij.x*tpainv;
|
numtyp force = zeta_ij.x*tpainv;
|
||||||
numtyp prefactor = zeta_ij.y;
|
numtyp prefactor = zeta_ij.y;
|
||||||
f.x += delr1[0]*force;
|
f.x += delr1[0]*force;
|
||||||
@ -568,7 +567,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
|||||||
if (vflag>0) {
|
if (vflag>0) {
|
||||||
acctyp v[6];
|
acctyp v[6];
|
||||||
numtyp pre = (numtyp)2.0;
|
numtyp pre = (numtyp)2.0;
|
||||||
if (evatom==1) pre = THIRD;
|
if (evatom==1) pre = TWOTHIRD;
|
||||||
v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
|
v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
|
||||||
v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
|
v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
|
||||||
v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
|
v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
|
||||||
@ -595,7 +594,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||||||
const __global int *restrict map,
|
const __global int *restrict map,
|
||||||
const __global int *restrict elem2param,
|
const __global int *restrict elem2param,
|
||||||
const int nelements, const int nparams,
|
const int nelements, const int nparams,
|
||||||
const __global numtyp4 *restrict zetaij,
|
const __global acctyp4 *restrict zetaij,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
@ -710,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||||||
int idx;
|
int idx;
|
||||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||||
j, ijnum, offset_kf, idx);
|
j, ijnum, offset_kf, idx);
|
||||||
numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||||
numtyp force = zeta_ji.x*tpainv;
|
numtyp force = zeta_ji.x*tpainv;
|
||||||
numtyp prefactor_ji = zeta_ji.y;
|
numtyp prefactor_ji = zeta_ji.y;
|
||||||
f.x += delr1[0]*force;
|
f.x += delr1[0]*force;
|
||||||
@ -776,7 +775,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||||||
int idx;
|
int idx;
|
||||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||||
j, nbor_k, offset_k, idx);
|
j, nbor_k, offset_k, idx);
|
||||||
numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||||
numtyp prefactor_jk = zeta_jk.y;
|
numtyp prefactor_jk = zeta_jk.y;
|
||||||
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
||||||
ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
|
ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
|
||||||
@ -816,7 +815,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||||||
const __global int *restrict map,
|
const __global int *restrict map,
|
||||||
const __global int *restrict elem2param,
|
const __global int *restrict elem2param,
|
||||||
const int nelements, const int nparams,
|
const int nelements, const int nparams,
|
||||||
const __global numtyp4 *restrict zetaij,
|
const __global acctyp4 *restrict zetaij,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
@ -931,9 +930,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||||||
int idx;
|
int idx;
|
||||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||||
j, ijnum, offset_kf, idx);
|
j, ijnum, offset_kf, idx);
|
||||||
numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
|
||||||
numtyp force = zeta_ji.x*tpainv;
|
numtyp force = zeta_ji.x*tpainv;
|
||||||
numtyp prefactor = zeta_ji.y;
|
numtyp prefactor_ji = zeta_ji.y;
|
||||||
f.x += delr1[0]*force;
|
f.x += delr1[0]*force;
|
||||||
f.y += delr1[1]*force;
|
f.y += delr1[1]*force;
|
||||||
f.z += delr1[2]*force;
|
f.z += delr1[2]*force;
|
||||||
@ -987,24 +986,24 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||||||
h = ts4_param.z;
|
h = ts4_param.z;
|
||||||
gamma = ts4_param.w;
|
gamma = ts4_param.w;
|
||||||
attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
|
attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
|
||||||
prefactor, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
|
prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
|
||||||
f.x += fj[0];
|
f.x += fj[0];
|
||||||
f.y += fj[1];
|
f.y += fj[1];
|
||||||
f.z += fj[2];
|
f.z += fj[2];
|
||||||
|
|
||||||
virial[0] += THIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
|
virial[0] += TWOTHIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
|
||||||
virial[1] += THIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
|
virial[1] += TWOTHIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
|
||||||
virial[2] += THIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
|
virial[2] += TWOTHIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
|
||||||
virial[3] += THIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
|
virial[3] += TWOTHIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
|
||||||
virial[4] += THIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
|
virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
|
||||||
virial[5] += THIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
|
virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
|
||||||
|
|
||||||
//int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
|
//int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
|
||||||
//int idx = kk*n_stride + j*t_per_atom + offset_k;
|
//int idx = kk*n_stride + j*t_per_atom + offset_k;
|
||||||
int idx;
|
int idx;
|
||||||
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
|
||||||
j, nbor_k, offset_k, idx);
|
j, nbor_k, offset_k, idx);
|
||||||
numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
|
||||||
numtyp prefactor_jk = zeta_jk.y;
|
numtyp prefactor_jk = zeta_jk.y;
|
||||||
|
|
||||||
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
|
||||||
@ -1025,12 +1024,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||||||
f.y += fk[1];
|
f.y += fk[1];
|
||||||
f.z += fk[2];
|
f.z += fk[2];
|
||||||
|
|
||||||
virial[0] += THIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
|
virial[0] += TWOTHIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
|
||||||
virial[1] += THIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
|
virial[1] += TWOTHIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
|
||||||
virial[2] += THIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
|
virial[2] += TWOTHIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
|
||||||
virial[3] += THIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
|
virial[3] += TWOTHIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
|
||||||
virial[4] += THIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
|
virial[4] += TWOTHIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
|
||||||
virial[5] += THIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
|
virial[5] += TWOTHIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
|
||||||
|
|
||||||
}
|
}
|
||||||
} // for nbor
|
} // for nbor
|
||||||
|
|
||||||
|
|||||||
@ -100,7 +100,7 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
|
|||||||
/// Per-atom arrays:
|
/// Per-atom arrays:
|
||||||
/// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
|
/// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
|
||||||
/// zetaij.w = zetaij
|
/// zetaij.w = zetaij
|
||||||
UCL_D_Vec<numtyp4> _zetaij;
|
UCL_D_Vec<acctyp4> _zetaij;
|
||||||
|
|
||||||
UCL_Kernel k_zeta;
|
UCL_Kernel k_zeta;
|
||||||
UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
|
UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
|
||||||
|
|||||||
@ -227,7 +227,7 @@ ucl_inline void ters_zetaterm_d(const numtyp prefactor,
|
|||||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||||
else tmp = t;
|
else tmp = t;
|
||||||
|
|
||||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||||
else ex_delr = ucl_exp(tmp);
|
else ex_delr = ucl_exp(tmp);
|
||||||
|
|
||||||
@ -295,7 +295,7 @@ ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
|
|||||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||||
else tmp = t;
|
else tmp = t;
|
||||||
|
|
||||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||||
else ex_delr = ucl_exp(tmp);
|
else ex_delr = ucl_exp(tmp);
|
||||||
|
|
||||||
@ -344,7 +344,7 @@ ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
|
|||||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||||
else tmp = t;
|
else tmp = t;
|
||||||
|
|
||||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||||
else ex_delr = ucl_exp(tmp);
|
else ex_delr = ucl_exp(tmp);
|
||||||
|
|
||||||
@ -391,7 +391,7 @@ ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
|
|||||||
if ((int)param_powermint == 3) tmp = t*t*t;
|
if ((int)param_powermint == 3) tmp = t*t*t;
|
||||||
else tmp = t;
|
else tmp = t;
|
||||||
|
|
||||||
if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
|
if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
|
||||||
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
|
||||||
else ex_delr = ucl_exp(tmp);
|
else ex_delr = ucl_exp(tmp);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user