Added udirect2b and umutual2b for hippo
This commit is contained in:
@ -172,6 +172,7 @@ elseif(GPU_API STREQUAL "OPENCL")
|
|||||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
|
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
|
||||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
|
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
|
||||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
|
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
|
||||||
|
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
foreach(GPU_KERNEL ${GPU_LIB_CU})
|
foreach(GPU_KERNEL ${GPU_LIB_CU})
|
||||||
@ -188,6 +189,7 @@ elseif(GPU_API STREQUAL "OPENCL")
|
|||||||
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
|
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
|
||||||
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
|
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
|
||||||
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
|
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
|
||||||
|
GenerateOpenCLHeader(hippo ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu)
|
||||||
|
|
||||||
list(APPEND GPU_LIB_SOURCES
|
list(APPEND GPU_LIB_SOURCES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
|
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
|
||||||
@ -197,6 +199,7 @@ elseif(GPU_API STREQUAL "OPENCL")
|
|||||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
|
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
|
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
|
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||||
|
|||||||
@ -74,6 +74,9 @@ $(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu $(PRE1_H) lal_tersoff_mod_extra.
|
|||||||
$(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu $(PRE1_H) lal_tersoff_zbl_extra.h
|
$(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu $(PRE1_H) lal_tersoff_zbl_extra.h
|
||||||
$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
|
$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
|
||||||
|
|
||||||
|
$(OBJ_DIR)/hippo_cl.h: lal_hippo.cu $(PRE1_H) lal_hippo_extra.h
|
||||||
|
$(BSH) ./geryon/file_to_cstr.sh hippo $(PRE1_H) lal_hippo_extra.h lal_hippo.cu $(OBJ_DIR)/hippo_cl.h;
|
||||||
|
|
||||||
$(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H)
|
$(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H)
|
||||||
$(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@;
|
$(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@;
|
||||||
|
|
||||||
|
|||||||
@ -1064,7 +1064,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
||||||
|
|
||||||
numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
|
numtyp tdipdip[6];
|
||||||
tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
|
tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
|
||||||
tdipdip[1] = bcn[1]*xr*yr;
|
tdipdip[1] = bcn[1]*xr*yr;
|
||||||
tdipdip[2] = bcn[1]*xr*zr;
|
tdipdip[2] = bcn[1]*xr*zr;
|
||||||
@ -1233,10 +1233,10 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
numtyp r = ucl_sqrt(r2);
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
|
||||||
const numtyp4 pol1j = polar1[j];
|
const numtyp4 pol1j = polar1[j];
|
||||||
numtyp ck = polar1[j].x; // rpole[j][0];
|
numtyp ck = pol1j.x; // rpole[j][0];
|
||||||
numtyp dkx = polar1[j].y; // rpole[j][1];
|
numtyp dkx = pol1j.y; // rpole[j][1];
|
||||||
numtyp dky = polar1[j].z; // rpole[j][2];
|
numtyp dky = pol1j.z; // rpole[j][2];
|
||||||
numtyp dkz = polar1[j].w; // rpole[j][3];
|
numtyp dkz = pol1j.w; // rpole[j][3];
|
||||||
const numtyp4 pol2j = polar2[j];
|
const numtyp4 pol2j = polar2[j];
|
||||||
numtyp qkxx = pol2j.x; // rpole[j][4];
|
numtyp qkxx = pol2j.x; // rpole[j][4];
|
||||||
numtyp qkxy = pol2j.y; // rpole[j][5];
|
numtyp qkxy = pol2j.y; // rpole[j][5];
|
||||||
|
|||||||
@ -489,6 +489,81 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
|||||||
return GX;
|
return GX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Reneighbor on GPU if necessary, and then compute the direct real space part
|
||||||
|
// of the permanent field
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int** HippoT::compute_udirect2b(const int ago, const int inum_full,
|
||||||
|
const int nall, double **host_x,
|
||||||
|
int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole,
|
||||||
|
double **host_uind, double **host_uinp,
|
||||||
|
double* host_pval,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
const bool eatom, const bool vatom,
|
||||||
|
int &host_start, int **ilist, int **jnum,
|
||||||
|
const double cpu_time, bool &success,
|
||||||
|
const double aewald, const double off2_polar,
|
||||||
|
double *host_q, double *boxlo, double *prd,
|
||||||
|
void** fieldp_ptr) {
|
||||||
|
this->acc_timers();
|
||||||
|
int eflag, vflag;
|
||||||
|
if (eatom) eflag=2;
|
||||||
|
else if (eflag_in) eflag=1;
|
||||||
|
else eflag=0;
|
||||||
|
if (vatom) vflag=2;
|
||||||
|
else if (vflag_in) vflag=1;
|
||||||
|
else vflag=0;
|
||||||
|
|
||||||
|
#ifdef LAL_NO_BLOCK_REDUCE
|
||||||
|
if (eflag) eflag=2;
|
||||||
|
if (vflag) vflag=2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
this->set_kernel(eflag,vflag);
|
||||||
|
|
||||||
|
// reallocate per-atom arrays, transfer data from the host
|
||||||
|
// and build the neighbor lists if needed
|
||||||
|
|
||||||
|
int** firstneigh = nullptr;
|
||||||
|
firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
|
||||||
|
host_amtype, host_amgroup, host_rpole,
|
||||||
|
host_uind, host_uinp, host_pval, sublo, subhi, tag,
|
||||||
|
nspecial, special, nspecial15, special15,
|
||||||
|
eflag_in, vflag_in, eatom, vatom,
|
||||||
|
host_start, ilist, jnum, cpu_time,
|
||||||
|
success, host_q, boxlo, prd);
|
||||||
|
|
||||||
|
// ------------------- Resize _fieldp array ------------------------
|
||||||
|
|
||||||
|
if (inum_full>this->_max_fieldp_size) {
|
||||||
|
this->_max_fieldp_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
|
this->_fieldp.resize(this->_max_fieldp_size*8);
|
||||||
|
}
|
||||||
|
*fieldp_ptr=this->_fieldp.host.begin();
|
||||||
|
|
||||||
|
this->_off2_polar = off2_polar;
|
||||||
|
this->_aewald = aewald;
|
||||||
|
const int red_blocks=udirect2b(eflag,vflag);
|
||||||
|
|
||||||
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
|
||||||
|
this->_fieldp.update_host(this->_max_fieldp_size*8,false);
|
||||||
|
/*
|
||||||
|
printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n",
|
||||||
|
this->_fieldp.cols(), _max_fieldp_size);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]);
|
||||||
|
printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
return firstneigh; //nbor->host_jlist.begin()-host_start;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space permanent field, returning field and fieldp
|
// Calculate the real-space permanent field, returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -518,7 +593,8 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->k_udirect2b.set_size(GX,BX);
|
this->k_udirect2b.set_size(GX,BX);
|
||||||
this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
this->k_udirect2b.run(&this->atom->x, &this->atom->extra,
|
||||||
|
&coeff_amtype, &coeff_amclass, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
||||||
@ -529,6 +605,80 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
|
|||||||
return GX;
|
return GX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Reneighbor on GPU if necessary, and then compute the direct real space part
|
||||||
|
// of the induced field
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int** HippoT::compute_umutual2b(const int ago, const int inum_full,
|
||||||
|
const int nall, double **host_x,
|
||||||
|
int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole,
|
||||||
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
const bool eatom, const bool vatom,
|
||||||
|
int &host_start, int **ilist, int **jnum,
|
||||||
|
const double cpu_time, bool &success,
|
||||||
|
const double aewald, const double off2_polar,
|
||||||
|
double *host_q, double *boxlo, double *prd,
|
||||||
|
void** fieldp_ptr) {
|
||||||
|
this->acc_timers();
|
||||||
|
int eflag, vflag;
|
||||||
|
if (eatom) eflag=2;
|
||||||
|
else if (eflag_in) eflag=1;
|
||||||
|
else eflag=0;
|
||||||
|
if (vatom) vflag=2;
|
||||||
|
else if (vflag_in) vflag=1;
|
||||||
|
else vflag=0;
|
||||||
|
|
||||||
|
#ifdef LAL_NO_BLOCK_REDUCE
|
||||||
|
if (eflag) eflag=2;
|
||||||
|
if (vflag) vflag=2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
this->set_kernel(eflag,vflag);
|
||||||
|
|
||||||
|
// reallocate per-atom arrays, transfer extra data from the host
|
||||||
|
// and build the neighbor lists if needed
|
||||||
|
|
||||||
|
int** firstneigh = nullptr;
|
||||||
|
firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
|
||||||
|
host_amtype, host_amgroup, host_rpole,
|
||||||
|
host_uind, host_uinp, host_pval, sublo, subhi, tag,
|
||||||
|
nspecial, special, nspecial15, special15,
|
||||||
|
eflag_in, vflag_in, eatom, vatom,
|
||||||
|
host_start, ilist, jnum, cpu_time,
|
||||||
|
success, host_q, boxlo, prd);
|
||||||
|
|
||||||
|
// ------------------- Resize _fieldp array ------------------------
|
||||||
|
|
||||||
|
if (inum_full>this->_max_fieldp_size) {
|
||||||
|
this->_max_fieldp_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
|
this->_fieldp.resize(this->_max_fieldp_size*8);
|
||||||
|
}
|
||||||
|
*fieldp_ptr=this->_fieldp.host.begin();
|
||||||
|
|
||||||
|
this->_off2_polar = off2_polar;
|
||||||
|
this->_aewald = aewald;
|
||||||
|
const int red_blocks=umutual2b(eflag,vflag);
|
||||||
|
|
||||||
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
|
||||||
|
this->_fieldp.update_host(this->_max_fieldp_size*8,false);
|
||||||
|
/*
|
||||||
|
printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n",
|
||||||
|
this->_fieldp.cols(), _max_fieldp_size);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]);
|
||||||
|
printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
return firstneigh; //nbor->host_jlist.begin()-host_start;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space induced field, returning field and fieldp
|
// Calculate the real-space induced field, returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -558,7 +708,8 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->k_umutual2b.set_size(GX,BX);
|
this->k_umutual2b.set_size(GX,BX);
|
||||||
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
this->k_umutual2b.run(&this->atom->x, &this->atom->extra,
|
||||||
|
&coeff_amtype, &coeff_amclass, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
|
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
|
||||||
&nbor_pitch, &this->_threads_per_atom, &this->_aewald,
|
&nbor_pitch, &this->_threads_per_atom, &this->_aewald,
|
||||||
|
|||||||
@ -16,7 +16,6 @@
|
|||||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "lal_hippo_extra.h"
|
#include "lal_hippo_extra.h"
|
||||||
//#include "lal_aux_fun1.h"
|
|
||||||
#ifdef LAMMPS_SMALLBIG
|
#ifdef LAMMPS_SMALLBIG
|
||||||
#define tagint int
|
#define tagint int
|
||||||
#endif
|
#endif
|
||||||
@ -985,10 +984,10 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp qiyz = pol3i.x; // rpole[i][9];
|
numtyp qiyz = pol3i.x; // rpole[i][9];
|
||||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||||
itype = pol3i.z; // amtype[i];
|
itype = pol3i.z; // amtype[i];
|
||||||
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
|
||||||
|
|
||||||
numtyp corei = coeff_amclass[itype].z; // pcore[iclass];
|
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||||
numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
|
numtyp corei = coeff_amclass[iclass].z; // pcore[iclass];
|
||||||
|
numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
|
||||||
numtyp vali = polar6[i].x;
|
numtyp vali = polar6[i].x;
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
@ -1028,8 +1027,8 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||||
numtyp factor_mpole = sp_pol.w; // sp_mpole[sbmask15(jextra)];
|
numtyp factor_mpole = sp_pol.w; // sp_mpole[sbmask15(jextra)];
|
||||||
|
|
||||||
numtyp corek = coeff_amclass[jtype].z; // pcore[jclass];
|
numtyp corek = coeff_amclass[jclass].z; // pcore[jclass];
|
||||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
|
||||||
numtyp valk = polar6[j].x;
|
numtyp valk = polar6[j].x;
|
||||||
|
|
||||||
// intermediates involving moments and separation distance
|
// intermediates involving moments and separation distance
|
||||||
@ -1250,7 +1249,8 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
__kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -1273,6 +1273,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -1309,13 +1310,11 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||||
int itype = pol3i.z; // amtype[i];
|
int itype = pol3i.z; // amtype[i];
|
||||||
int igroup = pol3i.w; // amgroup[i];
|
int igroup = pol3i.w; // amgroup[i];
|
||||||
|
int iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||||
|
|
||||||
// debug:
|
numtyp corei = coeff_amclass[iclass].z; // pcore[iclass];
|
||||||
// xi__ = ix; xi__.w = itype;
|
numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
|
||||||
|
numtyp vali = polar6[i].x;
|
||||||
numtyp pdi = coeff[itype].x;
|
|
||||||
numtyp pti = coeff[itype].y;
|
|
||||||
numtyp ddi = coeff[itype].z;
|
|
||||||
|
|
||||||
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
||||||
numtyp aesq2n = (numtyp)0.0;
|
numtyp aesq2n = (numtyp)0.0;
|
||||||
@ -1360,15 +1359,21 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp qkzz = pol3j.y; // rpole[j][12];
|
numtyp qkzz = pol3j.y; // rpole[j][12];
|
||||||
int jtype = pol3j.z; // amtype[j];
|
int jtype = pol3j.z; // amtype[j];
|
||||||
int jgroup = pol3j.w; // amgroup[j];
|
int jgroup = pol3j.w; // amgroup[j];
|
||||||
|
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
|
||||||
|
|
||||||
numtyp factor_dscale, factor_pscale;
|
numtyp corek = coeff_amclass[jclass].z; // pcore[jclass];
|
||||||
|
numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
|
||||||
|
numtyp valk = polar6[j].x;
|
||||||
|
|
||||||
|
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
||||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||||
|
factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
|
||||||
if (igroup == jgroup) {
|
if (igroup == jgroup) {
|
||||||
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
|
||||||
factor_dscale = polar_dscale;
|
factor_uscale = polar_uscale;
|
||||||
} else {
|
} else {
|
||||||
factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
|
factor_dscale = factor_pscale = sp_pol.z; // special_polar_pscale[sbmask15(jextra)];
|
||||||
factor_dscale = (numtyp)1.0;
|
factor_uscale = (numtyp)1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// intermediates involving moments and separation distance
|
// intermediates involving moments and separation distance
|
||||||
@ -1400,50 +1405,42 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
|
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
|
||||||
}
|
}
|
||||||
|
|
||||||
// find the field components for Thole polarization damping
|
// find the field components for charge penetration damping
|
||||||
|
numtyp dmpi[7],dmpk[7];
|
||||||
numtyp scale3 = (numtyp)1.0;
|
dampdir(r,alphai,alphak,dmpi,dmpk);
|
||||||
numtyp scale5 = (numtyp)1.0;
|
|
||||||
numtyp scale7 = (numtyp)1.0;
|
|
||||||
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
|
|
||||||
if (damp != (numtyp)0.0) {
|
|
||||||
numtyp pgamma = MIN(ddi,coeff[jtype].z); // dirdamp[jtype]
|
|
||||||
if (pgamma != (numtyp)0.0) {
|
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
|
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp) ;
|
|
||||||
scale3 = (numtyp)1.0 - expdamp ;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.5*damp);
|
|
||||||
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
|
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
|
||||||
scale3 = (numtyp)1.0 - expdamp;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
|
||||||
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp + (numtyp)0.6*damp*damp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // damp == 0: ???
|
|
||||||
}
|
|
||||||
|
|
||||||
numtyp scalek = factor_dscale;
|
numtyp scalek = factor_dscale;
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
|
||||||
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
numtyp rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7;
|
||||||
fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
numtyp rr3k = bn[1] - ((numtyp)1.0-scalek*dmpk[2])*rr3;
|
||||||
fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
numtyp rr5k = bn[2] - ((numtyp)1.0-scalek*dmpk[4])*rr5;
|
||||||
fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
numtyp rr7k = bn[3] - ((numtyp)1.0-scalek*dmpk[6])*rr7;
|
||||||
|
rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
|
||||||
|
fid[0] = -xr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dkx + (numtyp)2.0*rr5k*qkx;
|
||||||
|
fid[1] = -yr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dky + (numtyp)2.0*rr5k*qky;
|
||||||
|
fid[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dkz + (numtyp)2.0*rr5k*qkz;
|
||||||
|
|
||||||
scalek = factor_pscale;
|
scalek = factor_pscale;
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
rr3 = r2inv * rr1;
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
|
||||||
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
|
||||||
fip[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7;
|
||||||
fip[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
rr3k = bn[1] - ((numtyp)1.0-scalek*dmpk[2])*rr3;
|
||||||
fip[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
rr5k = bn[2] - ((numtyp)1.0-scalek*dmpk[4])*rr5;
|
||||||
|
rr7k = bn[3] - ((numtyp)1.0-scalek*dmpk[6])*rr7;
|
||||||
|
rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
|
||||||
|
fip[0] = -xr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dkx + (numtyp)2.0*rr5k*qkx;
|
||||||
|
fip[1] = -yr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dky + (numtyp)2.0*rr5k*qky;
|
||||||
|
fip[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
|
||||||
|
rr3k*dkz + (numtyp)2.0*rr5k*qkz;
|
||||||
|
|
||||||
|
// find terms needed later to compute mutual polarization
|
||||||
|
|
||||||
_fieldp[0] += fid[0];
|
_fieldp[0] += fid[0];
|
||||||
_fieldp[1] += fid[1];
|
_fieldp[1] += fid[1];
|
||||||
@ -1468,7 +1465,8 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
__kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -1491,6 +1489,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
||||||
|
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
||||||
|
|
||||||
//numtyp4 xi__;
|
//numtyp4 xi__;
|
||||||
|
|
||||||
@ -1519,8 +1518,10 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
itype = polar3[i].z; // amtype[i];
|
itype = polar3[i].z; // amtype[i];
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
igroup = polar3[i].w; // amgroup[i];
|
||||||
|
|
||||||
numtyp pdi = coeff[itype].x;
|
int iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||||
numtyp pti = coeff[itype].y;
|
numtyp corei = coeff_amclass[iclass].z; // pcore[iclass];
|
||||||
|
numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
|
||||||
|
numtyp vali = polar6[i].x;
|
||||||
|
|
||||||
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
||||||
numtyp aesq2n = (numtyp)0.0;
|
numtyp aesq2n = (numtyp)0.0;
|
||||||
@ -1561,9 +1562,21 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp ukyp = pol5j.y; // uinp[j][1];
|
numtyp ukyp = pol5j.y; // uinp[j][1];
|
||||||
numtyp ukzp = pol5j.z; // uinp[j][2];
|
numtyp ukzp = pol5j.z; // uinp[j][2];
|
||||||
|
|
||||||
numtyp factor_uscale;
|
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
|
||||||
if (igroup == jgroup) factor_uscale = polar_uscale;
|
numtyp corek = coeff_amclass[jclass].z; // pcore[jclass];
|
||||||
else factor_uscale = (numtyp)1.0;
|
numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
|
||||||
|
numtyp valk = polar6[j].x;
|
||||||
|
|
||||||
|
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
||||||
|
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||||
|
factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
|
||||||
|
if (igroup == jgroup) {
|
||||||
|
factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
|
||||||
|
factor_uscale = polar_uscale;
|
||||||
|
} else {
|
||||||
|
factor_dscale = factor_pscale = sp_pol.z; // special_polar_pscale[sbmask15(jextra)];
|
||||||
|
factor_uscale = (numtyp)1.0;
|
||||||
|
}
|
||||||
|
|
||||||
// calculate the real space Ewald error function terms
|
// calculate the real space Ewald error function terms
|
||||||
|
|
||||||
@ -1583,32 +1596,20 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
// find terms needed later to compute mutual polarization
|
// find terms needed later to compute mutual polarization
|
||||||
// if (poltyp != DIRECT)
|
// if (poltyp != DIRECT)
|
||||||
numtyp scale3 = (numtyp)1.0;
|
numtyp dmpik[5];
|
||||||
numtyp scale5 = (numtyp)1.0;
|
dampmut(r,alphai,alphak,dmpik);
|
||||||
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
|
numtyp scalek = factor_wscale;
|
||||||
if (damp != (numtyp)0.0) {
|
rr3 = r2inv * rr1;
|
||||||
numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
|
numtyp rr3ik = bn[1] - ((numtyp)1.0-scalek*dmpik[2])*rr3;
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
numtyp rr5ik = bn[2] - ((numtyp)1.0-scalek*dmpik[4])*rr5;
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
|
||||||
scale3 = (numtyp)1.0 - expdamp;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else { // damp == 0: ???
|
numtyp tdipdip[6];
|
||||||
}
|
tdipdip[0] = -rr3ik + rr5ik*xr*xr;
|
||||||
|
tdipdip[1] = rr5ik*xr*yr;
|
||||||
numtyp scalek = factor_uscale;
|
tdipdip[2] = rr5ik*xr*zr;
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
tdipdip[3] = -rr3ik + rr5ik*yr*yr;
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
tdipdip[4] = rr5ik*yr*zr;
|
||||||
|
tdipdip[5] = -rr3ik + rr5ik*zr*zr;
|
||||||
numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
|
|
||||||
tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
|
|
||||||
tdipdip[1] = bcn[1]*xr*yr;
|
|
||||||
tdipdip[2] = bcn[1]*xr*zr;
|
|
||||||
tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
|
|
||||||
tdipdip[4] = bcn[1]*yr*zr;
|
|
||||||
tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
|
|
||||||
//if (i==0 && j == 10)
|
//if (i==0 && j == 10)
|
||||||
// printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
|
// printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
|
||||||
// i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
|
// i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
|
||||||
|
|||||||
@ -90,6 +90,40 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
const double aewald, const double felec, const double off2_mpole, double *charge,
|
const double aewald, const double felec, const double off2_mpole, double *charge,
|
||||||
double *boxlo, double *prd, void **tep_ptr);
|
double *boxlo, double *prd, void **tep_ptr);
|
||||||
|
|
||||||
|
/// Compute the real space part of the permanent field (udirect2b) with device neighboring
|
||||||
|
virtual int** compute_udirect2b(const int ago, const int inum_full,
|
||||||
|
const int nall, double **host_x,
|
||||||
|
int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole,
|
||||||
|
double **host_uind, double **host_uinp, double* host_pval,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
const bool eatom, const bool vatom,
|
||||||
|
int &host_start, int **ilist, int **jnum,
|
||||||
|
const double cpu_time, bool &success,
|
||||||
|
const double aewald, const double off2_polar,
|
||||||
|
double *host_q, double *boxlo, double *prd,
|
||||||
|
void** fieldp_ptr);
|
||||||
|
|
||||||
|
/// Compute the real space part of the induced field (umutual2b) with device neighboring
|
||||||
|
virtual int** compute_umutual2b(const int ago, const int inum_full,
|
||||||
|
const int nall, double **host_x,
|
||||||
|
int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole,
|
||||||
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
const bool eatom, const bool vatom,
|
||||||
|
int &host_start, int **ilist, int **jnum,
|
||||||
|
const double cpu_time, bool &success,
|
||||||
|
const double aewald, const double off2_polar,
|
||||||
|
double *host_q, double *boxlo, double *prd,
|
||||||
|
void** fieldp_ptr);
|
||||||
|
|
||||||
/// Compute polar real-space with device neighboring
|
/// Compute polar real-space with device neighboring
|
||||||
virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
|
|||||||
@ -157,7 +157,7 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full,
|
|||||||
int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
|
int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
|
||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
int *host_amtype, int *host_amgroup, double **host_rpole,
|
int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp,
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int *nspecial15, tagint** special15,
|
tagint **special, int *nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag, const bool eatom,
|
const bool eflag, const bool vflag, const bool eatom,
|
||||||
@ -166,7 +166,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
|
|||||||
bool &success, const double aewald, const double off2, double *host_q,
|
bool &success, const double aewald, const double off2, double *host_q,
|
||||||
double *boxlo, double *prd, void **fieldp_ptr) {
|
double *boxlo, double *prd, void **fieldp_ptr) {
|
||||||
return HIPPOMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
|
return HIPPOMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
|
||||||
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
|
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
|
||||||
@ -175,7 +175,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
|
|||||||
int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
|
int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
|
||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
int *host_amtype, int *host_amgroup, double **host_rpole,
|
int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp,
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int *nspecial15, tagint** special15,
|
tagint **special, int *nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
@ -184,7 +184,7 @@ int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
|
|||||||
bool &success, const double aewald, const double off2, double *host_q,
|
bool &success, const double aewald, const double off2, double *host_q,
|
||||||
double *boxlo, double *prd, void **fieldp_ptr) {
|
double *boxlo, double *prd, void **fieldp_ptr) {
|
||||||
return HIPPOMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
|
return HIPPOMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
|
||||||
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
|
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
|
||||||
|
|||||||
@ -59,7 +59,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
|||||||
// compute tolerance value for damping exponents
|
// compute tolerance value for damping exponents
|
||||||
|
|
||||||
eps = (numtyp)0.001;
|
eps = (numtyp)0.001;
|
||||||
diff = dmpi-dmpk;
|
diff = dmpi-dmpk; // fabs(dmpi-dmpk)
|
||||||
if (diff < (numtyp)0) diff = -diff;
|
if (diff < (numtyp)0) diff = -diff;
|
||||||
|
|
||||||
// treat the case where alpha damping exponents are equal
|
// treat the case where alpha damping exponents are equal
|
||||||
@ -322,6 +322,109 @@ ucl_inline void damppole(const numtyp r, const int rorder,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
dampdir = direct field damping coefficents
|
||||||
|
dampdir generates coefficients for the direct field damping
|
||||||
|
function for powers of the interatomic distance
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ucl_inline void dampdir(numtyp r, numtyp alphai, numtyp alphak, numtyp *dmpi, numtyp *dmpk)
|
||||||
|
{
|
||||||
|
numtyp eps,diff;
|
||||||
|
numtyp expi,expk;
|
||||||
|
numtyp dampi,dampk;
|
||||||
|
numtyp dampi2,dampk2;
|
||||||
|
numtyp dampi3,dampk3;
|
||||||
|
numtyp dampi4,dampk4;
|
||||||
|
|
||||||
|
// compute tolerance and exponential damping factors
|
||||||
|
|
||||||
|
eps = (numtyp)0.001;
|
||||||
|
diff = alphai-alphak; // fabs(alphai-alphak);
|
||||||
|
if (diff < (numtyp)0) diff = -diff;
|
||||||
|
dampi = alphai * r;
|
||||||
|
dampk = alphak * r;
|
||||||
|
expi = ucl_exp(-dampi);
|
||||||
|
expk = ucl_exp(-dampk);
|
||||||
|
|
||||||
|
// core-valence charge penetration damping for Gordon f1 (HIPPO)
|
||||||
|
|
||||||
|
dampi2 = dampi * dampi;
|
||||||
|
dampi3 = dampi * dampi2;
|
||||||
|
dampi4 = dampi2 * dampi2;
|
||||||
|
dmpi[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2)*expi;
|
||||||
|
dmpi[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi;
|
||||||
|
dmpi[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi;
|
||||||
|
if (diff < eps) {
|
||||||
|
dmpk[2] = dmpi[2];
|
||||||
|
dmpk[4] = dmpi[4];
|
||||||
|
dmpk[6] = dmpi[6];
|
||||||
|
} else {
|
||||||
|
dampk2 = dampk * dampk;
|
||||||
|
dampk3 = dampk * dampk2;
|
||||||
|
dampk4 = dampk2 * dampk2;
|
||||||
|
dmpk[2] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2)*expk;
|
||||||
|
dmpk[4] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk;
|
||||||
|
dmpk[6] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/30.0)*expk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
dampmut = mutual field damping coefficents
|
||||||
|
dampmut generates coefficients for the mutual field damping
|
||||||
|
function for powers of the interatomic distance
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
|
||||||
|
{
|
||||||
|
numtyp termi,termk;
|
||||||
|
numtyp termi2,termk2;
|
||||||
|
numtyp alphai2,alphak2;
|
||||||
|
numtyp eps,diff;
|
||||||
|
numtyp expi,expk;
|
||||||
|
numtyp dampi,dampk;
|
||||||
|
numtyp dampi2,dampi3;
|
||||||
|
numtyp dampi4,dampi5;
|
||||||
|
numtyp dampk2,dampk3;
|
||||||
|
|
||||||
|
// compute tolerance and exponential damping factors
|
||||||
|
|
||||||
|
eps = (numtyp)0.001;
|
||||||
|
diff = alphai-alphak; // fabs(alphai-alphak);
|
||||||
|
if (diff < (numtyp)0) diff = -diff;
|
||||||
|
dampi = alphai * r;
|
||||||
|
dampk = alphak * r;
|
||||||
|
expi = ucl_exp(-dampi);
|
||||||
|
expk = ucl_exp(-dampk);
|
||||||
|
|
||||||
|
// valence-valence charge penetration damping for Gordon f1 (HIPPO)
|
||||||
|
|
||||||
|
dampi2 = dampi * dampi;
|
||||||
|
dampi3 = dampi * dampi2;
|
||||||
|
if (diff < eps) {
|
||||||
|
dampi4 = dampi2 * dampi2;
|
||||||
|
dampi5 = dampi2 * dampi3;
|
||||||
|
dmpik[2] = 1.0 - (1.0 + dampi + 0.5*dampi2 +
|
||||||
|
7.0*dampi3/48.0 + dampi4/48.0)*expi;
|
||||||
|
dmpik[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||||
|
dampi4/24.0 + dampi5/144.0)*expi;
|
||||||
|
} else {
|
||||||
|
dampk2 = dampk * dampk;
|
||||||
|
dampk3 = dampk * dampk2;
|
||||||
|
alphai2 = alphai * alphai;
|
||||||
|
alphak2 = alphak * alphak;
|
||||||
|
termi = alphak2 / (alphak2-alphai2);
|
||||||
|
termk = alphai2 / (alphai2-alphak2);
|
||||||
|
termi2 = termi * termi;
|
||||||
|
termk2 = termk * termk;
|
||||||
|
dmpik[2] = 1.0 - termi2*(1.0+dampi+0.5*dampi2)*expi -
|
||||||
|
termk2*(1.0+dampk+0.5*dampk2)*expk -
|
||||||
|
2.0*termi2*termk*(1.0+dampi)*expi - 2.0*termk2*termi*(1.0+dampk)*expk;
|
||||||
|
dmpik[4] = 1.0 - termi2*(1.0+dampi+0.5*dampi2 + dampi3/6.0)*expi -
|
||||||
|
termk2*(1.0+dampk+0.5*dampk2 + dampk3/6.00)*expk -
|
||||||
|
2.0*termi2*termk *(1.0+dampi+dampi2/3.0)*expi -
|
||||||
|
2.0*termk2*termi *(1.0+dampk+dampk2/3.0)*expk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1900,7 +1900,7 @@ void PairAmoeba::dampmut(double r, double alphai, double alphak, double *dmpik)
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PairAmoeba::dampdir(double r, double alphai, double alphak,
|
void PairAmoeba::dampdir(double r, double alphai, double alphak,
|
||||||
double *dmpi, double *dmpk)
|
double dmpi[7], double dmpk[7])
|
||||||
{
|
{
|
||||||
double eps,diff;
|
double eps,diff;
|
||||||
double expi,expk;
|
double expi,expk;
|
||||||
|
|||||||
@ -89,7 +89,7 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int
|
|||||||
int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
|
int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int* nspecial15, tagint** special15,
|
tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
||||||
@ -98,7 +98,7 @@ int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall
|
|||||||
|
|
||||||
int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
|
int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int* nspecial15, tagint** special15,
|
tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
@ -136,8 +136,8 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
|||||||
gpu_repulsion_ready = false; // true for HIPPO when ready
|
gpu_repulsion_ready = false; // true for HIPPO when ready
|
||||||
gpu_dispersion_real_ready = true; // true for HIPPO when ready
|
gpu_dispersion_real_ready = true; // true for HIPPO when ready
|
||||||
gpu_multipole_real_ready = true;
|
gpu_multipole_real_ready = true;
|
||||||
gpu_udirect2b_ready = false;
|
gpu_udirect2b_ready = true;
|
||||||
gpu_umutual2b_ready = false;
|
gpu_umutual2b_ready = true;
|
||||||
gpu_polar_real_ready = true;
|
gpu_polar_real_ready = true;
|
||||||
|
|
||||||
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
|
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
|
||||||
@ -791,7 +791,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
|
|||||||
|
|
||||||
firstneigh = hippo_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
|
firstneigh = hippo_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole,
|
atom->type, amtype, amgroup, rpole,
|
||||||
uind, uinp, sublo, subhi, atom->tag,
|
uind, uinp, pval, sublo, subhi, atom->tag,
|
||||||
atom->nspecial, atom->special,
|
atom->nspecial, atom->special,
|
||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
@ -1016,7 +1016,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
|
|||||||
|
|
||||||
firstneigh = hippo_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
|
firstneigh = hippo_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole,
|
atom->type, amtype, amgroup, rpole,
|
||||||
uind, uinp, sublo, subhi, atom->tag,
|
uind, uinp, pval, sublo, subhi, atom->tag,
|
||||||
atom->nspecial, atom->special,
|
atom->nspecial, atom->special,
|
||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
|
|||||||
Reference in New Issue
Block a user