diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 2b6977005d..cf5bcd2ea2 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -172,6 +172,7 @@ elseif(GPU_API STREQUAL "OPENCL") ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu ) foreach(GPU_KERNEL ${GPU_LIB_CU}) @@ -188,6 +189,7 @@ elseif(GPU_API STREQUAL "OPENCL") GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu) GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu) GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu) + GenerateOpenCLHeader(hippo ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu) list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h @@ -197,6 +199,7 @@ elseif(GPU_API STREQUAL "OPENCL") ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ) add_library(gpu STATIC ${GPU_LIB_SOURCES}) diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 2ff98827d4..64a2161f85 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -74,6 +74,9 @@ $(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu $(PRE1_H) lal_tersoff_mod_extra. $(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu $(PRE1_H) lal_tersoff_zbl_extra.h $(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h; +$(OBJ_DIR)/hippo_cl.h: lal_hippo.cu $(PRE1_H) lal_hippo_extra.h + $(BSH) ./geryon/file_to_cstr.sh hippo $(PRE1_H) lal_hippo_extra.h lal_hippo.cu $(OBJ_DIR)/hippo_cl.h; + $(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@; diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index e4d129214a..1deb3e3bb5 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -1064,7 +1064,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_, bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3; bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5; - numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip + numtyp tdipdip[6]; tdipdip[0] = -bcn[0] + bcn[1]*xr*xr; tdipdip[1] = bcn[1]*xr*yr; tdipdip[2] = bcn[1]*xr*zr; @@ -1233,10 +1233,10 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, numtyp r = ucl_sqrt(r2); const numtyp4 pol1j = polar1[j]; - numtyp ck = polar1[j].x; // rpole[j][0]; - numtyp dkx = polar1[j].y; // rpole[j][1]; - numtyp dky = polar1[j].z; // rpole[j][2]; - numtyp dkz = polar1[j].w; // rpole[j][3]; + numtyp ck = pol1j.x; // rpole[j][0]; + numtyp dkx = pol1j.y; // rpole[j][1]; + numtyp dky = pol1j.z; // rpole[j][2]; + numtyp dkz = pol1j.w; // rpole[j][3]; const numtyp4 pol2j = polar2[j]; numtyp qkxx = pol2j.x; // rpole[j][4]; numtyp qkxy = pol2j.y; // rpole[j][5]; diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index d31370be73..caf910863f 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -489,6 +489,81 @@ int HippoT::multipole_real(const int eflag, const int vflag) { return GX; } +// --------------------------------------------------------------------------- +// Reneighbor on GPU if necessary, and then compute the direct real space part +// of the permanent field +// --------------------------------------------------------------------------- +template +int** HippoT::compute_udirect2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + double* host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr) { + this->acc_timers(); + int eflag, vflag; + if (eatom) eflag=2; + else if (eflag_in) eflag=1; + else eflag=0; + if (vatom) vflag=2; + else if (vflag_in) vflag=1; + else vflag=0; + + #ifdef LAL_NO_BLOCK_REDUCE + if (eflag) eflag=2; + if (vflag) vflag=2; + #endif + + this->set_kernel(eflag,vflag); + + // reallocate per-atom arrays, transfer data from the host + // and build the neighbor lists if needed + + int** firstneigh = nullptr; + firstneigh = precompute(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, + host_uind, host_uinp, host_pval, sublo, subhi, tag, + nspecial, special, nspecial15, special15, + eflag_in, vflag_in, eatom, vatom, + host_start, ilist, jnum, cpu_time, + success, host_q, boxlo, prd); + + // ------------------- Resize _fieldp array ------------------------ + + if (inum_full>this->_max_fieldp_size) { + this->_max_fieldp_size=static_cast(static_cast(inum_full)*1.10); + this->_fieldp.resize(this->_max_fieldp_size*8); + } + *fieldp_ptr=this->_fieldp.host.begin(); + + this->_off2_polar = off2_polar; + this->_aewald = aewald; + const int red_blocks=udirect2b(eflag,vflag); + + // copy field and fieldp from device to host (_fieldp store both arrays, one after another) + + this->_fieldp.update_host(this->_max_fieldp_size*8,false); +/* + printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n", + this->_fieldp.cols(), _max_fieldp_size); + for (int i = 0; i < 10; i++) { + numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]); + printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z); + } +*/ + return firstneigh; //nbor->host_jlist.begin()-host_start; +} + // --------------------------------------------------------------------------- // Calculate the real-space permanent field, returning field and fieldp // --------------------------------------------------------------------------- @@ -518,7 +593,8 @@ int HippoT::udirect2b(const int eflag, const int vflag) { } this->k_udirect2b.set_size(GX,BX); - this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, + this->k_udirect2b.run(&this->atom->x, &this->atom->extra, + &coeff_amtype, &coeff_amclass, &sp_polar, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &nbor_pitch, @@ -529,6 +605,80 @@ int HippoT::udirect2b(const int eflag, const int vflag) { return GX; } +// --------------------------------------------------------------------------- +// Reneighbor on GPU if necessary, and then compute the direct real space part +// of the induced field +// --------------------------------------------------------------------------- +template +int** HippoT::compute_umutual2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr) { + this->acc_timers(); + int eflag, vflag; + if (eatom) eflag=2; + else if (eflag_in) eflag=1; + else eflag=0; + if (vatom) vflag=2; + else if (vflag_in) vflag=1; + else vflag=0; + + #ifdef LAL_NO_BLOCK_REDUCE + if (eflag) eflag=2; + if (vflag) vflag=2; + #endif + + this->set_kernel(eflag,vflag); + + // reallocate per-atom arrays, transfer extra data from the host + // and build the neighbor lists if needed + + int** firstneigh = nullptr; + firstneigh = precompute(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, + host_uind, host_uinp, host_pval, sublo, subhi, tag, + nspecial, special, nspecial15, special15, + eflag_in, vflag_in, eatom, vatom, + host_start, ilist, jnum, cpu_time, + success, host_q, boxlo, prd); + + // ------------------- Resize _fieldp array ------------------------ + + if (inum_full>this->_max_fieldp_size) { + this->_max_fieldp_size=static_cast(static_cast(inum_full)*1.10); + this->_fieldp.resize(this->_max_fieldp_size*8); + } + *fieldp_ptr=this->_fieldp.host.begin(); + + this->_off2_polar = off2_polar; + this->_aewald = aewald; + const int red_blocks=umutual2b(eflag,vflag); + + // copy field and fieldp from device to host (_fieldp store both arrays, one after another) + + this->_fieldp.update_host(this->_max_fieldp_size*8,false); +/* + printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n", + this->_fieldp.cols(), _max_fieldp_size); + for (int i = 0; i < 10; i++) { + numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]); + printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z); + } +*/ + return firstneigh; //nbor->host_jlist.begin()-host_start; +} + // --------------------------------------------------------------------------- // Calculate the real-space induced field, returning field and fieldp // --------------------------------------------------------------------------- @@ -558,7 +708,8 @@ int HippoT::umutual2b(const int eflag, const int vflag) { } this->k_umutual2b.set_size(GX,BX); - this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, + this->k_umutual2b.run(&this->atom->x, &this->atom->extra, + &coeff_amtype, &coeff_amclass, &sp_polar, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &nbor_pitch, &this->_threads_per_atom, &this->_aewald, diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index 45361ed1fb..487e852baf 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -16,7 +16,6 @@ #if defined(NV_KERNEL) || defined(USE_HIP) #include #include "lal_hippo_extra.h" -//#include "lal_aux_fun1.h" #ifdef LAMMPS_SMALLBIG #define tagint int #endif @@ -985,10 +984,10 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_, numtyp qiyz = pol3i.x; // rpole[i][9]; numtyp qizz = pol3i.y; // rpole[i][12]; itype = pol3i.z; // amtype[i]; - iclass = coeff_amtype[itype].w; // amtype2class[itype]; - numtyp corei = coeff_amclass[itype].z; // pcore[iclass]; - numtyp alphai = coeff_amclass[itype].w; // palpha[iclass]; + iclass = coeff_amtype[itype].w; // amtype2class[itype]; + numtyp corei = coeff_amclass[iclass].z; // pcore[iclass]; + numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass]; numtyp vali = polar6[i].x; for ( ; nbor { const double aewald, const double felec, const double off2_mpole, double *charge, double *boxlo, double *prd, void **tep_ptr); + /// Compute the real space part of the permanent field (udirect2b) with device neighboring + virtual int** compute_udirect2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double* host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr); + + /// Compute the real space part of the induced field (umutual2b) with device neighboring + virtual int** compute_umutual2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr); + /// Compute polar real-space with device neighboring virtual int** compute_polar_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 1851c3aba3..16b697d88f 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -157,7 +157,7 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full, int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, + double **host_uind, double **host_uinp, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int *nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, @@ -166,7 +166,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full, bool &success, const double aewald, const double off2, double *host_q, double *boxlo, double *prd, void **fieldp_ptr) { return HIPPOMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, + host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); @@ -175,7 +175,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full, int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, + double **host_uind, double **host_uinp, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int *nspecial15, tagint** special15, const bool eflag, const bool vflag, @@ -184,7 +184,7 @@ int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full, bool &success, const double aewald, const double off2, double *host_q, double *boxlo, double *prd, void **fieldp_ptr) { return HIPPOMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, + host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); diff --git a/lib/gpu/lal_hippo_extra.h b/lib/gpu/lal_hippo_extra.h index a06ac4425c..cacee4ae72 100644 --- a/lib/gpu/lal_hippo_extra.h +++ b/lib/gpu/lal_hippo_extra.h @@ -59,7 +59,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1, // compute tolerance value for damping exponents eps = (numtyp)0.001; - diff = dmpi-dmpk; + diff = dmpi-dmpk; // fabs(dmpi-dmpk) if (diff < (numtyp)0) diff = -diff; // treat the case where alpha damping exponents are equal @@ -322,6 +322,109 @@ ucl_inline void damppole(const numtyp r, const int rorder, } } +/* ---------------------------------------------------------------------- + dampdir = direct field damping coefficents + dampdir generates coefficients for the direct field damping + function for powers of the interatomic distance +------------------------------------------------------------------------- */ +ucl_inline void dampdir(numtyp r, numtyp alphai, numtyp alphak, numtyp *dmpi, numtyp *dmpk) +{ + numtyp eps,diff; + numtyp expi,expk; + numtyp dampi,dampk; + numtyp dampi2,dampk2; + numtyp dampi3,dampk3; + numtyp dampi4,dampk4; + + // compute tolerance and exponential damping factors + + eps = (numtyp)0.001; + diff = alphai-alphak; // fabs(alphai-alphak); + if (diff < (numtyp)0) diff = -diff; + dampi = alphai * r; + dampk = alphak * r; + expi = ucl_exp(-dampi); + expk = ucl_exp(-dampk); + + // core-valence charge penetration damping for Gordon f1 (HIPPO) + + dampi2 = dampi * dampi; + dampi3 = dampi * dampi2; + dampi4 = dampi2 * dampi2; + dmpi[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2)*expi; + dmpi[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi; + dmpi[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi; + if (diff < eps) { + dmpk[2] = dmpi[2]; + dmpk[4] = dmpi[4]; + dmpk[6] = dmpi[6]; + } else { + dampk2 = dampk * dampk; + dampk3 = dampk * dampk2; + dampk4 = dampk2 * dampk2; + dmpk[2] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2)*expk; + dmpk[4] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk; + dmpk[6] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/30.0)*expk; + } +} + +/* ---------------------------------------------------------------------- + dampmut = mutual field damping coefficents + dampmut generates coefficients for the mutual field damping + function for powers of the interatomic distance +------------------------------------------------------------------------- */ + +ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5]) +{ + numtyp termi,termk; + numtyp termi2,termk2; + numtyp alphai2,alphak2; + numtyp eps,diff; + numtyp expi,expk; + numtyp dampi,dampk; + numtyp dampi2,dampi3; + numtyp dampi4,dampi5; + numtyp dampk2,dampk3; + + // compute tolerance and exponential damping factors + + eps = (numtyp)0.001; + diff = alphai-alphak; // fabs(alphai-alphak); + if (diff < (numtyp)0) diff = -diff; + dampi = alphai * r; + dampk = alphak * r; + expi = ucl_exp(-dampi); + expk = ucl_exp(-dampk); + + // valence-valence charge penetration damping for Gordon f1 (HIPPO) + + dampi2 = dampi * dampi; + dampi3 = dampi * dampi2; + if (diff < eps) { + dampi4 = dampi2 * dampi2; + dampi5 = dampi2 * dampi3; + dmpik[2] = 1.0 - (1.0 + dampi + 0.5*dampi2 + + 7.0*dampi3/48.0 + dampi4/48.0)*expi; + dmpik[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 + + dampi4/24.0 + dampi5/144.0)*expi; + } else { + dampk2 = dampk * dampk; + dampk3 = dampk * dampk2; + alphai2 = alphai * alphai; + alphak2 = alphak * alphak; + termi = alphak2 / (alphak2-alphai2); + termk = alphai2 / (alphai2-alphak2); + termi2 = termi * termi; + termk2 = termk * termk; + dmpik[2] = 1.0 - termi2*(1.0+dampi+0.5*dampi2)*expi - + termk2*(1.0+dampk+0.5*dampk2)*expk - + 2.0*termi2*termk*(1.0+dampi)*expi - 2.0*termk2*termi*(1.0+dampk)*expk; + dmpik[4] = 1.0 - termi2*(1.0+dampi+0.5*dampi2 + dampi3/6.0)*expi - + termk2*(1.0+dampk+0.5*dampk2 + dampk3/6.00)*expk - + 2.0*termi2*termk *(1.0+dampi+dampi2/3.0)*expi - + 2.0*termk2*termi *(1.0+dampk+dampk2/3.0)*expk; + } +} #endif diff --git a/src/AMOEBA/amoeba_induce.cpp b/src/AMOEBA/amoeba_induce.cpp index 5b855abdd0..617eb89fcd 100644 --- a/src/AMOEBA/amoeba_induce.cpp +++ b/src/AMOEBA/amoeba_induce.cpp @@ -1900,7 +1900,7 @@ void PairAmoeba::dampmut(double r, double alphai, double alphak, double *dmpik) ------------------------------------------------------------------------- */ void PairAmoeba::dampdir(double r, double alphai, double alphak, - double *dmpi, double *dmpk) + double dmpi[7], double dmpk[7]) { double eps,diff; double expi,expk; diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 1067969c7b..f4cbf28561 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -89,7 +89,7 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, + double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, @@ -98,7 +98,7 @@ int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, - double **host_rpole, double **host_uind, double **host_uinp, + double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, @@ -136,8 +136,8 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) gpu_repulsion_ready = false; // true for HIPPO when ready gpu_dispersion_real_ready = true; // true for HIPPO when ready gpu_multipole_real_ready = true; - gpu_udirect2b_ready = false; - gpu_umutual2b_ready = false; + gpu_udirect2b_ready = true; + gpu_umutual2b_ready = true; gpu_polar_real_ready = true; GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); @@ -791,7 +791,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp) firstneigh = hippo_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, - uind, uinp, sublo, subhi, atom->tag, + uind, uinp, pval, sublo, subhi, atom->tag, atom->nspecial, atom->special, atom->nspecial15, atom->special15, eflag, vflag, eflag_atom, vflag_atom, @@ -1015,14 +1015,14 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) else choose(POLAR); firstneigh = hippo_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - uind, uinp, sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success,aewald, off2, atom->q, - domain->boxlo, domain->prd, &fieldp_pinned); + atom->type, amtype, amgroup, rpole, + uind, uinp, pval, sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success,aewald, off2, atom->q, + domain->boxlo, domain->prd, &fieldp_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator");