From 62ecf98cda4d1bd970b7bf1b5e8f1a09c388d009 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 16 Sep 2022 14:47:16 -0500 Subject: [PATCH] Enabled fphi_uind in hippo/gpu, really need to refactor hippo and amoeba in the GPU lib to remove kernel duplicates --- lib/gpu/Nvidia.makefile | 26 +-- lib/gpu/lal_amoeba.cpp | 3 +- lib/gpu/lal_amoeba.cu | 2 +- lib/gpu/lal_base_amoeba.cpp | 24 +-- lib/gpu/lal_base_amoeba.h | 10 +- lib/gpu/lal_hippo.cpp | 3 +- lib/gpu/lal_hippo.cu | 301 ++++++++++++++++++++++++++++++++++ lib/gpu/lal_hippo_ext.cpp | 14 ++ src/GPU/pair_amoeba_gpu.cpp | 2 +- src/GPU/pair_hippo_gpu.cpp | 311 ++++++++++++++++++++++++++++++++---- src/GPU/pair_hippo_gpu.h | 6 + 11 files changed, 626 insertions(+), 76 deletions(-) diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index c52246b06b..5f50486e28 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -68,31 +68,7 @@ $(OBJ_DIR)/%_cubin.h: lal_%.cu $(PRE1_H) # host code compilation -$(OBJ_DIR)/lal_answer.o: lal_answer.cpp $(HOST_H) - $(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_dpd_tstat_ext.o: lal_dpd_tstat_ext.cpp lal_dpd.h $(HOST_H) - $(CUDR) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_eam_alloy_ext.o: lal_eam_alloy_ext.cpp lal_eam.h $(HOST_H) - $(CUDR) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_eam_fs_ext.o: lal_eam_fs_ext.cpp lal_eam.h $(HOST_H) - $(CUDR) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp $(HOST_H) - $(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp $(HOST_H) - $(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_%_ext.o: lal_%_ext.cpp lal_%.h $(HOST_H) - $(CUDR) -o $@ -c $< -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_base_%.o: lal_base_%.cpp $(HOST_H) - $(CUDR) -o $@ -c $< -I$(OBJ_DIR) - -$(OBJ_DIR)/lal_%.o: lal_%.cpp %_cubin.h $(HOST_H) +$(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(HOST_H) $(CUDR) -o $@ -c $< -I$(OBJ_DIR) #ifdef CUDPP_OPT diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index 48316e9b6e..02870ea861 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -64,7 +64,8 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass, cell_size,gpu_split,_screen,amoeba, "k_amoeba_multipole", "k_amoeba_udirect2b", "k_amoeba_umutual2b", "k_amoeba_polar", - "k_amoeba_short_nbor", "k_amoeba_special15"); + "k_amoeba_fphi_uind", "k_amoeba_short_nbor", + "k_amoeba_special15"); if (success!=0) return success; diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index d391279f5d..66926721cb 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -1626,7 +1626,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid ------------------------------------------------------------------------- */ -__kernel void k_fphi_uind(const __global numtyp4 *restrict thetai1, +__kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1, const __global numtyp4 *restrict thetai2, const __global numtyp4 *restrict thetai3, const __global int *restrict igrid, diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 3ee0517dfb..eac704fbfc 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -65,6 +65,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall, const char *k_name_udirect2b, const char *k_name_umutual2b, const char *k_name_polar, + const char *k_name_fphi_uind, const char *k_name_short_nbor, const char* k_name_special15) { screen=_screen; @@ -100,7 +101,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall, _block_bio_size=device->block_bio_pair(); compile_kernels(*ucl_device,pair_program,k_name_multipole, k_name_udirect2b, k_name_umutual2b,k_name_polar, - k_name_short_nbor, k_name_special15); + k_name_fphi_uind, k_name_short_nbor, k_name_special15); if (_threads_per_atom>1 && gpu_nbor==0) { nbor->packing(true); @@ -934,6 +935,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str, const char *kname_udirect2b, const char *kname_umutual2b, const char *kname_polar, + const char *kname_fphi_uind, const char *kname_short_nbor, const char* kname_special15) { if (_compiled) @@ -942,17 +944,17 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str, if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); std::string oclstring = device->compile_string()+" -DEVFLAG=1"; - pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen); + pair_program->load_string(pair_str, oclstring.c_str(),nullptr, screen); - k_multipole.set_function(*pair_program,kname_multipole); - k_udirect2b.set_function(*pair_program,kname_udirect2b); - k_umutual2b.set_function(*pair_program,kname_umutual2b); - k_polar.set_function(*pair_program,kname_polar); - k_fphi_uind.set_function(*pair_program,"k_fphi_uind"); - k_short_nbor.set_function(*pair_program,kname_short_nbor); - k_special15.set_function(*pair_program,kname_special15); - pos_tex.get_texture(*pair_program,"pos_tex"); - q_tex.get_texture(*pair_program,"q_tex"); + k_multipole.set_function(*pair_program, kname_multipole); + k_udirect2b.set_function(*pair_program, kname_udirect2b); + k_umutual2b.set_function(*pair_program, kname_umutual2b); + k_polar.set_function(*pair_program, kname_polar); + k_fphi_uind.set_function(*pair_program, kname_fphi_uind); + k_short_nbor.set_function(*pair_program, kname_short_nbor); + k_special15.set_function(*pair_program, kname_special15); + pos_tex.get_texture(*pair_program, "pos_tex"); + q_tex.get_texture(*pair_program, "q_tex"); _compiled=true; diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index 802b6962b7..5aeb729993 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -62,9 +62,10 @@ class BaseAmoeba { int init_atomic(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, const double gpu_split, FILE *screen, const void *pair_program, - const char *kname_multipole, - const char *kname_udirect2b, const char *kname_umutual2b, - const char *kname_polar, const char *kname_short_nbor, const char* kname_special15); + const char *kname_multipole, const char *kname_udirect2b, + const char *kname_umutual2b, const char *kname_polar, + const char *kname_fphi_uind, const char *kname_short_nbor, + const char* kname_special15); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(const int add_kernels=0); @@ -309,7 +310,8 @@ class BaseAmoeba { void compile_kernels(UCL_Device &dev, const void *pair_string, const char *kname_multipole, const char *kname_udirect2b, const char *kname_umutual2b, const char *kname_polar, - const char *kname_short_nbor, const char* kname_special15); + const char *kname_fphi_uind, const char *kname_short_nbor, + const char* kname_special15); virtual int multipole_real(const int eflag, const int vflag) = 0; virtual int udirect2b(const int eflag, const int vflag) = 0; diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 79a8772c3e..9917ab91a2 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -67,7 +67,8 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass, cell_size,gpu_split,_screen,hippo, "k_hippo_multipole", "k_hippo_udirect2b", "k_hippo_umutual2b", "k_hippo_polar", - "k_hippo_short_nbor", "k_hippo_special15"); + "k_hippo_fphi_uind", "k_hippo_short_nbor", + "k_hippo_special15"); if (success!=0) return success; diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index be8d2c0701..dde8f9bfd5 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -2045,6 +2045,307 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_, offset,eflag,vflag,ans,engv,NUM_BLOCKS_X); } +/* ---------------------------------------------------------------------- + fphi_uind = induced potential from grid + fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid +------------------------------------------------------------------------- */ + +__kernel void k_hippo_fphi_uind(const __global numtyp4 *restrict thetai1, + const __global numtyp4 *restrict thetai2, + const __global numtyp4 *restrict thetai3, + const __global int *restrict igrid, + const __global numtyp *restrict grid, + __global numtyp *restrict fdip_phi1, + __global numtyp *restrict fdip_phi2, + __global numtyp *restrict fdip_sum_phi, + const int bsorder, const int inum, + const int nzlo_out, const int nylo_out, + const int nxlo_out, const int ngridxy, + const int ngridx) +{ + //int tid, ii, offset, i, n_stride; + //atom_info(t_per_atom,ii,tid,offset); + + + int tid=THREAD_ID_X; + int ii=tid+BLOCK_ID_X*BLOCK_SIZE_X; + + if (iimodify, lmp->error); } @@ -198,6 +213,16 @@ void PairHippoGPU::init_style() tq_single = false; else tq_single = true; + + // replace with the gpu counterpart + + if (gpu_umutual1_ready) { + if (use_ewald && ic_kspace) { + delete ic_kspace; + ic_kspace = + new AmoebaConvolutionGPU(lmp,this,nefft1,nefft2,nefft3,bsporder,INDUCE_GRIDC); + } + } } /* ---------------------------------------------------------------------- @@ -392,6 +417,8 @@ void PairHippoGPU::induce() int debug = 1; + first_induce_iteration = true; + // set cutoffs, taper coeffs, and PME params // create qfac here, free at end of polar() @@ -403,8 +430,6 @@ void PairHippoGPU::induce() // owned atoms - double **x = atom->x; - double **f = atom->f; int nlocal = atom->nlocal; // zero out the induced dipoles at each site @@ -996,37 +1021,60 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp) int i,j; double term; + double time0,time1,time2; + // zero field,fieldp for owned and ghost atoms int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; - for (i = 0; i < nall; i++) { - for (j = 0; j < 3; j++) { + memset(&field[0][0], 0, 3*nall *sizeof(double)); + memset(&fieldp[0][0], 0, 3*nall *sizeof(double)); + +/* + for (int i = 0; i < nall; i++) { + for (int j = 0; j < 3; j++) { field[i][j] = 0.0; fieldp[i][j] = 0.0; } } - +*/ + // get the real space portion of the mutual field first + MPI_Barrier(world); + time0 = MPI_Wtime(); + if (polar_rspace_flag) umutual2b(field,fieldp); + time1 = MPI_Wtime(); // get the reciprocal space part of the mutual field if (polar_kspace_flag) umutual1(field,fieldp); + time2 = MPI_Wtime(); // add the self-energy portion of the mutual field term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS; + for (int i = 0; i < nlocal; i++) { + field[i][0] += term*uind[i][0]; + field[i][1] += term*uind[i][1]; + field[i][2] += term*uind[i][2]; + } + for (int i = 0; i < nlocal; i++) { + fieldp[i][0] += term*uinp[i][0]; + fieldp[i][1] += term*uinp[i][1]; + fieldp[i][2] += term*uinp[i][2]; + } +/* for (i = 0; i < nlocal; i++) { for (j = 0; j < 3; j++) { field[i][j] += term*uind[i][j]; fieldp[i][j] += term*uinp[i][j]; } } - - // accumulate the field and fieldp values from real-space portion from umutual2b() on the GPU +*/ + // accumulate the field and fieldp values from the real-space portion from umutual2b() on the GPU // field and fieldp may already have some nonzero values from kspace (umutual1 and self) hippo_gpu_update_fieldp(&fieldp_pinned); @@ -1049,6 +1097,228 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp) fieldp[i][1] += fieldp_ptr[idx+1]; fieldp[i][2] += fieldp_ptr[idx+2]; } + + // accumulate timing information + + time_mutual_rspace += time1 - time0; + time_mutual_kspace += time2 - time1; +} + +/* ---------------------------------------------------------------------- + umutual1 = Ewald recip mutual induced field + umutual1 computes the reciprocal space contribution of the + induced atomic dipole moments to the field +------------------------------------------------------------------------- */ + +void PairHippoGPU::umutual1(double **field, double **fieldp) +{ + int m,n; + int nxlo,nxhi,nylo,nyhi,nzlo,nzhi; + double term; + double a[3][3]; // indices not flipped vs Fortran + + // return if the Ewald coefficient is zero + + if (aewald < 1.0e-6) return; + + // convert Cartesian dipoles to fractional coordinates + + for (int j = 0; j < 3; j++) { + a[0][j] = nfft1 * recip[0][j]; + a[1][j] = nfft2 * recip[1][j]; + a[2][j] = nfft3 * recip[2][j]; + } + + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + fuind[i][0] = a[0][0]*uind[i][0] + a[0][1]*uind[i][1] + a[0][2]*uind[i][2]; + fuind[i][1] = a[1][0]*uind[i][0] + a[1][1]*uind[i][1] + a[1][2]*uind[i][2]; + fuind[i][2] = a[2][0]*uind[i][0] + a[2][1]*uind[i][1] + a[2][2]*uind[i][2]; + } + + for (int i = 0; i < nlocal; i++) { + fuinp[i][0] = a[0][0]*uinp[i][0] + a[0][1]*uinp[i][1] + a[0][2]*uinp[i][2]; + fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2]; + fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2]; + } +/* + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + fuind[i][j] = a[j][0]*uind[i][0] + a[j][1]*uind[i][1] + a[j][2]*uind[i][2]; + fuinp[i][j] = a[j][0]*uinp[i][0] + a[j][1]*uinp[i][1] + a[j][2]*uinp[i][2]; + } + } +*/ + // gridpre = my portion of 4d grid in brick decomp w/ ghost values + + double ****gridpre = (double ****) ic_kspace->zero(); + + // map 2 values to grid + + grid_uind(fuind,fuinp,gridpre); + + // pre-convolution operations including forward FFT + // gridfft = my portion of complex 3d grid in FFT decomposition + + double *gridfft = ic_kspace->pre_convolution(); + + // --------------------- + // convolution operation + // --------------------- + + nxlo = ic_kspace->nxlo_fft; + nxhi = ic_kspace->nxhi_fft; + nylo = ic_kspace->nylo_fft; + nyhi = ic_kspace->nyhi_fft; + nzlo = ic_kspace->nzlo_fft; + nzhi = ic_kspace->nzhi_fft; + + // use qfac values stored in udirect1() + + m = n = 0; + for (int k = nzlo; k <= nzhi; k++) { + for (int j = nylo; j <= nyhi; j++) { + for (int i = nxlo; i <= nxhi; i++) { + term = qfac[m++]; + gridfft[n] *= term; + gridfft[n+1] *= term; + n += 2; + } + } + } + + // post-convolution operations including backward FFT + // gridppost = my portion of 4d grid in brick decomp w/ ghost values + + double ****gridpost = (double ****) ic_kspace->post_convolution(); + + // get potential + double time0, time1; + + MPI_Barrier(world); + time0 = MPI_Wtime(); + + fphi_uind(gridpost,fdip_phi1,fdip_phi2,fdip_sum_phi); + + time1 = MPI_Wtime(); + time_fphi_uind += (time1 - time0); + + // store fractional reciprocal potentials for OPT method + + if (poltyp == OPT) { + for (int i = 0; i < nlocal; i++) { + for (int j = 0; j < 10; j++) { + fopt[i][optlevel][j] = fdip_phi1[i][j]; + foptp[i][optlevel][j] = fdip_phi2[i][j]; + } + } + } + + // convert the dipole fields from fractional to Cartesian + + for (int i = 0; i < 3; i++) { + a[0][i] = nfft1 * recip[0][i]; + a[1][i] = nfft2 * recip[1][i]; + a[2][i] = nfft3 * recip[2][i]; + } + + for (int i = 0; i < nlocal; i++) { + double dfx = a[0][0]*fdip_phi1[i][1] + + a[0][1]*fdip_phi1[i][2] + a[0][2]*fdip_phi1[i][3]; + double dfy = a[1][0]*fdip_phi1[i][1] + + a[1][1]*fdip_phi1[i][2] + a[1][2]*fdip_phi1[i][3]; + double dfz = a[2][0]*fdip_phi1[i][1] + + a[2][1]*fdip_phi1[i][2] + a[2][2]*fdip_phi1[i][3]; + field[i][0] -= dfx; + field[i][1] -= dfy; + field[i][2] -= dfz; + } + + for (int i = 0; i < nlocal; i++) { + double dfx = a[0][0]*fdip_phi2[i][1] + + a[0][1]*fdip_phi2[i][2] + a[0][2]*fdip_phi2[i][3]; + double dfy = a[1][0]*fdip_phi2[i][1] + + a[1][1]*fdip_phi2[i][2] + a[1][2]*fdip_phi2[i][3]; + double dfz = a[2][0]*fdip_phi2[i][1] + + a[2][1]*fdip_phi2[i][2] + a[2][2]*fdip_phi2[i][3]; + fieldp[i][0] -= dfx; + fieldp[i][1] -= dfy; + fieldp[i][2] -= dfz; + } +/* + for (int i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + dipfield1[i][j] = a[j][0]*fdip_phi1[i][1] + + a[j][1]*fdip_phi1[i][2] + a[j][2]*fdip_phi1[i][3]; + dipfield2[i][j] = a[j][0]*fdip_phi2[i][1] + + a[j][1]*fdip_phi2[i][2] + a[j][2]*fdip_phi2[i][3]; + } + } + + // increment the field at each multipole site + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + field[i][j] -= dipfield1[i][j]; + fieldp[i][j] -= dipfield2[i][j]; + } + } +*/ +} + +/* ---------------------------------------------------------------------- + fphi_uind = induced potential from grid + fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid +------------------------------------------------------------------------- */ + +void PairHippoGPU::fphi_uind(double ****grid, double **fdip_phi1, + double **fdip_phi2, double **fdip_sum_phi) +{ + if (!gpu_fphi_uind_ready) { + PairAmoeba::fphi_uind(grid, fdip_phi1, fdip_phi2, fdip_sum_phi); + return; + } + + void* fdip_phi1_pinned = nullptr; + void* fdip_phi2_pinned = nullptr; + void* fdip_sum_phi_pinned = nullptr; + hippo_gpu_fphi_uind(atom->nlocal, bsorder, thetai1, + thetai2, thetai3, igrid, grid, + &fdip_phi1_pinned, &fdip_phi2_pinned, + &fdip_sum_phi_pinned, + ic_kspace->nzlo_out, ic_kspace->nzhi_out, + ic_kspace->nylo_out, ic_kspace->nyhi_out, + ic_kspace->nxlo_out, ic_kspace->nxhi_out, + first_induce_iteration); + + int nlocal = atom->nlocal; + double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned; + for (int i = 0; i < nlocal; i++) { + int n = i; + for (int m = 0; m < 10; m++) { + fdip_phi1[i][m] = _fdip_phi1_ptr[n]; + n += nlocal; + } + } + + double *_fdip_phi2_ptr = (double *)fdip_phi2_pinned; + for (int i = 0; i < nlocal; i++) { + int n = i; + for (int m = 0; m < 10; m++) { + fdip_phi2[i][m] = _fdip_phi2_ptr[n]; + n += nlocal; + } + } + + double *_fdip_sum_phi_ptr = (double *)fdip_sum_phi_pinned; + for (int i = 0; i < nlocal; i++) { + int n = i; + for (int m = 0; m < 20; m++) { + fdip_sum_phi[i][m] = _fdip_sum_phi_ptr[n]; + n += nlocal; + } + } } /* ---------------------------------------------------------------------- @@ -1089,29 +1359,6 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) double *pval = atom->dvector[index_pval]; hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval, aewald, off2, &fieldp_pinned); -/* - // accumulate the field and fieldp values from the GPU lib - // field and fieldp may already have some nonzero values from kspace (umutual1) - - int nlocal = atom->nlocal; - double *field_ptr = (double *)fieldp_pinned; - - for (int i = 0; i < nlocal; i++) { - int idx = 4*i; - field[i][0] += field_ptr[idx]; - field[i][1] += field_ptr[idx+1]; - field[i][2] += field_ptr[idx+2]; - } - - double* fieldp_ptr = (double *)fieldp_pinned; - fieldp_ptr += 4*inum; - for (int i = 0; i < nlocal; i++) { - int idx = 4*i; - fieldp[i][0] += fieldp_ptr[idx]; - fieldp[i][1] += fieldp_ptr[idx+1]; - fieldp[i][2] += fieldp_ptr[idx+2]; - } -*/ } /* ---------------------------------------------------------------------- diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h index 1ed1c3299d..742fbfb119 100644 --- a/src/GPU/pair_hippo_gpu.h +++ b/src/GPU/pair_hippo_gpu.h @@ -39,6 +39,8 @@ class PairHippoGPU : public PairAmoeba { virtual void dispersion_real(); virtual void multipole_real(); virtual void udirect2b(double **, double **); + virtual void umutual1(double **, double **); + virtual void fphi_uind(double ****, double **, double **, double **); virtual void umutual2b(double **, double **); virtual void ufield0c(double **, double **); virtual void polar_real(); @@ -55,9 +57,13 @@ class PairHippoGPU : public PairAmoeba { bool gpu_dispersion_real_ready; bool gpu_multipole_real_ready; bool gpu_udirect2b_ready; + bool gpu_umutual1_ready; + bool gpu_fphi_uind_ready; bool gpu_umutual2b_ready; bool gpu_polar_real_ready; + bool first_induce_iteration; + void udirect2b_cpu(); template