diff --git a/lib/gpu/atomic_gpu_memory.cpp b/lib/gpu/atomic_gpu_memory.cpp index fbf5262089..5279223f20 100644 --- a/lib/gpu/atomic_gpu_memory.cpp +++ b/lib/gpu/atomic_gpu_memory.cpp @@ -68,6 +68,9 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, _block_size=ucl_device->group_size(); compile_kernels(*ucl_device,pair_program); + // Initialize host-device load balancer + hd_balancer.init(device,gpu_nbor,gpu_split); + // Initialize timers for the selected GPU time_pair.init(*ucl_device); time_pair.zero(); @@ -76,9 +79,6 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes(); - // Initialize host-device load balancer - hd_balancer.init(device,gpu_nbor,gpu_split); - return true; } @@ -204,52 +204,6 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full, hd_balancer.stop_timer(); } -// --------------------------------------------------------------------------- -// Reneighbor on GPU if necessary and then compute forces, virials, energies -// --------------------------------------------------------------------------- -template -int * AtomicGPUMemoryT::compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - double *boxlo, double *boxhi, int *tag, - int **nspecial, int **special, const bool eflag, - const bool vflag, const bool eatom, - const bool vatom, int &host_start, - const double cpu_time, bool &success) { - acc_timers(); - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - resize_atom(0,nall,success); - zero_timers(); - return NULL; - } - - hd_balancer.balance(cpu_time); - int inum=hd_balancer.get_gpu_count(ago,inum_full); - ans->inum(inum); - host_start=inum; - - // Build neighbor list on GPU if necessary - if (ago==0) { - build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, - boxlo, boxhi, tag, nspecial, special, success); - if (!success) - return NULL; - hd_balancer.start_timer(); - } else { - atom->cast_x_data(host_x,host_type); - hd_balancer.start_timer(); - atom->add_x_data(host_x,host_type); - } - - loop(eflag,vflag); - ans->copy_answers(eflag,vflag,eatom,vatom); - device->add_ans_object(ans); - hd_balancer.stop_timer(); - - return nbor->host_nbor.begin(); -} - // --------------------------------------------------------------------------- // Reneighbor on GPU if necessary and then compute forces, virials, energies // --------------------------------------------------------------------------- diff --git a/lib/gpu/charge_gpu_memory.cpp b/lib/gpu/charge_gpu_memory.cpp index a0c2d2f5d7..47b200ce23 100644 --- a/lib/gpu/charge_gpu_memory.cpp +++ b/lib/gpu/charge_gpu_memory.cpp @@ -212,12 +212,13 @@ void ChargeGPUMemoryT::compute(const int f_ago, const int inum_full, // Reneighbor on GPU if necessary and then compute forces, virials, energies // --------------------------------------------------------------------------- template -int * ChargeGPUMemoryT::compute(const int ago, const int inum_full, +int** ChargeGPUMemoryT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q) { acc_timers(); @@ -249,13 +250,15 @@ int * ChargeGPUMemoryT::compute(const int ago, const int inum_full, atom->add_x_data(host_x,host_type); } atom->add_q_data(); + *ilist=nbor->host_ilist.begin(); + *jnum=nbor->host_acc.begin(); loop(eflag,vflag); ans->copy_answers(eflag,vflag,eatom,vatom); device->add_ans_object(ans); hd_balancer.stop_timer(); - return nbor->host_nbor.begin(); + return nbor->host_jlist.begin()-host_start; } template diff --git a/lib/gpu/charge_gpu_memory.h b/lib/gpu/charge_gpu_memory.h index ef8d8b0281..b9d2911c1e 100644 --- a/lib/gpu/charge_gpu_memory.h +++ b/lib/gpu/charge_gpu_memory.h @@ -127,12 +127,13 @@ class ChargeGPUMemory { const double cpu_time, bool &success, double *charge); /// Pair loop with device neighboring - int * compute(const int ago, const int inum_full, const int nall, + int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *charge); + int **ilist, int **numj, const double cpu_time, bool &success, + double *charge); // -------------------------- DEVICE DATA ------------------------- diff --git a/lib/gpu/cmm_cut_gpu.cpp b/lib/gpu/cmm_cut_gpu.cpp index cc299c4dad..f8ffd29c55 100644 --- a/lib/gpu/cmm_cut_gpu.cpp +++ b/lib/gpu/cmm_cut_gpu.cpp @@ -89,6 +89,7 @@ bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, } if (message) fprintf(screen,"\n"); + CMMMF.estimate_gpu_overhead(); return true; } @@ -96,15 +97,16 @@ void cmm_gpu_clear() { CMMMF.clear(); } -int * cmm_gpu_compute_n(const int ago, const int inum_full, +int** cmm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success) { + int **ilist, int **jnum, const double cpu_time, + bool &success) { return CMMMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success); + vatom, host_start, ilist, jnum, cpu_time, success); } void cmm_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/cmmc_long_gpu.cpp b/lib/gpu/cmmc_long_gpu.cpp index cab192b5aa..752c281af9 100644 --- a/lib/gpu/cmmc_long_gpu.cpp +++ b/lib/gpu/cmmc_long_gpu.cpp @@ -94,6 +94,7 @@ bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, } if (message) fprintf(screen,"\n"); + CMMLMF.estimate_gpu_overhead(); return true; } @@ -101,15 +102,17 @@ void cmml_gpu_clear() { CMMLMF.clear(); } -int * cmml_gpu_compute_n(const int ago, const int inum_full, +int** cmml_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *host_q) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q) { return CMMLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success, host_q); + vatom, host_start, ilist, jnum, cpu_time, success, + host_q); } void cmml_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/cmmc_msm_gpu.cpp b/lib/gpu/cmmc_msm_gpu.cpp index 6b3babbb7c..66e7bd4b04 100644 --- a/lib/gpu/cmmc_msm_gpu.cpp +++ b/lib/gpu/cmmc_msm_gpu.cpp @@ -94,6 +94,7 @@ bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, } if (message) fprintf(screen,"\n"); + CMMMMF.estimate_gpu_overhead(); return true; } @@ -101,15 +102,17 @@ void cmmm_gpu_clear() { CMMMMF.clear(); } -int * cmmm_gpu_compute_n(const int ago, const int inum_full, +int** cmmm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *host_q) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q) { return CMMMMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success, host_q); + vatom, host_start, ilist, jnum, cpu_time, success, + host_q); } void cmmm_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/crml_gpu.cpp b/lib/gpu/crml_gpu.cpp index 347febe729..ea15346747 100644 --- a/lib/gpu/crml_gpu.cpp +++ b/lib/gpu/crml_gpu.cpp @@ -99,6 +99,7 @@ bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, } if (message) fprintf(screen,"\n"); + CRMLMF.estimate_gpu_overhead(); return true; } @@ -106,15 +107,17 @@ void crml_gpu_clear() { CRMLMF.clear(); } -int * crml_gpu_compute_n(const int ago, const int inum_full, +int** crml_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *host_q) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q) { return CRMLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success, host_q); + vatom, host_start, ilist, jnum, cpu_time, success, + host_q); } void crml_gpu_compute(const int ago, const int inum_full, diff --git a/lib/gpu/gb_gpu.cpp b/lib/gpu/gb_gpu.cpp index 7afe4ff262..c13a5ae638 100644 --- a/lib/gpu/gb_gpu.cpp +++ b/lib/gpu/gb_gpu.cpp @@ -114,6 +114,7 @@ bool gb_gpu_init(const int ntypes, const double gamma, } if (message) fprintf(screen,"\n"); + GBMF.estimate_gpu_overhead(); return true; } @@ -326,14 +327,14 @@ void _gb_gpu_gayberne(GBMT &gbm, const bool _eflag, const bool _vflag) { // Reneighbor on GPU if necessary and then compute forces, torques, energies // --------------------------------------------------------------------------- template -inline int * _gb_gpu_compute_n(gbmtyp &gbm, const int ago, - const int inum_full, const int nall, - double **host_x, int *host_type, - double *boxlo, double *boxhi, const bool eflag, - const bool vflag, const bool eatom, +inline int** _gb_gpu_compute_n(gbmtyp &gbm, const int ago, + const int inum_full, const int nall, + double **host_x, int *host_type, + double *boxlo, double *boxhi, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, - double **host_quat) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double **host_quat) { gbm.acc_timers(); if (inum_full==0) { host_start=0; @@ -363,23 +364,25 @@ inline int * _gb_gpu_compute_n(gbmtyp &gbm, const int ago, } gbm.atom->add_quat_data(); + *ilist=gbm.nbor->host_ilist.begin(); + *jnum=gbm.nbor->host_acc.begin(); _gb_gpu_gayberne(gbm,eflag,vflag); gbm.ans->copy_answers(eflag,vflag,eatom,vatom); gbm.device->add_ans_object(gbm.ans); gbm.hd_balancer.stop_timer(); - return gbm.nbor->host_nbor.begin(); + return gbm.nbor->host_jlist.begin()-host_start; } -int * gb_gpu_compute_n(const int ago, const int inum_full, const int nall, +int** gb_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, - double **host_quat) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double **host_quat) { return _gb_gpu_compute_n(GBMF, ago, inum_full, nall, host_x, host_type, boxlo, - boxhi, eflag, vflag, eatom, vatom, host_start, - cpu_time, success, host_quat); + boxhi, eflag, vflag, eatom, vatom, host_start, ilist, + jnum, cpu_time, success, host_quat); } // --------------------------------------------------------------------------- diff --git a/lib/gpu/gb_gpu_memory.cpp b/lib/gpu/gb_gpu_memory.cpp index 61078b8122..6592bc1e40 100644 --- a/lib/gpu/gb_gpu_memory.cpp +++ b/lib/gpu/gb_gpu_memory.cpp @@ -198,6 +198,11 @@ bool GB_GPU_MemoryT::init(const int ntypes, const double gamma, return (host_olist.alloc(nbor->max_atoms(),*ucl_device)==UCL_SUCCESS); } +template +void GB_GPU_MemoryT::estimate_gpu_overhead() { + device->estimate_gpu_overhead(2,_gpu_overhead,_driver_overhead); +} + template void GB_GPU_MemoryT::clear() { if (!_allocated) @@ -213,7 +218,7 @@ void GB_GPU_MemoryT::clear() { // Output any timing information acc_timers(); - double single[6], times[6]; + double single[9], times[9]; single[0]=atom->transfer_time()+ans->transfer_time(); single[1]=nbor->time_nbor.total_seconds(); @@ -225,6 +230,9 @@ void GB_GPU_MemoryT::clear() { else single[4]=0; single[5]=atom->cast_time()+ans->cast_time(); + single[6]=_gpu_overhead; + single[7]=_driver_overhead; + single[8]=ans->cpu_idle_time(); MPI_Reduce(single,times,6,MPI_DOUBLE,MPI_SUM,0,device->replica()); double avg_split=hd_balancer.all_avg_split(); @@ -259,10 +267,19 @@ void GB_GPU_MemoryT::clear() { fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size); fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size); } + fprintf(screen,"GPU Overhead: %.4f s.\n",times[6]/replica_size); fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb); + fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[7]/replica_size); + fprintf(screen,"CPU Idle_Time: %.4f s.\n",times[8]/replica_size); fprintf(screen,"-------------------------------------"); fprintf(screen,"--------------------------------\n\n"); + + + fprintf(screen,"Average split: %.4f.\n",avg_split); + fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb); + + } _max_bytes=0.0; diff --git a/lib/gpu/gb_gpu_memory.h b/lib/gpu/gb_gpu_memory.h index b9c0ce8c53..edba318ace 100644 --- a/lib/gpu/gb_gpu_memory.h +++ b/lib/gpu/gb_gpu_memory.h @@ -46,6 +46,9 @@ class GB_GPU_Memory { const int max_nbors, const double cell_size, const double gpu_split, FILE *screen); + /// Estimate the overhead for GPU context changes and CPU driver + void estimate_gpu_overhead(); + /// Check if there is enough storage for atom arrays and realloc if not /** \param success set to false if insufficient memory **/ inline void resize_atom(const int inum, const int nall, bool &success) { @@ -194,6 +197,7 @@ class GB_GPU_Memory { bool _allocated, _compiled; int _block_size; double _max_bytes; + double _gpu_overhead, _driver_overhead; void compile_kernels(UCL_Device &dev); }; diff --git a/lib/gpu/lj96_cut_gpu.cpp b/lib/gpu/lj96_cut_gpu.cpp index fe5df7826e..9b2a555ac4 100644 --- a/lib/gpu/lj96_cut_gpu.cpp +++ b/lib/gpu/lj96_cut_gpu.cpp @@ -88,6 +88,7 @@ bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, } if (message) fprintf(screen,"\n"); + LJ96MF.estimate_gpu_overhead(); return true; } @@ -95,15 +96,16 @@ void lj96_gpu_clear() { LJ96MF.clear(); } -int * lj96_gpu_compute_n(const int ago, const int inum_full, +int** lj96_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success) { + int **ilist, int **jnum, const double cpu_time, + bool &success) { return LJ96MF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success); + vatom, host_start, ilist, jnum, cpu_time, success); } void lj96_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/lj_expand_gpu.cpp b/lib/gpu/lj_expand_gpu.cpp index 26c42546af..cbfb215dac 100644 --- a/lib/gpu/lj_expand_gpu.cpp +++ b/lib/gpu/lj_expand_gpu.cpp @@ -88,6 +88,7 @@ bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, } if (message) fprintf(screen,"\n"); + LJEMF.estimate_gpu_overhead(); return true; } @@ -95,15 +96,16 @@ void lje_gpu_clear() { LJEMF.clear(); } -int * lje_gpu_compute_n(const int ago, const int inum_full, +int** lje_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success) { + int **ilist, int **jnum, const double cpu_time, + bool &success) { return LJEMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success); + vatom, host_start, ilist, jnum, cpu_time, success); } void lje_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/ljc_cut_gpu.cpp b/lib/gpu/ljc_cut_gpu.cpp index 209bebeb76..1dbf06f3e6 100644 --- a/lib/gpu/ljc_cut_gpu.cpp +++ b/lib/gpu/ljc_cut_gpu.cpp @@ -93,6 +93,7 @@ bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, } if (message) fprintf(screen,"\n"); + LJCMF.estimate_gpu_overhead(); return true; } @@ -100,15 +101,17 @@ void ljc_gpu_clear() { LJCMF.clear(); } -int * ljc_gpu_compute_n(const int ago, const int inum_full, +int** ljc_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *host_q) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q) { return LJCMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success, host_q); + vatom, host_start, ilist, jnum, cpu_time, success, + host_q); } void ljc_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/ljcl_cut_gpu.cpp b/lib/gpu/ljcl_cut_gpu.cpp index 570bf0326d..23b45114db 100644 --- a/lib/gpu/ljcl_cut_gpu.cpp +++ b/lib/gpu/ljcl_cut_gpu.cpp @@ -94,6 +94,7 @@ bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, } if (message) fprintf(screen,"\n"); + LJCLMF.estimate_gpu_overhead(); return true; } @@ -101,15 +102,17 @@ void ljcl_gpu_clear() { LJCLMF.clear(); } -int * ljcl_gpu_compute_n(const int ago, const int inum_full, +int** ljcl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *host_q) { + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q) { return LJCLMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success, host_q); + vatom, host_start, ilist, jnum, cpu_time, success, + host_q); } void ljcl_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/morse_gpu.cpp b/lib/gpu/morse_gpu.cpp index 1a3d3b0182..bcdb63c4a8 100644 --- a/lib/gpu/morse_gpu.cpp +++ b/lib/gpu/morse_gpu.cpp @@ -89,6 +89,7 @@ bool mor_gpu_init(const int ntypes, double **cutsq, } if (message) fprintf(screen,"\n"); + MORMF.estimate_gpu_overhead(); return true; } @@ -96,15 +97,16 @@ void mor_gpu_clear() { MORMF.clear(); } -int * mor_gpu_compute_n(const int ago, const int inum_full, +int** mor_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *boxlo, double *boxhi, int *tag, int **nspecial, int **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success) { + int **ilist, int **jnum, const double cpu_time, + bool &success) { return MORMF.compute(ago, inum_full, nall, host_x, host_type, boxlo, boxhi, tag, nspecial, special, eflag, vflag, eatom, - vatom, host_start, cpu_time, success); + vatom, host_start, ilist, jnum, cpu_time, success); } void mor_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/pair_gpu_device.cpp b/lib/gpu/pair_gpu_device.cpp index 30f3dda0be..837e5f52c6 100644 --- a/lib/gpu/pair_gpu_device.cpp +++ b/lib/gpu/pair_gpu_device.cpp @@ -246,9 +246,9 @@ template void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, double &gpu_overhead, double &gpu_driver_overhead) { - UCL_H_Vec *host_data_in, *host_data_out; - UCL_D_Vec *dev_data_in, *dev_data_out, *kernel_data; - UCL_Timer *timers_in, *timers_out, *timers_kernel; + UCL_H_Vec *host_data_in=NULL, *host_data_out=NULL; + UCL_D_Vec *dev_data_in=NULL, *dev_data_out=NULL, *kernel_data=NULL; + UCL_Timer *timers_in=NULL, *timers_out=NULL, *timers_kernel=NULL; UCL_Timer over_timer(*gpu); if (_data_in_estimate>0) { @@ -291,7 +291,6 @@ void PairGPUDeviceT::estimate_gpu_overhead(const int kernel_calls, for (int i=0; i<10; i++) { gpu->sync(); gpu_barrier(); - double driver_t=MPI_Wtime(); over_timer.start(); gpu->sync(); gpu_barrier(); diff --git a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp index 4e8169df1b..c5e7bdb503 100644 --- a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp +++ b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp @@ -105,12 +105,12 @@ void PairCGCMMCoulLongGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q); + firstneigh = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp index 073c1a2d3f..566da1830c 100644 --- a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp +++ b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp @@ -98,12 +98,12 @@ void PairCGCMMCoulMSMGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = cmmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q); + firstneigh = cmmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_cg_cmm_gpu.cpp b/src/GPU/pair_cg_cmm_gpu.cpp index 219cf00ae2..6fd4670abd 100644 --- a/src/GPU/pair_cg_cmm_gpu.cpp +++ b/src/GPU/pair_cg_cmm_gpu.cpp @@ -94,11 +94,12 @@ void PairCGCMMGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, - domain->sublo, domain->subhi, atom->tag, - atom->nspecial, atom->special, eflag, vflag, - eflag_atom, vflag_atom, host_start, &ilist, - &numneigh, cpu_time, success); + firstneigh = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp index 94e4c36ce2..c199d1ddde 100644 --- a/src/GPU/pair_gayberne_gpu.cpp +++ b/src/GPU/pair_gayberne_gpu.cpp @@ -98,11 +98,11 @@ void PairGayBerneGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, - domain->sublo, domain->subhi, eflag, vflag, - eflag_atom, vflag_atom, host_start, - &ilist, &numneigh, cpu_time, success, - atom->quat); + firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->quat); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj96_cut_gpu.cpp b/src/GPU/pair_lj96_cut_gpu.cpp index 925c4c4b0d..1f47d32582 100644 --- a/src/GPU/pair_lj96_cut_gpu.cpp +++ b/src/GPU/pair_lj96_cut_gpu.cpp @@ -93,12 +93,12 @@ void PairLJ96CutGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success); + firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp index 6bae770eed..e516925008 100644 --- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp @@ -108,11 +108,12 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, cpu_time, success, atom->q); + firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp index 47b8b9065e..db061bd4d8 100644 --- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp @@ -96,12 +96,12 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q); + firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp index 9417b4206b..c03cd87d61 100644 --- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp @@ -106,12 +106,12 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q); + firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_cut_tgpu.cpp b/src/GPU/pair_lj_cut_tgpu.cpp index 75885c231e..004d145c33 100644 --- a/src/GPU/pair_lj_cut_tgpu.cpp +++ b/src/GPU/pair_lj_cut_tgpu.cpp @@ -98,12 +98,12 @@ void PairLJCutTGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = ljl_gpu_compute_n(neighbor->ago, inum, nall, - atom->x, atom->type, domain->sublo, - domain->subhi, atom->tag, atom->nspecial, - atom->special, eflag, vflag, eflag_atom, - vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success); + firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, &ilist, &numneigh, + cpu_time, success); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp index 75558b0a21..b2839b4839 100644 --- a/src/GPU/pair_lj_expand_gpu.cpp +++ b/src/GPU/pair_lj_expand_gpu.cpp @@ -94,12 +94,12 @@ void PairLJExpandGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success); + firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_morse_gpu.cpp b/src/GPU/pair_morse_gpu.cpp index c3ac27c8f1..e77d5bd29b 100644 --- a/src/GPU/pair_morse_gpu.cpp +++ b/src/GPU/pair_morse_gpu.cpp @@ -92,12 +92,12 @@ void PairMorseGPU::compute(int eflag, int vflag) int *ilist, *numneigh, **firstneigh; if (gpu_mode == GPU_NEIGH) { inum = atom->nlocal; - gpulist = mor_gpu_compute_n(neighbor->ago, inum, nall, - atom->x, atom->type, domain->sublo, - domain->subhi, atom->tag, atom->nspecial, - atom->special, eflag, vflag, eflag_atom, - vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success); + firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, &ilist, &numneigh, + cpu_time, success); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_morse_gpu.h b/src/GPU/pair_morse_gpu.h index f146682a97..ee9ba576d5 100644 --- a/src/GPU/pair_morse_gpu.h +++ b/src/GPU/pair_morse_gpu.h @@ -29,6 +29,7 @@ class PairMorseGPU : public PairMorse { PairMorseGPU(LAMMPS *lmp); ~PairMorseGPU(); void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); void init_style(); double memory_usage();