From 86f7f6cd42a81143c5b72e71882bf0bb07ad8af8 Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Sun, 3 Apr 2011 11:51:26 -0400 Subject: [PATCH] Adding better error output to LAMMPS routines. --- lib/gpu/atomic_gpu_memory.cpp | 20 ++++---- lib/gpu/atomic_gpu_memory.h | 17 +++++-- lib/gpu/charge_gpu_memory.cpp | 20 ++++---- lib/gpu/charge_gpu_memory.h | 17 +++++-- lib/gpu/cmm_cut_gpu.cpp | 43 ++++++++-------- lib/gpu/cmm_cut_gpu_memory.cpp | 25 +++++----- lib/gpu/cmm_cut_gpu_memory.h | 21 +++++--- lib/gpu/cmmc_long_gpu.cpp | 52 +++++++++----------- lib/gpu/cmmc_long_gpu_memory.cpp | 33 +++++++------ lib/gpu/cmmc_long_gpu_memory.h | 25 ++++++---- lib/gpu/cmmc_msm_gpu.cpp | 53 ++++++++++---------- lib/gpu/cmmc_msm_gpu_memory.cpp | 33 +++++++------ lib/gpu/cmmc_msm_gpu_memory.h | 25 ++++++---- lib/gpu/crml_gpu.cpp | 62 +++++++++++------------- lib/gpu/crml_gpu_memory.cpp | 11 +++-- lib/gpu/crml_gpu_memory.h | 29 ++++++----- lib/gpu/gb_gpu.cpp | 51 +++++++++---------- lib/gpu/gb_gpu_memory.cpp | 34 +++++++------ lib/gpu/gb_gpu_memory.h | 27 +++++++---- lib/gpu/geryon/nvc_device.h | 4 +- lib/gpu/geryon/nvd_device.h | 16 ++++-- lib/gpu/geryon/ucl_d_mat.h | 40 ++++++++------- lib/gpu/geryon/ucl_d_vec.h | 33 ++++++++----- lib/gpu/geryon/ucl_h_mat.h | 24 ++++++--- lib/gpu/geryon/ucl_h_vec.h | 38 ++++++++++----- lib/gpu/lj96_cut_gpu.cpp | 41 ++++++++-------- lib/gpu/lj96_cut_gpu_memory.cpp | 11 +++-- lib/gpu/lj96_cut_gpu_memory.h | 21 +++++--- lib/gpu/lj_cut_gpu.cpp | 41 +++++++--------- lib/gpu/lj_cut_gpu_memory.cpp | 25 +++++----- lib/gpu/lj_cut_gpu_memory.h | 21 +++++--- lib/gpu/lj_expand_gpu.cpp | 42 ++++++++-------- lib/gpu/lj_expand_gpu_memory.cpp | 25 +++++----- lib/gpu/lj_expand_gpu_memory.h | 21 +++++--- lib/gpu/ljc_cut_gpu.cpp | 50 +++++++++---------- lib/gpu/ljc_cut_gpu_memory.cpp | 29 ++++++----- lib/gpu/ljc_cut_gpu_memory.h | 25 ++++++---- lib/gpu/ljcl_cut_gpu.cpp | 52 +++++++++----------- lib/gpu/ljcl_cut_gpu_memory.cpp | 11 +++-- lib/gpu/ljcl_cut_gpu_memory.h | 25 ++++++---- lib/gpu/morse_gpu.cpp | 43 ++++++++-------- lib/gpu/morse_gpu_memory.cpp | 25 +++++----- lib/gpu/morse_gpu_memory.h | 21 +++++--- lib/gpu/pair_gpu_ans.h | 1 - lib/gpu/pair_gpu_device.cpp | 52 ++++++++++++-------- lib/gpu/pair_gpu_device.h | 27 ++++++++--- lib/gpu/pair_gpu_nbor.cpp | 7 +++ lib/gpu/pppm_gpu_memory.cpp | 5 +- lib/gpu/pppm_l_gpu.cpp | 7 ++- src/GPU/Install.sh | 2 + src/GPU/fix_gpu.cpp | 5 +- src/GPU/gpu_extra.h | 47 ++++++++++++++++++ src/GPU/pair_cg_cmm_coul_long_gpu.cpp | 32 ++++++------ src/GPU/pair_cg_cmm_coul_msm_gpu.cpp | 32 ++++++------ src/GPU/pair_cg_cmm_gpu.cpp | 24 ++++----- src/GPU/pair_gayberne_gpu.cpp | 30 ++++++------ src/GPU/pair_lj96_cut_gpu.cpp | 22 ++++----- src/GPU/pair_lj_charmm_coul_long_gpu.cpp | 38 +++++++-------- src/GPU/pair_lj_cut_coul_cut_gpu.cpp | 28 +++++------ src/GPU/pair_lj_cut_coul_long_gpu.cpp | 22 ++++----- src/GPU/pair_lj_cut_gpu.cpp | 22 ++++----- src/GPU/pair_lj_cut_tgpu.cpp | 16 +++--- src/GPU/pair_lj_expand_gpu.cpp | 13 ++--- src/GPU/pair_morse_gpu.cpp | 22 ++++----- src/GPU/pppm_gpu.cpp | 2 +- src/GPU/pppm_gpu_double.cpp | 20 ++------ src/GPU/pppm_gpu_single.cpp | 20 ++------ 67 files changed, 983 insertions(+), 795 deletions(-) create mode 100755 src/GPU/gpu_extra.h diff --git a/lib/gpu/atomic_gpu_memory.cpp b/lib/gpu/atomic_gpu_memory.cpp index 8e06c36413..2e5d91b993 100644 --- a/lib/gpu/atomic_gpu_memory.cpp +++ b/lib/gpu/atomic_gpu_memory.cpp @@ -40,11 +40,11 @@ int AtomicGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const { } template -bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, - const int max_nbors, const int maxspecial, - const double cell_size, - const double gpu_split, FILE *_screen, - const char *pair_program) { +int AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen, + const char *pair_program) { nbor_time_avail=false; screen=_screen; @@ -57,9 +57,11 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, if (host_nlocal>0) _gpu_host=1; - if (!device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,maxspecial, - _gpu_host,max_nbors,cell_size,false)) - return false; + int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor, + maxspecial,_gpu_host,max_nbors,cell_size,false); + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; @@ -79,7 +81,7 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall, _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/atomic_gpu_memory.h b/lib/gpu/atomic_gpu_memory.h index 08728e9c75..c7117c0288 100644 --- a/lib/gpu/atomic_gpu_memory.h +++ b/lib/gpu/atomic_gpu_memory.h @@ -39,11 +39,18 @@ class AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init_atomic(const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, - const char *pair_program); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init_atomic(const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, + const char *pair_program); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); diff --git a/lib/gpu/charge_gpu_memory.cpp b/lib/gpu/charge_gpu_memory.cpp index 35891b5854..e78282064c 100644 --- a/lib/gpu/charge_gpu_memory.cpp +++ b/lib/gpu/charge_gpu_memory.cpp @@ -40,11 +40,11 @@ int ChargeGPUMemoryT::bytes_per_atom_atomic(const int max_nbors) const { } template -bool ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, - const int max_nbors, const int maxspecial, - const double cell_size, - const double gpu_split, FILE *_screen, - const char *pair_program) { +int ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen, + const char *pair_program) { nbor_time_avail=false; screen=_screen; @@ -57,9 +57,11 @@ bool ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, if (host_nlocal>0) _gpu_host=1; - if (!device->init(*ans,true,false,nlocal,host_nlocal,nall,nbor,maxspecial, - _gpu_host,max_nbors,cell_size,false)) - return false; + int success=device->init(*ans,true,false,nlocal,host_nlocal,nall,nbor, + maxspecial,_gpu_host,max_nbors,cell_size,false); + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; @@ -80,7 +82,7 @@ bool ChargeGPUMemoryT::init_atomic(const int nlocal, const int nall, _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes(); - return true; + return success; } template diff --git a/lib/gpu/charge_gpu_memory.h b/lib/gpu/charge_gpu_memory.h index c75eed072f..4dc33fd2dc 100644 --- a/lib/gpu/charge_gpu_memory.h +++ b/lib/gpu/charge_gpu_memory.h @@ -39,11 +39,18 @@ class ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init_atomic(const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, - const char *pair_program); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init_atomic(const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, + const char *pair_program); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); diff --git a/lib/gpu/cmm_cut_gpu.cpp b/lib/gpu/cmm_cut_gpu.cpp index 3070513dc9..7be958615a 100644 --- a/lib/gpu/cmm_cut_gpu.cpp +++ b/lib/gpu/cmm_cut_gpu.cpp @@ -28,12 +28,12 @@ static CMM_GPU_Memory CMMMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen) { +int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen) { CMMMF.clear(); gpu_mode=CMMMF.device->gpu_mode(); double gpu_split=CMMMF.device->particle_split(); @@ -54,13 +54,11 @@ bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, fflush(screen); } - if (world_me==0) { - bool init_ok=CMMMF.init(ntypes,cutsq,cg_types,host_lj1,host_lj2,host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=CMMMF.init(ntypes,cutsq,cg_types,host_lj1,host_lj2,host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); CMMMF.device->world_barrier(); if (message) @@ -75,22 +73,21 @@ bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=CMMMF.init(ntypes,cutsq,cg_types,host_lj1,host_lj2,host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=CMMMF.init(ntypes,cutsq,cg_types,host_lj1,host_lj2,host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); + CMMMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - CMMMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + CMMMF.estimate_gpu_overhead(); + return init_ok; } void cmm_gpu_clear() { diff --git a/lib/gpu/cmm_cut_gpu_memory.cpp b/lib/gpu/cmm_cut_gpu_memory.cpp index 53f219274c..f82390097a 100644 --- a/lib/gpu/cmm_cut_gpu_memory.cpp +++ b/lib/gpu/cmm_cut_gpu_memory.cpp @@ -42,16 +42,19 @@ int CMM_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool CMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, - int **host_cg_type, double **host_lj1, - double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,cmm_cut_gpu_kernel); +int CMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, + int **host_cg_type, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,cmm_cut_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int cmm_types=ntypes; @@ -84,7 +87,7 @@ bool CMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/cmm_cut_gpu_memory.h b/lib/gpu/cmm_cut_gpu_memory.h index 8099d5b9c4..fff90e477d 100644 --- a/lib/gpu/cmm_cut_gpu_memory.h +++ b/lib/gpu/cmm_cut_gpu_memory.h @@ -29,13 +29,20 @@ class CMM_GPU_Memory : public AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, int **host_cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, int **host_cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/cmmc_long_gpu.cpp b/lib/gpu/cmmc_long_gpu.cpp index 7b100df2d3..a6f3d090af 100644 --- a/lib/gpu/cmmc_long_gpu.cpp +++ b/lib/gpu/cmmc_long_gpu.cpp @@ -28,14 +28,14 @@ static CMML_GPU_Memory CMMLMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald) { +int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald) { CMMLMF.clear(); gpu_mode=CMMLMF.device->gpu_mode(); double gpu_split=CMMLMF.device->particle_split(); @@ -56,15 +56,12 @@ bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, fflush(screen); } - if (world_me==0) { - bool init_ok=CMMLMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, - host_lj3, host_lj4, offset, special_lj, inum, - nall, 300, maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e,g_ewald); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=CMMLMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e,g_ewald); CMMLMF.device->world_barrier(); if (message) @@ -79,23 +76,22 @@ bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=CMMLMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, - host_lj3, host_lj4, offset, special_lj, inum, - nall, 300, maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e, g_ewald); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=CMMLMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, + host_cut_ljsq, host_cut_coulsq, host_special_coul, + qqrd2e, g_ewald); CMMLMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - CMMLMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + CMMLMF.estimate_gpu_overhead(); + return init_ok; } void cmml_gpu_clear() { diff --git a/lib/gpu/cmmc_long_gpu_memory.cpp b/lib/gpu/cmmc_long_gpu_memory.cpp index 2091122832..020bbde549 100644 --- a/lib/gpu/cmmc_long_gpu_memory.cpp +++ b/lib/gpu/cmmc_long_gpu_memory.cpp @@ -43,20 +43,23 @@ int CMML_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool CMML_GPU_MemoryT::init(const int ntypes, double **host_cutsq, - int **host_cg_type, double **host_lj1, - double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen, - double **host_cut_ljsq, - const double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,cmmc_long_gpu_kernel); +int CMML_GPU_MemoryT::init(const int ntypes, double **host_cutsq, + int **host_cg_type, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_cut_ljsq, + const double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,cmmc_long_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -95,7 +98,7 @@ bool CMML_GPU_MemoryT::init(const int ntypes, double **host_cutsq, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/cmmc_long_gpu_memory.h b/lib/gpu/cmmc_long_gpu_memory.h index 8192c78249..45090368a5 100644 --- a/lib/gpu/cmmc_long_gpu_memory.h +++ b/lib/gpu/cmmc_long_gpu_memory.h @@ -29,15 +29,22 @@ class CMML_GPU_Memory : public ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, int ** cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, double **host_cut_ljsq, - const double host_cut_coulsq, double *host_special_coul, - const double qqrd2e, const double g_ewald); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, int ** cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/cmmc_msm_gpu.cpp b/lib/gpu/cmmc_msm_gpu.cpp index df0a226a73..cfa6c50453 100644 --- a/lib/gpu/cmmc_msm_gpu.cpp +++ b/lib/gpu/cmmc_msm_gpu.cpp @@ -28,14 +28,14 @@ static CMMM_GPU_Memory CMMMMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const int smooth) { +int cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const int smooth) { CMMMMF.clear(); gpu_mode=CMMMMF.device->gpu_mode(); double gpu_split=CMMMMF.device->particle_split(); @@ -56,15 +56,12 @@ bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, fflush(screen); } - if (world_me==0) { - bool init_ok=CMMMMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, - host_lj3, host_lj4, offset, special_lj, inum, - nall, 300, maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e,smooth); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=CMMMMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e,smooth); CMMMMF.device->world_barrier(); if (message) @@ -79,23 +76,23 @@ bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=CMMMMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, - host_lj3, host_lj4, offset, special_lj, inum, - nall, 300, maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e,smooth); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=CMMMMF.init(ntypes, cutsq, cg_type, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, + host_cut_ljsq, host_cut_coulsq, host_special_coul, + qqrd2e,smooth); + CMMMMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - CMMMMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + CMMMMF.estimate_gpu_overhead(); + return init_ok; } void cmmm_gpu_clear() { diff --git a/lib/gpu/cmmc_msm_gpu_memory.cpp b/lib/gpu/cmmc_msm_gpu_memory.cpp index 7675005348..aa170a476d 100644 --- a/lib/gpu/cmmc_msm_gpu_memory.cpp +++ b/lib/gpu/cmmc_msm_gpu_memory.cpp @@ -43,20 +43,23 @@ int CMMM_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool CMMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, - int **host_cg_type, double **host_lj1, - double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen, - double **host_cut_ljsq, - const double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const int smooth) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,cmmc_msm_gpu_kernel); +int CMMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, + int **host_cg_type, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_cut_ljsq, + const double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const int smooth) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,cmmc_msm_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -95,7 +98,7 @@ bool CMMM_GPU_MemoryT::init(const int ntypes, double **host_cutsq, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/cmmc_msm_gpu_memory.h b/lib/gpu/cmmc_msm_gpu_memory.h index ddaef5944d..9b5205f702 100644 --- a/lib/gpu/cmmc_msm_gpu_memory.h +++ b/lib/gpu/cmmc_msm_gpu_memory.h @@ -29,15 +29,22 @@ class CMMM_GPU_Memory : public ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, int ** cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, double **host_cut_ljsq, - const double host_cut_coulsq, double *host_special_coul, - const double qqrd2e, const int smooth); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, int ** cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const int smooth); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/crml_gpu.cpp b/lib/gpu/crml_gpu.cpp index f55d73390d..1e59562ed5 100644 --- a/lib/gpu/crml_gpu.cpp +++ b/lib/gpu/crml_gpu.cpp @@ -28,16 +28,16 @@ static CRML_GPU_Memory CRMLMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int inum, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald, const double cut_lj_innersq, - const double denom_lj, double **epsilon, - double **sigma, const bool mix_arithmetic) { +int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald, const double cut_lj_innersq, + const double denom_lj, double **epsilon, + double **sigma, const bool mix_arithmetic) { CRMLMF.clear(); gpu_mode=CRMLMF.device->gpu_mode(); double gpu_split=CRMLMF.device->particle_split(); @@ -58,16 +58,13 @@ bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, fflush(screen); } - if (world_me==0) { - bool init_ok=CRMLMF.init(ntypes, cut_bothsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen, - host_cut_ljsq, host_cut_coulsq, host_special_coul, - qqrd2e, g_ewald, cut_lj_innersq, denom_lj, - epsilon,sigma,mix_arithmetic); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + CRMLMF.init(ntypes, cut_bothsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, cell_size, + gpu_split, screen, host_cut_ljsq, host_cut_coulsq, + host_special_coul, qqrd2e, g_ewald, cut_lj_innersq, denom_lj, + epsilon,sigma,mix_arithmetic); CRMLMF.device->world_barrier(); if (message) @@ -82,25 +79,24 @@ bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=CRMLMF.init(ntypes, cut_bothsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e, g_ewald, - cut_lj_innersq, denom_lj, epsilon, sigma, - mix_arithmetic); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=CRMLMF.init(ntypes, cut_bothsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, + host_cut_ljsq, host_cut_coulsq, host_special_coul, + qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon, + sigma, mix_arithmetic); + CRMLMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - CRMLMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + CRMLMF.estimate_gpu_overhead(); + return init_ok; } void crml_gpu_clear() { diff --git a/lib/gpu/crml_gpu_memory.cpp b/lib/gpu/crml_gpu_memory.cpp index f051a349f9..3b6a07ee7e 100644 --- a/lib/gpu/crml_gpu_memory.cpp +++ b/lib/gpu/crml_gpu_memory.cpp @@ -43,7 +43,7 @@ int CRML_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool CRML_GPU_MemoryT::init(const int ntypes, +int CRML_GPU_MemoryT::init(const int ntypes, double host_cut_bothsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, @@ -56,8 +56,11 @@ bool CRML_GPU_MemoryT::init(const int ntypes, const double g_ewald, const double cut_lj_innersq, const double denom_lj, double **epsilon, double **sigma, const bool mix_arithmetic) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,crml_gpu_kernel); + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,crml_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -99,7 +102,7 @@ bool CRML_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=lj1.row_bytes()+ljd.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/crml_gpu_memory.h b/lib/gpu/crml_gpu_memory.h index 5520cd3a17..a474d5982d 100644 --- a/lib/gpu/crml_gpu_memory.h +++ b/lib/gpu/crml_gpu_memory.h @@ -29,17 +29,24 @@ class CRML_GPU_Memory : public ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double host_cut_bothsq, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, double host_cut_ljsq, - const double host_cut_coulsq, double *host_special_coul, - const double qqrd2e, const double g_ewald, - const double cut_lj_innersq, const double denom_lj, - double **epsilon, double **sigma, const bool mix_arithmetic); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double host_cut_bothsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double host_cut_ljsq, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald, + const double cut_lj_innersq, const double denom_lj, + double **epsilon, double **sigma, const bool mix_arithmetic); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/gb_gpu.cpp b/lib/gpu/gb_gpu.cpp index 45ad59d667..4593ba5452 100644 --- a/lib/gpu/gb_gpu.cpp +++ b/lib/gpu/gb_gpu.cpp @@ -49,14 +49,14 @@ void gb_gpu_pack_nbors(GBMT &gbm, const int GX, const int BX, const int start, // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool gb_gpu_init(const int ntypes, const double gamma, - const double upsilon, const double mu, double **shape, - double **well, double **cutsq, double **sigma, - double **epsilon, double *host_lshape, int **form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const double cell_size, int &gpu_mode, FILE *screen) { +int gb_gpu_init(const int ntypes, const double gamma, + const double upsilon, const double mu, double **shape, + double **well, double **cutsq, double **sigma, + double **epsilon, double *host_lshape, int **form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const double cell_size, int &gpu_mode, FILE *screen) { GBMF.clear(); gpu_mode=GBMF.device->gpu_mode(); double gpu_split=GBMF.device->particle_split(); @@ -77,14 +77,12 @@ bool gb_gpu_init(const int ntypes, const double gamma, fflush(screen); } - if (world_me==0) { - bool init_ok=GBMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, - sigma, epsilon, host_lshape, form, host_lj1, - host_lj2, host_lj3, host_lj4, offset, special_lj, - inum, nall, max_nbors, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=GBMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, + sigma, epsilon, host_lshape, form, host_lj1, + host_lj2, host_lj3, host_lj4, offset, special_lj, + inum, nall, max_nbors, cell_size, gpu_split, screen); GBMF.device->world_barrier(); if (message) @@ -99,23 +97,22 @@ bool gb_gpu_init(const int ntypes, const double gamma, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=GBMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, - sigma, epsilon, host_lshape, form, host_lj1, - host_lj2, host_lj3, host_lj4, offset, special_lj, - inum, nall, max_nbors, cell_size, gpu_split, - screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=GBMF.init(ntypes, gamma, upsilon, mu, shape, well, cutsq, sigma, + epsilon, host_lshape, form, host_lj1, host_lj2, + host_lj3, host_lj4, offset, special_lj, inum, nall, + max_nbors, cell_size, gpu_split, screen); + GBMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - GBMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + GBMF.estimate_gpu_overhead(); + return init_ok; } // --------------------------------------------------------------------------- diff --git a/lib/gpu/gb_gpu_memory.cpp b/lib/gpu/gb_gpu_memory.cpp index 338751f34c..a22e582725 100644 --- a/lib/gpu/gb_gpu_memory.cpp +++ b/lib/gpu/gb_gpu_memory.cpp @@ -50,17 +50,17 @@ int GB_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool GB_GPU_MemoryT::init(const int ntypes, const double gamma, - const double upsilon, const double mu, - double **host_shape, double **host_well, - double **host_cutsq, double **host_sigma, - double **host_epsilon, double *host_lshape, - int **h_form, double **host_lj1, double **host_lj2, - double **host_lj3, double **host_lj4, - double **host_offset, const double *host_special_lj, - const int nlocal, const int nall, - const int max_nbors, const double cell_size, - const double gpu_split, FILE *_screen) { +int GB_GPU_MemoryT::init(const int ntypes, const double gamma, + const double upsilon, const double mu, + double **host_shape, double **host_well, + double **host_cutsq, double **host_sigma, + double **host_epsilon, double *host_lshape, + int **h_form, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, + double **host_offset, const double *host_special_lj, + const int nlocal, const int nall, + const int max_nbors, const double cell_size, + const double gpu_split, FILE *_screen) { nbor_time_avail=false; screen=_screen; @@ -73,9 +73,11 @@ bool GB_GPU_MemoryT::init(const int ntypes, const double gamma, if (host_nlocal>0) _gpu_host=1; - if (!device->init(*ans,false,true,nlocal,host_nlocal,nall,nbor,0, - _gpu_host,max_nbors,cell_size,true)) - return false; + int success=device->init(*ans,false,true,nlocal,host_nlocal,nall,nbor,0, + _gpu_host,max_nbors,cell_size,true); + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; @@ -195,7 +197,9 @@ bool GB_GPU_MemoryT::init(const int ntypes, const double gamma, _max_bytes=ans->gpu_bytes()+nbor->gpu_bytes(); // Memory for ilist ordered by particle type - return (host_olist.alloc(nbor->max_atoms(),*ucl_device)==UCL_SUCCESS); + if (host_olist.alloc(nbor->max_atoms(),*ucl_device)==UCL_SUCCESS) + return 0; + else return -3; } template diff --git a/lib/gpu/gb_gpu_memory.h b/lib/gpu/gb_gpu_memory.h index edba318ace..f47f9ff758 100644 --- a/lib/gpu/gb_gpu_memory.h +++ b/lib/gpu/gb_gpu_memory.h @@ -35,16 +35,23 @@ class GB_GPU_Memory { * \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin * \param gpu_split fraction of particles handled by device - * \return false if there is not sufficient memory or device init prob **/ - bool init(const int ntypes, const double gamma, - const double upsilon, const double mu, double **host_shape, - double **host_well, double **host_cutsq, double **host_sigma, - double **host_epsilon, double *host_lshape, int **h_form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - const double *host_special_lj, const int nlocal, const int nall, - const int max_nbors, const double cell_size, - const double gpu_split, FILE *screen); + * \return false if there is not sufficient memory or device init prob + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, const double gamma, + const double upsilon, const double mu, double **host_shape, + double **host_well, double **host_cutsq, double **host_sigma, + double **host_epsilon, double *host_lshape, int **h_form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + const double *host_special_lj, const int nlocal, const int nall, + const int max_nbors, const double cell_size, + const double gpu_split, FILE *screen); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); diff --git a/lib/gpu/geryon/nvc_device.h b/lib/gpu/geryon/nvc_device.h index ed445716f6..6a232986ff 100644 --- a/lib/gpu/geryon/nvc_device.h +++ b/lib/gpu/geryon/nvc_device.h @@ -167,6 +167,7 @@ class UCL_Device { int _device, _num_devices; std::vector _properties; std::vector _cq; + std::vector _device_ids; }; // Grabs the properties for all devices @@ -178,6 +179,7 @@ inline UCL_Device::UCL_Device() { if (deviceProp.major == 9999 && deviceProp.minor == 9999) break; _properties.push_back(deviceProp); + _device_ids.push_back(dev); } _device=-1; _cq.push_back(cudaStream_t()); @@ -194,7 +196,7 @@ inline void UCL_Device::set(int num) { return; for (int i=1; i LJ96MF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int inum, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen) { +int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { LJ96MF.clear(); gpu_mode=LJ96MF.device->gpu_mode(); double gpu_split=LJ96MF.device->particle_split(); @@ -53,13 +53,11 @@ bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, fflush(screen); } - if (world_me==0) { - bool init_ok=LJ96MF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=LJ96MF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); LJ96MF.device->world_barrier(); if (message) @@ -74,22 +72,21 @@ bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=LJ96MF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, - nall, 300, maxspecial, cell_size, gpu_split, - screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=LJ96MF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen); + LJ96MF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - LJ96MF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + LJ96MF.estimate_gpu_overhead(); + return init_ok; } void lj96_gpu_clear() { diff --git a/lib/gpu/lj96_cut_gpu_memory.cpp b/lib/gpu/lj96_cut_gpu_memory.cpp index c30c5d8caf..8f0a9bae0c 100644 --- a/lib/gpu/lj96_cut_gpu_memory.cpp +++ b/lib/gpu/lj96_cut_gpu_memory.cpp @@ -42,7 +42,7 @@ int LJ96_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool LJ96_GPU_MemoryT::init(const int ntypes, +int LJ96_GPU_MemoryT::init(const int ntypes, double **host_cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, @@ -50,8 +50,11 @@ bool LJ96_GPU_MemoryT::init(const int ntypes, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,lj96_cut_gpu_kernel); + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj96_cut_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -84,7 +87,7 @@ bool LJ96_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/lj96_cut_gpu_memory.h b/lib/gpu/lj96_cut_gpu_memory.h index 483ef05570..fe0a0b1665 100644 --- a/lib/gpu/lj96_cut_gpu_memory.h +++ b/lib/gpu/lj96_cut_gpu_memory.h @@ -29,13 +29,20 @@ class LJ96_GPU_Memory : public AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/lj_cut_gpu.cpp b/lib/gpu/lj_cut_gpu.cpp index a40d0a7102..aef085f7c9 100644 --- a/lib/gpu/lj_cut_gpu.cpp +++ b/lib/gpu/lj_cut_gpu.cpp @@ -28,12 +28,11 @@ static LJL_GPU_Memory LJLMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool ljl_gpu_init(const int ntypes, double **cutsq, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen) { +int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { LJLMF.clear(); gpu_mode=LJLMF.device->gpu_mode(); double gpu_split=LJLMF.device->particle_split(); @@ -54,13 +53,11 @@ bool ljl_gpu_init(const int ntypes, double **cutsq, fflush(screen); } - if (world_me==0) { - bool init_ok=LJLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=LJLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); LJLMF.device->world_barrier(); if (message) @@ -75,23 +72,21 @@ bool ljl_gpu_init(const int ntypes, double **cutsq, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=LJLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=LJLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen); + LJLMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - LJLMF.estimate_gpu_overhead(); - return true; + if (init_ok==0) + LJLMF.estimate_gpu_overhead(); + return init_ok; } void ljl_gpu_clear() { diff --git a/lib/gpu/lj_cut_gpu_memory.cpp b/lib/gpu/lj_cut_gpu_memory.cpp index 27a759fec1..32911e22a0 100644 --- a/lib/gpu/lj_cut_gpu_memory.cpp +++ b/lib/gpu/lj_cut_gpu_memory.cpp @@ -42,16 +42,19 @@ int LJL_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool LJL_GPU_MemoryT::init(const int ntypes, - double **host_cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,lj_cut_gpu_kernel); +int LJL_GPU_MemoryT::init(const int ntypes, + double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj_cut_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -84,7 +87,7 @@ bool LJL_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/lj_cut_gpu_memory.h b/lib/gpu/lj_cut_gpu_memory.h index 123b739649..4b86b133a1 100644 --- a/lib/gpu/lj_cut_gpu_memory.h +++ b/lib/gpu/lj_cut_gpu_memory.h @@ -29,13 +29,20 @@ class LJL_GPU_Memory : public AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/lj_expand_gpu.cpp b/lib/gpu/lj_expand_gpu.cpp index 00ae841d94..fd0392f3fc 100644 --- a/lib/gpu/lj_expand_gpu.cpp +++ b/lib/gpu/lj_expand_gpu.cpp @@ -28,12 +28,12 @@ static LJE_GPU_Memory LJEMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double **shift, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen) { +int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **shift, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen) { LJEMF.clear(); gpu_mode=LJEMF.device->gpu_mode(); double gpu_split=LJEMF.device->particle_split(); @@ -54,13 +54,11 @@ bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, fflush(screen); } - if (world_me==0) { - bool init_ok=LJEMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, shift, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=LJEMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, shift, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); LJEMF.device->world_barrier(); if (message) @@ -75,21 +73,21 @@ bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=LJEMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, shift, special_lj, inum, nall, - 300,maxspecial, cell_size, gpu_split,screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=LJEMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, shift, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split,screen); + LJEMF.device->world_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - LJEMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + LJEMF.estimate_gpu_overhead(); + return init_ok; } void lje_gpu_clear() { diff --git a/lib/gpu/lj_expand_gpu_memory.cpp b/lib/gpu/lj_expand_gpu_memory.cpp index fe05b4fc2f..22eccab165 100644 --- a/lib/gpu/lj_expand_gpu_memory.cpp +++ b/lib/gpu/lj_expand_gpu_memory.cpp @@ -42,16 +42,19 @@ int LJE_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool LJE_GPU_MemoryT::init(const int ntypes, double **host_cutsq, - double **host_lj1, double **host_lj2, - double **host_lj3, double **host_lj4, - double **host_offset, double **host_shift, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,lj_expand_gpu_kernel); +int LJE_GPU_MemoryT::init(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, + double **host_offset, double **host_shift, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj_expand_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -84,7 +87,7 @@ bool LJE_GPU_MemoryT::init(const int ntypes, double **host_cutsq, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/lj_expand_gpu_memory.h b/lib/gpu/lj_expand_gpu_memory.h index 516af0c93d..a04b11efd1 100644 --- a/lib/gpu/lj_expand_gpu_memory.h +++ b/lib/gpu/lj_expand_gpu_memory.h @@ -29,13 +29,20 @@ class LJE_GPU_Memory : public AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **host_offset, double **host_shift, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, double **host_shift, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/ljc_cut_gpu.cpp b/lib/gpu/ljc_cut_gpu.cpp index 6703c072d9..de6f4f3e62 100644 --- a/lib/gpu/ljc_cut_gpu.cpp +++ b/lib/gpu/ljc_cut_gpu.cpp @@ -28,13 +28,13 @@ static LJC_GPU_Memory LJCMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int inum, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double **host_cut_coulsq, - double *host_special_coul, const double qqrd2e) { +int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e) { LJCMF.clear(); gpu_mode=LJCMF.device->gpu_mode(); double gpu_split=LJCMF.device->particle_split(); @@ -55,15 +55,12 @@ bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, fflush(screen); } - if (world_me==0) { - bool init_ok=LJCMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen, - host_cut_ljsq, host_cut_coulsq, host_special_coul, - qqrd2e); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=LJCMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e); LJCMF.device->world_barrier(); if (message) @@ -78,23 +75,22 @@ bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=LJCMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=LJCMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e); + LJCMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - LJCMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + LJCMF.estimate_gpu_overhead(); + return init_ok; } void ljc_gpu_clear() { diff --git a/lib/gpu/ljc_cut_gpu_memory.cpp b/lib/gpu/ljc_cut_gpu_memory.cpp index b662b653fe..7387c34aff 100644 --- a/lib/gpu/ljc_cut_gpu_memory.cpp +++ b/lib/gpu/ljc_cut_gpu_memory.cpp @@ -43,18 +43,21 @@ int LJC_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool LJC_GPU_MemoryT::init(const int ntypes, - double **host_cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen, - double **host_cut_ljsq, double **host_cut_coulsq, - double *host_special_coul, const double qqrd2e) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,ljc_cut_gpu_kernel); +int LJC_GPU_MemoryT::init(const int ntypes, + double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,ljc_cut_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -95,7 +98,7 @@ bool LJC_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+cutsq.row_bytes()+ sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/ljc_cut_gpu_memory.h b/lib/gpu/ljc_cut_gpu_memory.h index 4dedce957a..552f9d9881 100644 --- a/lib/gpu/ljc_cut_gpu_memory.h +++ b/lib/gpu/ljc_cut_gpu_memory.h @@ -29,15 +29,22 @@ class LJC_GPU_Memory : public ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, double **host_cut_ljsq, - double **host_cut_coulsq, double *host_special_coul, - const double qqrd2e); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + double **host_cut_coulsq, double *host_special_coul, + const double qqrd2e); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/ljcl_cut_gpu.cpp b/lib/gpu/ljcl_cut_gpu.cpp index 04cdab81d9..167f41b374 100644 --- a/lib/gpu/ljcl_cut_gpu.cpp +++ b/lib/gpu/ljcl_cut_gpu.cpp @@ -28,14 +28,14 @@ static LJCL_GPU_Memory LJCLMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int inum, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald) { +int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald) { LJCLMF.clear(); gpu_mode=LJCLMF.device->gpu_mode(); double gpu_split=LJCLMF.device->particle_split(); @@ -56,15 +56,12 @@ bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, fflush(screen); } - if (world_me==0) { - bool init_ok=LJCLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen, - host_cut_ljsq, host_cut_coulsq, host_special_coul, - qqrd2e,g_ewald); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=LJCLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); LJCLMF.device->world_barrier(); if (message) @@ -79,23 +76,22 @@ bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=LJCLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen, host_cut_ljsq, host_cut_coulsq, - host_special_coul, qqrd2e, g_ewald); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=LJCLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); + LJCLMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - LJCLMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + LJCLMF.estimate_gpu_overhead(); + return init_ok; } void ljcl_gpu_clear() { diff --git a/lib/gpu/ljcl_cut_gpu_memory.cpp b/lib/gpu/ljcl_cut_gpu_memory.cpp index 1f638c030c..ca7ac7d21f 100644 --- a/lib/gpu/ljcl_cut_gpu_memory.cpp +++ b/lib/gpu/ljcl_cut_gpu_memory.cpp @@ -43,7 +43,7 @@ int LJCL_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool LJCL_GPU_MemoryT::init(const int ntypes, +int LJCL_GPU_MemoryT::init(const int ntypes, double **host_cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, @@ -54,8 +54,11 @@ bool LJCL_GPU_MemoryT::init(const int ntypes, double **host_cut_ljsq, const double host_cut_coulsq, double *host_special_coul, const double qqrd2e, const double g_ewald) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,ljcl_cut_gpu_kernel); + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,ljcl_cut_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -94,7 +97,7 @@ bool LJCL_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/ljcl_cut_gpu_memory.h b/lib/gpu/ljcl_cut_gpu_memory.h index 056ba0e41f..fae4c07040 100644 --- a/lib/gpu/ljcl_cut_gpu_memory.h +++ b/lib/gpu/ljcl_cut_gpu_memory.h @@ -29,15 +29,22 @@ class LJCL_GPU_Memory : public ChargeGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen, double **host_cut_ljsq, - const double host_cut_coulsq, double *host_special_coul, - const double qqrd2e, const double g_ewald); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/morse_gpu.cpp b/lib/gpu/morse_gpu.cpp index ea86b4ef2a..96636128db 100644 --- a/lib/gpu/morse_gpu.cpp +++ b/lib/gpu/morse_gpu.cpp @@ -28,12 +28,12 @@ static MOR_GPU_Memory MORMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -bool mor_gpu_init(const int ntypes, double **cutsq, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int inum, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen) { +int mor_gpu_init(const int ntypes, double **cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen) { MORMF.clear(); gpu_mode=MORMF.device->gpu_mode(); double gpu_split=MORMF.device->particle_split(); @@ -54,13 +54,11 @@ bool mor_gpu_init(const int ntypes, double **cutsq, fflush(screen); } - if (world_me==0) { - bool init_ok=MORMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); - if (!init_ok) - return false; - } + int init_ok=0; + if (world_me==0) + init_ok=MORMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); MORMF.device->world_barrier(); if (message) @@ -75,22 +73,21 @@ bool mor_gpu_init(const int ntypes, double **cutsq, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { - bool init_ok=MORMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, - screen); - if (!init_ok) - return false; - } + if (gpu_rank==i && world_me!=0) + init_ok=MORMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, special_lj, inum, nall, 300, maxspecial, + cell_size, gpu_split, screen); + MORMF.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); } if (message) fprintf(screen,"\n"); - MORMF.estimate_gpu_overhead(); - return true; + + if (init_ok==0) + MORMF.estimate_gpu_overhead(); + return init_ok; } void mor_gpu_clear() { diff --git a/lib/gpu/morse_gpu_memory.cpp b/lib/gpu/morse_gpu_memory.cpp index 037341f23c..28f980cdb6 100644 --- a/lib/gpu/morse_gpu_memory.cpp +++ b/lib/gpu/morse_gpu_memory.cpp @@ -42,16 +42,19 @@ int MOR_GPU_MemoryT::bytes_per_atom(const int max_nbors) const { } template -bool MOR_GPU_MemoryT::init(const int ntypes, - double **host_cutsq, double **host_morse1, - double **host_r0, double **host_alpha, - double **host_d0, double **host_offset, - double *host_special_lj, const int nlocal, - const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { - this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,morse_gpu_kernel); +int MOR_GPU_MemoryT::init(const int ntypes, + double **host_cutsq, double **host_morse1, + double **host_r0, double **host_alpha, + double **host_d0, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,morse_gpu_kernel); + if (success!=0) + return success; // If atom type constants fit in shared memory use fast kernel int types=ntypes; @@ -83,7 +86,7 @@ bool MOR_GPU_MemoryT::init(const int ntypes, _allocated=true; this->_max_bytes=mor1.row_bytes()+mor2.row_bytes()+sp_lj.row_bytes(); - return true; + return 0; } template diff --git a/lib/gpu/morse_gpu_memory.h b/lib/gpu/morse_gpu_memory.h index b7480dbb84..a7bbaa1f9f 100644 --- a/lib/gpu/morse_gpu_memory.h +++ b/lib/gpu/morse_gpu_memory.h @@ -29,13 +29,20 @@ class MOR_GPU_Memory : public AtomicGPUMemory { /// Clear any previous data and set up for a new LAMMPS run /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin - * \param gpu_split fraction of particles handled by device **/ - bool init(const int ntypes, double **host_cutsq, - double **host_morse1, double **host_r0, double **host_alpha, - double **host_d0, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, - const double gpu_split, FILE *screen); + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_morse1, double **host_r0, double **host_alpha, + double **host_d0, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/pair_gpu_ans.h b/lib/gpu/pair_gpu_ans.h index ae737fe7c0..ccd1f40b2d 100644 --- a/lib/gpu/pair_gpu_ans.h +++ b/lib/gpu/pair_gpu_ans.h @@ -29,7 +29,6 @@ using namespace ucl_opencl; #else -#include "cudpp.h" #include "geryon/nvd_timer.h" #include "geryon/nvd_mat.h" using namespace ucl_cudadr; diff --git a/lib/gpu/pair_gpu_device.cpp b/lib/gpu/pair_gpu_device.cpp index 83d20a51c9..5c68cf73d0 100644 --- a/lib/gpu/pair_gpu_device.cpp +++ b/lib/gpu/pair_gpu_device.cpp @@ -130,15 +130,22 @@ bool PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica, return true; } + /** Success will be: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + template -bool PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, - const bool rot, const int nlocal, - const int host_nlocal, const int nall, - PairGPUNbor *nbor, const int maxspecial, - const int gpu_host, const int max_nbors, - const double cell_size, const bool pre_cut) { +int PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, + const bool rot, const int nlocal, + const int host_nlocal, const int nall, + PairGPUNbor *nbor, const int maxspecial, + const int gpu_host, const int max_nbors, + const double cell_size, const bool pre_cut) { if (!_device_init) - return false; + return -1; // Counts of data transfers for timing overhead estimates _data_in_estimate=0; @@ -156,7 +163,7 @@ bool PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, if (_init_count==0) { // Initialize atom and nbor data if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor && maxspecial>0)) - return false; + return -3; compile_kernels(); _data_in_estimate++; if (charge) @@ -168,40 +175,42 @@ bool PairGPUDeviceT::init(PairGPUAns &ans, const bool charge, _data_in_estimate++; if (atom.quat()==false && rot) _data_in_estimate++; - atom.add_fields(charge,rot,gpu_nbor,gpu_nbor && maxspecial); + if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor && maxspecial)) + return -3; } if (!ans.init(ef_nlocal,charge,rot,*gpu)) - return false; + return -3; if (!nbor->init(&_nbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, *gpu,gpu_nbor,gpu_host,pre_cut)) - return false; + return -3; nbor->cell_size(cell_size); _init_count++; - return true; + return 0; } template -bool PairGPUDeviceT::init(PairGPUAns &ans, const int nlocal, - const int nall) { +int PairGPUDeviceT::init(PairGPUAns &ans, const int nlocal, + const int nall) { if (!_device_init) - return false; + return -1; if (_init_count==0) { // Initialize atom and nbor data if (!atom.init(nall,true,false,*gpu,false,false)) - return false; + return -3; compile_kernels(); } else - atom.add_fields(true,false,false,false); + if (!atom.add_fields(true,false,false,false)) + return -3; if (!ans.init(nlocal,true,false,*gpu)) - return false; + return -3; _init_count++; - return true; + return 0; } template @@ -238,7 +247,10 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name, fprintf(screen,"-------------------------------------"); fprintf(screen,"-------------------------------------\n"); - for (int i=first_gpu; i<=last_gpu; i++) { + int last=last_gpu+1; + if (last>gpu->num_devices()) + last=gpu->num_devices(); + for (int i=first_gpu; iname(i)+", "+toa(gpu->cores(i))+" cores, "+fs+ toa(gpu->gigabytes(i))+" GB, "+toa(gpu->clock_rate(i))+ " GHZ ("; diff --git a/lib/gpu/pair_gpu_device.h b/lib/gpu/pair_gpu_device.h index 77882d7f3f..6f40ef160e 100644 --- a/lib/gpu/pair_gpu_device.h +++ b/lib/gpu/pair_gpu_device.h @@ -56,16 +56,29 @@ class PairGPUDevice { * \param max_nbors Initial number of rows in the neighbor matrix * \param cell_size cutoff+skin * \param pre_cut True if cutoff test will be performed in separate kernel - * than the force kernel **/ - bool init(PairGPUAns &a, const bool charge, const bool rot, - const int nlocal, const int host_nlocal, const int nall, - PairGPUNbor *nbor, const int maxspecial, const int gpu_host, - const int max_nbors, const double cell_size, const bool pre_cut); + * than the force kernel + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(PairGPUAns &a, const bool charge, const bool rot, + const int nlocal, const int host_nlocal, const int nall, + PairGPUNbor *nbor, const int maxspecial, const int gpu_host, + const int max_nbors, const double cell_size, const bool pre_cut); /// Initialize the device for Atom storage only /** \param nlocal Total number of local particles to allocate memory for - * \param nall Total number of local+ghost particles **/ - bool init(PairGPUAns &ans, const int nlocal, const int nall); + * \param nall Total number of local+ghost particles + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(PairGPUAns &ans, const int nlocal, const int nall); /// Output a message for pair_style acceleration with device stats void init_message(FILE *screen, const char *name, diff --git a/lib/gpu/pair_gpu_nbor.cpp b/lib/gpu/pair_gpu_nbor.cpp index 182541260e..aed1da0a12 100644 --- a/lib/gpu/pair_gpu_nbor.cpp +++ b/lib/gpu/pair_gpu_nbor.cpp @@ -75,6 +75,9 @@ bool PairGPUNbor::init(PairGPUNborShared *shared, const int inum, success=success && (host_packed.alloc(2*IJ_SIZE,*dev, UCL_WRITE_OPTIMIZED)==UCL_SUCCESS); alloc(success); + if (!success) + return false; + if (_use_packing==false) _shared->compile_kernels(devi,gpu_nbor); @@ -115,10 +118,14 @@ void PairGPUNbor::alloc(bool &success) { success=success && (dev_host_numj.alloc(_max_host,*dev, UCL_WRITE_ONLY)==UCL_SUCCESS); success=success && (host_ilist.alloc(nt,*dev,UCL_NOT_PINNED)==UCL_SUCCESS); + if (!success) + return; for (int i=0; iinit(*ans,nlocal,nall)) { - flag=-2; + flag=device->init(*ans,nlocal,nall); + if (flag!=0) return 0; - } if (sizeof(grdtyp)==sizeof(double) && device->double_precision()==false) { flag=-5; return 0; diff --git a/lib/gpu/pppm_l_gpu.cpp b/lib/gpu/pppm_l_gpu.cpp index 9396fd0bf5..200d2f1685 100644 --- a/lib/gpu/pppm_l_gpu.cpp +++ b/lib/gpu/pppm_l_gpu.cpp @@ -58,11 +58,10 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall, success=0; grdtyp * host_brick=NULL; - if (world_me==0) { + if (world_me==0) host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out, nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick, slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success); - } pppm.device->world_barrier(); if (message) @@ -77,12 +76,12 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall, last_gpu,i); fflush(screen); } - if (gpu_rank==i && world_me!=0) { + if (gpu_rank==i && world_me!=0) host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out, nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff, vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm, success); - } + pppm.device->gpu_barrier(); if (message) fprintf(screen,"Done.\n"); diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index 9541f82782..83c1f61aa1 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -67,6 +67,7 @@ if (test $1 = 1) then cp fix_gpu.cpp .. cp fix_gpu.h .. + cp gpu_extra.h .. cp pair_omp_gpu.cpp .. cp pair_omp_gpu.h .. @@ -114,6 +115,7 @@ elif (test $1 = 0) then rm ../pair_cg_cmm_coul_msm.h rm ../pair_cg_cmm_coul_msm_gpu.h rm ../fix_gpu.h + rm ../gpu_extra.h rm ../pair_omp_gpu.h fi diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 8c7b1837f6..626eb24292 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -24,6 +24,7 @@ #include "modify.h" #include "domain.h" #include "universe.h" +#include "gpu_extra.h" using namespace LAMMPS_NS; @@ -82,9 +83,11 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : error->all("No OpenMP support compiled in."); #endif + int gpu_flag = 0; if (!lmp_init_device(universe->uworld,world,first_gpu,last_gpu,_gpu_mode, _particle_split,nthreads)) - error->one("Could not find or initialize a specified accelerator device."); + gpu_flag = -2; + GPU_EXTRA::check_flag(gpu_flag,error,world); } /* ---------------------------------------------------------------------- */ diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h new file mode 100755 index 0000000000..806757575a --- /dev/null +++ b/src/GPU/gpu_extra.h @@ -0,0 +1,47 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (ORNL) +------------------------------------------------------------------------- */ + +#ifndef LMP_GPU_EXTRA_H +#define LMP_GPU_EXTRA_H + +#include "error.h" + +namespace GPU_EXTRA { + + inline void check_flag(int error_flag, LAMMPS_NS::Error *error, + MPI_Comm &world) { + int all_success; + MPI_Allreduce(&error_flag, &all_success, 1, MPI_INT, MPI_MIN, world); + if (all_success != 0) { + if (all_success == -1) + error->all("Could not find fix gpu"); + else if (all_success == -2) + error->all("Could not find/initialize a specified accelerator device."); + else if (all_success == -3) + error->all("Out of memory on GPU."); + else if (all_success == -4) + error->all("GPU library not compiled for this GPU."); + else if (all_success == -5) + error->all("Double precision is not supported on this GPU."); + else + error->all("Unknown error in GPU library."); + } + } + +} + +#endif diff --git a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp index d25f5860cb..b01400dfdc 100644 --- a/src/GPU/pair_cg_cmm_coul_long_gpu.cpp +++ b/src/GPU/pair_cg_cmm_coul_long_gpu.cpp @@ -35,6 +35,7 @@ #include "domain.h" #include "string.h" #include "kspace.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -49,14 +50,14 @@ // External functions from cuda library for atom decomposition -bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald); +int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald); void cmml_gpu_clear(); int ** cmml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -177,14 +178,13 @@ void PairCGCMMCoulLongGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = cmml_gpu_init(atom->ntypes+1, cutsq, cg_type, lj1, lj2, lj3, - lj4, offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, - cut_coulsq_global, force->special_coul, - force->qqrd2e, g_ewald); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = cmml_gpu_init(atom->ntypes+1, cutsq, cg_type, lj1, lj2, lj3, + lj4, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, + cut_coulsq_global, force->special_coul, + force->qqrd2e, g_ewald); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp index 8ca92c4227..b3f934946c 100644 --- a/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp +++ b/src/GPU/pair_cg_cmm_coul_msm_gpu.cpp @@ -34,6 +34,7 @@ #include "domain.h" #include "string.h" #include "kspace.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -41,14 +42,14 @@ enum {C3=0,C4=1}; // External functions from cuda library for atom decomposition -bool cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const int smooth); +int cmmm_gpu_init(const int ntypes, double **cutsq, int **cg_type, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const int smooth); void cmmm_gpu_clear(); int ** cmmm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -157,14 +158,13 @@ void PairCGCMMCoulMSMGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = cmmm_gpu_init(atom->ntypes+1, cutsq, cg_type, lj1, lj2, lj3, - lj4, offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, - cut_coulsq_global, force->special_coul, - force->qqrd2e,_smooth); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = cmmm_gpu_init(atom->ntypes+1, cutsq, cg_type, lj1, lj2, lj3, + lj4, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, + cut_coulsq_global, force->special_coul, + force->qqrd2e,_smooth); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_cg_cmm_gpu.cpp b/src/GPU/pair_cg_cmm_gpu.cpp index 158845846f..a252303712 100644 --- a/src/GPU/pair_cg_cmm_gpu.cpp +++ b/src/GPU/pair_cg_cmm_gpu.cpp @@ -34,18 +34,19 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen); +int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen); void cmm_gpu_clear(); int ** cmm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -150,12 +151,11 @@ void PairCGCMMGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = cmm_gpu_init(atom->ntypes+1,cutsq,cg_type,lj1,lj2,lj3,lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = cmm_gpu_init(atom->ntypes+1,cutsq,cg_type,lj1,lj2,lj3,lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp index 66673cedd6..85a20faf7f 100644 --- a/src/GPU/pair_gayberne_gpu.cpp +++ b/src/GPU/pair_gayberne_gpu.cpp @@ -35,20 +35,21 @@ #include "domain.h" #include "update.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool gb_gpu_init(const int ntypes, const double gamma, const double upsilon, - const double mu, double **shape, double **well, double **cutsq, - double **sigma, double **epsilon, double *host_lshape, - int **form, double **host_lj1, double **host_lj2, - double **host_lj3, double **host_lj4, double **offset, - double *special_lj, const int nlocal, const int nall, - const int max_nbors, const double cell_size, - int &gpu_mode, FILE *screen); +int gb_gpu_init(const int ntypes, const double gamma, const double upsilon, + const double mu, double **shape, double **well, double **cutsq, + double **sigma, double **epsilon, double *host_lshape, + int **form, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, double **offset, + double *special_lj, const int nlocal, const int nall, + const int max_nbors, const double cell_size, + int &gpu_mode, FILE *screen); void gb_gpu_clear(); int ** gb_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -165,13 +166,12 @@ void PairGayBerneGPU::init_style() double cell_size = sqrt(maxcut) + neighbor->skin; - bool init_ok = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu, - shape, well, cutsq, sigma, epsilon, lshape, form, - lj1, lj2, lj3, lj4, offset, force->special_lj, - atom->nlocal, atom->nlocal+atom->nghost, 300, - cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu)."); + int success = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu, + shape, well, cutsq, sigma, epsilon, lshape, form, + lj1, lj2, lj3, lj4, offset, force->special_lj, + atom->nlocal, atom->nlocal+atom->nghost, 300, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj96_cut_gpu.cpp b/src/GPU/pair_lj96_cut_gpu.cpp index e00fe0a2be..f83646704a 100644 --- a/src/GPU/pair_lj96_cut_gpu.cpp +++ b/src/GPU/pair_lj96_cut_gpu.cpp @@ -34,17 +34,18 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen); +int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); void lj96_gpu_clear(); int ** lj96_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, @@ -149,12 +150,11 @@ void PairLJ96CutGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp index 851225d07c..802c21a2e7 100644 --- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp @@ -35,6 +35,7 @@ #include "domain.h" #include "string.h" #include "kspace.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -49,16 +50,16 @@ // External functions from cuda library for atom decomposition -bool crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald, const double cut_lj_innersq, - const double denom_lj, double **epsilon, double **sigma, - const bool mix_arithmetic); +int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald, const double cut_lj_innersq, + const double denom_lj, double **epsilon, double **sigma, + const bool mix_arithmetic); void crml_gpu_clear(); int ** crml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -192,15 +193,14 @@ void PairLJCharmmCoulLongGPU::init_style() arithmetic = false; } - bool init_ok = crml_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, - cut_coulsq, force->special_coul, force->qqrd2e, - g_ewald, cut_lj_innersq,denom_lj,epsilon,sigma, - arithmetic); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = crml_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, + cut_coulsq, force->special_coul, force->qqrd2e, + g_ewald, cut_lj_innersq,denom_lj,epsilon,sigma, + arithmetic); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp index 7001f61f7a..c960081341 100644 --- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp @@ -34,19 +34,20 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double **host_cut_coulsq, - double *host_special_coul, const double qqrd2e); +int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e); void ljc_gpu_clear(); int ** ljc_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -157,13 +158,12 @@ void PairLJCutCoulCutGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, - force->special_coul, force->qqrd2e); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, + force->special_coul, force->qqrd2e); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp index 656006a3de..ac8f983828 100644 --- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp @@ -35,6 +35,7 @@ #include "domain.h" #include "string.h" #include "kspace.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -49,14 +50,14 @@ // External functions from cuda library for atom decomposition -bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double host_cut_coulsq, - double *host_special_coul, const double qqrd2e, - const double g_ewald); +int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald); void ljcl_gpu_clear(); int ** ljcl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -180,13 +181,12 @@ void PairLJCutCoulLongGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + int success = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_cut_gpu.cpp b/src/GPU/pair_lj_cut_gpu.cpp index 55017144d5..cb84a889e9 100644 --- a/src/GPU/pair_lj_cut_gpu.cpp +++ b/src/GPU/pair_lj_cut_gpu.cpp @@ -34,17 +34,18 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen); +int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); void ljl_gpu_clear(); int ** ljl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -149,12 +150,11 @@ void PairLJCutGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_cut_tgpu.cpp b/src/GPU/pair_lj_cut_tgpu.cpp index 8f4871742c..31a2d95a71 100644 --- a/src/GPU/pair_lj_cut_tgpu.cpp +++ b/src/GPU/pair_lj_cut_tgpu.cpp @@ -33,17 +33,18 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen); +int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); void ljl_gpu_clear(); int ** ljl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -161,12 +162,11 @@ void PairLJCutTGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp index ec6306546e..a55f558072 100644 --- a/src/GPU/pair_lj_expand_gpu.cpp +++ b/src/GPU/pair_lj_expand_gpu.cpp @@ -34,18 +34,19 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double **shift, double *special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, int &gpu_mode, - FILE *screen); +int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **shift, double *special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen); void lje_gpu_clear(); int ** lje_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, diff --git a/src/GPU/pair_morse_gpu.cpp b/src/GPU/pair_morse_gpu.cpp index df65203f68..35b1b99eb0 100644 --- a/src/GPU/pair_morse_gpu.cpp +++ b/src/GPU/pair_morse_gpu.cpp @@ -34,17 +34,18 @@ #include "update.h" #include "domain.h" #include "string.h" +#include "gpu_extra.h" #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) // External functions from cuda library for atom decomposition -bool mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1, - double **host_r0, double **host_alpha, double **host_d0, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen); +int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1, + double **host_r0, double **host_alpha, double **host_d0, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); void mor_gpu_clear(); int ** mor_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, @@ -146,12 +147,11 @@ void PairMorseGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - bool init_ok = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen); - if (!init_ok) - error->one("Insufficient memory on accelerator (or no fix gpu).\n"); + int success = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); if (gpu_mode != GPU_NEIGH) { int irequest = neighbor->request(this); diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp index af445e6e8b..d447419ce7 100644 --- a/src/GPU/pppm_gpu.cpp +++ b/src/GPU/pppm_gpu.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + Contributing authors: Mike Brown (ORNL) ------------------------------------------------------------------------- */ #include "mpi.h" diff --git a/src/GPU/pppm_gpu_double.cpp b/src/GPU/pppm_gpu_double.cpp index 33a6bf8d0f..4ddc7e5ad5 100644 --- a/src/GPU/pppm_gpu_double.cpp +++ b/src/GPU/pppm_gpu_double.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + Contributing authors: Mike Brown (ORNL) ------------------------------------------------------------------------- */ #include "mpi.h" @@ -34,6 +34,7 @@ #include "remap_wrap.h" #include "memory.h" #include "error.h" +#include "gpu_extra.h" #define grdtyp double @@ -102,22 +103,7 @@ void PPPMGPUDouble::init() slab_volfactor, nx_pppm, ny_pppm, nz_pppm, success); - int all_success; - MPI_Allreduce(&success, &all_success, 1, MPI_INT, MPI_MIN, world); - if (all_success != 0) { - if (all_success == -1) - error->all("Could not find fix gpu"); - else if (all_success == -2) - error->all("At least one node could not find specified GPU."); - else if (all_success == -3) - error->all("Out of memory on GPU."); - else if (all_success == -4) - error->all("GPU library not compiled for this GPU."); - else if (all_success == -5) - error->all("Double precision is not supported on this GPU."); - else - error->all("Unknown error in GPU library."); - } + GPU_EXTRA::check_flag(success,error,world); density_brick = create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out, diff --git a/src/GPU/pppm_gpu_single.cpp b/src/GPU/pppm_gpu_single.cpp index b57ec0abbd..cee6b438bf 100644 --- a/src/GPU/pppm_gpu_single.cpp +++ b/src/GPU/pppm_gpu_single.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + Contributing authors: Mike Brown (ORNL) ------------------------------------------------------------------------- */ #include "mpi.h" @@ -34,6 +34,7 @@ #include "remap_wrap.h" #include "memory.h" #include "error.h" +#include "gpu_extra.h" #define grdtyp float @@ -101,22 +102,7 @@ void PPPMGPUSingle::init() nyhi_out, nzhi_out, rho_coeff, &data, slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success); - int all_success; - MPI_Allreduce(&success, &all_success, 1, MPI_INT, MPI_MIN, world); - if (all_success != 0) { - if (all_success == -1) - error->all("Could not find fix gpu"); - else if (all_success == -2) - error->all("At least one node could not find specified GPU."); - else if (all_success == -3) - error->all("Out of memory on GPU."); - else if (all_success == -4) - error->all("GPU library not compiled for this GPU."); - else if (all_success == -5) - error->all("Double precision is not supported on this GPU."); - else - error->all("Unknown error in GPU library."); - } + GPU_EXTRA::check_flag(success,error,world); density_brick = create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,