From 92cd2a69bf2da40853f1236b0b393a173b2db5bc Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 11 Nov 2010 12:31:36 -0500 Subject: [PATCH] do GPU device memory accounting with doubles using (signed) int will overflow at 2GB, switching to unsigned has risk of hiding overflows and using long long is not as portable as double precisiong floating point. --- lib/gpu/atomic_gpu_memory.h | 3 ++- lib/gpu/charge_gpu_memory.h | 3 ++- lib/gpu/pair_gpu_atom.h | 4 ++-- lib/gpu/pair_gpu_device.cpp | 10 +++++----- lib/gpu/pair_gpu_device.h | 2 +- lib/gpu/pair_gpu_nbor.cpp | 7 ++++--- lib/gpu/pair_gpu_nbor.h | 11 ++++++----- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/lib/gpu/atomic_gpu_memory.h b/lib/gpu/atomic_gpu_memory.h index 7767228c8a..91003f5c0d 100644 --- a/lib/gpu/atomic_gpu_memory.h +++ b/lib/gpu/atomic_gpu_memory.h @@ -167,7 +167,8 @@ class AtomicGPUMemory { protected: bool _compiled; - int _block_size, _max_bytes, _max_an_bytes; + int _block_size; + double _max_bytes, _max_an_bytes; void compile_kernels(UCL_Device &dev, const char *pair_string); diff --git a/lib/gpu/charge_gpu_memory.h b/lib/gpu/charge_gpu_memory.h index 4e6e4d506d..c53f897118 100644 --- a/lib/gpu/charge_gpu_memory.h +++ b/lib/gpu/charge_gpu_memory.h @@ -171,7 +171,8 @@ class ChargeGPUMemory { protected: bool _compiled; - int _block_size, _max_bytes, _max_an_bytes; + int _block_size; + double _max_bytes, _max_an_bytes; void compile_kernels(UCL_Device &dev, const char *pair_string); diff --git a/lib/gpu/pair_gpu_atom.h b/lib/gpu/pair_gpu_atom.h index b20f1d67f6..7cec73f98c 100644 --- a/lib/gpu/pair_gpu_atom.h +++ b/lib/gpu/pair_gpu_atom.h @@ -311,7 +311,7 @@ class PairGPUAtom { } /// Return number of bytes used on device - inline int gpu_bytes() { return _gpu_bytes; } + inline double gpu_bytes() { return _gpu_bytes; } // -------------------------COPY FROM GPU ------------------------------- @@ -394,7 +394,7 @@ class PairGPUAtom { int *_ilist; double _time_cast; - int _gpu_bytes; + double _gpu_bytes; #ifndef USE_OPENCL CUDPPConfiguration sort_config; diff --git a/lib/gpu/pair_gpu_device.cpp b/lib/gpu/pair_gpu_device.cpp index ecd56e09dc..0262b3dcd6 100644 --- a/lib/gpu/pair_gpu_device.cpp +++ b/lib/gpu/pair_gpu_device.cpp @@ -166,7 +166,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name, template void PairGPUDeviceT::output_times(UCL_Timer &time_pair, const double avg_split, - const int max_bytes, FILE *screen) { + const double max_bytes, FILE *screen) { double single[5], times[5]; single[0]=atom.transfer_time(); @@ -177,10 +177,10 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair, const double avg_split, MPI_Reduce(single,times,5,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); - int my_max_bytes=max_bytes; - int mpi_max_bytes; - MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_INT,MPI_MAX,0,MPI_COMM_WORLD); - double max_mb=mpi_max_bytes/(1024*1024); + double my_max_bytes=max_bytes; + double mpi_max_bytes; + MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); + double max_mb=mpi_max_bytes/(1024.0*1024.0); if (world_me()==0) if (screen && times[3]>0.0) { diff --git a/lib/gpu/pair_gpu_device.h b/lib/gpu/pair_gpu_device.h index 1275dbe96f..e7a78328d9 100644 --- a/lib/gpu/pair_gpu_device.h +++ b/lib/gpu/pair_gpu_device.h @@ -61,7 +61,7 @@ class PairGPUDevice { /// Output a message with timing information void output_times(UCL_Timer &time_pair, const double avg_split, - const int max_bytes, FILE *screen); + const double max_bytes, FILE *screen); /// Clear all memory on host and device associated with atom and nbor data void clear(); diff --git a/lib/gpu/pair_gpu_nbor.cpp b/lib/gpu/pair_gpu_nbor.cpp index 10e996d559..63048b7560 100644 --- a/lib/gpu/pair_gpu_nbor.cpp +++ b/lib/gpu/pair_gpu_nbor.cpp @@ -98,7 +98,7 @@ void PairGPUNbor::alloc(bool &success) { success=success && (host_acc.alloc((_max_atoms+_max_host)*2,*dev, UCL_WRITE_OPTIMIZED)==UCL_SUCCESS); - _c_bytes+=dev_nbor.row_bytes(); + _c_bytes=dev_nbor.row_bytes(); if (_alloc_packed) { dev_packed.clear(); success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev, @@ -133,8 +133,9 @@ void PairGPUNbor::alloc(bool &success) { } void PairGPUNbor::clear() { - _gpu_bytes=0; - _cell_bytes=0; + _gpu_bytes=0.0; + _cell_bytes=0.0; + _c_bytes=0.0; if (_allocated) { _allocated=false; diff --git a/lib/gpu/pair_gpu_nbor.h b/lib/gpu/pair_gpu_nbor.h index 565dc5ddc0..403bd7aed4 100644 --- a/lib/gpu/pair_gpu_nbor.h +++ b/lib/gpu/pair_gpu_nbor.h @@ -146,11 +146,12 @@ class PairGPUNbor { bool &success, int &max_nbors); /// Return the number of bytes used on device - inline int gpu_bytes() { + inline double gpu_bytes() { + double res = _gpu_bytes + _c_bytes + _cell_bytes; if (_gpu_nbor==false) - return _gpu_bytes+_c_bytes+2*IJ_SIZE*sizeof(int)+_cell_bytes; - else - return _gpu_bytes+_c_bytes+_cell_bytes; + res += 2*IJ_SIZE*sizeof(int); + + return res; } // ------------------------------- Data ------------------------------- @@ -198,7 +199,7 @@ class PairGPUNbor { bool _gpu_nbor, _gpu_host, _alloc_packed; double _cell_size; - int _gpu_bytes, _cell_bytes, _c_bytes; + double _gpu_bytes, _c_bytes, _cell_bytes; void alloc(bool &success); };