diff --git a/lib/gpu/atomic_gpu_memory.h b/lib/gpu/atomic_gpu_memory.h index 7767228c8a..91003f5c0d 100644 --- a/lib/gpu/atomic_gpu_memory.h +++ b/lib/gpu/atomic_gpu_memory.h @@ -167,7 +167,8 @@ class AtomicGPUMemory { protected: bool _compiled; - int _block_size, _max_bytes, _max_an_bytes; + int _block_size; + double _max_bytes, _max_an_bytes; void compile_kernels(UCL_Device &dev, const char *pair_string); diff --git a/lib/gpu/charge_gpu_memory.h b/lib/gpu/charge_gpu_memory.h index 4e6e4d506d..c53f897118 100644 --- a/lib/gpu/charge_gpu_memory.h +++ b/lib/gpu/charge_gpu_memory.h @@ -171,7 +171,8 @@ class ChargeGPUMemory { protected: bool _compiled; - int _block_size, _max_bytes, _max_an_bytes; + int _block_size; + double _max_bytes, _max_an_bytes; void compile_kernels(UCL_Device &dev, const char *pair_string); diff --git a/lib/gpu/pair_gpu_atom.h b/lib/gpu/pair_gpu_atom.h index b20f1d67f6..7cec73f98c 100644 --- a/lib/gpu/pair_gpu_atom.h +++ b/lib/gpu/pair_gpu_atom.h @@ -311,7 +311,7 @@ class PairGPUAtom { } /// Return number of bytes used on device - inline int gpu_bytes() { return _gpu_bytes; } + inline double gpu_bytes() { return _gpu_bytes; } // -------------------------COPY FROM GPU ------------------------------- @@ -394,7 +394,7 @@ class PairGPUAtom { int *_ilist; double _time_cast; - int _gpu_bytes; + double _gpu_bytes; #ifndef USE_OPENCL CUDPPConfiguration sort_config; diff --git a/lib/gpu/pair_gpu_device.cpp b/lib/gpu/pair_gpu_device.cpp index ecd56e09dc..0262b3dcd6 100644 --- a/lib/gpu/pair_gpu_device.cpp +++ b/lib/gpu/pair_gpu_device.cpp @@ -166,7 +166,7 @@ void PairGPUDeviceT::init_message(FILE *screen, const char *name, template void PairGPUDeviceT::output_times(UCL_Timer &time_pair, const double avg_split, - const int max_bytes, FILE *screen) { + const double max_bytes, FILE *screen) { double single[5], times[5]; single[0]=atom.transfer_time(); @@ -177,10 +177,10 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair, const double avg_split, MPI_Reduce(single,times,5,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); - int my_max_bytes=max_bytes; - int mpi_max_bytes; - MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_INT,MPI_MAX,0,MPI_COMM_WORLD); - double max_mb=mpi_max_bytes/(1024*1024); + double my_max_bytes=max_bytes; + double mpi_max_bytes; + MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); + double max_mb=mpi_max_bytes/(1024.0*1024.0); if (world_me()==0) if (screen && times[3]>0.0) { diff --git a/lib/gpu/pair_gpu_device.h b/lib/gpu/pair_gpu_device.h index 1275dbe96f..e7a78328d9 100644 --- a/lib/gpu/pair_gpu_device.h +++ b/lib/gpu/pair_gpu_device.h @@ -61,7 +61,7 @@ class PairGPUDevice { /// Output a message with timing information void output_times(UCL_Timer &time_pair, const double avg_split, - const int max_bytes, FILE *screen); + const double max_bytes, FILE *screen); /// Clear all memory on host and device associated with atom and nbor data void clear(); diff --git a/lib/gpu/pair_gpu_nbor.cpp b/lib/gpu/pair_gpu_nbor.cpp index 10e996d559..63048b7560 100644 --- a/lib/gpu/pair_gpu_nbor.cpp +++ b/lib/gpu/pair_gpu_nbor.cpp @@ -98,7 +98,7 @@ void PairGPUNbor::alloc(bool &success) { success=success && (host_acc.alloc((_max_atoms+_max_host)*2,*dev, UCL_WRITE_OPTIMIZED)==UCL_SUCCESS); - _c_bytes+=dev_nbor.row_bytes(); + _c_bytes=dev_nbor.row_bytes(); if (_alloc_packed) { dev_packed.clear(); success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev, @@ -133,8 +133,9 @@ void PairGPUNbor::alloc(bool &success) { } void PairGPUNbor::clear() { - _gpu_bytes=0; - _cell_bytes=0; + _gpu_bytes=0.0; + _cell_bytes=0.0; + _c_bytes=0.0; if (_allocated) { _allocated=false; diff --git a/lib/gpu/pair_gpu_nbor.h b/lib/gpu/pair_gpu_nbor.h index 565dc5ddc0..403bd7aed4 100644 --- a/lib/gpu/pair_gpu_nbor.h +++ b/lib/gpu/pair_gpu_nbor.h @@ -146,11 +146,12 @@ class PairGPUNbor { bool &success, int &max_nbors); /// Return the number of bytes used on device - inline int gpu_bytes() { + inline double gpu_bytes() { + double res = _gpu_bytes + _c_bytes + _cell_bytes; if (_gpu_nbor==false) - return _gpu_bytes+_c_bytes+2*IJ_SIZE*sizeof(int)+_cell_bytes; - else - return _gpu_bytes+_c_bytes+_cell_bytes; + res += 2*IJ_SIZE*sizeof(int); + + return res; } // ------------------------------- Data ------------------------------- @@ -198,7 +199,7 @@ class PairGPUNbor { bool _gpu_nbor, _gpu_host, _alloc_packed; double _cell_size; - int _gpu_bytes, _cell_bytes, _c_bytes; + double _gpu_bytes, _c_bytes, _cell_bytes; void alloc(bool &success); };