diff --git a/lib/cuda/Makefile.defaults b/lib/cuda/Makefile.defaults index 590435446c..16445cca18 100644 --- a/lib/cuda/Makefile.defaults +++ b/lib/cuda/Makefile.defaults @@ -1,6 +1,6 @@ #precision setting: 1 single, 2 double, 4 mixed -precision ?= 2 +precision ?= 4 #verbose setting: 0 no, 1 yes verbose ?= 1 diff --git a/lib/cuda/Makefile.lammps b/lib/cuda/Makefile.lammps index a488404f31..cb98690689 100644 --- a/lib/cuda/Makefile.lammps +++ b/lib/cuda/Makefile.lammps @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -CUDA_FLAGS := -I/usr/local/cuda/include -DUNIX -DFFT_CUFFT -DCUDA_PRECISION=2 -DCUDA_ARCH=20 +CUDA_FLAGS := -I/usr/local/cuda/include -DUNIX -DFFT_CUFFT -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2 -DCUDA_ARCH=20 CUDA_USRLIB_CONDITIONAL := -L/usr/local/cuda/lib -L/usr/local/cuda/lib64 -lcufft user-cuda_SYSINC = ${CUDA_FLAGS} diff --git a/lib/cuda/cuda_wrapper.cu b/lib/cuda/cuda_wrapper.cu index c8bda6ecc3..051d37d56c 100644 --- a/lib/cuda/cuda_wrapper.cu +++ b/lib/cuda/cuda_wrapper.cu @@ -254,7 +254,7 @@ void cuda_check_error(char* comment) printf("ERROR-CUDA %s %s\n", comment, cudaGetErrorString(cudaGetLastError())); } -int CudaWrapper_CheckMemUseage() +int CudaWrapper_CheckMemUsage() { size_t free, total; cudaMemGetInfo(&free, &total); diff --git a/lib/cuda/cuda_wrapper_cu.h b/lib/cuda/cuda_wrapper_cu.h index 5bcfaffd44..3f574463f9 100644 --- a/lib/cuda/cuda_wrapper_cu.h +++ b/lib/cuda/cuda_wrapper_cu.h @@ -36,7 +36,7 @@ extern "C" void CudaWrapper_CopyData(void* dev_dest, void* dev_source, unsigned extern "C" void* CudaWrapper_AllocPinnedHostData(unsigned nbytes, bool mapped = false, bool writeCombind = false); extern "C" void CudaWrapper_FreePinnedHostData(void* dev_data); extern "C" void cuda_check_error(char* comment); -extern "C" int CudaWrapper_CheckMemUseage(); +extern "C" int CudaWrapper_CheckMemUsage(); extern "C" double CudaWrapper_CheckUploadTime(bool reset = false); extern "C" double CudaWrapper_CheckDownloadTime(bool reset = false); extern "C" double CudaWrapper_CheckCPUBufUploadTime(bool reset = false); diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 0859062345..76000bd07b 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -492,6 +492,7 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, const double driver_overhead, const int threads_per_atom, FILE *screen) { double single[9], times[9]; + int post_final=0; single[0]=atom.transfer_time()+ans.transfer_time(); single[1]=nbor.time_nbor.total_seconds()+nbor.time_hybrid1.total_seconds()+ @@ -504,6 +505,9 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, single[7]=ans.cpu_idle_time(); single[8]=nbor.bin_time(); + MPI_Finalized(&post_final); + if (post_final) return; + MPI_Reduce(single,times,9,MPI_DOUBLE,MPI_SUM,0,_comm_replica); double my_max_bytes=max_bytes+atom.max_gpu_bytes();