diff --git a/lib/gpu/crml_gpu_kernel2.cu b/lib/gpu/crml_gpu_kernel2.cu index 731c50078c..5febfc77f2 100644 --- a/lib/gpu/crml_gpu_kernel2.cu +++ b/lib/gpu/crml_gpu_kernel2.cu @@ -280,6 +280,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in, f[tid].z=(acctyp)0; for (int o=0; o<6; o++) virial[tid][o]=(acctyp)0; + __syncthreads(); if (ii0) { for (int v=0; v<6; v++) { - *ap1=virial[tid][i]; + *ap1=virial[tid][v]; ap1+=inum; } } diff --git a/lib/gpu/crml_gpu_memory2.cpp b/lib/gpu/crml_gpu_memory2.cpp index fa2b5fe840..764fa5f48c 100644 --- a/lib/gpu/crml_gpu_memory2.cpp +++ b/lib/gpu/crml_gpu_memory2.cpp @@ -127,7 +127,7 @@ double CRML_GPU_Memory2T::host_memory_usage() const { // --------------------------------------------------------------------------- template void CRML_GPU_Memory2T::loop(const bool _eflag, const bool _vflag) { - const int threads_per_atom=32; + const int threads_per_atom=16; // Compute the block size and grid size to keep all cores busy const int BX=this->block_size();