diff --git a/lib/gpu/lal_atom.cpp b/lib/gpu/lal_atom.cpp index 03f3b477c9..bf27334578 100644 --- a/lib/gpu/lal_atom.cpp +++ b/lib/gpu/lal_atom.cpp @@ -124,7 +124,7 @@ bool AtomT::alloc(const int nall) { UCL_READ_ONLY)==UCL_SUCCESS); gpu_bytes+=v.device.row_bytes(); } - if (_extra_fields>0 && !_host_view) { + if (_extra_fields>0) { success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY, UCL_READ_ONLY)==UCL_SUCCESS); gpu_bytes+=extra.device.row_bytes(); diff --git a/lib/gpu/lal_atom.h b/lib/gpu/lal_atom.h index cfd4368948..f4b23822f8 100644 --- a/lib/gpu/lal_atom.h +++ b/lib/gpu/lal_atom.h @@ -470,18 +470,13 @@ class Atom { inline void cast_extra_data(cpytyp *host_ptr) { if (_extra_avail==false) { double t=MPI_Wtime(); - if (_host_view) { - extra.host.view((numtyp*)host_ptr,_nall*_extra_fields,*dev); - extra.device.view(extra.host); - } else if (sizeof(numtyp)==sizeof(double)) - memcpy(extra.host.begin(),host_ptr,_nall*_extra_fields*sizeof(numtyp)); - else - #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1) - #pragma omp parallel for simd schedule(static) - #elif (LAL_USE_OMP_SIMD == 1) - #pragma omp simd - #endif - for (int i=0; i<_nall*_extra_fields; i++) extra[i]=host_ptr[i]; + #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1) + #pragma omp parallel for simd schedule(static) + #elif (LAL_USE_OMP_SIMD == 1) + #pragma omp simd + #endif + for (int i=0; i<_nall*_extra_fields; i++) + extra[i]=host_ptr[i]; _time_cast+=MPI_Wtime()-t; } } diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 1dbe1a0c40..e54d16266c 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -490,7 +490,7 @@ int DeviceT::init(Answer &ans, const bool charge, _data_in_estimate++; if (!atom.velocity() && vel) _data_in_estimate++; - if (atom.using_extra()==false && extra_fields>0) + if (atom.using_extra() && extra_fields>0) _data_in_estimate++; if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel,extra_fields)) return -3;