Fixed a bug with extra being nullptr when _host_view is true: always allocate extra

(Note that BaseAmoeba has its own cast_extra_data() that doesn't know if extra is allocated properly, it is the case when _host_view is false for dedicated GPUs for example)
This commit is contained in:
Trung Nguyen
2023-01-18 20:04:45 -06:00
parent f86375c992
commit eddd3d6f25
3 changed files with 9 additions and 14 deletions

View File

@ -124,7 +124,7 @@ bool AtomT::alloc(const int nall) {
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=v.device.row_bytes();
}
if (_extra_fields>0 && !_host_view) {
if (_extra_fields>0) {
success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=extra.device.row_bytes();

View File

@ -470,18 +470,13 @@ class Atom {
inline void cast_extra_data(cpytyp *host_ptr) {
if (_extra_avail==false) {
double t=MPI_Wtime();
if (_host_view) {
extra.host.view((numtyp*)host_ptr,_nall*_extra_fields,*dev);
extra.device.view(extra.host);
} else if (sizeof(numtyp)==sizeof(double))
memcpy(extra.host.begin(),host_ptr,_nall*_extra_fields*sizeof(numtyp));
else
#if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
#pragma omp parallel for simd schedule(static)
#elif (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i=0; i<_nall*_extra_fields; i++) extra[i]=host_ptr[i];
for (int i=0; i<_nall*_extra_fields; i++)
extra[i]=host_ptr[i];
_time_cast+=MPI_Wtime()-t;
}
}

View File

@ -490,7 +490,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
_data_in_estimate++;
if (!atom.velocity() && vel)
_data_in_estimate++;
if (atom.using_extra()==false && extra_fields>0)
if (atom.using_extra() && extra_fields>0)
_data_in_estimate++;
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel,extra_fields))
return -3;