Updating all code to use full neighbor lists for all host forces
computations. Changed the load balancing scheme. Added timing estimates for GPU and driver overhead, cpu idle time.
This commit is contained in:
@ -68,6 +68,9 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
_block_size=ucl_device->group_size();
|
||||
compile_kernels(*ucl_device,pair_program);
|
||||
|
||||
// Initialize host-device load balancer
|
||||
hd_balancer.init(device,gpu_nbor,gpu_split);
|
||||
|
||||
// Initialize timers for the selected GPU
|
||||
time_pair.init(*ucl_device);
|
||||
time_pair.zero();
|
||||
@ -76,9 +79,6 @@ bool AtomicGPUMemoryT::init_atomic(const int nlocal, const int nall,
|
||||
|
||||
_max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes();
|
||||
|
||||
// Initialize host-device load balancer
|
||||
hd_balancer.init(device,gpu_nbor,gpu_split);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -204,52 +204,6 @@ void AtomicGPUMemoryT::compute(const int f_ago, const int inum_full,
|
||||
hd_balancer.stop_timer();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int * AtomicGPUMemoryT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success) {
|
||||
acc_timers();
|
||||
if (inum_full==0) {
|
||||
host_start=0;
|
||||
// Make sure textures are correct if realloc by a different hybrid style
|
||||
resize_atom(0,nall,success);
|
||||
zero_timers();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hd_balancer.balance(cpu_time);
|
||||
int inum=hd_balancer.get_gpu_count(ago,inum_full);
|
||||
ans->inum(inum);
|
||||
host_start=inum;
|
||||
|
||||
// Build neighbor list on GPU if necessary
|
||||
if (ago==0) {
|
||||
build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
|
||||
boxlo, boxhi, tag, nspecial, special, success);
|
||||
if (!success)
|
||||
return NULL;
|
||||
hd_balancer.start_timer();
|
||||
} else {
|
||||
atom->cast_x_data(host_x,host_type);
|
||||
hd_balancer.start_timer();
|
||||
atom->add_x_data(host_x,host_type);
|
||||
}
|
||||
|
||||
loop(eflag,vflag);
|
||||
ans->copy_answers(eflag,vflag,eatom,vatom);
|
||||
device->add_ans_object(ans);
|
||||
hd_balancer.stop_timer();
|
||||
|
||||
return nbor->host_nbor.begin();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user