Changing erroneous BIO_PAIR in kernels to BLOCK_PAIR. Default threads per atom is now 1 for all but charged pair styles.

This commit is contained in:
W. Michael Brown
2011-04-15 20:57:03 -04:00
parent 95c322ef03
commit 58bf89fda3
28 changed files with 56 additions and 1867 deletions

View File

@ -52,6 +52,7 @@ int PairGPUDeviceT::init_device(MPI_Comm world, MPI_Comm replica,
omp_set_num_threads(nthreads);
#endif
_threads_per_atom=t_per_atom;
_threads_per_charge=t_per_atom;
if (_device_init)
return 0;
@ -381,7 +382,8 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
PairGPUNbor &nbor, const double avg_split,
const double max_bytes,
const double gpu_overhead,
const double driver_overhead, FILE *screen) {
const double driver_overhead,
const int threads_per_atom, FILE *screen) {
double single[8], times[8];
single[0]=atom.transfer_time()+ans.transfer_time();
@ -420,7 +422,7 @@ void PairGPUDeviceT::output_times(UCL_Timer &time_pair,
}
fprintf(screen,"GPU Overhead: %.4f s.\n",times[5]/_replica_size);
fprintf(screen,"Average split: %.4f.\n",avg_split);
fprintf(screen,"Threads / atom: %d.\n",_threads_per_atom);
fprintf(screen,"Threads / atom: %d.\n",threads_per_atom);
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[6]/_replica_size);
fprintf(screen,"CPU Idle_Time: %.4f s.\n",times[7]/_replica_size);
@ -534,8 +536,8 @@ int PairGPUDeviceT::compile_kernels() {
k_info.set_function(*dev_program,"kernel_info");
_compiled=true;
UCL_H_Vec<int> h_gpu_lib_data(13,*gpu,UCL_NOT_PINNED);
UCL_D_Vec<int> d_gpu_lib_data(13,*gpu);
UCL_H_Vec<int> h_gpu_lib_data(14,*gpu,UCL_NOT_PINNED);
UCL_D_Vec<int> d_gpu_lib_data(14,*gpu);
k_info.set_size(1,1);
k_info.run(&d_gpu_lib_data.begin());
ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
@ -549,6 +551,8 @@ int PairGPUDeviceT::compile_kernels() {
_warp_size=h_gpu_lib_data[2];
if (_threads_per_atom<1)
_threads_per_atom=h_gpu_lib_data[3];
if (_threads_per_charge<1)
_threads_per_charge=h_gpu_lib_data[13];
_pppm_max_spline=h_gpu_lib_data[4];
_pppm_block=h_gpu_lib_data[5];
_block_pair=h_gpu_lib_data[6];
@ -567,6 +571,10 @@ int PairGPUDeviceT::compile_kernels() {
_threads_per_atom=_warp_size;
if (_warp_size%_threads_per_atom!=0)
_threads_per_atom=1;
if (_threads_per_charge>_warp_size)
_threads_per_charge=_warp_size;
if (_warp_size%_threads_per_charge!=0)
_threads_per_charge=1;
return flag;
}