Merge 'gpu_hip_port' into master

This commit is contained in:
Vsevak
2020-01-28 20:09:40 +03:00
parent 5eef3b1828
commit 66c5fa2abd
91 changed files with 2290 additions and 312 deletions

View File

@ -15,6 +15,11 @@
#include "lal_atom.h"
#ifdef USE_HIP_DEVICE_SORT
#include <hip/hip_runtime.h>
#include <hipcub/hipcub.hpp>
#endif
namespace LAMMPS_AL {
#define AtomT Atom<numtyp,acctyp>
@ -70,6 +75,26 @@ bool AtomT::alloc(const int nall) {
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
size_t temp_storage_bytes = 0;
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
return false;
if(sort_out_size < _max_atoms){
if (sort_out_keys ) hipFree(sort_out_keys);
if (sort_out_values) hipFree(sort_out_values);
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
sort_out_size = _max_atoms;
}
if(temp_storage_bytes > sort_temp_storage_size){
if(sort_temp_storage) hipFree(sort_temp_storage);
hipMalloc(&sort_temp_storage, temp_storage_bytes);
sort_temp_storage_size = temp_storage_bytes;
}
}
#endif
// --------------------------- Device allocations
int gpu_bytes=0;
success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
@ -184,6 +209,27 @@ bool AtomT::add_fields(const bool charge, const bool rot,
return false;
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
size_t temp_storage_bytes = 0;
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
return false;
if(sort_out_size < _max_atoms){
if (sort_out_keys ) hipFree(sort_out_keys);
if (sort_out_values) hipFree(sort_out_values);
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
sort_out_size = _max_atoms;
}
if(temp_storage_bytes > sort_temp_storage_size){
if(sort_temp_storage) hipFree(sort_temp_storage);
hipMalloc(&sort_temp_storage, temp_storage_bytes);
sort_temp_storage_size = temp_storage_bytes;
}
}
#endif
success=success && (dev_particle_id.alloc(_max_atoms,*dev,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=dev_particle_id.row_bytes();
@ -275,6 +321,19 @@ void AtomT::clear_resize() {
if (_gpu_nbor==1) cudppDestroyPlan(sort_plan);
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
if(sort_out_keys) hipFree(sort_out_keys);
if(sort_out_values) hipFree(sort_out_values);
if(sort_temp_storage) hipFree(sort_temp_storage);
sort_out_keys = nullptr;
sort_out_values = nullptr;
sort_temp_storage = nullptr;
sort_temp_storage_size = 0;
sort_out_size = 0;
}
#endif
if (_gpu_nbor==2) {
host_particle_id.clear();
host_cell_id.clear();
@ -326,6 +385,22 @@ void AtomT::sort_neighbor(const int num_atoms) {
UCL_GERYON_EXIT;
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if(sort_out_size < num_atoms){
printf("AtomT::sort_neighbor: invalid temp buffer size\n");
UCL_GERYON_EXIT;
}
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(sort_temp_storage, sort_temp_storage_size, (unsigned *)dev_cell_id.begin(), sort_out_keys, (int *)dev_particle_id.begin(), sort_out_values, num_atoms)){
printf("AtomT::sort_neighbor: DeviceRadixSort error\n");
UCL_GERYON_EXIT;
}
if(hipSuccess != hipMemcpy((unsigned *)dev_cell_id.begin(), sort_out_keys , num_atoms*sizeof(unsigned), hipMemcpyDeviceToDevice) ||
hipSuccess != hipMemcpy((int *) dev_particle_id.begin(), sort_out_values, num_atoms*sizeof(int ), hipMemcpyDeviceToDevice)){
printf("AtomT::sort_neighbor: copy output error\n");
UCL_GERYON_EXIT;
}
#endif
}
#ifdef GPU_CAST