Merge 'gpu_hip_port' into master
This commit is contained in:
@ -15,6 +15,11 @@
|
||||
|
||||
#include "lal_atom.h"
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hipcub/hipcub.hpp>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
#define AtomT Atom<numtyp,acctyp>
|
||||
|
||||
@ -70,6 +75,26 @@ bool AtomT::alloc(const int nall) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
size_t temp_storage_bytes = 0;
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
|
||||
return false;
|
||||
if(sort_out_size < _max_atoms){
|
||||
if (sort_out_keys ) hipFree(sort_out_keys);
|
||||
if (sort_out_values) hipFree(sort_out_values);
|
||||
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
|
||||
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
|
||||
sort_out_size = _max_atoms;
|
||||
}
|
||||
if(temp_storage_bytes > sort_temp_storage_size){
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
hipMalloc(&sort_temp_storage, temp_storage_bytes);
|
||||
sort_temp_storage_size = temp_storage_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// --------------------------- Device allocations
|
||||
int gpu_bytes=0;
|
||||
success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
@ -184,6 +209,27 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
size_t temp_storage_bytes = 0;
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
|
||||
return false;
|
||||
if(sort_out_size < _max_atoms){
|
||||
if (sort_out_keys ) hipFree(sort_out_keys);
|
||||
if (sort_out_values) hipFree(sort_out_values);
|
||||
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
|
||||
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
|
||||
sort_out_size = _max_atoms;
|
||||
}
|
||||
if(temp_storage_bytes > sort_temp_storage_size){
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
hipMalloc(&sort_temp_storage, temp_storage_bytes);
|
||||
sort_temp_storage_size = temp_storage_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
success=success && (dev_particle_id.alloc(_max_atoms,*dev,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_particle_id.row_bytes();
|
||||
@ -275,6 +321,19 @@ void AtomT::clear_resize() {
|
||||
if (_gpu_nbor==1) cudppDestroyPlan(sort_plan);
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
if(sort_out_keys) hipFree(sort_out_keys);
|
||||
if(sort_out_values) hipFree(sort_out_values);
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
sort_out_keys = nullptr;
|
||||
sort_out_values = nullptr;
|
||||
sort_temp_storage = nullptr;
|
||||
sort_temp_storage_size = 0;
|
||||
sort_out_size = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_gpu_nbor==2) {
|
||||
host_particle_id.clear();
|
||||
host_cell_id.clear();
|
||||
@ -326,6 +385,22 @@ void AtomT::sort_neighbor(const int num_atoms) {
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if(sort_out_size < num_atoms){
|
||||
printf("AtomT::sort_neighbor: invalid temp buffer size\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(sort_temp_storage, sort_temp_storage_size, (unsigned *)dev_cell_id.begin(), sort_out_keys, (int *)dev_particle_id.begin(), sort_out_values, num_atoms)){
|
||||
printf("AtomT::sort_neighbor: DeviceRadixSort error\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
if(hipSuccess != hipMemcpy((unsigned *)dev_cell_id.begin(), sort_out_keys , num_atoms*sizeof(unsigned), hipMemcpyDeviceToDevice) ||
|
||||
hipSuccess != hipMemcpy((int *) dev_particle_id.begin(), sort_out_values, num_atoms*sizeof(int ), hipMemcpyDeviceToDevice)){
|
||||
printf("AtomT::sort_neighbor: copy output error\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef GPU_CAST
|
||||
|
||||
Reference in New Issue
Block a user