use available introspection API to get accumulator data type. update name of flag.

This commit is contained in:
Axel Kohlmeyer
2023-01-25 05:22:49 -05:00
parent e068b14969
commit 722e583b59
6 changed files with 26 additions and 31 deletions

View File

@ -41,8 +41,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double polar_dscale, const double polar_uscale, const double polar_dscale, const double polar_uscale) {
int& tep_size) {
AMOEBAMF.clear(); AMOEBAMF.clear();
gpu_mode=AMOEBAMF.device->gpu_mode(); gpu_mode=AMOEBAMF.device->gpu_mode();
double gpu_split=AMOEBAMF.device->particle_split(); double gpu_split=AMOEBAMF.device->particle_split();
@ -52,8 +51,6 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
int gpu_rank=AMOEBAMF.device->gpu_rank(); int gpu_rank=AMOEBAMF.device->gpu_rank();
int procs_per_gpu=AMOEBAMF.device->procs_per_gpu(); int procs_per_gpu=AMOEBAMF.device->procs_per_gpu();
tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION);
AMOEBAMF.device->init_message(screen,"amoeba",first_gpu,last_gpu); AMOEBAMF.device->init_message(screen,"amoeba",first_gpu,last_gpu);
bool message=false; bool message=false;

View File

@ -42,8 +42,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double polar_dscale, const double polar_uscale, const double polar_dscale, const double polar_uscale) {
int& tep_size) {
HIPPOMF.clear(); HIPPOMF.clear();
gpu_mode=HIPPOMF.device->gpu_mode(); gpu_mode=HIPPOMF.device->gpu_mode();
double gpu_split=HIPPOMF.device->particle_split(); double gpu_split=HIPPOMF.device->particle_split();
@ -53,8 +52,6 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
int gpu_rank=HIPPOMF.device->gpu_rank(); int gpu_rank=HIPPOMF.device->gpu_rank();
int procs_per_gpu=HIPPOMF.device->procs_per_gpu(); int procs_per_gpu=HIPPOMF.device->procs_per_gpu();
tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION);
HIPPOMF.device->init_message(screen,"HIPPO",first_gpu,last_gpu); HIPPOMF.device->init_message(screen,"HIPPO",first_gpu,last_gpu);
bool message=false; bool message=false;

View File

@ -26,6 +26,7 @@
#include "fix_store_peratom.h" #include "fix_store_peratom.h"
#include "force.h" #include "force.h"
#include "gpu_extra.h" #include "gpu_extra.h"
#include "info.h"
#include "math_const.h" #include "math_const.h"
#include "memory.h" #include "memory.h"
#include "my_page.h" #include "my_page.h"
@ -66,7 +67,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double polar_dscale, const double polar_uscale, int& tq_size); const double polar_dscale, const double polar_uscale);
void amoeba_gpu_clear(); void amoeba_gpu_clear();
int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall, int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall,
@ -188,7 +189,6 @@ void PairAmoebaGPU::init_style()
maxspecial15=atom->maxspecial15; maxspecial15=atom->maxspecial15;
} }
int tq_size;
int mnf = 5e-2 * neighbor->oneatom; int mnf = 5e-2 * neighbor->oneatom;
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass, int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
pdamp, thole, dirdamp, amtype2class, special_hal, pdamp, thole, dirdamp, amtype2class, special_hal,
@ -197,13 +197,13 @@ void PairAmoebaGPU::init_style()
special_polar_pscale, csix, adisp, atom->nlocal, special_polar_pscale, csix, adisp, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial, atom->nlocal+atom->nghost, mnf, maxspecial,
maxspecial15, cell_size, gpu_mode, screen, maxspecial15, cell_size, gpu_mode, screen,
polar_dscale, polar_uscale, tq_size); polar_dscale, polar_uscale);
GPU_EXTRA::check_flag(success,error,world); GPU_EXTRA::check_flag(success,error,world);
if (gpu_mode == GPU_FORCE) if (gpu_mode == GPU_FORCE)
error->all(FLERR,"Pair style amoeba/gpu does not support neigh no for now"); error->all(FLERR,"Pair style amoeba/gpu does not support neigh no for now");
tq_single = (tq_size == sizeof(float)); acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
// replace with the gpu counterpart // replace with the gpu counterpart
@ -284,7 +284,7 @@ void PairAmoebaGPU::multipole_real()
// reference to the tep array from GPU lib // reference to the tep array from GPU lib
if (tq_single) { if (acc_float) {
auto *tq_ptr = (float *)tq_pinned; auto *tq_ptr = (float *)tq_pinned;
compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole
} else { } else {
@ -732,7 +732,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
// field and fieldp may already have some nonzero values from kspace (udirect1) // field and fieldp may already have some nonzero values from kspace (udirect1)
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
if (tq_single) { if (acc_float) {
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
@ -963,7 +963,7 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
amoeba_gpu_update_fieldp(&fieldp_pinned); amoeba_gpu_update_fieldp(&fieldp_pinned);
int inum = atom->nlocal; int inum = atom->nlocal;
if (tq_single) { if (acc_float) {
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
@ -1161,7 +1161,7 @@ void PairAmoebaGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1,
&fdip_sum_phi_pinned); &fdip_sum_phi_pinned);
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
if (tq_single) { if (acc_float) {
auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned; auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int n = i; int n = i;
@ -1296,7 +1296,7 @@ void PairAmoebaGPU::polar_real()
// reference to the tep array from GPU lib // reference to the tep array from GPU lib
if (tq_single) { if (acc_float) {
auto *tep_ptr = (float *)tq_pinned; auto *tep_ptr = (float *)tq_pinned;
compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar
} else { } else {
@ -1492,7 +1492,7 @@ void PairAmoebaGPU::polar_kspace()
} else { } else {
void* fphi_pinned = nullptr; void* fphi_pinned = nullptr;
amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec); amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec);
if (tq_single) { if (acc_float) {
auto _fphi_ptr = (float *)fphi_pinned; auto _fphi_ptr = (float *)fphi_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = i; int idx = i;

View File

@ -49,7 +49,7 @@ class PairAmoebaGPU : public PairAmoeba {
double cpu_time; double cpu_time;
void *tq_pinned; void *tq_pinned;
void *fieldp_pinned; void *fieldp_pinned;
bool tq_single; bool acc_float;
bool gpu_hal_ready; bool gpu_hal_ready;
bool gpu_repulsion_ready; bool gpu_repulsion_ready;

View File

@ -26,6 +26,7 @@
#include "fix_store_peratom.h" #include "fix_store_peratom.h"
#include "force.h" #include "force.h"
#include "gpu_extra.h" #include "gpu_extra.h"
#include "info.h"
#include "math_const.h" #include "math_const.h"
#include "memory.h" #include "memory.h"
#include "my_page.h" #include "my_page.h"
@ -67,7 +68,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double polar_dscale, const double polar_uscale, int& tq_size); const double polar_dscale, const double polar_uscale);
void hippo_gpu_clear(); void hippo_gpu_clear();
int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall, int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall,
@ -205,7 +206,6 @@ void PairHippoGPU::init_style()
maxspecial15=atom->maxspecial15; maxspecial15=atom->maxspecial15;
} }
int tq_size;
int mnf = 5e-2 * neighbor->oneatom; int mnf = 5e-2 * neighbor->oneatom;
int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass, int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
pdamp, thole, dirdamp, amtype2class, pdamp, thole, dirdamp, amtype2class,
@ -215,12 +215,13 @@ void PairHippoGPU::init_style()
csix, adisp, pcore, palpha, csix, adisp, pcore, palpha,
atom->nlocal, atom->nlocal+atom->nghost, mnf, atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, maxspecial15, cell_size, gpu_mode, maxspecial, maxspecial15, cell_size, gpu_mode,
screen, polar_dscale, polar_uscale, tq_size); screen, polar_dscale, polar_uscale);
GPU_EXTRA::check_flag(success,error,world); GPU_EXTRA::check_flag(success,error,world);
if (gpu_mode == GPU_FORCE) error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now"); if (gpu_mode == GPU_FORCE)
error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now");
tq_single = (tq_size == sizeof(float)); acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
// replace with the gpu counterpart // replace with the gpu counterpart
@ -296,7 +297,7 @@ void PairHippoGPU::repulsion()
// reference to the tep array from GPU lib // reference to the tep array from GPU lib
if (tq_single) { if (acc_float) {
auto *tq_ptr = (float *)tq_pinned; auto *tq_ptr = (float *)tq_pinned;
compute_force_from_torque<float>(tq_ptr, f, virrepulse); // frepulse compute_force_from_torque<float>(tq_ptr, f, virrepulse); // frepulse
} else { } else {
@ -396,7 +397,7 @@ void PairHippoGPU::multipole_real()
// reference to the tep array from GPU lib // reference to the tep array from GPU lib
if (tq_single) { if (acc_float) {
auto *tq_ptr = (float *)tq_pinned; auto *tq_ptr = (float *)tq_pinned;
compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole
} else { } else {
@ -845,7 +846,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
// field and fieldp may already have some nonzero values from kspace (udirect1) // field and fieldp may already have some nonzero values from kspace (udirect1)
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
if (tq_single) { if (acc_float) {
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
@ -1073,7 +1074,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
hippo_gpu_update_fieldp(&fieldp_pinned); hippo_gpu_update_fieldp(&fieldp_pinned);
int inum = atom->nlocal; int inum = atom->nlocal;
if (tq_single) { if (acc_float) {
auto *field_ptr = (float *)fieldp_pinned; auto *field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
@ -1279,7 +1280,7 @@ void PairHippoGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1,
&fdip_sum_phi_pinned); &fdip_sum_phi_pinned);
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
if (tq_single) { if (acc_float) {
auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned; auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int n = i; int n = i;
@ -1416,7 +1417,7 @@ void PairHippoGPU::polar_real()
// reference to the tep array from GPU lib // reference to the tep array from GPU lib
if (tq_single) { if (acc_float) {
auto *tep_ptr = (float *)tq_pinned; auto *tep_ptr = (float *)tq_pinned;
compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar
} else { } else {

View File

@ -50,7 +50,7 @@ class PairHippoGPU : public PairAmoeba {
double cpu_time; double cpu_time;
void *tq_pinned; void *tq_pinned;
void *fieldp_pinned; void *fieldp_pinned;
bool tq_single; bool acc_float;
bool gpu_hal_ready; bool gpu_hal_ready;
bool gpu_repulsion_ready; bool gpu_repulsion_ready;