From 722e583b591633736e2beaa3b4a29809d190efc8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 25 Jan 2023 05:22:49 -0500 Subject: [PATCH] use available introspection API to get accumulator data type. update name of flag. --- lib/gpu/lal_amoeba_ext.cpp | 5 +---- lib/gpu/lal_hippo_ext.cpp | 5 +---- src/GPU/pair_amoeba_gpu.cpp | 20 ++++++++++---------- src/GPU/pair_amoeba_gpu.h | 2 +- src/GPU/pair_hippo_gpu.cpp | 23 ++++++++++++----------- src/GPU/pair_hippo_gpu.h | 2 +- 6 files changed, 26 insertions(+), 31 deletions(-) diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index fe3d4a26d8..995dfbe95f 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -41,8 +41,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double polar_dscale, const double polar_uscale, - int& tep_size) { + const double polar_dscale, const double polar_uscale) { AMOEBAMF.clear(); gpu_mode=AMOEBAMF.device->gpu_mode(); double gpu_split=AMOEBAMF.device->particle_split(); @@ -52,8 +51,6 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas int gpu_rank=AMOEBAMF.device->gpu_rank(); int procs_per_gpu=AMOEBAMF.device->procs_per_gpu(); - tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION); - AMOEBAMF.device->init_message(screen,"amoeba",first_gpu,last_gpu); bool message=false; diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index b5ac42744a..0cb00387ca 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -42,8 +42,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double polar_dscale, const double polar_uscale, - int& tep_size) { + const double polar_dscale, const double polar_uscale) { HIPPOMF.clear(); gpu_mode=HIPPOMF.device->gpu_mode(); double gpu_split=HIPPOMF.device->particle_split(); @@ -53,8 +52,6 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass int gpu_rank=HIPPOMF.device->gpu_rank(); int procs_per_gpu=HIPPOMF.device->procs_per_gpu(); - tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION); - HIPPOMF.device->init_message(screen,"HIPPO",first_gpu,last_gpu); bool message=false; diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 941050cf04..fd423486fd 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -26,6 +26,7 @@ #include "fix_store_peratom.h" #include "force.h" #include "gpu_extra.h" +#include "info.h" #include "math_const.h" #include "memory.h" #include "my_page.h" @@ -66,7 +67,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double polar_dscale, const double polar_uscale, int& tq_size); + const double polar_dscale, const double polar_uscale); void amoeba_gpu_clear(); int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall, @@ -188,7 +189,6 @@ void PairAmoebaGPU::init_style() maxspecial15=atom->maxspecial15; } - int tq_size; int mnf = 5e-2 * neighbor->oneatom; int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass, pdamp, thole, dirdamp, amtype2class, special_hal, @@ -197,13 +197,13 @@ void PairAmoebaGPU::init_style() special_polar_pscale, csix, adisp, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, maxspecial15, cell_size, gpu_mode, screen, - polar_dscale, polar_uscale, tq_size); + polar_dscale, polar_uscale); GPU_EXTRA::check_flag(success,error,world); if (gpu_mode == GPU_FORCE) error->all(FLERR,"Pair style amoeba/gpu does not support neigh no for now"); - tq_single = (tq_size == sizeof(float)); + acc_float = Info::has_accelerator_feature("GPU", "precision", "single"); // replace with the gpu counterpart @@ -284,7 +284,7 @@ void PairAmoebaGPU::multipole_real() // reference to the tep array from GPU lib - if (tq_single) { + if (acc_float) { auto *tq_ptr = (float *)tq_pinned; compute_force_from_torque(tq_ptr, f, virmpole); // fmpole } else { @@ -732,7 +732,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp) // field and fieldp may already have some nonzero values from kspace (udirect1) int nlocal = atom->nlocal; - if (tq_single) { + if (acc_float) { auto field_ptr = (float *)fieldp_pinned; for (int i = 0; i < nlocal; i++) { @@ -963,7 +963,7 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp) amoeba_gpu_update_fieldp(&fieldp_pinned); int inum = atom->nlocal; - if (tq_single) { + if (acc_float) { auto field_ptr = (float *)fieldp_pinned; for (int i = 0; i < nlocal; i++) { @@ -1161,7 +1161,7 @@ void PairAmoebaGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1, &fdip_sum_phi_pinned); int nlocal = atom->nlocal; - if (tq_single) { + if (acc_float) { auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned; for (int i = 0; i < nlocal; i++) { int n = i; @@ -1296,7 +1296,7 @@ void PairAmoebaGPU::polar_real() // reference to the tep array from GPU lib - if (tq_single) { + if (acc_float) { auto *tep_ptr = (float *)tq_pinned; compute_force_from_torque(tep_ptr, f, virpolar); // fpolar } else { @@ -1492,7 +1492,7 @@ void PairAmoebaGPU::polar_kspace() } else { void* fphi_pinned = nullptr; amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec); - if (tq_single) { + if (acc_float) { auto _fphi_ptr = (float *)fphi_pinned; for (int i = 0; i < nlocal; i++) { int idx = i; diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h index c9b9b73a58..be53f7ef50 100644 --- a/src/GPU/pair_amoeba_gpu.h +++ b/src/GPU/pair_amoeba_gpu.h @@ -49,7 +49,7 @@ class PairAmoebaGPU : public PairAmoeba { double cpu_time; void *tq_pinned; void *fieldp_pinned; - bool tq_single; + bool acc_float; bool gpu_hal_ready; bool gpu_repulsion_ready; diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 5956f1bc11..9d286d5db7 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -26,6 +26,7 @@ #include "fix_store_peratom.h" #include "force.h" #include "gpu_extra.h" +#include "info.h" #include "math_const.h" #include "memory.h" #include "my_page.h" @@ -67,7 +68,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double polar_dscale, const double polar_uscale, int& tq_size); + const double polar_dscale, const double polar_uscale); void hippo_gpu_clear(); int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall, @@ -205,7 +206,6 @@ void PairHippoGPU::init_style() maxspecial15=atom->maxspecial15; } - int tq_size; int mnf = 5e-2 * neighbor->oneatom; int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass, pdamp, thole, dirdamp, amtype2class, @@ -215,12 +215,13 @@ void PairHippoGPU::init_style() csix, adisp, pcore, palpha, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, maxspecial15, cell_size, gpu_mode, - screen, polar_dscale, polar_uscale, tq_size); + screen, polar_dscale, polar_uscale); GPU_EXTRA::check_flag(success,error,world); - if (gpu_mode == GPU_FORCE) error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now"); + if (gpu_mode == GPU_FORCE) + error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now"); - tq_single = (tq_size == sizeof(float)); + acc_float = Info::has_accelerator_feature("GPU", "precision", "single"); // replace with the gpu counterpart @@ -296,7 +297,7 @@ void PairHippoGPU::repulsion() // reference to the tep array from GPU lib - if (tq_single) { + if (acc_float) { auto *tq_ptr = (float *)tq_pinned; compute_force_from_torque(tq_ptr, f, virrepulse); // frepulse } else { @@ -396,7 +397,7 @@ void PairHippoGPU::multipole_real() // reference to the tep array from GPU lib - if (tq_single) { + if (acc_float) { auto *tq_ptr = (float *)tq_pinned; compute_force_from_torque(tq_ptr, f, virmpole); // fmpole } else { @@ -845,7 +846,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp) // field and fieldp may already have some nonzero values from kspace (udirect1) int nlocal = atom->nlocal; - if (tq_single) { + if (acc_float) { auto field_ptr = (float *)fieldp_pinned; for (int i = 0; i < nlocal; i++) { @@ -1073,7 +1074,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp) hippo_gpu_update_fieldp(&fieldp_pinned); int inum = atom->nlocal; - if (tq_single) { + if (acc_float) { auto *field_ptr = (float *)fieldp_pinned; for (int i = 0; i < nlocal; i++) { @@ -1279,7 +1280,7 @@ void PairHippoGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1, &fdip_sum_phi_pinned); int nlocal = atom->nlocal; - if (tq_single) { + if (acc_float) { auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned; for (int i = 0; i < nlocal; i++) { int n = i; @@ -1416,7 +1417,7 @@ void PairHippoGPU::polar_real() // reference to the tep array from GPU lib - if (tq_single) { + if (acc_float) { auto *tep_ptr = (float *)tq_pinned; compute_force_from_torque(tep_ptr, f, virpolar); // fpolar } else { diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h index 7955c97470..d160446d77 100644 --- a/src/GPU/pair_hippo_gpu.h +++ b/src/GPU/pair_hippo_gpu.h @@ -50,7 +50,7 @@ class PairHippoGPU : public PairAmoeba { double cpu_time; void *tq_pinned; void *fieldp_pinned; - bool tq_single; + bool acc_float; bool gpu_hal_ready; bool gpu_repulsion_ready;