use available introspection API to get accumulator data type. update name of flag.

2023-01-25 05:22:49 -05:00
parent e068b14969
commit 722e583b59
6 changed files with 26 additions and 31 deletions
--- a/lib/gpu/lal_amoeba_ext.cpp
+++ b/lib/gpu/lal_amoeba_ext.cpp
@ -41,8 +41,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
                    const int nlocal, const int nall, const int max_nbors,
                    const int maxspecial, const int maxspecial15,
                    const double cell_size, int &gpu_mode, FILE *screen,
-                    const double polar_dscale, const double polar_uscale,
-                    int& tep_size) {
+                    const double polar_dscale, const double polar_uscale) {
  AMOEBAMF.clear();
  gpu_mode=AMOEBAMF.device->gpu_mode();
  double gpu_split=AMOEBAMF.device->particle_split();
@ -52,8 +51,6 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
  int gpu_rank=AMOEBAMF.device->gpu_rank();
  int procs_per_gpu=AMOEBAMF.device->procs_per_gpu();

-  tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION);
-
  AMOEBAMF.device->init_message(screen,"amoeba",first_gpu,last_gpu);

  bool message=false;
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@ -42,8 +42,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
                    const int nlocal, const int nall, const int max_nbors,
                    const int maxspecial, const int maxspecial15,
                    const double cell_size, int &gpu_mode, FILE *screen,
-                    const double polar_dscale, const double polar_uscale,
-                    int& tep_size) {
+                   const double polar_dscale, const double polar_uscale) {
  HIPPOMF.clear();
  gpu_mode=HIPPOMF.device->gpu_mode();
  double gpu_split=HIPPOMF.device->particle_split();
@ -53,8 +52,6 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
  int gpu_rank=HIPPOMF.device->gpu_rank();
  int procs_per_gpu=HIPPOMF.device->procs_per_gpu();

-  tep_size=sizeof(ACC_PRECISION); // tep_size=sizeof(PRECISION);
-
  HIPPOMF.device->init_message(screen,"HIPPO",first_gpu,last_gpu);

  bool message=false;
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@ -26,6 +26,7 @@
 #include "fix_store_peratom.h"
 #include "force.h"
 #include "gpu_extra.h"
+#include "info.h"
 #include "math_const.h"
 #include "memory.h"
 #include "my_page.h"
@ -66,7 +67,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
                    const int nlocal, const int nall, const int max_nbors,
                    const int maxspecial, const int maxspecial15,
                    const double cell_size, int &gpu_mode, FILE *screen,
-                    const double polar_dscale, const double polar_uscale, int& tq_size);
+                    const double polar_dscale, const double polar_uscale);
 void amoeba_gpu_clear();

 int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall,
@ -188,7 +189,6 @@ void PairAmoebaGPU::init_style()
    maxspecial15=atom->maxspecial15;
  }

-  int tq_size;
  int mnf = 5e-2 * neighbor->oneatom;
  int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
                                pdamp, thole, dirdamp, amtype2class, special_hal,
@ -197,13 +197,13 @@ void PairAmoebaGPU::init_style()
                                special_polar_pscale, csix, adisp, atom->nlocal,
                                atom->nlocal+atom->nghost, mnf, maxspecial,
                                maxspecial15, cell_size, gpu_mode, screen,
-                                polar_dscale, polar_uscale, tq_size);
+                                polar_dscale, polar_uscale);
  GPU_EXTRA::check_flag(success,error,world);

  if (gpu_mode == GPU_FORCE)
    error->all(FLERR,"Pair style amoeba/gpu does not support neigh no for now");

-  tq_single = (tq_size == sizeof(float));
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");

  // replace with the gpu counterpart

@ -284,7 +284,7 @@ void PairAmoebaGPU::multipole_real()

  // reference to the tep array from GPU lib

-  if (tq_single) {
+  if (acc_float) {
    auto *tq_ptr = (float *)tq_pinned;
    compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole
  } else {
@ -732,7 +732,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
  //   field and fieldp may already have some nonzero values from kspace (udirect1)

  int nlocal = atom->nlocal;
-  if (tq_single) {
+  if (acc_float) {
    auto field_ptr = (float *)fieldp_pinned;

    for (int i = 0; i < nlocal; i++) {
@ -963,7 +963,7 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
  amoeba_gpu_update_fieldp(&fieldp_pinned);

  int inum = atom->nlocal;
-  if (tq_single) {
+  if (acc_float) {
    auto field_ptr = (float *)fieldp_pinned;

    for (int i = 0; i < nlocal; i++) {
@ -1161,7 +1161,7 @@ void PairAmoebaGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1,
                       &fdip_sum_phi_pinned);

  int nlocal = atom->nlocal;
-  if (tq_single) {
+  if (acc_float) {
    auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned;
    for (int i = 0; i < nlocal; i++) {
      int n = i;
@ -1296,7 +1296,7 @@ void PairAmoebaGPU::polar_real()

  // reference to the tep array from GPU lib

-  if (tq_single) {
+  if (acc_float) {
    auto *tep_ptr = (float *)tq_pinned;
    compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar
  } else {
@ -1492,7 +1492,7 @@ void PairAmoebaGPU::polar_kspace()
    } else {
      void* fphi_pinned = nullptr;
      amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec);
-      if (tq_single) {
+      if (acc_float) {
        auto _fphi_ptr = (float *)fphi_pinned;
        for (int i = 0; i < nlocal; i++) {
          int idx = i;
--- a/src/GPU/pair_amoeba_gpu.h
+++ b/src/GPU/pair_amoeba_gpu.h
@ -49,7 +49,7 @@ class PairAmoebaGPU : public PairAmoeba {
  double cpu_time;
  void *tq_pinned;
  void *fieldp_pinned;
-  bool tq_single;
+  bool acc_float;

  bool gpu_hal_ready;
  bool gpu_repulsion_ready;
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -26,6 +26,7 @@
 #include "fix_store_peratom.h"
 #include "force.h"
 #include "gpu_extra.h"
+#include "info.h"
 #include "math_const.h"
 #include "memory.h"
 #include "my_page.h"
@ -67,7 +68,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
                    const int nlocal, const int nall, const int max_nbors,
                    const int maxspecial, const int maxspecial15,
                    const double cell_size, int &gpu_mode, FILE *screen,
-                    const double polar_dscale, const double polar_uscale, int& tq_size);
+                    const double polar_dscale, const double polar_uscale);
 void hippo_gpu_clear();

 int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall,
@ -205,7 +206,6 @@ void PairHippoGPU::init_style()
    maxspecial15=atom->maxspecial15;
  }

-  int tq_size;
  int mnf = 5e-2 * neighbor->oneatom;
  int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
                               pdamp, thole, dirdamp, amtype2class,
@ -215,12 +215,13 @@ void PairHippoGPU::init_style()
                               csix, adisp, pcore, palpha,
                               atom->nlocal, atom->nlocal+atom->nghost, mnf,
                               maxspecial, maxspecial15, cell_size, gpu_mode,
-                               screen, polar_dscale, polar_uscale, tq_size);
+                               screen, polar_dscale, polar_uscale);
  GPU_EXTRA::check_flag(success,error,world);

-  if (gpu_mode == GPU_FORCE) error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now");
+  if (gpu_mode == GPU_FORCE)
+    error->all(FLERR,"Pair style hippo/gpu does not support neigh no for now");

-  tq_single = (tq_size == sizeof(float));
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");

  // replace with the gpu counterpart

@ -296,7 +297,7 @@ void PairHippoGPU::repulsion()

  // reference to the tep array from GPU lib

-  if (tq_single) {
+  if (acc_float) {
    auto *tq_ptr = (float *)tq_pinned;
    compute_force_from_torque<float>(tq_ptr, f, virrepulse); // frepulse
  } else {
@ -396,7 +397,7 @@ void PairHippoGPU::multipole_real()

  // reference to the tep array from GPU lib

-  if (tq_single) {
+  if (acc_float) {
    auto *tq_ptr = (float *)tq_pinned;
    compute_force_from_torque<float>(tq_ptr, f, virmpole); // fmpole
  } else {
@ -845,7 +846,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
  //   field and fieldp may already have some nonzero values from kspace (udirect1)

  int nlocal = atom->nlocal;
-  if (tq_single) {
+  if (acc_float) {
    auto field_ptr = (float *)fieldp_pinned;

    for (int i = 0; i < nlocal; i++) {
@ -1073,7 +1074,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
  hippo_gpu_update_fieldp(&fieldp_pinned);
  int inum = atom->nlocal;

-  if (tq_single) {
+  if (acc_float) {
    auto *field_ptr = (float *)fieldp_pinned;

    for (int i = 0; i < nlocal; i++) {
@ -1279,7 +1280,7 @@ void PairHippoGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1,
                      &fdip_sum_phi_pinned);

  int nlocal = atom->nlocal;
-  if (tq_single) {
+  if (acc_float) {
    auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned;
    for (int i = 0; i < nlocal; i++) {
      int n = i;
@ -1416,7 +1417,7 @@ void PairHippoGPU::polar_real()

  // reference to the tep array from GPU lib

-  if (tq_single) {
+  if (acc_float) {
    auto *tep_ptr = (float *)tq_pinned;
    compute_force_from_torque<float>(tep_ptr, f, virpolar); // fpolar
  } else {
--- a/src/GPU/pair_hippo_gpu.h
+++ b/src/GPU/pair_hippo_gpu.h
@ -50,7 +50,7 @@ class PairHippoGPU : public PairAmoeba {
  double cpu_time;
  void *tq_pinned;
  void *fieldp_pinned;
-  bool tq_single;
+  bool acc_float;

  bool gpu_hal_ready;
  bool gpu_repulsion_ready;