Removed max_cus() from Device, used device->gpu->cus() instead

2022-10-07 15:50:30 -05:00
parent 6b9e83fe20
commit 00f46120c7
5 changed files with 9 additions and 12 deletions
--- a/lib/gpu/lal_amoeba.cpp
+++ b/lib/gpu/lal_amoeba.cpp
@ -278,10 +278,10 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
  int nbor_pitch=this->nbor->nbor_pitch();

  // Compute the block size and grid size to keep all cores busy
-  const int max_cus = this->device->max_cus();
+  const int cus = this->device->gpu->cus();
  int BX=this->block_size();
  int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  while (GX < max_cus) {
+  while (GX < cus) {
    BX /= 2;
    GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
  }
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@ -727,10 +727,10 @@ int BaseAmoebaT::fphi_uind() {
    return 0;

  // Compute the block size and grid size to keep all cores busy
-  const int max_cus = device->max_cus();
+  const int cus = device->gpu->cus();
  int BX=block_size();
  int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
-  while (GX < max_cus) {
+  while (GX < cus) {
    BX /= 2;
    GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
  }
@ -793,10 +793,10 @@ int BaseAmoebaT::fphi_mpole() {
  int nbor_pitch=nbor->nbor_pitch();

  // Compute the block size and grid size to keep all cores busy
-  const int max_cus = device->max_cus();
+  const int cus = device->gpu->cus();
  int BX=block_size();
  int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
-  while (GX < max_cus) {
+  while (GX < cus) {
    BX /= 2;
    GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
  }
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -214,7 +214,6 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
      }
    }
    _first_device = _last_device = best_device;
-    _max_cus = best_cus;
    type = gpu->device_type(_first_device);

    if (ndevices > 0) {
--- a/lib/gpu/lal_device.h
+++ b/lib/gpu/lal_device.h
@ -241,8 +241,6 @@ class Device {
  inline int shuffle_avail() const { return _shuffle_avail; }
  /// For OpenCL, 0 if fast-math options disabled, 1 enabled
  inline int fast_math() const { return _fast_math; }
-  /// return the max number of CUs among the devices
-  inline int max_cus() const { return _max_cus; }

  /// Return the number of threads per atom for pair styles
  inline int threads_per_atom() const { return _threads_per_atom; }
@ -326,7 +324,7 @@ class Device {

 private:
  std::queue<Answer<numtyp,acctyp> *> ans_queue;
-  int _init_count, _max_cus;
+  int _init_count;
  bool _device_init, _host_timer_started, _time_device;
  MPI_Comm _comm_world, _comm_replica, _comm_gpu;
  int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -619,10 +619,10 @@ int HippoT::polar_real(const int eflag, const int vflag) {
  int nbor_pitch=this->nbor->nbor_pitch();

  // Compute the block size and grid size to keep all cores busy
-  const int max_cus = this->device->max_cus();
+  const int cus = this->device->gpu->cus();
  int BX=this->block_size();
  int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  while (GX < max_cus) {
+  while (GX < cus) {
    BX /= 2;
    GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
  }