From 00f46120c79f841dcecf78d75e7498bf7a3fc708 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 7 Oct 2022 15:50:30 -0500 Subject: [PATCH] Removed max_cus() from Device, used device->gpu->cus() instead --- lib/gpu/lal_amoeba.cpp | 4 ++-- lib/gpu/lal_base_amoeba.cpp | 8 ++++---- lib/gpu/lal_device.cpp | 1 - lib/gpu/lal_device.h | 4 +--- lib/gpu/lal_hippo.cpp | 4 ++-- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index b61d7595af..1c0aa77706 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -278,10 +278,10 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { int nbor_pitch=this->nbor->nbor_pitch(); // Compute the block size and grid size to keep all cores busy - const int max_cus = this->device->max_cus(); + const int cus = this->device->gpu->cus(); int BX=this->block_size(); int GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - while (GX < max_cus) { + while (GX < cus) { BX /= 2; GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); } diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 3b2381f211..8e4e8faf83 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -727,10 +727,10 @@ int BaseAmoebaT::fphi_uind() { return 0; // Compute the block size and grid size to keep all cores busy - const int max_cus = device->max_cus(); + const int cus = device->gpu->cus(); int BX=block_size(); int GX=static_cast(ceil(static_cast(ainum)/BX)); - while (GX < max_cus) { + while (GX < cus) { BX /= 2; GX=static_cast(ceil(static_cast(ainum)/BX)); } @@ -793,10 +793,10 @@ int BaseAmoebaT::fphi_mpole() { int nbor_pitch=nbor->nbor_pitch(); // Compute the block size and grid size to keep all cores busy - const int max_cus = device->max_cus(); + const int cus = device->gpu->cus(); int BX=block_size(); int GX=static_cast(ceil(static_cast(ainum)/BX)); - while (GX < max_cus) { + while (GX < cus) { BX /= 2; GX=static_cast(ceil(static_cast(ainum)/BX)); } diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 89ae503a97..039970a0d3 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -214,7 +214,6 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, } } _first_device = _last_device = best_device; - _max_cus = best_cus; type = gpu->device_type(_first_device); if (ndevices > 0) { diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index 7def4b7f82..74f802a096 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -241,8 +241,6 @@ class Device { inline int shuffle_avail() const { return _shuffle_avail; } /// For OpenCL, 0 if fast-math options disabled, 1 enabled inline int fast_math() const { return _fast_math; } - /// return the max number of CUs among the devices - inline int max_cus() const { return _max_cus; } /// Return the number of threads per atom for pair styles inline int threads_per_atom() const { return _threads_per_atom; } @@ -326,7 +324,7 @@ class Device { private: std::queue *> ans_queue; - int _init_count, _max_cus; + int _init_count; bool _device_init, _host_timer_started, _time_device; MPI_Comm _comm_world, _comm_replica, _comm_gpu; int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me, diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index d8ef3e9a44..f20a0cfd62 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -619,10 +619,10 @@ int HippoT::polar_real(const int eflag, const int vflag) { int nbor_pitch=this->nbor->nbor_pitch(); // Compute the block size and grid size to keep all cores busy - const int max_cus = this->device->max_cus(); + const int cus = this->device->gpu->cus(); int BX=this->block_size(); int GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - while (GX < max_cus) { + while (GX < cus) { BX /= 2; GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); }