Update Kokkos library in LAMMPS to v3.5.0

2021-11-04 12:45:59 -06:00
parent 515ef7bece
commit 564098e629
396 changed files with 21892 additions and 8508 deletions
--- a/lib/kokkos/algorithms/CMakeLists.txt
+++ b/lib/kokkos/algorithms/CMakeLists.txt
@ -5,9 +5,7 @@ KOKKOS_SUBPACKAGE(Algorithms)
 IF (NOT Kokkos_INSTALL_TESTING)
  ADD_SUBDIRECTORY(src)
 ENDIF()
-IF(NOT (KOKKOS_ENABLE_OPENMPTARGET
-        AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR
-             KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)))
+IF(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
  KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
 ENDIF()

--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@ -447,6 +447,25 @@ struct rand<Generator, unsigned long long> {
  }
 };

+#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
+template <class Generator>
+struct rand<Generator, Kokkos::Experimental::half_t> {
+  using half = Kokkos::Experimental::half_t;
+  KOKKOS_INLINE_FUNCTION
+  static half max() { return half(1.0); }
+  KOKKOS_INLINE_FUNCTION
+  static half draw(Generator& gen) { return half(gen.frand()); }
+  KOKKOS_INLINE_FUNCTION
+  static half draw(Generator& gen, const half& range) {
+    return half(gen.frand(float(range)));
+  }
+  KOKKOS_INLINE_FUNCTION
+  static half draw(Generator& gen, const half& start, const half& end) {
+    return half(gen.frand(float(start), float(end)));
+  }
+};
+#endif  // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
+
 template <class Generator>
 struct rand<Generator, float> {
  KOKKOS_INLINE_FUNCTION
@ -600,7 +619,7 @@ struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>

 template <class ExecutionSpace>
 struct Random_UniqueIndex {
-  using locks_view_type = View<int*, ExecutionSpace>;
+  using locks_view_type = View<int**, ExecutionSpace>;
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type) {
 #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -615,7 +634,7 @@ struct Random_UniqueIndex {
 #ifdef KOKKOS_ENABLE_CUDA
 template <>
 struct Random_UniqueIndex<Kokkos::Cuda> {
-  using locks_view_type = View<int*, Kokkos::Cuda>;
+  using locks_view_type = View<int**, Kokkos::Cuda>;
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __CUDA_ARCH__
@ -625,7 +644,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
                 blockDim.x * blockDim.y * blockDim.z +
             i_offset) %
            locks_.extent(0);
-    while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
+    while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
      i += blockDim.x * blockDim.y * blockDim.z;
      if (i >= static_cast<int>(locks_.extent(0))) {
        i = i_offset;
@ -643,7 +662,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
 #ifdef KOKKOS_ENABLE_HIP
 template <>
 struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
-  using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
+  using locks_view_type = View<int**, Kokkos::Experimental::HIP>;
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __HIP_DEVICE_COMPILE__
@ -653,7 +672,7 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
                 blockDim.x * blockDim.y * blockDim.z +
             i_offset) %
            locks_.extent(0);
-    while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
+    while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
      i += blockDim.x * blockDim.y * blockDim.z;
      if (i >= static_cast<int>(locks_.extent(0))) {
        i = i_offset;
@ -671,15 +690,15 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
 #ifdef KOKKOS_ENABLE_SYCL
 template <>
 struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
-  using locks_view_type = View<int*, Kokkos::Experimental::SYCL>;
+  using locks_view_type = View<int**, Kokkos::Experimental::SYCL>;
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type& locks_) {
-#ifdef KOKKOS_ARCH_INTEL_GEN
+#ifdef KOKKOS_ARCH_INTEL_GPU
    int i = Kokkos::Impl::clock_tic() % locks_.extent(0);
 #else
    int i = 0;
 #endif
-    while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
+    while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
      i = (i + 1) % static_cast<int>(locks_.extent(0));
    }
    return i;
@ -690,14 +709,14 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
 #ifdef KOKKOS_ENABLE_OPENMPTARGET
 template <>
 struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
-  using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
+  using locks_view_type = View<int**, Kokkos::Experimental::OpenMPTarget>;
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type& locks) {
    const int team_size = omp_get_num_threads();
    int i               = omp_get_team_num() * team_size + omp_get_thread_num();
    const int lock_size = locks.extent_int(0);

-    while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
+    while (Kokkos::atomic_compare_exchange(&locks(i, 0), 0, 1)) {
      i = (i + 1) % lock_size;
    }
    return i;
@ -856,18 +875,22 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift64_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  using locks_type      = View<int*, execution_space>;
-  using state_data_type = View<uint64_t*, DeviceType>;
+  using locks_type      = View<int**, execution_space>;
+  using state_data_type = View<uint64_t**, DeviceType>;
  locks_type locks_;
  state_data_type state_;
  int num_states_;
+  int padding_;

 public:
  using generator_type = Random_XorShift64<DeviceType>;
  using device_type    = DeviceType;

  KOKKOS_INLINE_FUNCTION
-  Random_XorShift64_Pool() { num_states_ = 0; }
+  Random_XorShift64_Pool() {
+    num_states_ = 0;
+    padding_    = 0;
+  }
  Random_XorShift64_Pool(uint64_t seed) {
    num_states_ = 0;

@ -883,16 +906,22 @@ class Random_XorShift64_Pool {
    locks_      = src.locks_;
    state_      = src.state_;
    num_states_ = src.num_states_;
+    padding_    = src.padding_;
    return *this;
  }

  void init(uint64_t seed, int num_states) {
    if (seed == 0) seed = uint64_t(1318319);
-
+    // I only want to pad on CPU like archs (less than 1000 threads). 64 is a
+    // magic number, or random number I just wanted something not too large and
+    // not too small. 64 sounded fine.
+    padding_    = num_states < 1000 ? 64 : 1;
    num_states_ = num_states;

-    locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
-    state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
+    locks_ =
+        locks_type("Kokkos::Random_XorShift64::locks", num_states, padding_);
+    state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_,
+                             padding_);

    typename state_data_type::HostMirror h_state = create_mirror_view(state_);
    typename locks_type::HostMirror h_lock       = create_mirror_view(locks_);
@ -902,15 +931,15 @@ class Random_XorShift64_Pool {
        gen(seed, 0);
    for (int i = 0; i < 17; i++) gen.rand();
    for (int i = 0; i < num_states_; i++) {
-      int n1     = gen.rand();
-      int n2     = gen.rand();
-      int n3     = gen.rand();
-      int n4     = gen.rand();
-      h_state(i) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
-                   (((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
-                   (((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
-                   (((static_cast<uint64_t>(n4)) & 0xffff) << 48);
-      h_lock(i) = 0;
+      int n1        = gen.rand();
+      int n2        = gen.rand();
+      int n3        = gen.rand();
+      int n4        = gen.rand();
+      h_state(i, 0) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
+                      (((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
+                      (((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
+                      (((static_cast<uint64_t>(n4)) & 0xffff) << 48);
+      h_lock(i, 0) = 0;
    }
    deep_copy(state_, h_state);
    deep_copy(locks_, h_lock);
@ -920,19 +949,19 @@ class Random_XorShift64_Pool {
  Random_XorShift64<DeviceType> get_state() const {
    const int i =
        Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
-    return Random_XorShift64<DeviceType>(state_(i), i);
+    return Random_XorShift64<DeviceType>(state_(i, 0), i);
  }

  // NOTE: state_idx MUST be unique and less than num_states
  KOKKOS_INLINE_FUNCTION
  Random_XorShift64<DeviceType> get_state(const int state_idx) const {
-    return Random_XorShift64<DeviceType>(state_(state_idx), state_idx);
+    return Random_XorShift64<DeviceType>(state_(state_idx, 0), state_idx);
  }

  KOKKOS_INLINE_FUNCTION
  void free_state(const Random_XorShift64<DeviceType>& state) const {
-    state_(state.state_idx_) = state.state_;
-    locks_(state.state_idx_) = 0;
+    state_(state.state_idx_, 0) = state.state_;
+    locks_(state.state_idx_, 0) = 0;
  }
 };

@ -1092,14 +1121,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift1024_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  using locks_type      = View<int*, execution_space>;
-  using int_view_type   = View<int*, DeviceType>;
+  using locks_type      = View<int**, execution_space>;
+  using int_view_type   = View<int**, DeviceType>;
  using state_data_type = View<uint64_t * [16], DeviceType>;

  locks_type locks_;
  state_data_type state_;
  int_view_type p_;
  int num_states_;
+  int padding_;
  friend class Random_XorShift1024<DeviceType>;

 public:
@ -1129,15 +1159,21 @@ class Random_XorShift1024_Pool {
    state_      = src.state_;
    p_          = src.p_;
    num_states_ = src.num_states_;
+    padding_    = src.padding_;
    return *this;
  }

  inline void init(uint64_t seed, int num_states) {
    if (seed == 0) seed = uint64_t(1318319);
+    // I only want to pad on CPU like archs (less than 1000 threads). 64 is a
+    // magic number, or random number I just wanted something not too large and
+    // not too small. 64 sounded fine.
+    padding_    = num_states < 1000 ? 64 : 1;
    num_states_ = num_states;
-    locks_      = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
+    locks_ =
+        locks_type("Kokkos::Random_XorShift1024::locks", num_states_, padding_);
    state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
-    p_     = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
+    p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_);

    typename state_data_type::HostMirror h_state = create_mirror_view(state_);
    typename locks_type::HostMirror h_lock       = create_mirror_view(locks_);
@ -1158,8 +1194,8 @@ class Random_XorShift1024_Pool {
                        (((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
                        (((static_cast<uint64_t>(n4)) & 0xffff) << 48);
      }
-      h_p(i)    = 0;
-      h_lock(i) = 0;
+      h_p(i, 0)    = 0;
+      h_lock(i, 0) = 0;
    }
    deep_copy(state_, h_state);
    deep_copy(locks_, h_lock);
@ -1169,20 +1205,20 @@ class Random_XorShift1024_Pool {
  Random_XorShift1024<DeviceType> get_state() const {
    const int i =
        Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
-    return Random_XorShift1024<DeviceType>(state_, p_(i), i);
+    return Random_XorShift1024<DeviceType>(state_, p_(i, 0), i);
  };

  // NOTE: state_idx MUST be unique and less than num_states
  KOKKOS_INLINE_FUNCTION
  Random_XorShift1024<DeviceType> get_state(const int state_idx) const {
-    return Random_XorShift1024<DeviceType>(state_, p_(state_idx), state_idx);
+    return Random_XorShift1024<DeviceType>(state_, p_(state_idx, 0), state_idx);
  }

  KOKKOS_INLINE_FUNCTION
  void free_state(const Random_XorShift1024<DeviceType>& state) const {
    for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
-    p_(state.state_idx_)     = state.p_;
-    locks_(state.state_idx_) = 0;
+    p_(state.state_idx_, 0)     = state.p_;
+    locks_(state.state_idx_, 0) = 0;
  }
 };

--- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
@ -319,7 +319,7 @@ class BinSort {
                   Kokkos::RangePolicy<execution_space>(0, len), functor);
    }

-    execution_space().fence();
+    execution_space().fence("Kokkos::Sort: fence after sorting");
  }

  template <class ValuesViewType>
@ -492,7 +492,8 @@ bool try_std_sort(ViewType view) {
                      view.stride_3(), view.stride_4(), view.stride_5(),
                      view.stride_6(), view.stride_7()};
  possible         = possible &&
-             std::is_same<typename ViewType::memory_space, HostSpace>::value;
+             SpaceAccessibility<HostSpace,
+                                typename ViewType::memory_space>::accessible;
  possible = possible && (ViewType::Rank == 1);
  possible = possible && (stride[0] == 1);
  if (possible) {
--- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
@ -47,7 +47,7 @@
 #include <iostream>
 #include <cstdlib>
 #include <cstdio>
-#include <impl/Kokkos_Timer.hpp>
+#include <Kokkos_Timer.hpp>
 #include <Kokkos_Core.hpp>
 #include <Kokkos_Random.hpp>
 #include <cmath>
@ -198,11 +198,50 @@ struct test_random_functor {
          static_cast<uint64_t>(1.0 * HIST_DIM3D * tmp2 / theMax);
      const uint64_t ind3_3d =
          static_cast<uint64_t>(1.0 * HIST_DIM3D * tmp3 / theMax);
-
+// Workaround Intel 17 compiler bug which sometimes add random
+// instruction alignment which makes the lock instruction
+// illegal. Seems to be mostly just for unsigned int atomics.
+// Looking at the assembly the compiler
+// appears to insert cache line alignment for the instruction.
+// Isn't restricted to specific archs. Seen it on SNB and SKX, but for
+// different code. Another occurrence was with Desul atomics in
+// a different unit test. This one here happens without desul atomics.
+// Inserting an assembly nop instruction changes the alignment and
+// works round this.
+//
+// 17.0.4 for 64bit Random works with 1/1/1/2/1
+// 17.0.4 for 1024bit Random works with 1/1/1/1/1
+#ifdef KOKKOS_COMPILER_INTEL
+#if (KOKKOS_COMPILER_INTEL < 1800)
+      asm volatile("nop\n");
+#endif
+#endif
      atomic_fetch_add(&density_1d(ind1_1d), 1);
+#ifdef KOKKOS_COMPILER_INTEL
+#if (KOKKOS_COMPILER_INTEL < 1800)
+      asm volatile("nop\n");
+#endif
+#endif
      atomic_fetch_add(&density_1d(ind2_1d), 1);
+#ifdef KOKKOS_COMPILER_INTEL
+#if (KOKKOS_COMPILER_INTEL < 1800)
+      asm volatile("nop\n");
+#endif
+#endif
      atomic_fetch_add(&density_1d(ind3_1d), 1);
+#ifdef KOKKOS_COMPILER_INTEL
+#if (KOKKOS_COMPILER_INTEL < 1800)
+      if (std::is_same<rnd_type, Kokkos::Random_XorShift64<device_type>>::value)
+        asm volatile("nop\n");
+      asm volatile("nop\n");
+#endif
+#endif
      atomic_fetch_add(&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
+#ifdef KOKKOS_COMPILER_INTEL
+#if (KOKKOS_COMPILER_INTEL < 1800)
+      asm volatile("nop\n");
+#endif
+#endif
    }
    rand_pool.free_state(rand_gen);
  }
@ -338,9 +377,11 @@ struct test_random_scalar {
      using functor_type =
          test_histogram1d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
-
-      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
-      double mean_expect = 1.0 * num_draws * 3 / HIST_DIM1D;
+      double mean_eps_expect       = 0.0001;
+      double variance_eps_expect   = 0.07;
+      double covariance_eps_expect = 0.06;
+      double tolerance             = 6 * std::sqrt(1.0 / HIST_DIM1D);
+      double mean_expect           = 1.0 * num_draws * 3 / HIST_DIM1D;
      double variance_expect =
          1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D);
      double covariance_expect = -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D;
@ -349,11 +390,26 @@ struct test_random_scalar {
          variance_expect / (result.variance / HIST_DIM1D) - 1.0;
      double covariance_eps =
          (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
-      pass_hist1d_mean = ((-0.0001 < mean_eps) && (0.0001 > mean_eps)) ? 1 : 0;
-      pass_hist1d_var =
-          ((-0.07 < variance_eps) && (0.07 > variance_eps)) ? 1 : 0;
-      pass_hist1d_covar =
-          ((-0.06 < covariance_eps) && (0.06 > covariance_eps)) ? 1 : 0;
+
+#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
+      if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
+        mean_eps_expect       = 0.0003;
+        variance_eps_expect   = 1.0;
+        covariance_eps_expect = 5.0e4;
+      }
+#endif
+
+      pass_hist1d_mean =
+          ((-mean_eps_expect < mean_eps) && (mean_eps_expect > mean_eps)) ? 1
+                                                                          : 0;
+      pass_hist1d_var = ((-variance_eps_expect < variance_eps) &&
+                         (variance_eps_expect > variance_eps))
+                            ? 1
+                            : 0;
+      pass_hist1d_covar = ((-covariance_eps_expect < covariance_eps) &&
+                           (covariance_eps_expect > covariance_eps))
+                              ? 1
+                              : 0;

      cout << "Density 1D: " << mean_eps << " " << variance_eps << " "
           << (result.covariance / HIST_DIM1D / HIST_DIM1D) << " || "
@ -371,8 +427,9 @@ struct test_random_scalar {
          test_histogram3d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);

-      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
-      double mean_expect = 1.0 * num_draws / HIST_DIM1D;
+      double variance_factor = 1.2;
+      double tolerance       = 6 * std::sqrt(1.0 / HIST_DIM1D);
+      double mean_expect     = 1.0 * num_draws / HIST_DIM1D;
      double variance_expect =
          1.0 * num_draws / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D);
      double covariance_expect = -1.0 * num_draws / HIST_DIM1D / HIST_DIM1D;
@ -381,15 +438,23 @@ struct test_random_scalar {
          variance_expect / (result.variance / HIST_DIM1D) - 1.0;
      double covariance_eps =
          (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
+
+#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
+      if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
+        variance_factor = 7;
+      }
+#endif
+
      pass_hist3d_mean =
          ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0;
-      pass_hist3d_var = ((-1.2 * tolerance < variance_eps) &&
-                         (1.2 * tolerance > variance_eps))
+      pass_hist3d_var = ((-variance_factor * tolerance < variance_eps) &&
+                         (variance_factor * tolerance > variance_eps))
                            ? 1
                            : 0;
-      pass_hist3d_covar =
-          ((-tolerance < covariance_eps) && (tolerance > covariance_eps)) ? 1
-                                                                          : 0;
+      pass_hist3d_covar = ((-variance_factor * tolerance < covariance_eps) &&
+                           (variance_factor * tolerance > covariance_eps))
+                              ? 1
+                              : 0;

      cout << "Density 3D: " << mean_eps << " " << variance_eps << " "
           << result.covariance / HIST_DIM1D / HIST_DIM1D << " || " << tolerance
@ -471,6 +536,21 @@ void test_random(unsigned int num_draws) {
  deep_copy(density_1d, 0);
  deep_copy(density_3d, 0);

+  cout << "Test Scalar=half" << endl;
+  test_random_scalar<RandomGenerator, Kokkos::Experimental::half_t> test_half(
+      density_1d, density_3d, pool, num_draws);
+  ASSERT_EQ(test_half.pass_mean, 1);
+  ASSERT_EQ(test_half.pass_var, 1);
+  ASSERT_EQ(test_half.pass_covar, 1);
+  ASSERT_EQ(test_half.pass_hist1d_mean, 1);
+  ASSERT_EQ(test_half.pass_hist1d_var, 1);
+  ASSERT_EQ(test_half.pass_hist1d_covar, 1);
+  ASSERT_EQ(test_half.pass_hist3d_mean, 1);
+  ASSERT_EQ(test_half.pass_hist3d_var, 1);
+  ASSERT_EQ(test_half.pass_hist3d_covar, 1);
+  deep_copy(density_1d, 0);
+  deep_copy(density_3d, 0);
+
  cout << "Test Scalar=float" << endl;
  test_random_scalar<RandomGenerator, float> test_float(density_1d, density_3d,
                                                        pool, num_draws);