GPU Package: Switching back to timer disabling with multiple MPI tasks per GPU. Logic added to prevent mem leak.

2022-09-28 21:02:16 -07:00
parent be98b5a168
commit 6e34d21b24
6 changed files with 55 additions and 15 deletions
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -265,15 +265,13 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
  // Time on the device only if 1 proc per gpu
  _time_device=true;

-#if 0
-  // XXX: the following setting triggers a memory leak with OpenCL and MPI
-  //      setting _time_device=true for all processes doesn't seem to be a
-  //      problem with either (no segfault, no (large) memory leak.
-  //      thus keeping this disabled for now. may need to review later.
-  //      2018-07-23 <akohlmey@gmail.com>
+  // Previous source of OCL memory leak when time_device=false
+  // - Logic added to release OCL events when timers are not invoked
  if (_procs_per_gpu>1)
    _time_device=false;
-#endif
+
+  if (!_time_device && _particle_split > 0)
+    gpu->configure_profiling(false);

  // Set up a per device communicator
  MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);
@ -715,7 +713,9 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls,
      dev_data_out[0].flush();
    #endif
    driver_time=MPI_Wtime()-driver_time;
-    double time=over_timer.seconds();
+    double time=0.0;
+    if (_time_device)
+      time=over_timer.seconds();

    if (time_device()) {
      for (int i=0; i<_data_in_estimate; i++)