diff --git a/lib/gpu/lal_base_atomic.cpp b/lib/gpu/lal_base_atomic.cpp index 4aadd3754c..eb4dae636f 100644 --- a/lib/gpu/lal_base_atomic.cpp +++ b/lib/gpu/lal_base_atomic.cpp @@ -25,12 +25,16 @@ BaseAtomicT::BaseAtomic() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); + pair_program=NULL; } template BaseAtomicT::~BaseAtomic() { delete ans; delete nbor; + if (pair_program) delete pair_program; + k_pair_fast.clear(); + k_pair.clear(); } template @@ -109,19 +113,11 @@ void BaseAtomicT::clear_atomic() { device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, _gpu_overhead,_driver_overhead,_threads_per_atom,screen); - if (_compiled) { - k_pair_fast.clear(); - k_pair.clear(); - delete pair_program; - _compiled=false; - } - time_pair.clear(); hd_balancer.clear(); nbor->clear(); ans->clear(); - device->clear(); } // --------------------------------------------------------------------------- @@ -276,6 +272,7 @@ void BaseAtomicT::compile_kernels(UCL_Device &dev, const void *pair_str, return; std::string s_fast=std::string(kname)+"_fast"; + if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); k_pair_fast.set_function(*pair_program,s_fast.c_str()); diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index 760e759201..17f30a7047 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -25,12 +25,16 @@ BaseChargeT::BaseCharge() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); + pair_program=NULL; } template BaseChargeT::~BaseCharge() { delete ans; delete nbor; + if (pair_program) delete pair_program; + k_pair_fast.clear(); + k_pair.clear(); } template @@ -111,19 +115,11 @@ void BaseChargeT::clear_atomic() { device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, _gpu_overhead,_driver_overhead,_threads_per_atom,screen); - if (_compiled) { - k_pair_fast.clear(); - k_pair.clear(); - delete pair_program; - _compiled=false; - } - time_pair.clear(); hd_balancer.clear(); nbor->clear(); ans->clear(); - device->clear(); } // --------------------------------------------------------------------------- @@ -291,6 +287,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str, return; std::string s_fast=std::string(kname)+"_fast"; + if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); k_pair_fast.set_function(*pair_program,s_fast.c_str()); diff --git a/lib/gpu/lal_base_dipole.cpp b/lib/gpu/lal_base_dipole.cpp index 56dcaf8e12..a4b0bf3c37 100644 --- a/lib/gpu/lal_base_dipole.cpp +++ b/lib/gpu/lal_base_dipole.cpp @@ -25,12 +25,16 @@ BaseDipoleT::BaseDipole() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); + pair_program=NULL; } template BaseDipoleT::~BaseDipole() { delete ans; delete nbor; + if (pair_program) delete pair_program; + k_pair_fast.clear(); + k_pair.clear(); } template @@ -113,19 +117,11 @@ void BaseDipoleT::clear_atomic() { device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, _gpu_overhead,_driver_overhead,_threads_per_atom,screen); - if (_compiled) { - k_pair_fast.clear(); - k_pair.clear(); - delete pair_program; - _compiled=false; - } - time_pair.clear(); hd_balancer.clear(); nbor->clear(); ans->clear(); - device->clear(); } // --------------------------------------------------------------------------- @@ -299,6 +295,7 @@ void BaseDipoleT::compile_kernels(UCL_Device &dev, const void *pair_str, return; std::string s_fast=std::string(kname)+"_fast"; + if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); k_pair_fast.set_function(*pair_program,s_fast.c_str()); diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp index 66c8cf09e9..bf4533ad1a 100644 --- a/lib/gpu/lal_base_dpd.cpp +++ b/lib/gpu/lal_base_dpd.cpp @@ -25,12 +25,16 @@ BaseDPDT::BaseDPD() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); + pair_program=NULL; } template BaseDPDT::~BaseDPD() { delete ans; delete nbor; + if (pair_program) delete pair_program; + k_pair_fast.clear(); + k_pair.clear(); } template @@ -112,19 +116,11 @@ void BaseDPDT::clear_atomic() { device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, _gpu_overhead,_driver_overhead,_threads_per_atom,screen); - if (_compiled) { - k_pair_fast.clear(); - k_pair.clear(); - delete pair_program; - _compiled=false; - } - time_pair.clear(); hd_balancer.clear(); nbor->clear(); ans->clear(); - device->clear(); } // --------------------------------------------------------------------------- @@ -297,6 +293,7 @@ void BaseDPDT::compile_kernels(UCL_Device &dev, const void *pair_str, return; std::string s_fast=std::string(kname)+"_fast"; + if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); k_pair_fast.set_function(*pair_program,s_fast.c_str()); diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index b8d0b7a666..dc32383264 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -33,12 +33,25 @@ BaseEllipsoidT::BaseEllipsoid() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); + nbor_program=NULL; + ellipsoid_program=NULL; + lj_program=NULL; } template BaseEllipsoidT::~BaseEllipsoid() { delete ans; delete nbor; + k_nbor_fast.clear(); + k_nbor.clear(); + k_ellipsoid.clear(); + k_ellipsoid_sphere.clear(); + k_sphere_ellipsoid.clear(); + k_lj_fast.clear(); + k_lj.clear(); + if (nbor_program) delete nbor_program; + if (ellipsoid_program) delete ellipsoid_program; + if (lj_program) delete lj_program; } template @@ -146,20 +159,6 @@ void BaseEllipsoidT::clear_base() { output_times(); host_olist.clear(); - if (_compiled) { - k_nbor_fast.clear(); - k_nbor.clear(); - k_ellipsoid.clear(); - k_ellipsoid_sphere.clear(); - k_sphere_ellipsoid.clear(); - k_lj_fast.clear(); - k_lj.clear(); - delete nbor_program; - delete ellipsoid_program; - delete lj_program; - _compiled=false; - } - time_nbor1.clear(); time_ellipsoid.clear(); time_nbor2.clear(); @@ -171,7 +170,6 @@ void BaseEllipsoidT::clear_base() { nbor->clear(); ans->clear(); - device->clear(); } template @@ -437,6 +435,7 @@ int** BaseEllipsoidT::compute(const int ago, const int inum_full, const int nall ans->copy_answers(eflag,vflag,eatom,vatom); device->add_ans_object(ans); hd_balancer.stop_timer(); + return nbor->host_jlist.begin()-host_start; } @@ -462,18 +461,21 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev, std::string flags=device->compile_string(); + if (nbor_program) delete nbor_program; nbor_program=new UCL_Program(dev); nbor_program->load_string(ellipsoid_nbor,flags.c_str()); k_nbor_fast.set_function(*nbor_program,"kernel_nbor_fast"); k_nbor.set_function(*nbor_program,"kernel_nbor"); neigh_tex.get_texture(*nbor_program,"pos_tex"); + if (ellipsoid_program) delete ellipsoid_program; ellipsoid_program=new UCL_Program(dev); ellipsoid_program->load_string(ellipsoid_string,flags.c_str()); k_ellipsoid.set_function(*ellipsoid_program,kname); pos_tex.get_texture(*ellipsoid_program,"pos_tex"); quat_tex.get_texture(*ellipsoid_program,"quat_tex"); + if (lj_program) delete lj_program; lj_program=new UCL_Program(dev); lj_program->load_string(lj_string,flags.c_str()); k_sphere_ellipsoid.set_function(*lj_program,s_sphere_ellipsoid.c_str()); diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index dc5678dd24..1715fc3074 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -27,6 +27,7 @@ BaseThreeT::BaseThree() : _compiled(false), _max_bytes(0) { #ifdef THREE_CONCURRENT ans2=new Answer(); #endif + pair_program=NULL; } template @@ -36,6 +37,12 @@ BaseThreeT::~BaseThree() { #ifdef THREE_CONCURRENT delete ans2; #endif + if (pair_program) delete pair_program; + k_three_center.clear(); + k_three_end.clear(); + k_three_end_vatom.clear(); + k_pair.clear(); + k_short_nbor.clear(); } template @@ -139,16 +146,6 @@ void BaseThreeT::clear_atomic() { device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, _gpu_overhead,_driver_overhead,_threads_per_atom,screen); - if (_compiled) { - k_three_center.clear(); - k_three_end.clear(); - k_three_end_vatom.clear(); - k_pair.clear(); - k_short_nbor.clear(); - delete pair_program; - _compiled=false; - } - time_pair.clear(); hd_balancer.clear(); @@ -161,7 +158,6 @@ void BaseThreeT::clear_atomic() { // ucl_device will clean up the command queue in its destructor // ucl_device->pop_command_queue(); #endif - device->clear(); } // --------------------------------------------------------------------------- @@ -378,7 +374,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str, return; std::string vatom_name=std::string(three_end)+"_vatom"; - + if (pair_program) delete pair_program; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); k_three_center.set_function(*pair_program,three_center);