From 26c7358a849eaadf956252c2e6a63a6e31f8440b Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 7 Dec 2023 10:24:25 -0600 Subject: [PATCH] Working on sph_lj kernels --- lib/gpu/lal_sph_lj.cpp | 212 ++++++++++++++++++ lib/gpu/lal_sph_lj.cu | 432 +++++++++++++++++++++++++++++++++++++ lib/gpu/lal_sph_lj.h | 88 ++++++++ lib/gpu/lal_sph_lj_ext.cpp | 132 ++++++++++++ 4 files changed, 864 insertions(+) create mode 100644 lib/gpu/lal_sph_lj.cpp create mode 100644 lib/gpu/lal_sph_lj.cu create mode 100644 lib/gpu/lal_sph_lj.h create mode 100644 lib/gpu/lal_sph_lj_ext.cpp diff --git a/lib/gpu/lal_sph_lj.cpp b/lib/gpu/lal_sph_lj.cpp new file mode 100644 index 0000000000..8367a81b1e --- /dev/null +++ b/lib/gpu/lal_sph_lj.cpp @@ -0,0 +1,212 @@ +/*************************************************************************** + sph_lj.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the sph_lj pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "sph_lj_cl.h" +#elif defined(USE_CUDART) +const char *sph_lj=0; +#else +#include "sph_lj_cubin.h" +#endif + +#include "lal_sph_lj.h" +#include +namespace LAMMPS_AL { +#define SPHLJT SPHLJ + +extern Device device; + +template +SPHLJT::SPHLJ() : BaseDPD(), _allocated(false) { + _max_drhoE_size = 0; +} + +template +SPHLJT::~SPHLJ() { + clear(); +} + +template +int SPHLJT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int SPHLJT::init(const int ntypes, + double **host_cutsq, double **host_viscosity, + double *host_special_lj, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho, cv, mass + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,sph_lj,"k_sph_lj",onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack2(ntypes,lj_types,coeff,host_write,host_viscosity, + host_cutsq); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_drhoE_size=static_cast(static_cast(ef_nall)*1.10); + drhoE.alloc(_max_drhoE_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+drhoE.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void SPHLJT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + drhoE.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double SPHLJT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(SPHLJ); +} + +template +void SPHLJT::update_drhoE(void **drhoE_ptr) { + *drhoE_ptr=drhoE.host.begin(); + drhoE.update_host(_max_drhoE_size,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int SPHLJT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize drhoE array if necessary + if (nall > _max_drhoE_size) { + _max_drhoE_size=static_cast(static_cast(nall)*1.10); + drhoE.resize(_max_drhoE_size); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = rho[i]; + v.y = cv[i]; + v.z = mass[i]; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void SPHLJT::get_extra_data(double *host_rho, double *host_cv, double* host_mass) { + rho = host_rho; + cv = host_cv; + mass = host_mass; +} + +template class SPHLJ; +} diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu new file mode 100644 index 0000000000..c6fe071399 --- /dev/null +++ b/lib/gpu/lal_sph_lj.cu @@ -0,0 +1,432 @@ +// ************************************************************************** +// edpd.cu +// ------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the edpd pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_drhoE(drhoI, deltaE, ii, inum, tid, t_per_atom, offset, \ + drhoE) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add2(t_per_atom, red_acc, offset, tid, rhoEi, deltaE); \ + } \ + if (offset==0 && ii1) { \ + simd_reduce_add2(t_per_atom,drhoI,deltaE); \ + } \ + if (offset==0 && ii tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + numtyp T_ij=(numtyp)0.5*(Ti+Tj); + numtyp4 T_pow; + T_pow.x = T_ij - (numtyp)1.0; + T_pow.y = T_pow.x*T_pow.x; + T_pow.z = T_pow.x*T_pow.y; + T_pow.w = T_pow.x*T_pow.z; + + numtyp coeff2x = coeff2[mtype].x; //power[itype][jtype] + numtyp coeff2y = coeff2[mtype].y; //kappa[itype][jtype] + numtyp coeff2z = coeff2[mtype].z; //powerT[itype][jtype] + numtyp coeff2w = coeff2[mtype].w; //cutT[itype][jtype] + numtyp power_d = coeff2x; + if (power_flag) { + numtyp factor = (numtyp)1.0; + factor += sc[mtype].x*T_pow.x + sc[mtype].y*T_pow.y + + sc[mtype].z*T_pow.z + sc[mtype].w*T_pow.w; + power_d *= factor; + } + + power_d = MAX((numtyp)0.01,power_d); + numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype] + wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc)); + numtyp wr = ucl_pow(wc, (numtyp)0.5*power_d); + + numtyp kboltz = (numtyp)1.0; + numtyp GammaIJ = coeffy; // gamma[itype][jtype] + numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj); + SigmaIJ = ucl_sqrt(SigmaIJ); + + numtyp force = coeffx*T_ij*wc; // a0[itype][jtype] + force -= GammaIJ *wr*wr *dot*rinv; + force += SigmaIJ * wr *randnum * dtinvsqrt; + force *= factor_dpd*rinv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + // heat transfer + + if (r < coeff2w) { + numtyp wrT = (numtyp)1.0 - r/coeff2w; + wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); + wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] + numtyp randnumT = (numtyp)0; + saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian(); + randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0)); + + numtyp kappaT = coeff2y; // kappa[itype][jtype] + if (kappa_flag) { + numtyp factor = (numtyp)1.0; + factor += kc[mtype].x*T_pow.x + kc[mtype].y*T_pow.y + + kc[mtype].z*T_pow.z + kc[mtype].w*T_pow.w; + kappaT *= factor; + } + + numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; + numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); + + numtyp dQc = kij * wrT*wrT * (Tj - Ti)/(Ti*Tj); + numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; + dQd /= (cvi+cvj); + numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT; + Qi += (dQc + dQd + dQr ); + } + + if (EVFLAG && eflag) { + numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc; + energy+=factor_dpd*e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + store_drhoE(Qi,ii,inum,tid,t_per_atom,offset,Q); +} + +__kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict extra, + const __global numtyp2 *restrict coeff_in, + const __global numtyp *restrict sp_lj_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + __global acctyp *restrict drhoE, + const int eflag, const int vflag, + const int inum, const int nbor_pitch, + const __global numtyp4 *restrict v_, + const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + #ifndef ONETYPE + __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + if (tid<4) { + sp_lj[tid]=sp_lj_in[tid]; + } + if (tid tag2) { + tag1 = jtag; tag2 = itag; + } + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + numtyp T_ij=(numtyp)0.5*(Ti+Tj); + numtyp4 T_pow; + T_pow.x = T_ij - (numtyp)1.0; + T_pow.y = T_pow.x*T_pow.x; + T_pow.z = T_pow.x*T_pow.y; + T_pow.w = T_pow.x*T_pow.z; + + numtyp power_d = coeff2x; // power[itype][jtype] + if (power_flag) { + numtyp factor = (numtyp)1.0; + factor += scx*T_pow.x + scy*T_pow.y + scz*T_pow.z + scw*T_pow.w; + power_d *= factor; + } + + power_d = MAX((numtyp)0.01,power_d); + numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype] + wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc)); + numtyp wr = ucl_pow((numtyp)wc, (numtyp)0.5*power_d); + + numtyp kboltz = (numtyp)1.0; + numtyp GammaIJ = coeffy; // gamma[itype][jtype] + numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj); + SigmaIJ = ucl_sqrt(SigmaIJ); + + numtyp force = coeffx*T_ij*wc; // a0[itype][jtype] + force -= GammaIJ *wr*wr *dot*rinv; + force += SigmaIJ* wr *randnum * dtinvsqrt; + #ifndef ONETYPE + force *= factor_dpd*rinv; + #else + force *= rinv; + #endif + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + // heat transfer + + if (r < coeff2w) { + numtyp wrT = (numtyp)1.0 - r/coeff2w; + wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); + wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] + numtyp randnumT = (numtyp)0; + saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian(); + randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0)); + + numtyp kappaT = coeff2y; // kappa[itype][jtype] + if (kappa_flag) { + numtyp factor = (numtyp)1.0; + factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w; + kappaT *= factor; + } + + numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; + numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); + + numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj); + numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; + dQd /= (cvi+cvj); + numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT; + Qi += (dQc + dQd + dQr ); + } + + if (EVFLAG && eflag) { + numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc; + #ifndef ONETYPE + energy+=factor_dpd*e; + #else + energy+=e; + #endif + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + + } + } // for nbor + } // if ii + + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); + store_drhoE(Qi,ii,inum,tid,t_per_atom,offset,Q); +} + diff --git a/lib/gpu/lal_sph_lj.h b/lib/gpu/lal_sph_lj.h new file mode 100644 index 0000000000..5e26d019b2 --- /dev/null +++ b/lib/gpu/lal_sph_lj.h @@ -0,0 +1,88 @@ +/*************************************************************************** + sph_lj.h + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the sph lj pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#ifndef LAL_SPH_LJ_H +#define LAL_SPH_LJ_H + +#include "lal_base_dpd.h" + +namespace LAMMPS_AL { + +template +class SPHLJ : public BaseDPD { + public: + SPHLJ(); + ~SPHLJ(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_viscosity, + double *host_special_lj, const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, + FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho, double *host_cv, double* host_mass); + + /// copy drho and desph from device to host + void update_drhoE(void **drhoE_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = viscosity, coeff.y = cutsq + UCL_D_Vec coeff; + + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector drhoE; + int _max_drhoE_size; + + /// pointer to host data + double *rho, *cv, *mass; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_sph_lj_ext.cpp b/lib/gpu/lal_sph_lj_ext.cpp new file mode 100644 index 0000000000..7abaa10805 --- /dev/null +++ b/lib/gpu/lal_sph_lj_ext.cpp @@ -0,0 +1,132 @@ +/*************************************************************************** + sph_lj_ext.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to sph lj acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_sph_lj.h" + +using namespace std; +using namespace LAMMPS_AL; + +static SPHLJ SPHLJMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int sph_lj_gpu_init(const int ntypes, double **cutsq, double **host_viscosity, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + SPHLJMF.clear(); + gpu_mode=SPHLJMF.device->gpu_mode(); + double gpu_split=SPHLJMF.device->particle_split(); + int first_gpu=SPHLJMF.device->first_device(); + int last_gpu=SPHLJMF.device->last_device(); + int world_me=SPHLJMF.device->world_me(); + int gpu_rank=SPHLJMF.device->gpu_rank(); + int procs_per_gpu=SPHLJMF.device->procs_per_gpu(); + + SPHLJMF.device->init_message(screen,"sph_lj",first_gpu,last_gpu); + + bool message=false; + if (SPHLJMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=SPHLJMF.init(ntypes, cutsq, host_viscosity, special_lj, + inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + SPHLJMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + SPHLJMF.estimate_gpu_overhead(); + return init_ok; +} + +void sph_lj_gpu_clear() { + SPHLJMF.clear(); +} + +int ** sph_lj_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + double *boxlo, double *prd) { + return SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_v, dtinvsqrt, seed, timestep, boxlo, prd); +} + +void sph_lj_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + const int nlocal, double *boxlo, double *prd) { + SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd); +} + +void sph_lj_gpu_get_extra_data(double *host_rho, double *host_cv, double *host_mass) { + SPHLJMF.get_extra_data(host_rho, host_cv, host_mass); +} + +void sph_lj_gpu_update_drhoE(void **drhoE_ptr) { + SPHLJMF.update_drhoE(drhoE_ptr); +} + +double sph_lj_gpu_bytes() { + return SPHLJMF.host_memory_usage(); +}