lammps/lib/gpu/lal_gran_hooke_ext.cpp

/***************************************************************************
 *                             gran_hooke_ext.cpp
 *                              -------------------
 *                             Trung Dac Nguyen (ORNL)
 *
 *   Functions for LAMMPS access to gran/hooke acceleration routines.
 *
 * __________________________________________________________________________
 *    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
 * __________________________________________________________________________
 *
 *    begin                :
 *    email                : nguyentd@ornl.gov
 ***************************************************************************/

#include <iostream>
#include <cassert>
#include <cmath>

#include "lal_gran_hooke.h"

using namespace std;
using namespace LAMMPS_AL;

static GranHooke<PRECISION,ACC_PRECISION> GHM;

// ---------------------------------------------------------------------------
// Allocate memory on host and device and copy constants to device
// ---------------------------------------------------------------------------
int gran_hooke_gpu_init(const int ntypes, double **cutsq,
                              const double host_k_n, const double host_k_t,
                              const double host_gamman, const double host_gammat,
                              const double host_xmu, const double host_dt, const int host_dampflag,
                              double *special_lj, int *host_mask,
                              const int inum, const int nall, const int max_nbors,
                              const int maxspecial, const double cell_size, int &gpu_mode,
                              FILE *screen) {
  GHM.clear();
  gpu_mode=GHM.device->gpu_mode();
  double gpu_split=GHM.device->particle_split();
  int first_gpu=GHM.device->first_device();
  int last_gpu=GHM.device->last_device();
  int world_me=GHM.device->world_me();
  int gpu_rank=GHM.device->gpu_rank();
  int procs_per_gpu=GHM.device->procs_per_gpu();

  GHM.device->init_message(screen,"gran/hooke",first_gpu,last_gpu);

  bool message=false;
  if (GHM.device->replica_me()==0 && screen)
    message=true;

  if (message) {
    fprintf(screen,"Initializing Device and compiling on process 0...");
    fflush(screen);
  }

  int init_ok=0;
  if (world_me==0)
    init_ok=GHM.init(ntypes, host_k_n, host_k_t, host_gamman,
                     host_gammat, host_xmu, host_dt, host_dampflag, special_lj,
                     host_mask, inum, nall, max_nbors, maxspecial, cell_size,
                     gpu_split, screen);

  GHM.device->world_barrier();
  if (message)
    fprintf(screen,"Done.\n");

  for (int i=0; i<procs_per_gpu; i++) {
    if (message) {
      if (last_gpu-first_gpu==0)
        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
      else
        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
                last_gpu,i);
      fflush(screen);
    }
    if (gpu_rank==i && world_me!=0)
      init_ok=GHM.init(ntypes, host_k_n, host_k_t, host_gamman,
                     host_gammat, host_xmu, host_dt, host_dampflag, special_lj,
                     host_mask, inum, nall, max_nbors, maxspecial, cell_size,
                     gpu_split, screen);

    GHM.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
  if (message)
    fprintf(screen,"\n");

  if (init_ok==0)
    GHM.estimate_gpu_overhead();
  return init_ok;
}

void gran_hooke_gpu_clear() {
  GHM.clear();
}

int ** gran_hooke_gpu_compute_n(const int ago, const int inum_full,
                                      const int nall, double **host_x, int *host_type,
                                      double *sublo, double *subhi, tagint *tag,
                                      int **nspecial, tagint **special,
                                      const bool eflag, const bool vflag,
                                      const bool eatom, const bool vatom,
                                      int &host_start, int **ilist, int **jnum,
                                      const double cpu_time, bool &success,
                                      double **host_v, double *host_rad,
                                      double **host_omega, double *host_rmass, int limit_damping,
                                      double *boxlo, double *prd) {
  return GHM.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi,
                     tag, nspecial, special, eflag, vflag, eatom, vatom,
                     host_start, ilist, jnum, cpu_time, success, host_v, host_rad,
                     host_omega, host_rmass, limit_damping, boxlo, prd);
}

void gran_hooke_gpu_compute(const int ago, const int inum_full,
                                    const int nall, double **host_x, int *host_type,
                                    int *ilist, int *numj, int **firstneigh,
                                    const bool eflag, const bool vflag,
                                    const bool eatom, const bool vatom,
                                    int &host_start, const double cpu_time,
                                    bool &success, tagint *tag,
                                    double **host_v, double *host_rad,
                                    double **host_omega,  double *host_rmass, int limit_damping,
                                    const int nlocal, double *boxlo, double *prd) {
  GHM.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
              firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time,
              success, tag, host_v, host_rad, host_omega, host_rmass, limit_damping, nlocal, boxlo, prd);
}

double gran_hooke_gpu_bytes() {
  return GHM.host_memory_usage();
}