Files
lammps/lib/gpu/pair_gpu_device.h
pscrozi 4366bfffd3 Getting rid of extra CR characters at ends of lines.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@5285 f3b2605a-c512-4ea7-a41b-209d697bcdaa
2010-11-23 19:52:03 +00:00

141 lines
5.1 KiB
C++

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifndef PAIR_GPU_DEVICE_H
#define PAIR_GPU_DEVICE_H
#include "pair_gpu_atom.h"
#include "pair_gpu_nbor.h"
#include "mpi.h"
#include <sstream>
#include "stdio.h"
#include <string>
template <class numtyp, class acctyp>
class PairGPUDevice {
public:
PairGPUDevice();
~PairGPUDevice();
/// Initialize the device for use by this process
/** Sets up a per-device MPI communicator for load balancing and initializes
* the device (>=first_gpu and <=last_gpu) that this proc will be using **/
bool init_device(const int first_gpu, const int last_gpu,
const int gpu_mode, const double particle_split);
/// Initialize the device for Atom and Neighbor storage
/** \param rot True if quaternions need to be stored
* \param nlocal Total number of local particles to allocate memory for
* \param host_nlocal Initial number of host particles to allocate memory for
* \param nall Total number of local+ghost particles
* \param gpu_nbor True if neighboring is performed on device
* \param gpu_host 0 if host will not perform force calculations,
* 1 if gpu_nbor is true, and host needs a half nbor list,
* 2 if gpu_nbor is true, and host needs a full nbor list
* \param max_nbors Initial number of rows in the neighbor matrix
* \param cell_size cutoff+skin
* \param pre_cut True if cutoff test will be performed in separate kernel
* than the force kernel **/
bool init(const bool charge, const bool rot, const int nlocal,
const int host_nlocal, const int nall, const int maxspecial,
const bool gpu_nbor, const int gpu_host, const int max_nbors,
const double cell_size, const bool pre_cut);
/// Output a message for pair_style acceleration with device stats
void init_message(FILE *screen, const char *name,
const int first_gpu, const int last_gpu);
/// Output a message with timing information
void output_times(UCL_Timer &time_pair, const double avg_split,
const double max_bytes, FILE *screen);
/// Clear all memory on host and device associated with atom and nbor data
void clear();
/// Clear all memory on host and device
void clear_device();
/// Start timer on host
inline void start_host_timer() { _cpu_full=MPI_Wtime(); }
/// Stop timer on host
inline void stop_host_timer() { _cpu_full=MPI_Wtime()-_cpu_full; }
/// Return host time
inline double host_time() { return _cpu_full; }
/// Return host memory usage in bytes
double host_memory_usage() const;
/// Return the number of procs sharing a device (size of device commincator)
inline int procs_per_gpu() const { return _procs_per_gpu; }
/// Return my rank in the device communicator
inline int gpu_rank() const { return _gpu_rank; }
/// My rank within all processes
inline int world_me() const { return _world_me; }
/// Total number of processes
inline int world_size() const { return _world_size; }
/// Return the 'mode' for acceleration: GPU_FORCE or GPU_NEIGH
inline int gpu_mode() const { return _gpu_mode; }
/// Index of first device used by a node
inline int first_device() const { return _first_device; }
/// Index of last device used by a node
inline int last_device() const { return _last_device; }
/// Particle split defined in fix
inline double particle_split() const { return _particle_split; }
/// Return the initialization count for the device
inline int init_count() const { return _init_count; }
// -------------------------- DEVICE DATA -------------------------
/// Geryon Device
UCL_Device *gpu;
/// Device communicator
MPI_Comm gpu_comm;
enum{GPU_FORCE, GPU_NEIGH};
// --------------------------- ATOM DATA --------------------------
/// Atom Data
PairGPUAtom<numtyp,acctyp> atom;
// --------------------------- NBOR DATA ----------------------------
/// Neighbor Data
PairGPUNbor nbor;
private:
int _init_count;
bool _device_init;
int _procs_per_gpu, _gpu_rank, _world_me, _world_size;
int _gpu_mode, _first_device, _last_device;
double _particle_split;
double _cpu_full;
template <class t>
inline std::string toa(const t& in) {
std::ostringstream o;
o.precision(2);
o << in;
return o.str();
}
};
#endif