Updating lib/gpu to version 2.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3785 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
72
lib/gpu/Makefile.cyg
Normal file
72
lib/gpu/Makefile.cyg
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# /* ----------------------------------------------------------------------
|
||||||
|
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
# Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
#
|
||||||
|
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
# certain rights in this software. This software is distributed under
|
||||||
|
# the GNU General Public License.
|
||||||
|
#
|
||||||
|
# See the README file in the top-level LAMMPS directory.
|
||||||
|
# ------------------------------------------------------------------------- */
|
||||||
|
#
|
||||||
|
# /* ----------------------------------------------------------------------
|
||||||
|
# Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
# Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
# Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
BIN_DIR = .
|
||||||
|
OBJ_DIR = .
|
||||||
|
AR = ar
|
||||||
|
CUDA_CPP = /cygdrive/c/CUDA/bin/nvcc -I/cygdrive/c/CUDA/include -O3 -DWINDLL -DUNIX -Xptxas -v --use_fast_math
|
||||||
|
CUDA_ARCH = -arch=sm_13
|
||||||
|
CUDA_PREC = -D_SINGLE_SINGLE
|
||||||
|
CUDA_LINK = -L/cygdrive/c/CUDA/lib -lcudart $(CUDA_LIB)
|
||||||
|
|
||||||
|
CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
||||||
|
|
||||||
|
CUDA_LIB = $(OBJ_DIR)/gpu.dll
|
||||||
|
|
||||||
|
# Headers for CUDA Stuff
|
||||||
|
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
||||||
|
# Headers for Pair Stuff
|
||||||
|
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h pair_gpu_cell.h
|
||||||
|
# Dependencies for the Texture Tar
|
||||||
|
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
||||||
|
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
||||||
|
gb_gpu_extra.h gb_gpu_kernel.h gb_gpu.cu
|
||||||
|
|
||||||
|
ALL_H = $(NVC_H) $(PAIR_H)
|
||||||
|
|
||||||
|
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||||
|
OBJS = $(OBJ_DIR)/nvc_device.obj $(OBJ_DIR)/pair_gpu_nbor.obj \
|
||||||
|
$(OBJ_DIR)/pair_tex_tar.obj $(OBJ_DIR)/pair_gpu_cell.obj
|
||||||
|
|
||||||
|
all: $(CUDA_LIB) $(EXECS)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/nvc_device.obj : nvc_device.cu $(NVC_H)
|
||||||
|
$(CUDA) -o $@ -c nvc_device.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/pair_gpu_nbor.obj: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor.h $(NVC_H)
|
||||||
|
$(CUDA) -o $@ -c pair_gpu_nbor.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/pair_tex_tar.obj: $(TAR_H)
|
||||||
|
$(CUDA) -o $@ -c pair_tex_tar.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/pair_gpu_cell.obj: pair_gpu_cell.cu pair_gpu_cell.h lj_gpu_memory.h
|
||||||
|
$(CUDA) -o $@ -c pair_gpu_cell.cu
|
||||||
|
|
||||||
|
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.obj
|
||||||
|
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.obj
|
||||||
|
|
||||||
|
$(CUDA_LIB): $(OBJS) $(TAR_H)
|
||||||
|
$(CUDA) -o $@ -shared $(OBJS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(EXECS) $(CUDA_LIB) $(OBJS) *.exe *.exp *.lib *.dll *.linkinfo
|
||||||
|
|
||||||
|
veryclean: clean
|
||||||
|
rm -rf *~ *.linkinfo
|
||||||
|
|
||||||
@ -1,25 +1,29 @@
|
|||||||
#***************************************************************************
|
# /* ----------------------------------------------------------------------
|
||||||
# Makefile
|
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
# -------------------
|
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
# W. Michael Brown
|
# Steve Plimpton, sjplimp@sandia.gov
|
||||||
#
|
#
|
||||||
# _________________________________________________________________________
|
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
# Build for the LAMMPS GPU Force Library
|
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
#
|
# certain rights in this software. This software is distributed under
|
||||||
# _________________________________________________________________________
|
# the GNU General Public License.
|
||||||
#
|
#
|
||||||
# begin : Tue June 23 2009
|
# See the README file in the top-level LAMMPS directory.
|
||||||
# copyright : (C) 2009 by W. Michael Brown
|
# ------------------------------------------------------------------------- */
|
||||||
# email : wmbrown@sandia.gov
|
#
|
||||||
# ***************************************************************************/
|
# /* ----------------------------------------------------------------------
|
||||||
|
# Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
# Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
# Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
BIN_DIR = .
|
BIN_DIR = .
|
||||||
OBJ_DIR = .
|
OBJ_DIR = .
|
||||||
AR = ar
|
AR = ar
|
||||||
CUDA_CPP = nvcc -I/usr/local/cuda/include -DUNIX -O3 -Xptxas -v --use_fast_math
|
CUDA_CPP = nvcc -I/usr/local/cuda/include -DUNIX -O3 -Xptxas -v --use_fast_math
|
||||||
CUDA_ARCH = -maxrregcount 128 #-arch=sm_13
|
CUDA_ARCH = -arch=sm_13
|
||||||
CUDA_PREC = -D_SINGLE_SINGLE
|
CUDA_PREC = -D_SINGLE_SINGLE
|
||||||
CUDA_LINK = -L/usr/local/cuda/lib64 -lcudart $(CUDA_LIB)
|
CUDA_LINK = -L/usr/local/cuda/lib -lcudart $(CUDA_LIB)
|
||||||
|
|
||||||
CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
||||||
|
|
||||||
@ -28,7 +32,7 @@ CUDA_LIB = $(OBJ_DIR)/libgpu.a
|
|||||||
# Headers for CUDA Stuff
|
# Headers for CUDA Stuff
|
||||||
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
||||||
# Headers for Pair Stuff
|
# Headers for Pair Stuff
|
||||||
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h
|
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h pair_gpu_cell.h
|
||||||
# Dependencies for the Texture Tar
|
# Dependencies for the Texture Tar
|
||||||
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
||||||
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
||||||
@ -38,7 +42,7 @@ ALL_H = $(NVC_H) $(PAIR_H)
|
|||||||
|
|
||||||
EXECS = $(BIN_DIR)/nvc_get_devices
|
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||||
OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/pair_gpu_nbor.cu_o \
|
OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/pair_gpu_nbor.cu_o \
|
||||||
$(OBJ_DIR)/pair_tex_tar.cu_o
|
$(OBJ_DIR)/pair_tex_tar.cu_o $(OBJ_DIR)/pair_gpu_cell.cu_o
|
||||||
|
|
||||||
all: $(CUDA_LIB) $(EXECS)
|
all: $(CUDA_LIB) $(EXECS)
|
||||||
|
|
||||||
@ -51,6 +55,9 @@ $(OBJ_DIR)/pair_gpu_nbor.cu_o: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor
|
|||||||
$(OBJ_DIR)/pair_tex_tar.cu_o: $(TAR_H)
|
$(OBJ_DIR)/pair_tex_tar.cu_o: $(TAR_H)
|
||||||
$(CUDA) -o $@ -c pair_tex_tar.cu
|
$(CUDA) -o $@ -c pair_tex_tar.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/pair_gpu_cell.cu_o: pair_gpu_cell.cu pair_gpu_cell.h lj_gpu_memory.h
|
||||||
|
$(CUDA) -o $@ -c pair_gpu_cell.cu
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.o
|
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.o
|
||||||
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.o
|
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.o
|
||||||
|
|
||||||
|
|||||||
@ -1,29 +1,25 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
README
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
README for building LAMMPS GPU Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Thu Jun 25 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
GENERAL NOTES
|
GENERAL NOTES
|
||||||
|
|
||||||
This library, pair_gpu_lib.a, provides routines for GPGPU acceleration
|
This library, libgpu.a, provides routines for GPU acceleration
|
||||||
of LAMMPS pair styles. Currently, only CUDA enabled GPUs are
|
of LAMMPS pair styles. Currently, only CUDA enabled GPUs are
|
||||||
supported. Compilation of this library requires installing the CUDA
|
supported. Compilation of this library requires installing the CUDA
|
||||||
GPU driver and CUDA toolkit for your operating system. In addition to
|
GPU driver and CUDA toolkit for your operating system. In addition to
|
||||||
@ -33,14 +29,14 @@ devices on your system.
|
|||||||
|
|
||||||
NOTE: Installation of the CUDA SDK is not required.
|
NOTE: Installation of the CUDA SDK is not required.
|
||||||
|
|
||||||
Current pair styles supporting GPU Accelartion:
|
Current pair styles supporting GPU acceleration:
|
||||||
|
|
||||||
1. lj/cut/gpu
|
1. lj/cut/gpu
|
||||||
2. gayberne/gpu
|
2. gayberne/gpu
|
||||||
|
|
||||||
MULTIPLE LAMMPS PROCESSES
|
MULTIPLE LAMMPS PROCESSES
|
||||||
|
|
||||||
When using GPGPU acceleration, you are restricted to one physical GPU
|
When using GPU acceleration, you are restricted to one physical GPU
|
||||||
per LAMMPS process. This can be multiple GPUs on a single node or
|
per LAMMPS process. This can be multiple GPUs on a single node or
|
||||||
across multiple nodes. Intructions on GPU assignment can be found in
|
across multiple nodes. Intructions on GPU assignment can be found in
|
||||||
the LAMMPS documentation.
|
the LAMMPS documentation.
|
||||||
@ -66,6 +62,9 @@ the CUDA_PREC variable:
|
|||||||
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||||
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||||
|
|
||||||
|
NOTE: For the lj/cut pair style, only single precision will be used, even
|
||||||
|
if double precision is specified.
|
||||||
|
|
||||||
NOTE: Double precision is only supported on certain GPUS (with
|
NOTE: Double precision is only supported on certain GPUS (with
|
||||||
compute capability>=1.3).
|
compute capability>=1.3).
|
||||||
|
|
||||||
@ -74,14 +73,14 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
|
|||||||
|
|
||||||
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
||||||
package has been installed before installing the GPU package in LAMMPS.
|
package has been installed before installing the GPU package in LAMMPS.
|
||||||
|
|
||||||
GPU MEMORY
|
GPU MEMORY
|
||||||
|
|
||||||
Upon initialization of the pair style, the library will reserve memory
|
Upon initialization of the gayberne/gpu pair style, the library will reserve
|
||||||
for 64K atoms per GPU or 70% of each cards GPU memory, whichever value
|
memory for 64K atoms per GPU or 70% of each cards GPU memory, whichever value
|
||||||
is limiting. The value of 70% can be changed by editing the
|
is limiting. The value of 70% can be changed by editing the
|
||||||
PERCENT_GPU_MEMORY definition in the source file. The value of 64K
|
PERCENT_GPU_MEMORY definition in the source file. For gayberne/gpu, the value
|
||||||
cannot be increased and is the maximum number of atoms allowed per
|
of 64K cannot be increased and is the maximum number of atoms allowed per
|
||||||
GPU. Using the 'neigh_modify one' modifier in your LAMMPS input script
|
GPU. Using the 'neigh_modify one' modifier in your LAMMPS input script
|
||||||
can help to increase maximum number of atoms per GPU for cards with
|
can help to increase maximum number of atoms per GPU for cards with
|
||||||
limited memory.
|
limited memory.
|
||||||
|
|||||||
@ -1,27 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
gb_gpu.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Gay-Berne anisotropic potential GPU calcultation
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
*** Force decomposition by Atom Version ***
|
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
@ -190,30 +184,30 @@ inline string gb_gpu_toa(const t& in) {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Return string with GPU info
|
// Return string with GPU info
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
string gb_gpu_name(const int id, const int max_nbors) {
|
EXTERN void gb_gpu_name(const int id, const int max_nbors, char * name) {
|
||||||
string name=GBMF[0].gpu.name(id)+", "+
|
string sname=GBMF[0].gpu.name(id)+", "+
|
||||||
gb_gpu_toa(GBMF[0].gpu.cores(id))+" cores, "+
|
gb_gpu_toa(GBMF[0].gpu.cores(id))+" cores, "+
|
||||||
gb_gpu_toa(GBMF[0].gpu.gigabytes(id))+" GB, "+
|
gb_gpu_toa(GBMF[0].gpu.gigabytes(id))+" GB, "+
|
||||||
gb_gpu_toa(GBMF[0].gpu.clock_rate(id))+" GHZ, "+
|
gb_gpu_toa(GBMF[0].gpu.clock_rate(id))+" GHZ, "+
|
||||||
gb_gpu_toa(GBMF[0].get_max_atoms(GBMF[0].gpu.bytes(id),
|
gb_gpu_toa(GBMF[0].get_max_atoms(GBMF[0].gpu.bytes(id),
|
||||||
max_nbors))+" Atoms";
|
max_nbors))+" Atoms";
|
||||||
return name;
|
strcpy(name,sname.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Allocate memory on host and device and copy constants to device
|
// Allocate memory on host and device and copy constants to device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
EXTERN bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
||||||
const double upsilon, const double mu, double **shape,
|
const double upsilon, const double mu, double **shape,
|
||||||
double **well, double **cutsq, double **sigma,
|
double **well, double **cutsq, double **sigma,
|
||||||
double **epsilon, double *host_lshape, int **form,
|
double **epsilon, double *host_lshape, int **form,
|
||||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||||
double **host_lj4, double **offset, double *special_lj,
|
double **host_lj4, double **offset, double *special_lj,
|
||||||
const int max_nbors, const int thread, const int gpu_id) {
|
const int max_nbors, const int thread, const int gpu_id) {
|
||||||
assert(thread<MAX_GPU_THREADS);
|
assert(thread<MAX_GPU_THREADS);
|
||||||
|
|
||||||
GBMF[thread].gpu.init();
|
GBMF[thread].gpu.init();
|
||||||
|
|
||||||
if (GBMF[thread].gpu.num_devices()==0)
|
if (GBMF[thread].gpu.num_devices()==0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -227,7 +221,7 @@ bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Clear memory on host and device
|
// Clear memory on host and device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
void gb_gpu_clear(const int thread) {
|
EXTERN void gb_gpu_clear(const int thread) {
|
||||||
GBMF[thread].clear();
|
GBMF[thread].clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -262,7 +256,7 @@ inline void _gb_gpu_atom(PairGPUAtom<numtyp,acctyp> &atom, double **host_x,
|
|||||||
atom.time_atom.stop();
|
atom.time_atom.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
void gb_gpu_atom(double **host_x, double **host_quat,
|
EXTERN void gb_gpu_atom(double **host_x, double **host_quat,
|
||||||
const int *host_type, const bool rebuild, const int thread) {
|
const int *host_type, const bool rebuild, const int thread) {
|
||||||
_gb_gpu_atom(GBMF[thread].atom, host_x, host_quat, host_type, rebuild,
|
_gb_gpu_atom(GBMF[thread].atom, host_x, host_quat, host_type, rebuild,
|
||||||
GBMF[thread].pair_stream);
|
GBMF[thread].pair_stream);
|
||||||
@ -327,7 +321,7 @@ int * _gb_gpu_reset_nbors(gbmtyp &gbm, const int nall, const int nlocal,
|
|||||||
return ilist;
|
return ilist;
|
||||||
}
|
}
|
||||||
|
|
||||||
int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
EXTERN int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
||||||
int *ilist, const int *numj, const int *type,
|
int *ilist, const int *numj, const int *type,
|
||||||
const int thread, bool &success) {
|
const int thread, bool &success) {
|
||||||
return _gb_gpu_reset_nbors(GBMF[thread],nall,nlocal,inum,ilist,numj,type,
|
return _gb_gpu_reset_nbors(GBMF[thread],nall,nlocal,inum,ilist,numj,type,
|
||||||
@ -340,7 +334,7 @@ int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class gbmtyp>
|
template <class gbmtyp>
|
||||||
void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij,
|
void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij,
|
||||||
const bool eflag) {
|
const bool eflag) {
|
||||||
gbm.nbor.time_nbor.add_to_total();
|
gbm.nbor.time_nbor.add_to_total();
|
||||||
// CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); // Not if timed
|
// CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); // Not if timed
|
||||||
|
|
||||||
@ -350,8 +344,8 @@ void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij,
|
|||||||
gbm.nbor.time_nbor.stop();
|
gbm.nbor.time_nbor.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
|
EXTERN void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
|
||||||
const int thread) {
|
const int thread) {
|
||||||
_gb_gpu_nbors(GBMF[thread],ij,num_ij,eflag);
|
_gb_gpu_nbors(GBMF[thread],ij,num_ij,eflag);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -453,7 +447,7 @@ void _gb_gpu_gayberne(GBMT &gbm, const bool eflag, const bool vflag,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
EXTERN void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
||||||
const int thread) {
|
const int thread) {
|
||||||
_gb_gpu_gayberne<PRECISION,ACC_PRECISION>(GBMF[thread],eflag,vflag,rebuild);
|
_gb_gpu_gayberne<PRECISION,ACC_PRECISION>(GBMF[thread],eflag,vflag,rebuild);
|
||||||
}
|
}
|
||||||
@ -490,7 +484,7 @@ double _gb_gpu_forces(GBMT &gbm, double **f, double **tor, const int *ilist,
|
|||||||
return evdw;
|
return evdw;
|
||||||
}
|
}
|
||||||
|
|
||||||
double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
EXTERN double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
||||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||||
const bool vflag_atom, double *eatom, double **vatom,
|
const bool vflag_atom, double *eatom, double **vatom,
|
||||||
double *virial, const int thread) {
|
double *virial, const int thread) {
|
||||||
@ -499,7 +493,7 @@ double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
|||||||
vflag_atom,eatom,vatom,virial);
|
vflag_atom,eatom,vatom,virial);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gb_gpu_time(const int i) {
|
EXTERN void gb_gpu_time(const int i) {
|
||||||
cout.precision(4);
|
cout.precision(4);
|
||||||
cout << "Atom copy: " << GBMF[i].atom.time_atom.total_seconds()
|
cout << "Atom copy: " << GBMF[i].atom.time_atom.total_seconds()
|
||||||
<< " s.\n"
|
<< " s.\n"
|
||||||
@ -515,10 +509,10 @@ void gb_gpu_time(const int i) {
|
|||||||
<< " s.\n";
|
<< " s.\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
int gb_gpu_num_devices() {
|
EXTERN int gb_gpu_num_devices() {
|
||||||
return GBMF[0].gpu.num_devices();
|
return GBMF[0].gpu.num_devices();
|
||||||
}
|
}
|
||||||
|
|
||||||
double gb_gpu_bytes() {
|
EXTERN double gb_gpu_bytes() {
|
||||||
return GBMF[0].host_memory_usage();
|
return GBMF[0].host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
gb_gpu_extra.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Inline GPU kernel routines ala math_extra for the CPU.
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef GB_GPU_EXTRA_H
|
#ifndef GB_GPU_EXTRA_H
|
||||||
#define GB_GPU_EXTRA_H
|
#define GB_GPU_EXTRA_H
|
||||||
|
|||||||
@ -1,26 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
gb_gpu_kernel.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Routines that actually perform the force/torque computation
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
*** Force Decomposition by Atom Version ***
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef GB_GPU_KERNEL
|
#ifndef GB_GPU_KERNEL
|
||||||
#define GB_GPU_KERNEL
|
#define GB_GPU_KERNEL
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
gb_gpu_memory.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Global variables for GPU Gayberne Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Thu Jun 25 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "gb_gpu_memory.h"
|
#include "gb_gpu_memory.h"
|
||||||
#define GB_GPU_MemoryT GB_GPU_Memory<numtyp, acctyp>
|
#define GB_GPU_MemoryT GB_GPU_Memory<numtyp, acctyp>
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
gb_gpu_memory.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Global variables for GPU Gayberne Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Thu Jun 25 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef GB_GPU_MEMORY_H
|
#ifndef GB_GPU_MEMORY_H
|
||||||
#define GB_GPU_MEMORY_H
|
#define GB_GPU_MEMORY_H
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
lj_gpu.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Lennard-Jones potential GPU calcultation
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
@ -27,18 +23,39 @@
|
|||||||
#include "nvc_timer.h"
|
#include "nvc_timer.h"
|
||||||
#include "nvc_device.h"
|
#include "nvc_device.h"
|
||||||
#include "pair_gpu_texture.h"
|
#include "pair_gpu_texture.h"
|
||||||
|
#include "pair_gpu_cell.h"
|
||||||
#include "lj_gpu_memory.cu"
|
#include "lj_gpu_memory.cu"
|
||||||
#include "lj_gpu_kernel.h"
|
#include "lj_gpu_kernel.h"
|
||||||
|
|
||||||
|
#ifdef WINDLL
|
||||||
|
#include <windows.h>
|
||||||
|
BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, LPVOID lpReserved)
|
||||||
|
{
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef WINDLL
|
||||||
|
#define EXTERN extern "C" __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define EXTERN
|
||||||
|
#endif
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
static LJ_GPU_Memory<PRECISION,ACC_PRECISION> LJMF;
|
static LJ_GPU_Memory<PRECISION,ACC_PRECISION> LJMF;
|
||||||
#define LJMT LJ_GPU_Memory<numtyp,acctyp>
|
#define LJMT LJ_GPU_Memory<numtyp,acctyp>
|
||||||
|
|
||||||
|
static float kernelTime = 0.0;
|
||||||
|
static int ncell1D;
|
||||||
|
static float *energy, *d_energy;
|
||||||
|
static float3 *d_force, *f_temp, *v_temp, *d_virial;
|
||||||
|
static cell_list cell_list_gpu;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Convert something to a string
|
// Convert something to a string
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
template <class t>
|
template <class t>
|
||||||
inline string lj_gpu_toa(const t& in) {
|
inline string lj_gpu_toa(const t& in) {
|
||||||
ostringstream o;
|
ostringstream o;
|
||||||
@ -50,113 +67,54 @@ inline string lj_gpu_toa(const t& in) {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Return string with GPU info
|
// Return string with GPU info
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
string lj_gpu_name(const int id, const int max_nbors) {
|
EXTERN void lj_gpu_name(const int id, const int max_nbors, char * name) {
|
||||||
string name=LJMF.gpu.name(id)+", "+
|
string sname=LJMF.gpu.name(id)+", "+
|
||||||
lj_gpu_toa(LJMF.gpu.cores(id))+" cores, "+
|
lj_gpu_toa(LJMF.gpu.cores(id))+" cores, "+
|
||||||
lj_gpu_toa(LJMF.gpu.gigabytes(id))+" GB, "+
|
lj_gpu_toa(LJMF.gpu.gigabytes(id))+" GB, "+
|
||||||
lj_gpu_toa(LJMF.gpu.clock_rate(id))+" GHZ, "+
|
lj_gpu_toa(LJMF.gpu.clock_rate(id))+" GHZ";
|
||||||
lj_gpu_toa(LJMF.get_max_atoms(LJMF.gpu.bytes(id),
|
strcpy(name,sname.c_str());
|
||||||
max_nbors))+" Atoms";
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Allocate memory on host and device and copy constants to device
|
// Allocate memory on host and device and copy constants to device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
EXTERN bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
||||||
double **epsilon, double **host_lj1, double **host_lj2,
|
double **epsilon, double **host_lj1, double **host_lj2,
|
||||||
double **host_lj3, double **host_lj4, double **offset,
|
double **host_lj3, double **host_lj4, double **offset,
|
||||||
double *special_lj, const int max_nbors, const int gpu_id) {
|
double *special_lj, double *boxlo, double *boxhi,
|
||||||
LJMF.gpu.init();
|
double cell_size, double skin,
|
||||||
|
const int max_nbors, const int gpu_id) {
|
||||||
|
LJMF.gpu.init();
|
||||||
if (LJMF.gpu.num_devices()==0)
|
if (LJMF.gpu.num_devices()==0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
ij_size=IJ_SIZE;
|
ij_size=IJ_SIZE;
|
||||||
return LJMF.init(ij_size, ntypes, cutsq, sigma, epsilon, host_lj1, host_lj2,
|
|
||||||
host_lj3, host_lj4, offset, special_lj, max_nbors, gpu_id);
|
bool ret = LJMF.init(ij_size, ntypes, cutsq, sigma, epsilon, host_lj1, host_lj2,
|
||||||
|
host_lj3, host_lj4, offset, special_lj, max_nbors, gpu_id);
|
||||||
|
|
||||||
|
ncell1D = ceil(((boxhi[0] - boxlo[0]) + 2.0*cell_size) / cell_size);
|
||||||
|
|
||||||
|
init_cell_list_const(cell_size, skin, boxlo, boxhi);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Clear memory on host and device
|
// Clear memory on host and device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
void lj_gpu_clear() {
|
EXTERN void lj_gpu_clear() {
|
||||||
|
free(energy);
|
||||||
|
free(v_temp);
|
||||||
|
cudaFreeHost(f_temp);
|
||||||
|
cudaFree(d_force);
|
||||||
|
cudaFree(d_energy);
|
||||||
|
cudaFree(d_virial);
|
||||||
|
clear_cell_list(cell_list_gpu);
|
||||||
|
|
||||||
LJMF.clear();
|
LJMF.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// copy atom positions and optionally types to device
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template <class numtyp, class acctyp>
|
|
||||||
inline void _lj_gpu_atom(PairGPUAtom<numtyp,acctyp> &atom, double **host_x,
|
|
||||||
const int *host_type, const bool rebuild,
|
|
||||||
cudaStream_t &stream) {
|
|
||||||
atom.time_atom.start();
|
|
||||||
atom.reset_write_buffer();
|
|
||||||
|
|
||||||
// First row of dev_x is x position, second is y, third is z
|
|
||||||
atom.add_atom_data(host_x[0],3);
|
|
||||||
atom.add_atom_data(host_x[0]+1,3);
|
|
||||||
atom.add_atom_data(host_x[0]+2,3);
|
|
||||||
|
|
||||||
int csize=3;
|
|
||||||
|
|
||||||
// If a rebuild occured, copy type data
|
|
||||||
if (rebuild) {
|
|
||||||
atom.add_atom_data(host_type);
|
|
||||||
csize++;
|
|
||||||
}
|
|
||||||
|
|
||||||
atom.copy_atom_data(csize,stream);
|
|
||||||
atom.time_atom.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild) {
|
|
||||||
_lj_gpu_atom(LJMF.atom, host_x, host_type, rebuild, LJMF.pair_stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Signal that we need to transfer a new neighbor list
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template <class LJMTyp>
|
|
||||||
bool _lj_gpu_reset_nbors(LJMTyp &ljm, const int nall, const int inum,
|
|
||||||
int *ilist, const int *numj) {
|
|
||||||
if (nall>ljm.max_atoms)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
ljm.nbor.time_nbor.start();
|
|
||||||
|
|
||||||
ljm.atom.nall(nall);
|
|
||||||
ljm.atom.inum(inum);
|
|
||||||
ljm.nbor.reset(inum,ilist,numj,ljm.pair_stream);
|
|
||||||
|
|
||||||
ljm.nbor.time_nbor.stop();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist,
|
|
||||||
const int *numj) {
|
|
||||||
return _lj_gpu_reset_nbors(LJMF,nall,inum,ilist,numj);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Copy a set of ij_size ij interactions to device and compute energies,
|
|
||||||
// forces, and torques for those interactions
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template <class LJMTyp>
|
|
||||||
void _lj_gpu_nbors(LJMTyp &ljm, const int *ij, const int num_ij) {
|
|
||||||
ljm.nbor.time_nbor.add_to_total();
|
|
||||||
|
|
||||||
// CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); // Not if timed
|
|
||||||
|
|
||||||
memcpy(ljm.nbor.host_ij.begin(),ij,num_ij*sizeof(int));
|
|
||||||
ljm.nbor.time_nbor.start();
|
|
||||||
ljm.nbor.add(num_ij,ljm.pair_stream);
|
|
||||||
ljm.nbor.time_nbor.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
void lj_gpu_nbors(const int *ij, const int num_ij) {
|
|
||||||
_lj_gpu_nbors(LJMF,ij,num_ij);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate energies and forces for all ij interactions
|
// Calculate energies and forces for all ij interactions
|
||||||
@ -169,6 +127,7 @@ void _lj_gpu(LJMT &ljm, const bool eflag, const bool vflag, const bool rebuild){
|
|||||||
int GX=static_cast<int>(ceil(static_cast<double>(ljm.atom.inum())/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ljm.atom.inum())/BX));
|
||||||
|
|
||||||
ljm.time_pair.start();
|
ljm.time_pair.start();
|
||||||
|
|
||||||
if (ljm.shared_types)
|
if (ljm.shared_types)
|
||||||
kernel_lj_fast<numtyp,acctyp><<<GX,BX,0,ljm.pair_stream>>>
|
kernel_lj_fast<numtyp,acctyp><<<GX,BX,0,ljm.pair_stream>>>
|
||||||
(ljm.special_lj.begin(), ljm.nbor.dev_nbor.begin(),
|
(ljm.special_lj.begin(), ljm.nbor.dev_nbor.begin(),
|
||||||
@ -181,47 +140,279 @@ void _lj_gpu(LJMT &ljm, const bool eflag, const bool vflag, const bool rebuild){
|
|||||||
ljm.nbor.ij.begin(), ljm.nbor.dev_nbor.row_size(),
|
ljm.nbor.ij.begin(), ljm.nbor.dev_nbor.row_size(),
|
||||||
ljm.atom.ans.begin(), ljm.atom.ans.row_size(), eflag,
|
ljm.atom.ans.begin(), ljm.atom.ans.row_size(), eflag,
|
||||||
vflag, ljm.atom.inum(), ljm.atom.nall());
|
vflag, ljm.atom.inum(), ljm.atom.nall());
|
||||||
|
ljm.time_pair.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERN void lj_gpu(const bool eflag, const bool vflag, const bool rebuild) {
|
||||||
|
_lj_gpu<PRECISION,ACC_PRECISION>(LJMF, eflag,vflag,rebuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
double _lj_gpu_cell(LJMT &ljm, double **force, double *virial,
|
||||||
|
double **host_x, int *host_type, const int inum,
|
||||||
|
const int nall, const int ago, const bool eflag, const bool vflag,
|
||||||
|
const double *boxlo, const double *boxhi)
|
||||||
|
{
|
||||||
|
ljm.atom.nall(nall);
|
||||||
|
ljm.atom.inum(inum);
|
||||||
|
|
||||||
|
ljm.nbor.time_nbor.start();
|
||||||
|
ljm.nbor.time_nbor.stop();
|
||||||
|
|
||||||
|
double evdwl=0.0;
|
||||||
|
|
||||||
|
static int buffer = CELL_SIZE;
|
||||||
|
static int ncell = (int)pow((float)ncell1D,3);
|
||||||
|
|
||||||
|
static int first_call = 1;
|
||||||
|
|
||||||
|
// allocate memory on CPU and GPU
|
||||||
|
if (first_call) {
|
||||||
|
energy = (float*) malloc(inum*sizeof(float));
|
||||||
|
v_temp = (float3*)malloc(inum*2*sizeof(float3));
|
||||||
|
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||||
|
|
||||||
|
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||||
|
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||||
|
|
||||||
|
init_cell_list(cell_list_gpu, nall, ncell, buffer);
|
||||||
|
|
||||||
|
first_call = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!first_call && ago == 0) {
|
||||||
|
free(energy);
|
||||||
|
free(v_temp);
|
||||||
|
cudaFreeHost(f_temp);
|
||||||
|
cudaFree(d_force);
|
||||||
|
cudaFree(d_energy);
|
||||||
|
cudaFree(d_virial);
|
||||||
|
|
||||||
|
energy = (float*) malloc(inum*sizeof(float));
|
||||||
|
v_temp = (float3*)malloc(inum*2*sizeof(float3));
|
||||||
|
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||||
|
|
||||||
|
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||||
|
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||||
|
|
||||||
|
clear_cell_list(cell_list_gpu);
|
||||||
|
init_cell_list(cell_list_gpu, nall, ncell, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// build cell-list on GPU
|
||||||
|
ljm.atom.time_atom.start();
|
||||||
|
build_cell_list(host_x[0], host_type, cell_list_gpu,
|
||||||
|
ncell, ncell1D, buffer, inum, nall, ago);
|
||||||
|
ljm.atom.time_atom.stop();
|
||||||
|
|
||||||
|
ljm.time_pair.start();
|
||||||
|
|
||||||
|
#ifdef TIMING
|
||||||
|
cudaEvent_t start, stop;
|
||||||
|
cudaEventCreate(&start);
|
||||||
|
cudaEventCreate(&stop);
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// call the cell-list force kernel
|
||||||
|
const int BX=BLOCK_1D;
|
||||||
|
dim3 GX(ncell1D, ncell1D*ncell1D);
|
||||||
|
if (eflag == 0 && vflag == 0) {
|
||||||
|
kernel_lj_cell<false,false><<<GX, BX, 0>>>
|
||||||
|
(d_force, d_energy, d_virial,
|
||||||
|
cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.idx,
|
||||||
|
cell_list_gpu.type,
|
||||||
|
cell_list_gpu.natom,
|
||||||
|
inum, nall, ncell);
|
||||||
|
} else {
|
||||||
|
kernel_lj_cell<true,true><<<GX, BX, 3*sizeof(float)*MAX_SHARED_TYPES*MAX_SHARED_TYPES>>>
|
||||||
|
(d_force, d_energy, d_virial,
|
||||||
|
cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.idx,
|
||||||
|
cell_list_gpu.type,
|
||||||
|
cell_list_gpu.natom,
|
||||||
|
inum, nall, ncell);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TIMING
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
float kTime;
|
||||||
|
cudaEventElapsedTime(&kTime, start, stop);
|
||||||
|
kernelTime += kTime;
|
||||||
|
printf("kernelTime = %f, eflag=%d, vflag=%d\n", kTime, eflag, vflag);
|
||||||
|
cudaEventDestroy(start);
|
||||||
|
cudaEventDestroy(stop);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// copy results from GPU to CPU
|
||||||
|
cudaMemcpy(f_temp, d_force, inum*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||||
|
if (eflag) {
|
||||||
|
cudaMemcpy(energy, d_energy, inum*sizeof(float), cudaMemcpyDeviceToHost);
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
evdwl += energy[i];
|
||||||
|
}
|
||||||
|
evdwl *= 0.5f;
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
cudaMemcpy(v_temp, d_virial, inum*2*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
virial[0] += v_temp[2*i].x;
|
||||||
|
virial[1] += v_temp[2*i].y;
|
||||||
|
virial[2] += v_temp[2*i].z;
|
||||||
|
virial[3] += v_temp[2*i+1].x;
|
||||||
|
virial[4] += v_temp[2*i+1].y;
|
||||||
|
virial[5] += v_temp[2*i+1].z;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
virial[i] *= 0.5f;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
force[i][0] += f_temp[i].x;
|
||||||
|
force[i][1] += f_temp[i].y;
|
||||||
|
force[i][2] += f_temp[i].z;
|
||||||
|
}
|
||||||
|
|
||||||
ljm.time_pair.stop();
|
ljm.time_pair.stop();
|
||||||
}
|
|
||||||
|
|
||||||
void lj_gpu(const bool eflag, const bool vflag, const bool rebuild) {
|
|
||||||
_lj_gpu<PRECISION,ACC_PRECISION>(LJMF,eflag,vflag,rebuild);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Get energies and forces to host
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template<class numtyp, class acctyp>
|
|
||||||
double _lj_gpu_forces(LJMT &ljm, double **f, const int *ilist,
|
|
||||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
|
||||||
const bool vflag_atom, double *eatom, double **vatom,
|
|
||||||
double *virial) {
|
|
||||||
double evdw;
|
|
||||||
|
|
||||||
ljm.atom.time_answer.start();
|
|
||||||
ljm.atom.copy_answers(eflag,vflag,ljm.pair_stream);
|
|
||||||
|
|
||||||
ljm.atom.time_atom.add_to_total();
|
ljm.atom.time_atom.add_to_total();
|
||||||
ljm.nbor.time_nbor.add_to_total();
|
ljm.nbor.time_nbor.add_to_total();
|
||||||
ljm.time_pair.add_to_total();
|
ljm.time_pair.add_to_total();
|
||||||
CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream));
|
|
||||||
|
|
||||||
evdw=ljm.atom.energy_virial(ilist,eflag_atom,vflag_atom,eatom,vatom,virial);
|
|
||||||
ljm.atom.add_forces(ilist,f);
|
return evdwl;
|
||||||
ljm.atom.time_answer.stop();
|
|
||||||
ljm.atom.time_answer.add_to_total();
|
|
||||||
return evdw;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double lj_gpu_forces(double **f, const int *ilist, const bool eflag,
|
EXTERN double lj_gpu_cell(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||||
const bool vflag, const bool eflag_atom,
|
const int ago, const bool eflag, const bool vflag,
|
||||||
const bool vflag_atom, double *eatom, double **vatom,
|
const double *boxlo, const double *boxhi)
|
||||||
double *virial) {
|
{
|
||||||
return _lj_gpu_forces<PRECISION,ACC_PRECISION>
|
return _lj_gpu_cell<PRECISION,ACC_PRECISION>(LJMF, force, virial, host_x, host_type, inum, nall,
|
||||||
(LJMF,f,ilist,eflag,vflag,eflag_atom,vflag_atom,eatom,vatom,virial);
|
ago, eflag, vflag, boxlo, boxhi);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lj_gpu_time() {
|
template <class numtyp, class acctyp>
|
||||||
|
double _lj_gpu_n2(LJMT &ljm, double **force, double *virial,
|
||||||
|
double **host_x, int *host_type, const int inum, const int nall, const bool eflag, const bool vflag,
|
||||||
|
const double *boxlo, const double *boxhi)
|
||||||
|
{
|
||||||
|
ljm.atom.nall(nall);
|
||||||
|
ljm.atom.inum(inum);
|
||||||
|
|
||||||
|
|
||||||
|
ljm.nbor.time_nbor.start();
|
||||||
|
ljm.nbor.time_nbor.stop();
|
||||||
|
|
||||||
|
|
||||||
|
double evdwl=0.0;
|
||||||
|
|
||||||
|
#ifdef NOUSE
|
||||||
|
static int first_call = 1;
|
||||||
|
|
||||||
|
if (first_call) {
|
||||||
|
energy = (float*) malloc(inum*sizeof(float));
|
||||||
|
v_temp = (float3*) malloc(inum*2*sizeof(float3));
|
||||||
|
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||||
|
cudaMallocHost((void**)&pos_temp, nall*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||||
|
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_pos, nall*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_type, nall*sizeof(int));
|
||||||
|
first_call = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ljm.atom.time_atom.start();
|
||||||
|
double *atom_pos = host_x[0];
|
||||||
|
for (int i = 0; i < 3*nall; i+=3) {
|
||||||
|
pos_temp[i/3] = make_float3(atom_pos[i], atom_pos[i+1], atom_pos[i+2]);
|
||||||
|
}
|
||||||
|
cudaMemcpy(d_pos, pos_temp, nall*sizeof(float3), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_type, host_type, nall*sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
ljm.atom.time_atom.stop();
|
||||||
|
|
||||||
|
ljm.time_pair.start();
|
||||||
|
|
||||||
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
const int BX=BLOCK_1D;
|
||||||
|
dim3 GX(static_cast<int>(ceil(static_cast<double>(inum)/BX)));
|
||||||
|
|
||||||
|
#ifdef TIMING
|
||||||
|
cudaEvent_t start, stop;
|
||||||
|
cudaEventCreate(&start);
|
||||||
|
cudaEventCreate(&stop);
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// N^2 force kernel
|
||||||
|
kernel_lj_n2<numtyp, acctyp><<<GX, BX>>>(d_force, d_energy, d_virial,
|
||||||
|
d_pos, d_type, eflag, vflag, inum, nall);
|
||||||
|
|
||||||
|
#ifdef TIMING
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
float kTime;
|
||||||
|
cudaEventElapsedTime(&kTime, start, stop);
|
||||||
|
kernelTime += kTime;
|
||||||
|
printf("kernelTime = %f, eflag=%d, vflag=%d\n", kTime, eflag, vflag);
|
||||||
|
cudaEventDestroy(start);
|
||||||
|
cudaEventDestroy(stop);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// copy results from GPU to CPU
|
||||||
|
cudaMemcpy(f_temp, d_force, inum*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||||
|
if (eflag) {
|
||||||
|
cudaMemcpy(energy, d_energy, inum*sizeof(float), cudaMemcpyDeviceToHost);
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
evdwl += energy[i];
|
||||||
|
}
|
||||||
|
evdwl *= 0.5f;
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
cudaMemcpy(v_temp, d_virial, inum*2*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
virial[0] += v_temp[2*i].x;
|
||||||
|
virial[1] += v_temp[2*i].y;
|
||||||
|
virial[2] += v_temp[2*i].z;
|
||||||
|
virial[3] += v_temp[2*i+1].x;
|
||||||
|
virial[4] += v_temp[2*i+1].y;
|
||||||
|
virial[5] += v_temp[2*i+1].z;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
virial[i] *= 0.5f;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < inum; i++) {
|
||||||
|
force[i][0] += f_temp[i].x;
|
||||||
|
force[i][1] += f_temp[i].y;
|
||||||
|
force[i][2] += f_temp[i].z;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
ljm.time_pair.stop();
|
||||||
|
|
||||||
|
ljm.atom.time_atom.add_to_total();
|
||||||
|
ljm.nbor.time_nbor.add_to_total();
|
||||||
|
ljm.time_pair.add_to_total();
|
||||||
|
|
||||||
|
return evdwl;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERN double lj_gpu_n2(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||||
|
const bool eflag, const bool vflag,
|
||||||
|
const double *boxlo, const double *boxhi)
|
||||||
|
{
|
||||||
|
return _lj_gpu_n2<PRECISION,ACC_PRECISION>(LJMF, force, virial, host_x, host_type, inum, nall,
|
||||||
|
eflag, vflag, boxlo, boxhi);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERN void lj_gpu_time() {
|
||||||
cout.precision(4);
|
cout.precision(4);
|
||||||
cout << "Atom copy: " << LJMF.atom.time_atom.total_seconds() << " s.\n";
|
cout << "Atom copy: " << LJMF.atom.time_atom.total_seconds() << " s.\n";
|
||||||
cout << "Neighbor copy: " << LJMF.nbor.time_nbor.total_seconds() << " s.\n";
|
cout << "Neighbor copy: " << LJMF.nbor.time_nbor.total_seconds() << " s.\n";
|
||||||
@ -229,10 +420,10 @@ void lj_gpu_time() {
|
|||||||
cout << "Answer copy: " << LJMF.atom.time_answer.total_seconds() << " s.\n";
|
cout << "Answer copy: " << LJMF.atom.time_answer.total_seconds() << " s.\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
int lj_gpu_num_devices() {
|
EXTERN int lj_gpu_num_devices() {
|
||||||
return LJMF.gpu.num_devices();
|
return LJMF.gpu.num_devices();
|
||||||
}
|
}
|
||||||
|
|
||||||
double lj_gpu_bytes() {
|
EXTERN double lj_gpu_bytes() {
|
||||||
return LJMF.host_memory_usage();
|
return LJMF.host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,29 +1,220 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
lj_gpu_kernel.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Routines that actually perform the force computation
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef LJ_GPU_KERNEL
|
#ifndef LJ_GPU_KERNEL
|
||||||
#define LJ_GPU_KERNEL
|
#define LJ_GPU_KERNEL
|
||||||
|
|
||||||
|
/* Cell list version of LJ kernel */
|
||||||
|
template<bool eflag, bool vflag>
|
||||||
|
__global__ void kernel_lj_cell(float3 *force3,
|
||||||
|
float *energy, float3 *virial,
|
||||||
|
float3 *cell_list, unsigned int *cell_idx,
|
||||||
|
int *cell_type, int *cell_atom,
|
||||||
|
const int inum, const int nall, const int ncell)
|
||||||
|
{
|
||||||
|
// calculate 3D block idx from 2d block
|
||||||
|
int bx = blockIdx.x;
|
||||||
|
int by = blockIdx.y % gridDim.x;
|
||||||
|
int bz = blockIdx.y / gridDim.x;
|
||||||
|
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
|
||||||
|
// compute cell idx from 3D block idx
|
||||||
|
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||||
|
|
||||||
|
__shared__ int typeSh[CELL_SIZE];
|
||||||
|
__shared__ float posSh[CELL_SIZE*3];
|
||||||
|
__shared__ float cutsqSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ float lj1Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ float lj2Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
|
||||||
|
extern __shared__ float smem[];
|
||||||
|
|
||||||
|
__shared__ float *lj3Sh;
|
||||||
|
__shared__ float *lj4Sh;
|
||||||
|
__shared__ float *offsetSh;
|
||||||
|
|
||||||
|
// load force parameters into shared memory
|
||||||
|
for (int i = tid; i < MAX_SHARED_TYPES*MAX_SHARED_TYPES; i += BLOCK_1D) {
|
||||||
|
int itype = i/MAX_SHARED_TYPES;
|
||||||
|
int jtype = i%MAX_SHARED_TYPES;
|
||||||
|
cutsqSh[i] = _cutsq_<float>(itype,jtype);
|
||||||
|
lj1Sh[i] = _lj1_<float>(itype,jtype).x;
|
||||||
|
lj2Sh[i] = _lj1_<float>(itype,jtype).y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only allocate shared memory when needed,
|
||||||
|
// this reduces shared memory limitation on occupancy
|
||||||
|
if (eflag || vflag) {
|
||||||
|
lj3Sh = smem;
|
||||||
|
lj4Sh = lj3Sh + MAX_SHARED_TYPES*MAX_SHARED_TYPES;
|
||||||
|
offsetSh = lj4Sh + MAX_SHARED_TYPES*MAX_SHARED_TYPES;
|
||||||
|
for (int i = tid; i < MAX_SHARED_TYPES*MAX_SHARED_TYPES; i += BLOCK_1D) {
|
||||||
|
int itype = i/MAX_SHARED_TYPES;
|
||||||
|
int jtype = i%MAX_SHARED_TYPES;
|
||||||
|
lj3Sh[i] = _lj3_<float>(itype,jtype).x+0.01;
|
||||||
|
lj4Sh[i] = _lj3_<float>(itype,jtype).y;
|
||||||
|
offsetSh[i]= _offset_<float>(itype,jtype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
int nborz0 = max(bz-1,0), nborz1 = min(bz+1, gridDim.x-1),
|
||||||
|
nbory0 = max(by-1,0), nbory1 = min(by+1, gridDim.x-1),
|
||||||
|
nborx0 = max(bx-1,0), nborx1 = min(bx+1, gridDim.x-1);
|
||||||
|
|
||||||
|
for (int ii = 0; ii < ceil((float)(cell_atom[cid])/BLOCK_1D); ii++) {
|
||||||
|
float3 f = {0.0f, 0.0f, 0.0f};
|
||||||
|
float ener = 0.0f;
|
||||||
|
float3 v0 = {0.0f, 0.0f, 0.0f}, v1 = {0.0f, 0.0f, 0.0f};
|
||||||
|
int itype;
|
||||||
|
float ix, iy, iz;
|
||||||
|
int i = tid + ii*BLOCK_1D;
|
||||||
|
unsigned int answer_pos = cell_idx[cid*CELL_SIZE+i];
|
||||||
|
|
||||||
|
// load current cell atom position and type into sMem
|
||||||
|
for (int j = tid; j < cell_atom[cid]; j += BLOCK_1D) {
|
||||||
|
int pid = cid*CELL_SIZE + j;
|
||||||
|
float3 pos = cell_list[pid];
|
||||||
|
posSh[j ] = pos.x;
|
||||||
|
posSh[j+ CELL_SIZE] = pos.y;
|
||||||
|
posSh[j+2*CELL_SIZE] = pos.z;
|
||||||
|
typeSh[j] = cell_type[pid];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
if (answer_pos < inum) {
|
||||||
|
itype = typeSh[i];
|
||||||
|
ix = posSh[i ];
|
||||||
|
iy = posSh[i+ CELL_SIZE];
|
||||||
|
iz = posSh[i+2*CELL_SIZE];
|
||||||
|
|
||||||
|
// compute force from current cell
|
||||||
|
for (int j = 0; j < cell_atom[cid]; j++) {
|
||||||
|
if (j == i) continue;
|
||||||
|
float delx = ix - posSh[j ];
|
||||||
|
float dely = iy - posSh[j+ CELL_SIZE];
|
||||||
|
float delz = iz - posSh[j+2*CELL_SIZE];
|
||||||
|
int jtype = typeSh[j];
|
||||||
|
int mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||||
|
float r2inv = delx*delx + dely*dely + delz*delz;
|
||||||
|
|
||||||
|
if (r2inv < cutsqSh[mtype]) {
|
||||||
|
r2inv = 1.0f/r2inv;
|
||||||
|
float r6inv = r2inv * r2inv * r2inv;
|
||||||
|
float force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||||
|
f.x += delx * force;
|
||||||
|
f.y += dely * force;
|
||||||
|
f.z += delz * force;
|
||||||
|
|
||||||
|
if (eflag) {
|
||||||
|
float e = r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||||
|
ener += (e - offsetSh[mtype]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vflag) {
|
||||||
|
v0.x += delx*delx*force;
|
||||||
|
v0.y += dely*dely*force;
|
||||||
|
v0.z += delz*delz*force;
|
||||||
|
v1.x += delx*dely*force;
|
||||||
|
v1.y += delx*delz*force;
|
||||||
|
v1.z += dely*delz*force;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
// compute force from neigboring cells
|
||||||
|
for (int nborz = nborz0; nborz <= nborz1; nborz++) {
|
||||||
|
for (int nbory = nbory0; nbory <= nbory1; nbory++) {
|
||||||
|
for (int nborx = nborx0; nborx <= nborx1; nborx++) {
|
||||||
|
if (nborz == bz && nbory == by && nborx == bx) continue;
|
||||||
|
|
||||||
|
// compute cell id
|
||||||
|
int cid_nbor = nborx + INT_MUL(nbory,gridDim.x) +
|
||||||
|
INT_MUL(nborz,gridDim.x*gridDim.x);
|
||||||
|
|
||||||
|
// load neighbor cell position and type into smem
|
||||||
|
for (int j = tid; j < cell_atom[cid_nbor]; j += BLOCK_1D) {
|
||||||
|
int pid = INT_MUL(cid_nbor,CELL_SIZE) + j;
|
||||||
|
float3 pos = cell_list[pid];
|
||||||
|
posSh[j ] = pos.x;
|
||||||
|
posSh[j+ CELL_SIZE] = pos.y;
|
||||||
|
posSh[j+2*CELL_SIZE] = pos.z;
|
||||||
|
typeSh[j] = cell_type[pid];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
// compute force
|
||||||
|
if (answer_pos < inum) {
|
||||||
|
for (int j = 0; j < cell_atom[cid_nbor]; j++) {
|
||||||
|
float delx = ix - posSh[j ];
|
||||||
|
float dely = iy - posSh[j+ CELL_SIZE];
|
||||||
|
float delz = iz - posSh[j+2*CELL_SIZE];
|
||||||
|
int jtype = typeSh[j];
|
||||||
|
int mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||||
|
float r2inv = delx*delx + dely*dely + delz*delz;
|
||||||
|
|
||||||
|
if (r2inv < cutsqSh[mtype]) {
|
||||||
|
r2inv = 1.0f/r2inv;
|
||||||
|
float r6inv = r2inv * r2inv * r2inv;
|
||||||
|
float force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||||
|
f.x += delx * force;
|
||||||
|
f.y += dely * force;
|
||||||
|
f.z += delz * force;
|
||||||
|
|
||||||
|
if (eflag) {
|
||||||
|
float e=r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||||
|
ener += (e-offsetSh[mtype]);
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
v0.x += delx*delx*force;
|
||||||
|
v0.y += dely*dely*force;
|
||||||
|
v0.z += delz*delz*force;
|
||||||
|
v1.x += delx*dely*force;
|
||||||
|
v1.y += delx*delz*force;
|
||||||
|
v1.z += dely*delz*force;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (answer_pos < inum) {
|
||||||
|
force3[answer_pos] = f;
|
||||||
|
if (eflag)
|
||||||
|
energy[answer_pos] = ener;
|
||||||
|
if (vflag) {
|
||||||
|
virial[2*answer_pos] = v0;
|
||||||
|
virial[2*answer_pos+1] = v1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Neigbhor list version of LJ kernel */
|
||||||
template<class numtyp, class acctyp>
|
template<class numtyp, class acctyp>
|
||||||
__global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
__global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
||||||
const int *dev_ij, const int nbor_pitch, acctyp *ans,
|
const int *dev_ij, const int nbor_pitch, acctyp *ans,
|
||||||
@ -36,7 +227,6 @@ __global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
|||||||
if (ii<4)
|
if (ii<4)
|
||||||
sp_lj[ii]=special_lj[ii];
|
sp_lj[ii]=special_lj[ii];
|
||||||
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
|
|
||||||
@ -158,7 +348,6 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
|
|
||||||
@ -184,9 +373,11 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||||||
int itype=INT_MUL(MAX_SHARED_TYPES,_x_<numtyp>(i,3));
|
int itype=INT_MUL(MAX_SHARED_TYPES,_x_<numtyp>(i,3));
|
||||||
|
|
||||||
numtyp factor_lj;
|
numtyp factor_lj;
|
||||||
|
|
||||||
for ( ; list<list_end; list++) {
|
for ( ; list<list_end; list++) {
|
||||||
|
|
||||||
int j=*list;
|
int j= *list;
|
||||||
|
|
||||||
if (j < nall)
|
if (j < nall)
|
||||||
factor_lj = 1.0;
|
factor_lj = 1.0;
|
||||||
else {
|
else {
|
||||||
@ -247,4 +438,116 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||||||
} // if ii
|
} // if ii
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Brute force O(N^2) version of LJ kernel */
|
||||||
|
template<class numtyp, class acctyp>
|
||||||
|
__global__ void kernel_lj_n2(float3 *force3,
|
||||||
|
float *energy, float3 *virial,
|
||||||
|
float3 *pos, int *type,
|
||||||
|
const bool eflag, const bool vflag, const int inum, const int nall)
|
||||||
|
{
|
||||||
|
int gid = threadIdx.x + INT_MUL(blockIdx.x, blockDim.x);
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
__shared__ float posSh[BLOCK_1D*3];
|
||||||
|
__shared__ int typeSh[BLOCK_1D];
|
||||||
|
__shared__ numtyp cutsqSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ numtyp lj1Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ numtyp lj2Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ numtyp lj3Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ numtyp lj4Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
__shared__ numtyp offsetSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||||
|
|
||||||
|
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||||
|
int itype=tid/MAX_SHARED_TYPES;
|
||||||
|
int jtype=tid%MAX_SHARED_TYPES;
|
||||||
|
cutsqSh[tid]=_cutsq_<numtyp>(itype,jtype);
|
||||||
|
lj1Sh[tid]=_lj1_<numtyp>(itype,jtype).x;
|
||||||
|
lj2Sh[tid]=_lj1_<numtyp>(itype,jtype).y;
|
||||||
|
lj3Sh[tid]=_lj3_<numtyp>(itype,jtype).x;
|
||||||
|
lj4Sh[tid]=_lj3_<numtyp>(itype,jtype).y;
|
||||||
|
offsetSh[tid]=_offset_<numtyp>(itype,jtype);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
float3 f = {0.0f, 0.0f, 0.0f};
|
||||||
|
float ener = 0.0f;
|
||||||
|
float3 v0 = {0.0f, 0.0f, 0.0f}, v1 = {0.0f, 0.0f, 0.0f};
|
||||||
|
|
||||||
|
int itype, jtype;
|
||||||
|
int mtype;
|
||||||
|
|
||||||
|
numtyp ix, iy, iz;
|
||||||
|
|
||||||
|
if (gid < inum) {
|
||||||
|
ix = pos[gid].x;
|
||||||
|
iy = pos[gid].y;
|
||||||
|
iz = pos[gid].z;
|
||||||
|
itype = type[gid];
|
||||||
|
}
|
||||||
|
|
||||||
|
int pid = tid;
|
||||||
|
int nIter = ceil((float)nall/BLOCK_1D);
|
||||||
|
for (int jj = 0; jj < nIter; jj++, pid += BLOCK_1D) {
|
||||||
|
|
||||||
|
if (pid < nall) {
|
||||||
|
posSh[tid ] = pos[pid].x;
|
||||||
|
posSh[tid+ BLOCK_1D] = pos[pid].y;
|
||||||
|
posSh[tid+2*BLOCK_1D] = pos[pid].z;
|
||||||
|
typeSh[tid] = type[pid];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (gid < inum) {
|
||||||
|
int pid_j = jj*BLOCK_1D;
|
||||||
|
|
||||||
|
for (int j = 0; j < BLOCK_1D; j++, pid_j++) {
|
||||||
|
if (jj == blockIdx.x && tid == j) continue;
|
||||||
|
if (pid_j < nall) {
|
||||||
|
numtyp delx = ix - posSh[j ];
|
||||||
|
numtyp dely = iy - posSh[j+ BLOCK_1D];
|
||||||
|
numtyp delz = iz - posSh[j+2*BLOCK_1D];
|
||||||
|
jtype = typeSh[j];
|
||||||
|
mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||||
|
numtyp r2inv = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
|
if (r2inv < cutsqSh[mtype]) {
|
||||||
|
r2inv = (numtyp)1.0/r2inv;
|
||||||
|
numtyp r6inv = r2inv * r2inv * r2inv;
|
||||||
|
numtyp force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||||
|
f.x += delx * force;
|
||||||
|
f.y += dely * force;
|
||||||
|
f.z += delz * force;
|
||||||
|
|
||||||
|
if (eflag) {
|
||||||
|
numtyp e = r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||||
|
ener +=(e-offsetSh[mtype]);
|
||||||
|
}
|
||||||
|
if (vflag) {
|
||||||
|
v0.x += delx*delx*force;
|
||||||
|
v0.y += dely*dely*force;
|
||||||
|
v0.z += delz*delz*force;
|
||||||
|
v1.x += delx*dely*force;
|
||||||
|
v1.y += delx*delz*force;
|
||||||
|
v1.z += dely*delz*force;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gid < inum) {
|
||||||
|
if (eflag)
|
||||||
|
energy[gid] = ener;
|
||||||
|
if (vflag) {
|
||||||
|
virial[2*gid ] = v0;
|
||||||
|
virial[2*gid+1] = v1;
|
||||||
|
}
|
||||||
|
force3[gid] = f;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
lj_gpu_memory.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Global variables for GPU Lennard-Jones Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "lj_gpu_memory.h"
|
#include "lj_gpu_memory.h"
|
||||||
#define LJ_GPU_MemoryT LJ_GPU_Memory<numtyp, acctyp>
|
#define LJ_GPU_MemoryT LJ_GPU_Memory<numtyp, acctyp>
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
lj_gpu_memory.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Global variables for GPU Lennard-Jones Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef LJ_GPU_MEMORY_H
|
#ifndef LJ_GPU_MEMORY_H
|
||||||
#define LJ_GPU_MEMORY_H
|
#define LJ_GPU_MEMORY_H
|
||||||
@ -30,8 +26,10 @@
|
|||||||
#include "pair_gpu_nbor.h"
|
#include "pair_gpu_nbor.h"
|
||||||
|
|
||||||
#define BLOCK_1D 64
|
#define BLOCK_1D 64
|
||||||
|
#define CELL_SIZE 64
|
||||||
#define MAX_SHARED_TYPES 8
|
#define MAX_SHARED_TYPES 8
|
||||||
#define PERCENT_GPU_MEMORY 0.7
|
#define PERCENT_GPU_MEMORY 0.7
|
||||||
|
#define BIG_NUMBER 100000000
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
class LJ_GPU_Memory {
|
class LJ_GPU_Memory {
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_device.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Utilities for dealing with cuda devices
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the NVC Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Wed Jan 28 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_device.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Utilities for dealing with cuda devices
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the NVC Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Wed Jan 28 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef NVC_DEVICE
|
#ifndef NVC_DEVICE
|
||||||
#define NVC_DEVICE
|
#define NVC_DEVICE
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_get_devices.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
List properties of cuda devices
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the NVC Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Wed Jan 28 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "nvc_device.h"
|
#include "nvc_device.h"
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,22 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef NVC_MACROS_H
|
#ifndef NVC_MACROS_H
|
||||||
#define NVC_MACROS_H
|
#define NVC_MACROS_H
|
||||||
|
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_memory.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Routines for memory management on CUDA devices
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the NVC Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Thu Jun 25 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef NVC_MEMORY_H
|
#ifndef NVC_MEMORY_H
|
||||||
#define NVC_MEMORY_H
|
#define NVC_MEMORY_H
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_timer.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Class for timing CUDA routines
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the NVC Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Feb 3 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef NVC_TIMER_H
|
#ifndef NVC_TIMER_H
|
||||||
#define NVC_TIMER_H
|
#define NVC_TIMER_H
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
nvc_texture_traits.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Tricks for templating textures
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef NVC_TEXTURE_TRAITS_H
|
#ifndef NVC_TEXTURE_TRAITS_H
|
||||||
#define NVC_TEXTURE_TRAITS_H
|
#define NVC_TEXTURE_TRAITS_H
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_gpu_atom.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Memory routines for moving atom and force data between host and gpu
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "pair_gpu_texture.h"
|
#include "pair_gpu_texture.h"
|
||||||
#include "pair_gpu_atom.h"
|
#include "pair_gpu_atom.h"
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_gpu_atom.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Memory routines for moving atom and force data between host and gpu
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef PAIR_GPU_ATOM_H
|
#ifndef PAIR_GPU_ATOM_H
|
||||||
#define PAIR_GPU_ATOM_H
|
#define PAIR_GPU_ATOM_H
|
||||||
|
|||||||
454
lib/gpu/pair_gpu_cell.cu
Normal file
454
lib/gpu/pair_gpu_cell.cu
Normal file
@ -0,0 +1,454 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "lj_gpu_memory.h"
|
||||||
|
#include "pair_gpu_cell.h"
|
||||||
|
|
||||||
|
static __constant__ float d_boxlo[3];
|
||||||
|
static __constant__ float d_boxhi[3];
|
||||||
|
static __constant__ float d_cell_size[1];
|
||||||
|
static __constant__ float d_skin[1];
|
||||||
|
|
||||||
|
void init_cell_list_const(double cell_size, double skin,
|
||||||
|
double *boxlo, double *boxhi)
|
||||||
|
{
|
||||||
|
float cell_size1 = cell_size;
|
||||||
|
float skin1 = skin;
|
||||||
|
float boxlo1[3], boxhi1[3];
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
boxlo1[i] = boxlo[i];
|
||||||
|
boxhi1[i] = boxhi[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaMemcpyToSymbol(d_cell_size, &cell_size1, sizeof(float),
|
||||||
|
0, cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpyToSymbol(d_boxlo, boxlo1, 3*sizeof(float),
|
||||||
|
0, cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpyToSymbol(d_boxhi, boxhi1, 3*sizeof(float),
|
||||||
|
0, cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpyToSymbol(d_skin, &skin1, sizeof(float),
|
||||||
|
0, cudaMemcpyHostToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void kernel_set_cell_list(unsigned int *cell_idx)
|
||||||
|
{
|
||||||
|
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||||
|
cell_idx[gid] = BIG_NUMBER;
|
||||||
|
}
|
||||||
|
|
||||||
|
// build the cell list
|
||||||
|
__global__ void kernel_build_cell_list(float3 *cell_list,
|
||||||
|
unsigned int *cell_idx,
|
||||||
|
int *cell_type,
|
||||||
|
int *cell_atom,
|
||||||
|
float3 *pos,
|
||||||
|
int *type,
|
||||||
|
const int inum,
|
||||||
|
const int nall)
|
||||||
|
{
|
||||||
|
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||||
|
float cSize = d_cell_size[0];
|
||||||
|
int ncell1D = ceil(((d_boxhi[0] - d_boxlo[0]) + 2.0f*cSize) / cSize);
|
||||||
|
|
||||||
|
if (gid < nall) {
|
||||||
|
float3 p = pos[gid];
|
||||||
|
p.x = fmaxf(p.x, d_boxlo[0]-cSize);
|
||||||
|
p.x = fminf(p.x, d_boxhi[0]+cSize);
|
||||||
|
p.y = fmaxf(p.y, d_boxlo[1]-cSize);
|
||||||
|
p.y = fminf(p.y, d_boxhi[1]+cSize);
|
||||||
|
p.z = fmaxf(p.z, d_boxlo[2]-cSize);
|
||||||
|
p.z = fminf(p.z, d_boxhi[2]+cSize);
|
||||||
|
|
||||||
|
int cell_id = (int)(p.x/cSize + 1.0) + (int)(p.y/cSize + 1.0) * ncell1D
|
||||||
|
+ (int)(p.z/cSize + 1.0) * ncell1D * ncell1D;
|
||||||
|
|
||||||
|
int atom_pos = atomicAdd(&cell_atom[cell_id], 1);
|
||||||
|
int pid = cell_id*CELL_SIZE + atom_pos;
|
||||||
|
|
||||||
|
cell_list[pid] = pos[gid];
|
||||||
|
cell_type[pid] = type[gid];
|
||||||
|
cell_idx [pid] = gid;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void kernel_test_rebuild(float3 *cell_list, int *cell_atom, int *rebuild)
|
||||||
|
{
|
||||||
|
|
||||||
|
float cSize = d_cell_size[0];
|
||||||
|
int ncell1D = ceil(((d_boxhi[0] - d_boxlo[0]) + 2.0f*cSize) / cSize);
|
||||||
|
|
||||||
|
// calculate 3D block idx from 2d block
|
||||||
|
int bx = blockIdx.x;
|
||||||
|
int by = blockIdx.y % gridDim.x;
|
||||||
|
int bz = blockIdx.y / gridDim.x;
|
||||||
|
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
|
||||||
|
// compute cell idx from 3D block idx
|
||||||
|
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||||
|
int pbase = INT_MUL(cid,CELL_SIZE); // atom position id in cell list
|
||||||
|
|
||||||
|
float skin = d_skin[0];
|
||||||
|
float lowx = d_boxlo[0] + (bx-1)*cSize - 0.5*skin;
|
||||||
|
float hix = lowx + cSize + skin;
|
||||||
|
float lowy = d_boxlo[1] + (by-1)*cSize - 0.5*skin;
|
||||||
|
float hiy = lowy + cSize + skin;
|
||||||
|
float lowz = d_boxlo[2] + (bz-1)*cSize - 0.5*skin;
|
||||||
|
float hiz = lowz + cSize + skin;
|
||||||
|
|
||||||
|
for (int i = tid; i < cell_atom[cid]; i += BLOCK_1D) {
|
||||||
|
int pid = pbase + i;
|
||||||
|
float3 p = cell_list[pid];
|
||||||
|
p.x = fmaxf(p.x, d_boxlo[0]-cSize);
|
||||||
|
p.x = fminf(p.x, d_boxhi[0]+cSize);
|
||||||
|
p.y = fmaxf(p.y, d_boxlo[1]-cSize);
|
||||||
|
p.y = fminf(p.y, d_boxhi[1]+cSize);
|
||||||
|
p.z = fmaxf(p.z, d_boxlo[2]-cSize);
|
||||||
|
p.z = fminf(p.z, d_boxhi[2]+cSize);
|
||||||
|
|
||||||
|
if (p.x < lowx || p.x > hix || p.y < lowy || p.y > hiy || p.z < lowz || p.z > hiz) {
|
||||||
|
*rebuild = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void kernel_test_overflow(int *cell_atom, int *overflow, const int ncell)
|
||||||
|
{
|
||||||
|
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||||
|
|
||||||
|
if (gid < ncell) {
|
||||||
|
if (cell_atom[gid] > CELL_SIZE)
|
||||||
|
*overflow = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void kernel_copy_list(float3 *cell_list, unsigned int *cell_idx, int *cell_atom, float3 *pos)
|
||||||
|
{
|
||||||
|
// calculate 3D block idx from 2d block
|
||||||
|
int bx = blockIdx.x;
|
||||||
|
int by = blockIdx.y % gridDim.x;
|
||||||
|
int bz = blockIdx.y / gridDim.x;
|
||||||
|
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
|
||||||
|
// compute cell idx from 3D block idx
|
||||||
|
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||||
|
int pbase = INT_MUL(cid,CELL_SIZE); // atom position id in cell list
|
||||||
|
|
||||||
|
for (int i = tid; i < cell_atom[cid]; i += BLOCK_1D) {
|
||||||
|
int pid = pbase + i;
|
||||||
|
cell_list[pid] = pos[cell_idx[pid]];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void radixSortBlocks(unsigned int *keys, float3 *values1, int *values2, unsigned int nbits, unsigned int startbit);
|
||||||
|
|
||||||
|
void sortBlocks(unsigned int *keys, float3 *values1, int *values2, const int size)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
const unsigned int bitSize = sizeof(unsigned int)*8;
|
||||||
|
const unsigned int bitStep = 4;
|
||||||
|
const int gSize = size/BLOCK_1D;
|
||||||
|
while (bitSize > i*bitStep) {
|
||||||
|
radixSortBlocks<<<gSize, BLOCK_1D, 2*BLOCK_1D*sizeof(unsigned int)>>>(keys, values1, values2, bitStep, i*bitStep);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __DEVICE_EMULATION__
|
||||||
|
#define __SYNC __syncthreads();
|
||||||
|
#else
|
||||||
|
#define __SYNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define WARP_SIZE 32
|
||||||
|
|
||||||
|
template<class T, int maxlevel>
|
||||||
|
__device__ T scanwarp(T val, T* sData)
|
||||||
|
{
|
||||||
|
// The following is the same as 2 * RadixSort::WARP_SIZE * warpId + threadInWarp =
|
||||||
|
// 64*(threadIdx.x >> 5) + (threadIdx.x & (RadixSort::WARP_SIZE - 1))
|
||||||
|
int idx = 2 * threadIdx.x - (threadIdx.x & (WARP_SIZE - 1));
|
||||||
|
sData[idx] = 0;
|
||||||
|
idx += WARP_SIZE;
|
||||||
|
sData[idx] = val; __SYNC
|
||||||
|
|
||||||
|
#ifdef __DEVICE_EMULATION__
|
||||||
|
T t = sData[idx - 1]; __SYNC
|
||||||
|
sData[idx] += t; __SYNC
|
||||||
|
t = sData[idx - 2]; __SYNC
|
||||||
|
sData[idx] += t; __SYNC
|
||||||
|
t = sData[idx - 4]; __SYNC
|
||||||
|
sData[idx] += t; __SYNC
|
||||||
|
t = sData[idx - 8]; __SYNC
|
||||||
|
sData[idx] += t; __SYNC
|
||||||
|
t = sData[idx - 16]; __SYNC
|
||||||
|
sData[idx] += t; __SYNC
|
||||||
|
#else
|
||||||
|
if (0 <= maxlevel) { sData[idx] += sData[idx - 1]; } __SYNC
|
||||||
|
if (1 <= maxlevel) { sData[idx] += sData[idx - 2]; } __SYNC
|
||||||
|
if (2 <= maxlevel) { sData[idx] += sData[idx - 4]; } __SYNC
|
||||||
|
if (3 <= maxlevel) { sData[idx] += sData[idx - 8]; } __SYNC
|
||||||
|
if (4 <= maxlevel) { sData[idx] += sData[idx -16]; } __SYNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return sData[idx] - val; // convert inclusive -> exclusive
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ unsigned int scan(unsigned int idata)
|
||||||
|
{
|
||||||
|
extern __shared__ unsigned int ptr[];
|
||||||
|
|
||||||
|
unsigned int idx = threadIdx.x;
|
||||||
|
|
||||||
|
unsigned int val = idata;
|
||||||
|
|
||||||
|
val = scanwarp<unsigned int, 4>(val, ptr);
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if ((idx & (WARP_SIZE - 1)) == WARP_SIZE - 1)
|
||||||
|
{
|
||||||
|
ptr[idx >> 5] = val + idata;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
#ifndef __DEVICE_EMULATION__
|
||||||
|
if (idx < WARP_SIZE)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
ptr[idx] = scanwarp<unsigned int, 2>(ptr[idx], ptr);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
val += ptr[idx >> 5];
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__device__ unsigned int rank(unsigned int preds)
|
||||||
|
{
|
||||||
|
unsigned int address = scan(preds);
|
||||||
|
|
||||||
|
__shared__ unsigned int numtrue;
|
||||||
|
if (threadIdx.x == BLOCK_1D - 1)
|
||||||
|
{
|
||||||
|
numtrue = address + preds;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
unsigned int rank;
|
||||||
|
unsigned int idx = threadIdx.x;
|
||||||
|
rank = (preds) ? address : numtrue + idx - address;
|
||||||
|
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void radixSortBlock(unsigned int *key, float3 *value1, int *value2, unsigned int nbits, unsigned int startbit)
|
||||||
|
{
|
||||||
|
extern __shared__ unsigned int sMem1[];
|
||||||
|
__shared__ float sMem2[BLOCK_1D];
|
||||||
|
__shared__ int sMem3[BLOCK_1D];
|
||||||
|
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
|
||||||
|
for(unsigned int shift = startbit; shift < (startbit + nbits); ++shift) {
|
||||||
|
unsigned int lsb;
|
||||||
|
lsb = !(((*key) >> shift) & 0x1);
|
||||||
|
|
||||||
|
unsigned int r;
|
||||||
|
|
||||||
|
r = rank(lsb);
|
||||||
|
|
||||||
|
// This arithmetic strides the ranks across 4 CTA_SIZE regions
|
||||||
|
sMem1[r] = *key;
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
// The above allows us to read without 4-way bank conflicts:
|
||||||
|
*key = sMem1[tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
sMem2[r] = (*value1).x;
|
||||||
|
__syncthreads();
|
||||||
|
(*value1).x = sMem2[tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
sMem2[r] = (*value1).y;
|
||||||
|
__syncthreads();
|
||||||
|
(*value1).y = sMem2[tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
sMem2[r] = (*value1).z;
|
||||||
|
__syncthreads();
|
||||||
|
(*value1).z = sMem2[tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
sMem3[r] = *value2;
|
||||||
|
__syncthreads();
|
||||||
|
*value2 = sMem3[tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void radixSortBlocks(unsigned int *keys, float3 *values1, int *values2, unsigned int nbits, unsigned int startbit)
|
||||||
|
{
|
||||||
|
|
||||||
|
extern __shared__ unsigned int sMem[];
|
||||||
|
|
||||||
|
int gid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
unsigned int key;
|
||||||
|
float3 value1;
|
||||||
|
int value2;
|
||||||
|
key = keys[gid];
|
||||||
|
value1 = values1[gid];
|
||||||
|
value2 = values2[gid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
radixSortBlock(&key, &value1, &value2, nbits, startbit);
|
||||||
|
|
||||||
|
keys[gid] = key;
|
||||||
|
values1[gid] = value1;
|
||||||
|
values2[gid] = value2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float3 *d_pos, *pos_temp;
|
||||||
|
static int *d_type;
|
||||||
|
static int *d_overflow, *d_rebuild;
|
||||||
|
|
||||||
|
void init_cell_list(cell_list &cell_list_gpu,
|
||||||
|
const int nall,
|
||||||
|
const int ncell,
|
||||||
|
const int buffer)
|
||||||
|
{
|
||||||
|
cudaMalloc((void**)&(cell_list_gpu.pos), ncell*buffer*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&(cell_list_gpu.idx), ncell*buffer*sizeof(unsigned int));
|
||||||
|
cudaMalloc((void**)&(cell_list_gpu.type), ncell*buffer*sizeof(int));
|
||||||
|
cudaMalloc((void**)&(cell_list_gpu.natom), ncell*sizeof(int));
|
||||||
|
|
||||||
|
cudaMallocHost((void**)&pos_temp, nall*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_pos, nall*sizeof(float3));
|
||||||
|
cudaMalloc((void**)&d_type, nall*sizeof(int));
|
||||||
|
cudaMalloc((void**)&d_overflow, sizeof(int));
|
||||||
|
cudaMalloc((void**)&d_rebuild, sizeof(int));
|
||||||
|
|
||||||
|
cudaMemset(cell_list_gpu.natom, 0, ncell*sizeof(int));
|
||||||
|
cudaMemset(cell_list_gpu.pos, 0, ncell*buffer*sizeof(float3));
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_cell_list(cell_list &cell_list_gpu)
|
||||||
|
{
|
||||||
|
cudaFree(cell_list_gpu.pos);
|
||||||
|
cudaFree(cell_list_gpu.idx);
|
||||||
|
cudaFree(cell_list_gpu.natom);
|
||||||
|
cudaFree(cell_list_gpu.type);
|
||||||
|
|
||||||
|
cudaFreeHost(pos_temp);
|
||||||
|
cudaFree(d_pos);
|
||||||
|
cudaFree(d_type);
|
||||||
|
cudaFree(d_overflow);
|
||||||
|
cudaFree(d_rebuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void build_cell_list(double *atom_pos, int *atom_type,
|
||||||
|
cell_list &cell_list_gpu,
|
||||||
|
const int ncell, const int ncell1D, const int buffer,
|
||||||
|
const int inum, const int nall, const int ago)
|
||||||
|
{
|
||||||
|
cudaMemset(d_overflow, 0, sizeof(int));
|
||||||
|
cudaMemset(d_rebuild, 0, sizeof(int));
|
||||||
|
|
||||||
|
// copy position and type to GPU
|
||||||
|
for (int i = 0; i < 3*nall; i+=3) {
|
||||||
|
pos_temp[i/3] = make_float3(atom_pos[i], atom_pos[i+1], atom_pos[i+2]);
|
||||||
|
}
|
||||||
|
cudaMemcpy(d_pos, pos_temp, nall*sizeof(float3), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_type, atom_type, nall*sizeof(int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
static int first_build = 1;
|
||||||
|
int rebuild = 0;
|
||||||
|
|
||||||
|
// copy the last built cell-list and test whether it needs to be rebuilt
|
||||||
|
if (!first_build) {
|
||||||
|
dim3 block(BLOCK_1D);
|
||||||
|
dim3 grid(ncell1D, ncell1D*ncell1D);
|
||||||
|
kernel_copy_list<<<grid, block>>>(cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.idx,
|
||||||
|
cell_list_gpu.natom, d_pos);
|
||||||
|
cudaMemset(d_rebuild, 0, sizeof(int));
|
||||||
|
int *temp = (int*)malloc(sizeof(int)*ncell);
|
||||||
|
kernel_test_rebuild<<<grid, block>>>(cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.natom,
|
||||||
|
d_rebuild);
|
||||||
|
cudaMemcpy(&rebuild, d_rebuild, sizeof(int), cudaMemcpyDeviceToHost);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*if (!first_build) {
|
||||||
|
dim3 block(BLOCK_1D);
|
||||||
|
dim3 grid(ncell1D, ncell1D*ncell1D);
|
||||||
|
kernel_copy_list<<<grid, block>>>(cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.idx,
|
||||||
|
cell_list_gpu.natom, d_pos);
|
||||||
|
}*/
|
||||||
|
if (ago == 0) rebuild = 1;
|
||||||
|
|
||||||
|
// build cell-list for the first time
|
||||||
|
if (first_build || rebuild) {
|
||||||
|
first_build = 0;
|
||||||
|
// cout << "Building cell list..." << endl;
|
||||||
|
cudaMemset(cell_list_gpu.natom, 0, ncell*sizeof(int));
|
||||||
|
// initialize d_cell_idx for the sorting routine
|
||||||
|
kernel_set_cell_list<<<ncell, buffer>>>(cell_list_gpu.idx);
|
||||||
|
|
||||||
|
// build cell list
|
||||||
|
dim3 blockDim(128);
|
||||||
|
dim3 gridDim(static_cast<int>(ceil(static_cast<double>(nall)/blockDim.x)));
|
||||||
|
kernel_build_cell_list<<<gridDim, blockDim>>>(cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.idx,
|
||||||
|
cell_list_gpu.type,
|
||||||
|
cell_list_gpu.natom,
|
||||||
|
d_pos, d_type, inum, nall);
|
||||||
|
|
||||||
|
// check cell list overflow
|
||||||
|
int overflow;
|
||||||
|
int gDimCell = static_cast<int>(ceil(static_cast<double>(ncell)/BLOCK_1D));
|
||||||
|
kernel_test_overflow<<<gDimCell, BLOCK_1D>>>(cell_list_gpu.natom,
|
||||||
|
d_overflow, ncell);
|
||||||
|
cudaMemcpy(&overflow, d_overflow, sizeof(int), cudaMemcpyDeviceToHost);
|
||||||
|
if (overflow > 0) {
|
||||||
|
printf("\n\nBLOCK_1D too small for cell list, please increase it!\n\n");
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort atoms in every cell by atom index to avoid floating point associativity problem.
|
||||||
|
sortBlocks(cell_list_gpu.idx, cell_list_gpu.pos,
|
||||||
|
cell_list_gpu.type, ncell*buffer);
|
||||||
|
|
||||||
|
cudaThreadSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
62
lib/gpu/pair_gpu_cell.h
Normal file
62
lib/gpu/pair_gpu_cell.h
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifndef PAIR_GPU_CELL_H
|
||||||
|
#define PAIR_GPU_CELL_H
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float3 *pos;
|
||||||
|
unsigned int *idx;
|
||||||
|
int *type;
|
||||||
|
int *natom;
|
||||||
|
} cell_list;
|
||||||
|
|
||||||
|
__global__ void kernel_set_cell_list(unsigned int *cell_idx);
|
||||||
|
__global__ void kernel_build_cell_list(float3 *cell_list,
|
||||||
|
unsigned int *cell_idx,
|
||||||
|
int *cell_type,
|
||||||
|
int *cell_atom,
|
||||||
|
float3 *pos,
|
||||||
|
int *type,
|
||||||
|
const int inum,
|
||||||
|
const int nall);
|
||||||
|
__global__ void kernel_test_rebuild(float3 *cell_list, int *cell_atom, int *rebuild);
|
||||||
|
__global__ void kernel_copy_list(float3 *cell_list,
|
||||||
|
unsigned int *cell_idx,
|
||||||
|
int *cell_atom,
|
||||||
|
float3 *pos);
|
||||||
|
__global__ void kernel_test_overflow(int *cell_atom, int *overflow, const int ncell);
|
||||||
|
void sortBlocks(unsigned int *keys, float3 *values1, int *values2, const int size);
|
||||||
|
|
||||||
|
void init_cell_list_const(double cell_size, double skin,
|
||||||
|
double *boxlo, double *boxhi);
|
||||||
|
void init_cell_list(cell_list &cell_list_gpu,
|
||||||
|
const int nall,
|
||||||
|
const int ncell,
|
||||||
|
const int buffer);
|
||||||
|
|
||||||
|
void build_cell_list(double *atom_pos, int *atom_type,
|
||||||
|
cell_list &cell_list_gpu,
|
||||||
|
const int ncell, const int ncell1D, const int buffer,
|
||||||
|
const int inum, const int nall, const int ago);
|
||||||
|
|
||||||
|
void clear_cell_list(cell_list &cell_list_gpu);
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_gpu_nbor.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Neighbor memory operations for LAMMPS GPU Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "pair_gpu_nbor.h"
|
#include "pair_gpu_nbor.h"
|
||||||
|
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_gpu_nbor.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Neighbor memory operations for LAMMPS GPU Library
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Aug 4 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifndef PAIR_GPU_NBOR_H
|
#ifndef PAIR_GPU_NBOR_H
|
||||||
#define PAIR_GPU_NBOR_H
|
#define PAIR_GPU_NBOR_H
|
||||||
|
|||||||
@ -1,25 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_gpu_texture.h
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
Tricks for templating textures
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "nvc_traits.h"
|
#include "nvc_traits.h"
|
||||||
#include "nvc_memory.h"
|
#include "nvc_memory.h"
|
||||||
|
|||||||
@ -1,26 +1,21 @@
|
|||||||
/***************************************************************************
|
/* ----------------------------------------------------------------------
|
||||||
pair_tex_tar.cu
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
-------------------
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
W. Michael Brown
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
"Tar" of header and source files that need texture reference definitions
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
within file scope.
|
|
||||||
|
|
||||||
__________________________________________________________________________
|
|
||||||
This file is part of the LAMMPS GPU Library
|
|
||||||
__________________________________________________________________________
|
|
||||||
|
|
||||||
begin : Tue Jun 23 2009
|
|
||||||
copyright : (C) 2009 by W. Michael Brown
|
|
||||||
email : wmbrown@sandia.gov
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------
|
|
||||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
certain rights in this software. This software is distributed under
|
certain rights in this software. This software is distributed under
|
||||||
the GNU General Public License.
|
the GNU General Public License.
|
||||||
----------------------------------------------------------------------- */
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||||
|
Peng Wang (Nvidia), penwang@nvidia.com
|
||||||
|
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "pair_gpu_atom.cu"
|
#include "pair_gpu_atom.cu"
|
||||||
#include "lj_gpu.cu"
|
#include "lj_gpu.cu"
|
||||||
|
|||||||
Reference in New Issue
Block a user