git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6136 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -20,7 +20,11 @@
|
|||||||
CUDA_HOME = /usr/local/cuda
|
CUDA_HOME = /usr/local/cuda
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
|
|
||||||
|
# newer CUDA
|
||||||
CUDA_ARCH = -arch=sm_13
|
CUDA_ARCH = -arch=sm_13
|
||||||
|
# older CUDA
|
||||||
|
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||||
|
|
||||||
CUDA_PRECISION = -D_SINGLE_SINGLE
|
CUDA_PRECISION = -D_SINGLE_SINGLE
|
||||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||||
|
|||||||
@ -33,13 +33,17 @@ NOTE: Installation of the CUDA SDK is not required.
|
|||||||
|
|
||||||
Current pair styles supporting GPU acceleration:
|
Current pair styles supporting GPU acceleration:
|
||||||
|
|
||||||
1. lj/cut/gpu
|
1. lj/cut
|
||||||
2. lj/cut/coul/cut/gpu
|
2. lj96/cut
|
||||||
3. lj/cut/coul/long/gpu
|
3. lj/expand
|
||||||
4. lj96/cut/gpu
|
4. lj/cut/coul/cut
|
||||||
5. gayberne/gpu
|
5. lj/cut/coul/long
|
||||||
6. cmm/cg/gpu
|
6. lj/charmm/coul/long
|
||||||
7. cmm/cg/coul/long/gpu
|
7. morse
|
||||||
|
8. cg/cmm
|
||||||
|
9. cg/cmm/coul/long
|
||||||
|
10. gayberne
|
||||||
|
11. pppm
|
||||||
|
|
||||||
MULTIPLE LAMMPS PROCESSES
|
MULTIPLE LAMMPS PROCESSES
|
||||||
|
|
||||||
@ -52,12 +56,12 @@ LAMMPS user manual for details on running with GPU acceleration.
|
|||||||
|
|
||||||
BUILDING AND PRECISION MODES
|
BUILDING AND PRECISION MODES
|
||||||
|
|
||||||
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME, NVCC, CUDA_INCLUD,
|
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of
|
||||||
CUDA_LIB and CUDA_OPTS variables in one of the Makefiles. CUDA_ARCH should
|
the Makefiles. CUDA_ARCH should be set based on the compute capability of
|
||||||
be set based on the compute capability of your GPU. This can be verified by
|
your GPU. This can be verified by running the nvc_get_devices executable after
|
||||||
running the nvc_get_devices executable after the build is complete.
|
the build is complete. Additionally, the GPU package must be installed and
|
||||||
Additionally, the GPU package must be installed and compiled for LAMMPS.
|
compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
|
||||||
This may require editing the gpu_SYSPATH variable in the LAMMPS makefile.
|
LAMMPS makefile.
|
||||||
|
|
||||||
Please note that the GPU library accesses the CUDA driver library directly,
|
Please note that the GPU library accesses the CUDA driver library directly,
|
||||||
so it needs to be linked not only to the CUDA runtime library (libcudart.so)
|
so it needs to be linked not only to the CUDA runtime library (libcudart.so)
|
||||||
@ -74,6 +78,10 @@ the CUDA_PRECISION variable:
|
|||||||
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||||
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||||
|
|
||||||
|
NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
|
||||||
|
You will get the error "GPU library not compiled for this accelerator."
|
||||||
|
when attempting to run PPPM on a GPU with compute capability 1.0.
|
||||||
|
|
||||||
NOTE: Double precision is only supported on certain GPUs (with
|
NOTE: Double precision is only supported on certain GPUs (with
|
||||||
compute capability>=1.3).
|
compute capability>=1.3).
|
||||||
|
|
||||||
@ -83,15 +91,17 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
|
|||||||
NOTE: For Fermi, make sure that -arch=sm_20 is set on the CUDA_ARCH line.
|
NOTE: For Fermi, make sure that -arch=sm_20 is set on the CUDA_ARCH line.
|
||||||
|
|
||||||
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
||||||
package has been installed before installing the GPU package in LAMMPS.
|
package has been installed.
|
||||||
|
|
||||||
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
|
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
|
||||||
installed if the USER-CG-CMM package has been installed before
|
installed if the USER-CG-CMM package has been installed.
|
||||||
installing the GPU package in LAMMPS.
|
|
||||||
|
|
||||||
NOTE: The lj/cut/coul/long/gpu and cg/cmm/coul/long/gpu style will only be
|
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
|
||||||
installed if the KSPACE package has been installed before installing
|
pppm/gpu/double styles will only be installed if the KSPACE package has
|
||||||
the GPU package in LAMMPS.
|
been installed.
|
||||||
|
|
||||||
|
NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
|
||||||
|
has been installed.
|
||||||
|
|
||||||
EXAMPLE BUILD PROCESS
|
EXAMPLE BUILD PROCESS
|
||||||
|
|
||||||
@ -105,7 +115,3 @@ make yes-asphere
|
|||||||
make yes-kspace
|
make yes-kspace
|
||||||
make yes-gpu
|
make yes-gpu
|
||||||
make linux
|
make linux
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
Last merge with gpulammps: r561 on 2010-11-12
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef CMM_GPU_KERNEL
|
#ifndef CMM_GPU_KERNEL
|
||||||
#define CMM_GPU_KERNEL
|
#define CMM_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,38 +18,6 @@
|
|||||||
#ifndef CMML_GPU_KERNEL
|
#ifndef CMML_GPU_KERNEL
|
||||||
#define CMML_GPU_KERNEL
|
#define CMML_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define EWALD_F (numtyp)1.12837917
|
|
||||||
#define EWALD_P (numtyp)0.3275911
|
|
||||||
#define A1 (numtyp)0.254829592
|
|
||||||
#define A2 (numtyp)-0.284496736
|
|
||||||
#define A3 (numtyp)1.421413741
|
|
||||||
#define A4 (numtyp)-1.453152027
|
|
||||||
#define A5 (numtyp)1.061405429
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define EWALD_F (numtyp)1.12837917
|
||||||
|
#define EWALD_P (numtyp)0.3275911
|
||||||
|
#define A1 (numtyp)0.254829592
|
||||||
|
#define A2 (numtyp)-0.284496736
|
||||||
|
#define A3 (numtyp)1.421413741
|
||||||
|
#define A4 (numtyp)-1.453152027
|
||||||
|
#define A5 (numtyp)1.061405429
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,40 +18,6 @@
|
|||||||
#ifndef CRML_GPU_KERNEL
|
#ifndef CRML_GPU_KERNEL
|
||||||
#define CRML_GPU_KERNEL
|
#define CRML_GPU_KERNEL
|
||||||
|
|
||||||
#define MAX_BIO_SHARED_TYPES 128
|
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define EWALD_F (numtyp)1.12837917
|
|
||||||
#define EWALD_P (numtyp)0.3275911
|
|
||||||
#define A1 (numtyp)0.254829592
|
|
||||||
#define A2 (numtyp)-0.284496736
|
|
||||||
#define A3 (numtyp)1.421413741
|
|
||||||
#define A4 (numtyp)-1.453152027
|
|
||||||
#define A5 (numtyp)1.061405429
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -94,6 +60,40 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define MAX_BIO_SHARED_TYPES 128
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define EWALD_F (numtyp)1.12837917
|
||||||
|
#define EWALD_P (numtyp)0.3275911
|
||||||
|
#define A1 (numtyp)0.254829592
|
||||||
|
#define A2 (numtyp)-0.284496736
|
||||||
|
#define A3 (numtyp)1.421413741
|
||||||
|
#define A4 (numtyp)-1.453152027
|
||||||
|
#define A5 (numtyp)1.061405429
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,16 +18,6 @@
|
|||||||
#ifndef PAIR_GPU_KERNEL_H
|
#ifndef PAIR_GPU_KERNEL_H
|
||||||
#define PAIR_GPU_KERNEL_H
|
#define PAIR_GPU_KERNEL_H
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#else
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -44,6 +34,16 @@
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#else
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access
|
// Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access
|
||||||
// -- Only unpack neighbors matching the specified inclusive range of forms
|
// -- Only unpack neighbors matching the specified inclusive range of forms
|
||||||
|
|||||||
@ -33,6 +33,14 @@
|
|||||||
#define MEM_THREADS 32
|
#define MEM_THREADS 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CUDA_PRE_THREE
|
||||||
|
struct __builtin_align__(16) _double4
|
||||||
|
{
|
||||||
|
double x, y, z, w;
|
||||||
|
};
|
||||||
|
typedef struct _double4 double4;
|
||||||
|
#endif
|
||||||
|
|
||||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef LJ96_GPU_KERNEL
|
#ifndef LJ96_GPU_KERNEL
|
||||||
#define LJ96_GPU_KERNEL
|
#define LJ96_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef LJ_GPU_KERNEL
|
#ifndef LJ_GPU_KERNEL
|
||||||
#define LJ_GPU_KERNEL
|
#define LJ_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef LJE_GPU_KERNEL
|
#ifndef LJE_GPU_KERNEL
|
||||||
#define LJE_GPU_KERNEL
|
#define LJE_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef LJC_GPU_KERNEL
|
#ifndef LJC_GPU_KERNEL
|
||||||
#define LJC_GPU_KERNEL
|
#define LJC_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -85,6 +61,30 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,38 +18,6 @@
|
|||||||
#ifndef LJCL_GPU_KERNEL
|
#ifndef LJCL_GPU_KERNEL
|
||||||
#define LJCL_GPU_KERNEL
|
#define LJCL_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define EWALD_F (numtyp)1.12837917
|
|
||||||
#define EWALD_P (numtyp)0.3275911
|
|
||||||
#define A1 (numtyp)0.254829592
|
|
||||||
#define A2 (numtyp)-0.284496736
|
|
||||||
#define A3 (numtyp)1.421413741
|
|
||||||
#define A4 (numtyp)-1.453152027
|
|
||||||
#define A5 (numtyp)1.061405429
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define EWALD_F (numtyp)1.12837917
|
||||||
|
#define EWALD_P (numtyp)0.3275911
|
||||||
|
#define A1 (numtyp)0.254829592
|
||||||
|
#define A2 (numtyp)-0.284496736
|
||||||
|
#define A3 (numtyp)1.421413741
|
||||||
|
#define A4 (numtyp)-1.453152027
|
||||||
|
#define A5 (numtyp)1.061405429
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -18,30 +18,6 @@
|
|||||||
#ifndef MORSE_GPU_KERNEL
|
#ifndef MORSE_GPU_KERNEL
|
||||||
#define MORSE_GPU_KERNEL
|
#define MORSE_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp2 double2
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp2 float2
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "nv_kernel_def.h"
|
#include "nv_kernel_def.h"
|
||||||
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp2 double2
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp2 float2
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBBITS 30
|
#define SBBITS 30
|
||||||
#define NEIGHMASK 0x3FFFFFFF
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||||
|
|||||||
@ -15,6 +15,13 @@
|
|||||||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef NV_KERNEL
|
||||||
|
#include "geryon/ucl_nv_kernel.h"
|
||||||
|
#else
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
||||||
|
#define GLOBAL_ID_X get_global_id(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
#ifdef _DOUBLE_DOUBLE
|
||||||
#define numtyp double
|
#define numtyp double
|
||||||
#define numtyp4 double4
|
#define numtyp4 double4
|
||||||
@ -23,13 +30,6 @@
|
|||||||
#define numtyp4 float4
|
#define numtyp4 float4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
|
||||||
#include "geryon/ucl_nv_kernel.h"
|
|
||||||
#else
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
|
||||||
#define GLOBAL_ID_X get_global_id(0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
|
__kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
|
||||||
__global int *type, const int nall) {
|
__global int *type, const int nall) {
|
||||||
int ii=GLOBAL_ID_X;
|
int ii=GLOBAL_ID_X;
|
||||||
|
|||||||
@ -549,8 +549,9 @@ int PairGPUDeviceT::compile_kernels() {
|
|||||||
k_info.run(&d_gpu_lib_data.begin());
|
k_info.run(&d_gpu_lib_data.begin());
|
||||||
ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
|
ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
|
||||||
|
|
||||||
|
_ptx_arch=static_cast<double>(h_gpu_lib_data[0])/100.0;
|
||||||
#ifndef USE_OPENCL
|
#ifndef USE_OPENCL
|
||||||
if (static_cast<double>(h_gpu_lib_data[0])/100.0>gpu->arch())
|
if (_ptx_arch>gpu->arch())
|
||||||
return -4;
|
return -4;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -226,6 +226,8 @@ class PairGPUDevice {
|
|||||||
inline int block_bio_pair() const { return _block_bio_pair; }
|
inline int block_bio_pair() const { return _block_bio_pair; }
|
||||||
/// Return the maximum number of atom types for shared mem with "bio" styles
|
/// Return the maximum number of atom types for shared mem with "bio" styles
|
||||||
inline int max_bio_shared_types() const { return _max_bio_shared_types; }
|
inline int max_bio_shared_types() const { return _max_bio_shared_types; }
|
||||||
|
/// Architecture gpu code compiled for (returns 0 for OpenCL)
|
||||||
|
inline double ptx_arch() const { return _ptx_arch; }
|
||||||
|
|
||||||
// -------------------- SHARED DEVICE ROUTINES --------------------
|
// -------------------- SHARED DEVICE ROUTINES --------------------
|
||||||
// Perform asynchronous zero of integer array
|
// Perform asynchronous zero of integer array
|
||||||
@ -281,6 +283,7 @@ class PairGPUDevice {
|
|||||||
int _gpu_mode, _first_device, _last_device, _nthreads;
|
int _gpu_mode, _first_device, _last_device, _nthreads;
|
||||||
double _particle_split;
|
double _particle_split;
|
||||||
double _cpu_full;
|
double _cpu_full;
|
||||||
|
double _ptx_arch;
|
||||||
|
|
||||||
int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
|
int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
|
||||||
int _pppm_max_spline, _pppm_block;
|
int _pppm_max_spline, _pppm_block;
|
||||||
|
|||||||
@ -18,27 +18,6 @@
|
|||||||
#ifndef PPPM_GPU_KERNEL
|
#ifndef PPPM_GPU_KERNEL
|
||||||
#define PPPM_GPU_KERNEL
|
#define PPPM_GPU_KERNEL
|
||||||
|
|
||||||
#ifdef _DOUBLE_DOUBLE
|
|
||||||
#define numtyp double
|
|
||||||
#define numtyp4 double4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _SINGLE_DOUBLE
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp double
|
|
||||||
#define acctyp4 double4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef numtyp
|
|
||||||
#define numtyp float
|
|
||||||
#define numtyp4 float4
|
|
||||||
#define acctyp float
|
|
||||||
#define acctyp4 float4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NV_KERNEL
|
#ifdef NV_KERNEL
|
||||||
|
|
||||||
#include "geryon/ucl_nv_kernel.h"
|
#include "geryon/ucl_nv_kernel.h"
|
||||||
@ -67,6 +46,12 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
|
||||||
|
// generated at runtime with use of pppm/gpu
|
||||||
|
#if (__CUDA_ARCH__ < 110)
|
||||||
|
#define atom_add(x,y) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
||||||
@ -85,6 +70,27 @@ __inline float fetch_q(const int& i, const float *q)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _DOUBLE_DOUBLE
|
||||||
|
#define numtyp double
|
||||||
|
#define numtyp4 double4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _SINGLE_DOUBLE
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp double
|
||||||
|
#define acctyp4 double4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef numtyp
|
||||||
|
#define numtyp float
|
||||||
|
#define numtyp4 float4
|
||||||
|
#define acctyp float
|
||||||
|
#define acctyp4 float4
|
||||||
|
#endif
|
||||||
|
|
||||||
// Maximum order for spline
|
// Maximum order for spline
|
||||||
#define PPPM_MAX_SPLINE 8
|
#define PPPM_MAX_SPLINE 8
|
||||||
// Thread block size for PPPM kernels
|
// Thread block size for PPPM kernels
|
||||||
|
|||||||
@ -66,6 +66,10 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
|
|||||||
flag=-5;
|
flag=-5;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
|
||||||
|
flag=-4;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|||||||
Reference in New Issue
Block a user