Updated the coul_soft and coul_long_soft kernels with forces as acctyp3
This commit is contained in:
@ -14,7 +14,7 @@
|
|||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||||
#include <stdio.h>
|
|
||||||
#include "lal_aux_fun1.h"
|
#include "lal_aux_fun1.h"
|
||||||
#ifndef _DOUBLE_DOUBLE
|
#ifndef _DOUBLE_DOUBLE
|
||||||
_texture( pos_tex,float4);
|
_texture( pos_tex,float4);
|
||||||
@ -36,7 +36,7 @@ __kernel void k_lj_coul_long_soft(const __global numtyp4 *restrict x_,
|
|||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp3 *restrict ans,
|
||||||
__global acctyp *restrict engv,
|
__global acctyp *restrict engv,
|
||||||
const int eflag, const int vflag, const int inum,
|
const int eflag, const int vflag, const int inum,
|
||||||
const int nbor_pitch,
|
const int nbor_pitch,
|
||||||
@ -59,7 +59,7 @@ __kernel void k_lj_coul_long_soft(const __global numtyp4 *restrict x_,
|
|||||||
sp_lj[6]=sp_lj_in[6];
|
sp_lj[6]=sp_lj_in[6];
|
||||||
sp_lj[7]=sp_lj_in[7];
|
sp_lj[7]=sp_lj_in[7];
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp3 f;
|
||||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
acctyp energy, e_coul, virial[6];
|
acctyp energy, e_coul, virial[6];
|
||||||
if (EVFLAG) {
|
if (EVFLAG) {
|
||||||
@ -79,6 +79,7 @@ __kernel void k_lj_coul_long_soft(const __global numtyp4 *restrict x_,
|
|||||||
int itype=ix.w;
|
int itype=ix.w;
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||||
int j=dev_packed[nbor];
|
int j=dev_packed[nbor];
|
||||||
|
|
||||||
numtyp factor_lj, factor_coul;
|
numtyp factor_lj, factor_coul;
|
||||||
@ -163,7 +164,7 @@ __kernel void k_lj_coul_long_soft_fast(const __global numtyp4 *restrict x_,
|
|||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp3 *restrict ans,
|
||||||
__global acctyp *restrict engv,
|
__global acctyp *restrict engv,
|
||||||
const int eflag, const int vflag,
|
const int eflag, const int vflag,
|
||||||
const int inum, const int nbor_pitch,
|
const int inum, const int nbor_pitch,
|
||||||
@ -187,7 +188,7 @@ __kernel void k_lj_coul_long_soft_fast(const __global numtyp4 *restrict x_,
|
|||||||
lj3[tid]=lj3_in[tid];
|
lj3[tid]=lj3_in[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp3 f;
|
||||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
acctyp energy, e_coul, virial[6];
|
acctyp energy, e_coul, virial[6];
|
||||||
if (EVFLAG) {
|
if (EVFLAG) {
|
||||||
@ -210,6 +211,7 @@ __kernel void k_lj_coul_long_soft_fast(const __global numtyp4 *restrict x_,
|
|||||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||||
int j=dev_packed[nbor];
|
int j=dev_packed[nbor];
|
||||||
|
|
||||||
numtyp factor_lj, factor_coul;
|
numtyp factor_lj, factor_coul;
|
||||||
|
|||||||
@ -36,7 +36,7 @@ __kernel void k_lj_coul_soft(const __global numtyp4 *restrict x_,
|
|||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp3 *restrict ans,
|
||||||
__global acctyp *restrict engv,
|
__global acctyp *restrict engv,
|
||||||
const int eflag, const int vflag, const int inum,
|
const int eflag, const int vflag, const int inum,
|
||||||
const int nbor_pitch,
|
const int nbor_pitch,
|
||||||
@ -59,7 +59,7 @@ __kernel void k_lj_coul_soft(const __global numtyp4 *restrict x_,
|
|||||||
sp_lj[6]=sp_lj_in[6];
|
sp_lj[6]=sp_lj_in[6];
|
||||||
sp_lj[7]=sp_lj_in[7];
|
sp_lj[7]=sp_lj_in[7];
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp3 f;
|
||||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
acctyp energy, e_coul, virial[6];
|
acctyp energy, e_coul, virial[6];
|
||||||
if (EVFLAG) {
|
if (EVFLAG) {
|
||||||
@ -79,6 +79,7 @@ __kernel void k_lj_coul_soft(const __global numtyp4 *restrict x_,
|
|||||||
int itype=ix.w;
|
int itype=ix.w;
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||||
int j=dev_packed[nbor];
|
int j=dev_packed[nbor];
|
||||||
|
|
||||||
numtyp factor_lj, factor_coul;
|
numtyp factor_lj, factor_coul;
|
||||||
@ -155,7 +156,7 @@ __kernel void k_lj_coul_soft_fast(const __global numtyp4 *restrict x_,
|
|||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp3 *restrict ans,
|
||||||
__global acctyp *restrict engv,
|
__global acctyp *restrict engv,
|
||||||
const int eflag, const int vflag, const int inum,
|
const int eflag, const int vflag, const int inum,
|
||||||
const int nbor_pitch,
|
const int nbor_pitch,
|
||||||
@ -181,7 +182,7 @@ __kernel void k_lj_coul_soft_fast(const __global numtyp4 *restrict x_,
|
|||||||
lj3[tid]=lj3_in[tid];
|
lj3[tid]=lj3_in[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp3 f;
|
||||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
acctyp energy, e_coul, virial[6];
|
acctyp energy, e_coul, virial[6];
|
||||||
if (EVFLAG) {
|
if (EVFLAG) {
|
||||||
|
|||||||
Reference in New Issue
Block a user