From 8e36364f5ceaa166915ce553aa6b4f8e201fbab8 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:04:44 +0000
Subject: [PATCH 01/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14805
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/ASPHERE/pair_line_lj.cpp | 4 +-
src/DEPEND/fastdep.c | 85 ++++++---
src/GRANULAR/pair_gran_hertz_history.cpp | 2 -
src/GRANULAR/pair_gran_hooke.cpp | 2 -
src/GRANULAR/pair_gran_hooke_history.cpp | 2 -
src/KOKKOS/Install.sh | 8 +
src/KOKKOS/fix_nh_kokkos.cpp | 1 -
src/MANYBODY/pair_airebo.cpp | 1 -
src/MANYBODY/pair_bop.cpp | 8 +-
src/MANYBODY/pair_eim.cpp | 54 +++---
src/MANYBODY/pair_polymorphic.cpp | 2 +
src/MEAM/pair_meam.cpp | 2 +-
src/Make.py | 216 ++++++++++++++++++-----
src/Makefile | 4 +-
src/QEQ/fix_qeq_fire.cpp | 9 +-
src/REPLICA/temper.cpp | 31 +++-
src/STUBS/Makefile.mingw32-cross | 2 +-
src/STUBS/Makefile.mingw64-cross | 2 +-
src/USER-OMP/pair_airebo_omp.cpp | 1 -
src/compute_chunk_atom.cpp | 2 +-
src/domain.cpp | 13 +-
src/dump_image.cpp | 1 -
src/fix_ave_time.cpp | 3 +-
src/math_const.h | 1 +
src/read_data.cpp | 2 +-
src/set.cpp | 1 -
26 files changed, 314 insertions(+), 145 deletions(-)
diff --git a/src/ASPHERE/pair_line_lj.cpp b/src/ASPHERE/pair_line_lj.cpp
index 737aefa18d..aa3493ef47 100644
--- a/src/ASPHERE/pair_line_lj.cpp
+++ b/src/ASPHERE/pair_line_lj.cpp
@@ -69,11 +69,11 @@ PairLineLJ::~PairLineLJ()
void PairLineLJ::compute(int eflag, int vflag)
{
- int i,j,ii,jj,inum,jnum,itype,jtype,tmp;
+ int i,j,ii,jj,inum,jnum,itype,jtype;
int ni,nj,npi,npj,ifirst,jfirst;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
- double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj;
+ double xi[2],xj[2],fi[2],dxi,dxj,dyi,dyj;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = 0.0;
diff --git a/src/DEPEND/fastdep.c b/src/DEPEND/fastdep.c
index 5400e03034..2f4927abce 100644
--- a/src/DEPEND/fastdep.c
+++ b/src/DEPEND/fastdep.c
@@ -33,7 +33,24 @@
#include
#include
-const char version[] = "2.0";
+const char version[] = "2.1";
+
+/* store list of accepted extensions for object targets */
+static const char *extensions[] = { ".cpp", ".c", ".cu" };
+static const int numextensions = sizeof(extensions)/sizeof(const char *);
+
+/* strdup() is not part of ANSI C. provide a replacement for portability */
+static char *my_strdup(const char *src)
+{
+ int len;
+ char *ptr;
+
+ if (src == NULL) return NULL;
+ len = strlen(src);
+ ptr = (char *)malloc(len+1);
+ if (ptr) memcpy(ptr,src,len+1);
+ return ptr;
+}
/************************************************************************
* utility functions
@@ -198,7 +215,7 @@ static void llist_append(llist_t *ll, const char *key)
llnode_t *tmp;
if ((ll == NULL) || (key == NULL)) return;
- ll->tail->key = strdup(key);
+ ll->tail->key = my_strdup(key);
ll->count ++;
tmp = (llnode_t *)malloc(sizeof(llnode_t));
tmp->key = NULL;
@@ -286,7 +303,7 @@ static void set_add(set_t *s, const char *key)
tmp = tmp->next;
}
s->count ++;
- tmp->key = strdup(key);
+ tmp->key = my_strdup(key);
tmp->next = (llnode_t *)malloc(sizeof(llnode_t));
tmp = tmp->next;
tmp->key = NULL;
@@ -375,7 +392,7 @@ static void map_add(map_t *m, const char *key, const char *val)
/* add new entry to map */
if (tmp->next == NULL) {
m->count ++;
- tmp->key = strdup(key);
+ tmp->key = my_strdup(key);
tmp->val = set_init(50); /* XXX: chosen arbitrarily */
tmp->next = (mapnode_t *)malloc(sizeof(mapnode_t));
tmp->next->key = NULL;
@@ -547,42 +564,52 @@ static void do_depend(llnode_t *head, map_t *deps)
set_t *incl;
const char *source;
char *target, *ptr;
- int i,num;
+ int i,num,ext;
tmp = head;
while (tmp->next != NULL) {
source = tmp->key;
target = strrchr(source,'/');
if (target == NULL) {
- target = strdup(source);
+ target = my_strdup(source);
} else {
- target = strdup(target+1);
+ target = my_strdup(target+1);
}
+ ext = 0;
ptr = strrchr(target,'.');
if (ptr != NULL) {
- ptr[1] = 'o';
- ptr[2] = '\0';
- }
- fputs(target,stdout);
- fputs(" : ",stdout);
- fputs(source,stdout);
- free((void *)target);
-
- incl = set_init(50);
- add_depend(source,incl,deps);
-
- num = incl->nbuckets;
- for (i = 0; i < num; ++i) {
- lnk = incl->buckets + i;
- while (lnk->next != NULL) {
- fputc(' ',stdout);
- fputs(lnk->key,stdout);
- lnk = lnk->next;
+ for (i = 0; i < numextensions; ++i) {
+ if (strcmp(ptr,extensions[i]) == 0) ++ext;
+ }
+ if (ext > 0) {
+ ptr[1] = 'o';
+ ptr[2] = '\0';
}
}
- fputc('\n',stdout);
- set_free(incl);
+
+ if (ext > 0) {
+ fputs(target,stdout);
+ fputs(" : ",stdout);
+ fputs(source,stdout);
+
+ incl = set_init(50);
+ add_depend(source,incl,deps);
+
+ num = incl->nbuckets;
+ for (i = 0; i < num; ++i) {
+ lnk = incl->buckets + i;
+ while (lnk->next != NULL) {
+ fputc(' ',stdout);
+ fputs(lnk->key,stdout);
+ lnk = lnk->next;
+ }
+ }
+ fputc('\n',stdout);
+ set_free(incl);
+ }
+
+ free((void *)target);
tmp = tmp->next;
}
}
@@ -601,6 +628,8 @@ int main(int argc, char **argv)
fprintf(stderr,"FastDep v%s for LAMMPS\n"
"Usage: %s [-I ...] -- [ ...]\n",
version,argv[0]);
+ fprintf(stderr,"Supported extensions: %d, %s, %s\n",numextensions,
+ extensions[0], extensions[1]);
return 1;
}
@@ -631,7 +660,7 @@ int main(int argc, char **argv)
}
} else if (strcmp(*argv,"--") == 0) {
break;
- } // ignore all unrecognized arguments before '--'.
+ } /* ignore all unrecognized arguments before '--'. */
}
src = llist_init();
diff --git a/src/GRANULAR/pair_gran_hertz_history.cpp b/src/GRANULAR/pair_gran_hertz_history.cpp
index b32f136726..e14dc7110f 100644
--- a/src/GRANULAR/pair_gran_hertz_history.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history.cpp
@@ -87,7 +87,6 @@ void PairGranHertzHistory::compute(int eflag, int vflag)
double **torque = atom->torque;
double *radius = atom->radius;
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
@@ -367,7 +366,6 @@ double PairGranHertzHistory::single(int i, int j, int itype, int jtype,
// if I or J is frozen, meff is other particle
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
mi = rmass[i];
diff --git a/src/GRANULAR/pair_gran_hooke.cpp b/src/GRANULAR/pair_gran_hooke.cpp
index b5305c309d..9ff23a7553 100644
--- a/src/GRANULAR/pair_gran_hooke.cpp
+++ b/src/GRANULAR/pair_gran_hooke.cpp
@@ -81,7 +81,6 @@ void PairGranHooke::compute(int eflag, int vflag)
double **torque = atom->torque;
double *radius = atom->radius;
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
int newton_pair = force->newton_pair;
@@ -285,7 +284,6 @@ double PairGranHooke::single(int i, int j, int itype, int jtype, double rsq,
// if I or J is frozen, meff is other particle
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
mi = rmass[i];
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index c5cca38290..b8b0381d58 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -129,7 +129,6 @@ void PairGranHookeHistory::compute(int eflag, int vflag)
double **torque = atom->torque;
double *radius = atom->radius;
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
@@ -670,7 +669,6 @@ double PairGranHookeHistory::single(int i, int j, int itype, int jtype,
// if I or J is frozen, meff is other particle
double *rmass = atom->rmass;
- int *type = atom->type;
int *mask = atom->mask;
mi = rmass[i];
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index ad450592be..af77bcd206 100644
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -189,6 +189,14 @@ if (test $1 = 1) then
sed -i -e '5 i \include ..\/..\/lib\/kokkos\/Makefile.kokkos' ../Makefile.package.settings
fi
+ # comb/omp triggers a persistent bug in nvcc. deleting it.
+ rm -f ../*_comb_omp.*
+
+elif (test $1 = 2) then
+
+ # comb/omp triggers a persistent bug in nvcc. deleting it.
+ rm -f ../*_comb_omp.*
+
elif (test $1 = 0) then
if (test -e ../Makefile.package) then
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index 28b7ff55b6..8cc06fc52c 100755
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -291,7 +291,6 @@ void FixNHKokkos::final_integrate()
template
void FixNHKokkos::remap()
{
- int i;
double oldlo,oldhi;
double expfac;
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index 076848ddf0..12f39bf931 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -2251,7 +2251,6 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag,
ril[1] = rij[1]+rjl[1];
ril[2] = rij[2]+rjl[2];
ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
- rijrjl = 2.0*rijmag*rjlmag;
rjl2 = rjlmag*rjlmag;
costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
tspijl = Sp2(costmp,thmin,thmax,dtsijl);
diff --git a/src/MANYBODY/pair_bop.cpp b/src/MANYBODY/pair_bop.cpp
index 037f893ea5..219acc50af 100644
--- a/src/MANYBODY/pair_bop.cpp
+++ b/src/MANYBODY/pair_bop.cpp
@@ -993,7 +993,6 @@ void PairBOP::theta()
int *ilist;
int *iilist;
int **firstneigh;
- int maxn,maxt;
double rj2,rk2,rsq,ps;
double rj1k1,rj2k2;
double **x = atom->x;
@@ -1015,8 +1014,6 @@ void PairBOP::theta()
itype = map[type[i]]+1;
iilist=firstneigh[i];
- maxt=0;
- maxn=0;
nlisti=BOP_total[i];
for(jj=0;jjmaxn) maxn=maxt;
}
for (ii = 0; ii < nall; ii++) {
n=0;
diff --git a/src/MANYBODY/pair_eim.cpp b/src/MANYBODY/pair_eim.cpp
index c25f3d92e6..a2807eb78c 100644
--- a/src/MANYBODY/pair_eim.cpp
+++ b/src/MANYBODY/pair_eim.cpp
@@ -921,13 +921,13 @@ int PairEIM::grabsingle(FILE *fptr, int i)
pch1 = strstr(pch1,"element:");
if (pch1 != NULL) {
pch2 = strtok(NULL, " \t\n\r\f");
- if (pch2 != NULL) data = strtok (NULL, "?");
- if (strcmp(pch2,elements[i]) == 0) {
- sscanf(data,"%d %lg %lg %lg %lg %lg %lg",&setfl->ielement[i],
- &setfl->mass[i],&setfl->negativity[i],&setfl->ra[i],
- &setfl->ri[i],&setfl->Ec[i],&setfl->q0[i]);
- } else {
- pch2 = NULL;
+ if (pch2 != NULL) {
+ data = strtok (NULL, "?");
+ if (strcmp(pch2,elements[i]) == 0) {
+ sscanf(data,"%d %lg %lg %lg %lg %lg %lg",&setfl->ielement[i],
+ &setfl->mass[i],&setfl->negativity[i],&setfl->ra[i],
+ &setfl->ri[i],&setfl->Ec[i],&setfl->q0[i]);
+ } else pch2 = NULL;
}
}
}
@@ -960,25 +960,27 @@ int PairEIM::grabpair(FILE *fptr, int i, int j)
pch2 = strtok (NULL, " \t\n\r\f");
if (pch2 != NULL) pch3 = strtok (NULL, " \t\n\r\f");
if (pch3 != NULL) data = strtok (NULL, "?");
- if ((strcmp(pch2,elements[i]) == 0 &&
- strcmp(pch3,elements[j]) == 0) ||
- (strcmp(pch2,elements[j]) == 0 &&
- strcmp(pch3,elements[i]) == 0)) {
- sscanf(data,"%lg %lg %lg %lg %lg",
- &setfl->rcutphiA[ij],&setfl->rcutphiR[ij],
- &setfl->Eb[ij],&setfl->r0[ij],&setfl->alpha[ij]);
- fgets(line,MAXLINE,fptr);
- sscanf(line,"%lg %lg %lg %lg %lg",
- &setfl->beta[ij],&setfl->rcutq[ij],&setfl->Asigma[ij],
- &setfl->rq[ij],&setfl->rcutsigma[ij]);
- fgets(line,MAXLINE,fptr);
- sscanf(line,"%lg %lg %lg %d",
- &setfl->Ac[ij],&setfl->zeta[ij],&setfl->rs[ij],
- &setfl->tp[ij]);
- } else {
- pch1 = NULL;
- pch2 = NULL;
- pch3 = NULL;
+ if ((pch2 != NULL) && (pch3 != NULL)) {
+ if ((strcmp(pch2,elements[i]) == 0 &&
+ strcmp(pch3,elements[j]) == 0) ||
+ (strcmp(pch2,elements[j]) == 0 &&
+ strcmp(pch3,elements[i]) == 0)) {
+ sscanf(data,"%lg %lg %lg %lg %lg",
+ &setfl->rcutphiA[ij],&setfl->rcutphiR[ij],
+ &setfl->Eb[ij],&setfl->r0[ij],&setfl->alpha[ij]);
+ fgets(line,MAXLINE,fptr);
+ sscanf(line,"%lg %lg %lg %lg %lg",
+ &setfl->beta[ij],&setfl->rcutq[ij],&setfl->Asigma[ij],
+ &setfl->rq[ij],&setfl->rcutsigma[ij]);
+ fgets(line,MAXLINE,fptr);
+ sscanf(line,"%lg %lg %lg %d",
+ &setfl->Ac[ij],&setfl->zeta[ij],&setfl->rs[ij],
+ &setfl->tp[ij]);
+ } else {
+ pch1 = NULL;
+ pch2 = NULL;
+ pch3 = NULL;
+ }
}
}
}
diff --git a/src/MANYBODY/pair_polymorphic.cpp b/src/MANYBODY/pair_polymorphic.cpp
index ad53789737..2429dd0638 100755
--- a/src/MANYBODY/pair_polymorphic.cpp
+++ b/src/MANYBODY/pair_polymorphic.cpp
@@ -631,6 +631,8 @@ void PairPolymorphic::read_file(char *file)
if (ptr) maxX = atof(ptr);
if (ptr == NULL)
error->all(FLERR,"Potential file incompatible with this pair style version");
+ if ((ng == 0) || (nr == 0) || (nx == 0))
+ error->all(FLERR,"Error reading potential file header");
npair = nelements*(nelements+1)/2;
ntriple = nelements*nelements*nelements;
diff --git a/src/MEAM/pair_meam.cpp b/src/MEAM/pair_meam.cpp
index a350c66a96..2d14ffa0cf 100644
--- a/src/MEAM/pair_meam.cpp
+++ b/src/MEAM/pair_meam.cpp
@@ -555,7 +555,7 @@ void PairMEAM::read_files(char *globalfile, char *userfile)
for (i = 0; i < nelements; i++)
if (strcmp(words[0],elements[i]) == 0) break;
- if (i == nelements) continue;
+ if (i >= nelements) continue;
// skip if element already appeared
diff --git a/src/Make.py b/src/Make.py
index d8be53f13b..6ab41af99a 100755
--- a/src/Make.py
+++ b/src/Make.py
@@ -15,14 +15,14 @@ import sys,os,commands,re,copy,subprocess
# setargs = makefile settings
# actionargs = allowed actions (also lib-dir and machine)
-abbrevs = "adhjmoprsv"
+abbrevs = "adhjmoprsvz"
switchclasses = ("actions","dir","help","jmake","makefile",
- "output","packages","redo","settings","verbose")
+ "output","packages","redo","settings","verbose","zoutput")
libclasses = ("atc","awpmd","colvars","cuda","gpu","h5md",
"meam","poems","python","qmmm","reax","voronoi")
buildclasses = ("intel","kokkos")
-makeclasses = ("cc","mpi","fft","jpg","png")
+makeclasses = ("cc","flags","mpi","fft","jpg","png")
setargs = ("gzip","#gzip","ffmpeg","#ffmpeg","smallbig","bigbig","smallsmall")
actionargs = ("lib-all","file","clean","exe")
@@ -137,27 +137,37 @@ class Actions:
lib-all builds all auxiliary libs needed by installed packages
lib-dir builds a specific lib whether package installed or not
dir is any dir in lib directory (atc, cuda, meam, etc) except linalg
- (2) file = create src/MAKE/MINE/Makefile.auto
- use -m switch for Makefile.machine to start from,
- else use existing Makefile.auto
- adds settings needed for installed accelerator packages
- existing Makefile.auto is NOT changed unless "file" action is specified
+ (2) file = create a new src/MAKE/MINE/Makefile.auto
+ if file not specified, existing Makefile.auto is NOT changed
+ except by -m switch, which will copy Makefile.machine to Makefile.auto
+ note that exe action can add an -m switch, as described below
+ if file is specified, new Makefile.auto is created
+ if "-m machine" specified (or added by exe),
+ start with existing Makefile.machine, else existing Makefile.auto
+ if "-m none" specified, start Makefile.auto from scratch
+ must use -cc and -mpi switches to specify compiler and MPI
+ settings for these switches will alter Makefile.auto
+ -s, -intel, -kokkos, -cc, -mpi, -fft, -jpg, -png
+ if these accelerator packages are installed, they induce settings
+ that will alter Makefile.auto: opt, user-omp, user-intel, kokkos
+ use -z switch to copy final Makefile.auto to new filename
(3) clean = invoke "make clean-auto" to insure clean build on current files
useful if compiler flags have changed
(4) exe or machine = build LAMMPS
machine can be any existing Makefile.machine suffix
- machine is converted to "exe" action, as well as:
+ machine is converted to "exe" action, and additionally:
"-m machine" is added if -m switch is not specified
"-o machine" is added if -o switch is not specified
if either "-m" or "-o" are specified, they are not overridden
does not invoke any lib builds, since libs could be previously built
- exe always builds using src/MAKE/MINE/Makefile.auto
- if file action also specified, it creates Makefile.auto
+ exe ALWAYS builds using src/MAKE/MINE/Makefile.auto
+ if file action also specified, it creates a new Makefile.auto
else if -m switch specified,
existing Makefile.machine is copied to create Makefile.auto
else Makefile.auto must already exist and is not changed
- produces src/lmp_auto, or error message if unsuccessful
+ build produces src/lmp_auto, or error message if unsuccessful
use -o switch to copy src/lmp_auto to new filename
+ use -z switch to copy src/MAKE/MINE/Makefile.auto to new filename
"""
def check(self):
@@ -177,7 +187,7 @@ class Actions:
cleans.append(one)
elif one == "exe":
exes.append(one)
- # one action can be unknown in case is a machine (checked in setup)
+ # one action can be unknown, must be a machine (checked in setup)
else:
exes.append(one)
if len(set(libs)) != len(libs) or \
@@ -236,9 +246,9 @@ class Actions:
def file(self,caller):
- # if caller = "file", create from mpi or read from makefile.machine or auto
- # if caller = "exe" and "file" action already invoked, read from auto
- # if caller = "exe" and no "file" action, read from makefile.machine or auto
+ # if caller="file", create from mpi or read from Makefile.machine or auto
+ # if caller="exe" and "file" action already invoked, read from auto
+ # if caller="exe" and no "file" action, read from Makefile.machine or auto
if caller == "file":
if makefile and makefile.machine == "none":
@@ -279,7 +289,7 @@ class Actions:
make.addvar("CC","-cxx=%s" % wrapper)
make.addvar("LINK","-cxx=%s" % wrapper)
elif "-lmpi" in txt:
- make.addvar("OMPI_CXX",wrapper,"cc")
+ make.addvar("export OMPI_CXX",wrapper,"cc")
precompiler = "env OMPI_CXX=%s " % wrapper
else: error("Could not add MPI wrapper compiler, " +
"did not recognize OpenMPI or MPICH")
@@ -287,8 +297,20 @@ class Actions:
make.addvar("CCFLAGS","-O3")
make.setvar("LINKFLAGS","-g")
make.addvar("LINKFLAGS","-O")
+
+ # add CC and LINK flags
-# add MPI settings
+ if flags:
+ for flag in flags.CC:
+ flag = "-" + flag
+ if flag[:2] == "-O": make.delvar("CCFLAGS","-O*")
+ make.addvar("CCFLAGS",flag)
+ for flag in flags.LINK:
+ flag = "-" + flag
+ if flag[:2] == "-O": make.delvar("LINKFLAGS","-O*")
+ make.addvar("LINKFLAGS",flag)
+
+ # add MPI settings
if mpi:
make.delvar("MPI_INC","*")
@@ -397,7 +419,7 @@ class Actions:
make.addvar("KOKKOS_DEVICES","OpenMP","lmp")
make.addvar("KOKKOS_ARCH","KNC","lmp")
- # add LMP settings
+ # add LMP_INC ifdef settings
if settings:
list = settings.inlist
@@ -465,12 +487,13 @@ class Actions:
# set self.stubs if Makefile.auto uses STUBS lib in MPI settings
- if "-lmpi_stubs" in make.getvar("MPI_LIB"): self.stubs = 1
+ if make.getvar("MPI_LIB") and "-lmpi_stubs" in make.getvar("MPI_LIB"):
+ self.stubs = 1
else: self.stubs = 0
# write out Makefile.auto
# unless caller = "exe" and "file" action already invoked
-
+
if caller == "file" or "file" not in self.alist:
make.write("%s/MAKE/MINE/Makefile.auto" % dir.src,1)
print "Created src/MAKE/MINE/Makefile.auto"
@@ -510,13 +533,23 @@ class Actions:
print txt
error('Unsuccessful "make stubs"')
print "Created src/STUBS/libmpi_stubs.a"
- if jmake: str = "cd %s; make -j %d auto" % (dir.src,jmake.n)
- else: str = "cd %s; make auto" % dir.src
+
+ # special hack for shannon GPU cluster
+ # must use "srun make" if on it and building w/ GPU package, else just make
+ # this is b/c Cuda libs are not all available on host
+
+ make = "make"
+ if "shannon" in os.environ.get("HOST") and packages.final["gpu"]:
+ make = "srun make"
+
+ if jmake: str = "cd %s; %s -j %d auto" % (dir.src,make,jmake.n)
+ else: str = "cd %s; %s auto" % (dir.src,make)
# if verbose, print output as build proceeds, else only print if fails
if verbose: subprocess.call(str,shell=True)
else:
+ print str
try: subprocess.check_output(str,stderr=subprocess.STDOUT,shell=True)
except Exception as e: print e.output
@@ -574,8 +607,8 @@ Syntax: Make.py switch args ...
list one or more actions, in any order
machine is a Makefile.machine suffix
one-letter switches:
- -d (dir), -j (jmake), -m (makefile), -o (output),
- -p (packages), -r (redo), -s (settings), -v (verbose)
+ -d (dir), -j (jmake), -m (makefile), -o (output), -p (packages),
+ -r (redo), -s (settings), -v (verbose), -z (makefile output)
switches for libs:
-atc, -awpmd, -colvars, -cuda, -gpu, -h5md,
-meam, -poems, -python, -qmmm, -reax, -voronoi
@@ -898,9 +931,13 @@ class Settings:
def help(self):
return """
-s set1 set2 ...
- possible settings = gzip smallbig bigbig smallsmall
- add each setting as LAMMPS setting to created Makefile.auto
- if -s not specified, no settings are changed in Makefile.auto
+ possible settings = gzip #gzip ffmpeg #ffmpeg smallbig bigbig smallsmall
+ alter LAMMPS ifdef settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
+ gzip and #gzip turn on/off LAMMPS_GZIP setting
+ ffmpeg and #ffmpeg turn on/off LAMMPS_FFMPEG setting
+ smallbig, bigbig, smallsmall turn on LAMMPS_SMALLBIG, etc
+ and turn off other two
"""
def check(self):
@@ -924,6 +961,23 @@ class Verbose:
def check(self):
if len(self.inlist): error("-v args are invalid")
+# zoutput switch for making copy of final Makefile.auto
+
+class Zoutput:
+ def __init__(self,list):
+ self.inlist = copy.copy(list)
+
+ def help(self):
+ return """
+-z machine
+ copy created/used src/MAKE/MINE/Makefile.auto to Makefile.machine in same dir
+ this can be used to preserve the machine makefile
+"""
+
+ def check(self):
+ if len(self.inlist) != 1: error("-z args are invalid")
+ self.machine = self.inlist[0]
+
# ----------------------------------------------------------------
# lib classes, one per LAMMPS auxiliary lib
# ----------------------------------------------------------------
@@ -1087,15 +1141,15 @@ class CUDA:
def __init__(self,list):
self.inlist = copy.copy(list)
self.mode = "double"
- self.arch = "31"
+ self.arch = "35"
def help(self):
return """
--cuda mode=double arch=31
+-cuda mode=double arch=35
all args are optional and can be in any order
mode = double or mixed or single (def = double)
- arch = M (def = 31)
- M = 31 for Kepler
+ arch = M (def = 35)
+ M = 31,35,37,etc for Kepler
M = 20 for CC2.0 (GF100/110, e.g. C2050,GTX580,GTX470)
M = 21 for CC2.1 (GF104/114, e.g. GTX560, GTX460, GTX450)
M = 13 for CC1.3 (GF200, e.g. C1060, GTX285)
@@ -1144,16 +1198,18 @@ class GPU:
def __init__(self,list):
self.inlist = copy.copy(list)
self.make = "linux.double"
- self.lammpsflag = self.modeflag = self.archflag = 0
+ self.lammpsflag = self.modeflag = self.archflag = self.homeflag = 0
def help(self):
return """
--gpu make=suffix lammps=suffix2 mode=double arch=N
+-gpu make=suffix lammps=suffix2 mode=double arch=N home=path
all args are optional and can be in any order
make = use Makefile.suffix (def = linux.double)
lammps = use Makefile.lammps.suffix2 (def = EXTRAMAKE in makefile)
mode = double or mixed or single (def = CUDA_PREC in makefile)
- arch = 31 (Kepler) or 21 (Fermi) (def = CUDA_ARCH in makefile)
+ arch = 3x (x = digit for Kepler) or 2x (x = digit for Fermi)
+ (def = CUDA_ARCH in makefile)
+ home = path to Cuda, e.g. /usr/local/cuda (def = CUDA_HOME in makefile)
"""
def check(self):
@@ -1172,6 +1228,9 @@ class GPU:
elif words[0] == "arch":
self.arch = words[1]
self.archflag = 1
+ elif words[0] == "home":
+ self.home = words[1]
+ self.homeflag = 1
else: error("-gpu args are invalid")
if self.modeflag and (self.mode != "double" and
self.mode != "mixed" and
@@ -1192,13 +1251,22 @@ class GPU:
make.setvar("CUDA_PRECISION","-D_SINGLE_SINGLE")
if self.archflag:
make.setvar("CUDA_ARCH","-arch=sm_%s" % self.arch)
+ if self.homeflag:
+ make.setvar("CUDA_HOME",self.home)
if self.lammpsflag:
make.setvar("EXTRAMAKE","Makefile.lammps.%s" % self.lammps)
make.write("%s/Makefile.auto" % libdir)
- commands.getoutput("cd %s; make -f Makefile.auto clean" % libdir)
- if jmake: str = "cd %s; make -j %d -f Makefile.auto" % (libdir,jmake.n)
- else: str = "cd %s; make -f Makefile.auto" % libdir
+ # special hack for shannon GPU cluster
+ # must use "srun make" if on it, else just make
+ # this is b/c Cuda libs are not all available on host
+
+ make = "make"
+ if "shannon" in os.environ.get("HOST"): make = "srun make"
+
+ commands.getoutput("cd %s; %s -f Makefile.auto clean" % (libdir,make))
+ if jmake: str = "cd %s; %s -j %d -f Makefile.auto" % (libdir,make,jmake.n)
+ else: str = "cd %s; %s -f Makefile.auto" % (libdir,make)
# if verbose, print output as build proceeds, else only print if fails
@@ -1574,14 +1642,16 @@ class Kokkos:
mode is not optional, arch is optional
mode = omp or cuda or phi (def = KOKKOS_DEVICES setting in Makefile )
build Kokkos package for omp or cuda or phi
- set KOKKOS_DEVICES to "OpenMP" (omp, phi) or "Cuda, OpenMP" (cuda)
- arch = 31 (Kepler) or 21 (Fermi) (def = -arch setting in Makefile)
+ sets KOKKOS_DEVICES to "OpenMP" (omp, phi) or "Cuda, OpenMP" (cuda)
+ arch = number like 35 (Kepler) or 21 (Fermi)
+ sets KOKKOS_ARCH to appropriate value
"""
def check(self):
+ print self.inlist
if self.inlist != None and len(self.inlist) == 0:
error("-kokkos args are invalid")
-
+
if self.inlist == None: return
if len(self.inlist) < 1: error("-kokkos args are invalid")
self.mode = self.inlist[0]
@@ -1596,7 +1666,7 @@ class Kokkos:
else: error("-kokkos args are invalid")
# ----------------------------------------------------------------
-# makefile classes for CC, MPI, JPG, PNG, FFT settings
+# makefile classes for CC, FLAGS, MPI, JPG, PNG, FFT settings
# ----------------------------------------------------------------
# Cc class
@@ -1610,7 +1680,8 @@ class Cc:
def help(self):
return """
-cc compiler wrap=wcompiler
- change CC setting in makefile
+ alter CC setting in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
compiler is required, all other args are optional
compiler = any string with g++ or icc or icpc
or mpi (or mpicxx, mpiCC, mpiicpc, etc)
@@ -1644,6 +1715,41 @@ class Cc:
self.wrap = words[1]
else: error("-cc args are invalid")
+# Flags class
+
+class Flags:
+ def __init__(self,list):
+ self.inlist = copy.copy(list)
+ self.CC = []
+ self.LINK = []
+
+ def help(self):
+ return """
+-flags flag f1 f2 ... flag f1 f2 ...
+ alter CCFLAGS or LINKFLAGS settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
+ flag = CC or LINK
+ one or both can be specified
+ f1,f2,etc = flag to add or replace
+ "-" char will be prepended to each
+ for example: g, O3, xHost, "fp-model fast=2"
+ will become: -g, -O3, -xHost, -fp-model fast=2
+ for -O,-O2,-O3,etc: existing -O* will first be removed
+"""
+
+ def check(self):
+ if len(self.inlist) < 1: error("-flags args are invalid")
+ self.CC = [] # necessary?
+ self.LINK = []
+ mode = ""
+ for one in self.inlist:
+ if one == "CC": mode = "CC"
+ elif one == "LINK": mode = "LINK"
+ else:
+ if not mode: error("-flags args are invalid")
+ if mode == "CC": self.CC.append(one)
+ elif mode == "LINK": self.LINK.append(one)
+
# Mpi class
class Mpi:
@@ -1654,7 +1760,8 @@ class Mpi:
def help(self):
return """
-mpi style dir=path
- change MPI settings in makefile
+ alter MPI settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
style is required, all other args are optional
style = mpi or mpich or ompi or serial
mpi = no MPI settings (assume compiler is MPI wrapper)
@@ -1687,9 +1794,10 @@ class Fft:
def help(self):
return """
-fft mode lib=libname dir=homedir idir=incdir ldir=libdir
- change FFT settings in makefile
+ alter FFT settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
mode is required, all other args are optional
- removes all current FFT variable settings
+ first removes all current FFT variable settings
mode = none or fftw or fftw3 or ...
adds -DFFT_MODE setting
lib = name of FFT library to link with (def is libname = mode)
@@ -1727,6 +1835,8 @@ class Jpg:
def help(self):
return """
-jpg flag dir=homedir idir=incdir ldir=libdir
+ alter JPG settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
change JPG settings in makefile
all args are optional, flag must come first if specified
flag = yes or no (def = yes)
@@ -1764,7 +1874,8 @@ class Png:
def help(self):
return """
-png flag dir=homedir idir=incdir ldir=libdir
- change PNG settings in makefile
+ alter PNG settings in Makefile.auto
+ only happens if new Makefile.auto is created by use of "file" action
all args are optional, flag must come first if specified
flag = yes or no (def = yes)
include or exclude PNG support
@@ -2143,13 +2254,22 @@ while 1:
packages.uninstall()
- # create output file if requested and exe action performed
+ # create copy of executable if requested, and exe action performed
if output and actions and "exe" in actions.alist:
txt = "cp %s/lmp_auto %s/lmp_%s" % (dir.src,dir.cwd,output.machine)
commands.getoutput(txt)
print "Created lmp_%s in %s" % (output.machine,dir.cwd)
+ # create copy of Makefile.auto if requested, and file or exe action performed
+
+ if zoutput and actions and \
+ ("file" in actions.alist or "exe" in actions.alist):
+ txt = "cp %s/MAKE/MINE/Makefile.auto %s/MAKE/MINE/Makefile.%s" % \
+ (dir.src,dir.src,zoutput.machine)
+ commands.getoutput(txt)
+ print "Created Makefile.%s in %s/MAKE/MINE" % (zoutput.machine,dir.src)
+
# write current Make.py command to src/Make.py.last
fp = open("%s/Make.py.last" % dir.src,'w')
diff --git a/src/Makefile b/src/Makefile
index a0d9cc822d..8938c2e72a 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -205,8 +205,8 @@ install-python:
tar:
@cd STUBS; $(MAKE) clean
@cd ..; tar cvzf src/$(ROOT)_src.tar.gz \
- src/Make* src/Package.sh src/Depend.sh \
- src/MAKE src/*.cpp src/*.h src/STUBS \
+ src/Make* src/Package.sh src/Depend.sh src/Install.sh \
+ src/MAKE src/DEPEND src/*.cpp src/*.h src/STUBS \
$(patsubst %,src/%,$(PACKAGEUC)) $(patsubst %,src/%,$(PACKUSERUC)) \
--exclude=*/.svn
@cd STUBS; $(MAKE)
diff --git a/src/QEQ/fix_qeq_fire.cpp b/src/QEQ/fix_qeq_fire.cpp
index af9ca65eef..2404ae536d 100644
--- a/src/QEQ/fix_qeq_fire.cpp
+++ b/src/QEQ/fix_qeq_fire.cpp
@@ -107,17 +107,16 @@ void FixQEqFire::init()
void FixQEqFire::pre_force(int vflag)
{
int inum, *ilist;
- int i,ii,iloop,loopmax;
- int *mask = atom->mask;
+ int i,ii,iloop;
double *q = atom->q;
double vmax,vdotf,vdotfall,vdotv,vdotvall,fdotf,fdotfall;
double scale1,scale2;
double dtvone,dtv;
- double enegtot,enegchk,enegmax;
+ double enegtot,enegchk;
double alpha = qdamp;
double dt, dtmax;
- double enegchkall,enegmaxall;
+ double enegchkall;
bigint ntimestep = update->ntimestep;
bigint last_negative = 0;
@@ -225,7 +224,7 @@ void FixQEqFire::pre_force(int vflag)
if (comm->me == 0) {
if (iloop == maxiter) {
char str[128];
- sprintf(str,"Charges did not converge at step "BIGINT_FORMAT
+ sprintf(str,"Charges did not converge at step " BIGINT_FORMAT
": %lg",update->ntimestep,enegchk);
error->warning(FLERR,str);
}
diff --git a/src/REPLICA/temper.cpp b/src/REPLICA/temper.cpp
index 17c2c30fca..5b6c310911 100644
--- a/src/REPLICA/temper.cpp
+++ b/src/REPLICA/temper.cpp
@@ -93,13 +93,38 @@ void Temper::command(int narg, char **arg)
if (nswaps*nevery != nsteps)
error->universe_all(FLERR,"Non integer # of swaps in temper command");
- // fix style must be appropriate for temperature control
+ // fix style must be appropriate for temperature control, i.e. it needs
+ // to provide a working Fix::reset_target() and must not change the volume.
if ((strcmp(modify->fix[whichfix]->style,"nvt") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/asphere") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/asphere/omp") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/body") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/eff") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/intel") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/kk") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/kk/host") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/kk/device") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/omp") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/sphere") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"nvt/sphere/omp") != 0) &&
(strcmp(modify->fix[whichfix]->style,"langevin") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"langevin/drude") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"langevin/eff") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"gld") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"gle") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"rigid/nvt") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"rigid/nvt/small") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"rigid/nvt/omp") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"rigid/nvt/small/omp") != 0) &&
(strcmp(modify->fix[whichfix]->style,"temp/berendsen") != 0) &&
- (strcmp(modify->fix[whichfix]->style,"temp/rescale") != 0))
- error->universe_all(FLERR,"Tempering temperature fix is not valid");
+ (strcmp(modify->fix[whichfix]->style,"temp/berendsen/cuda") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"temp/csvr") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"temp/csld") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"temp/rescale") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"temp/rescale/cuda") != 0) &&
+ (strcmp(modify->fix[whichfix]->style,"temp/rescale/eff") != 0))
+ error->universe_all(FLERR,"Tempering temperature fix is not supported");
// setup for long tempering run
diff --git a/src/STUBS/Makefile.mingw32-cross b/src/STUBS/Makefile.mingw32-cross
index 8bc6d62135..4144954ec7 100644
--- a/src/STUBS/Makefile.mingw32-cross
+++ b/src/STUBS/Makefile.mingw32-cross
@@ -16,7 +16,7 @@ OBJ = $(SRC:%.c=%_mingw32.o)
# System-specific settings
CC = i686-w64-mingw32-gcc
-CCFLAGS = -O2 -Wall -march=i686 -mtune=generic -mfpmath=387 -mpc64
+CCFLAGS = -O2 -Wall -march=i686 -mtune=generic -mfpmath=387 -mpc64 -I.
ARCHIVE = i686-w64-mingw32-ar
ARCHFLAG = rs
diff --git a/src/STUBS/Makefile.mingw64-cross b/src/STUBS/Makefile.mingw64-cross
index 2ef8d7fd6e..70b971f262 100644
--- a/src/STUBS/Makefile.mingw64-cross
+++ b/src/STUBS/Makefile.mingw64-cross
@@ -16,7 +16,7 @@ OBJ = $(SRC:%.c=%_mingw64.o)
# System-specific settings
CC = x86_64-w64-mingw32-gcc
-CCFLAGS = -O2 -Wall -march=core2 -mtune=core2 -msse2 -mpc64
+CCFLAGS = -O2 -Wall -march=core2 -mtune=core2 -msse2 -mpc64 -I.
ARCHIVE = x86_64-w64-mingw32-ar
ARCHFLAG = rs
diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp
index fc6ba17f1c..eeb3e134f6 100644
--- a/src/USER-OMP/pair_airebo_omp.cpp
+++ b/src/USER-OMP/pair_airebo_omp.cpp
@@ -1097,7 +1097,6 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag
ril[1] = rij[1]+rjl[1];
ril[2] = rij[2]+rjl[2];
ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
- rijrjl = 2.0*rijmag*rjlmag;
rjl2 = rjlmag*rjlmag;
costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
tspijl = Sp2(costmp,thmin,thmax,dtsijl);
diff --git a/src/compute_chunk_atom.cpp b/src/compute_chunk_atom.cpp
index 3a9f043ed9..5412817d59 100644
--- a/src/compute_chunk_atom.cpp
+++ b/src/compute_chunk_atom.cpp
@@ -398,7 +398,7 @@ ComputeChunkAtom::ComputeChunkAtom(LAMMPS *lmp, int narg, char **arg) :
double scale;
if (which == BIN1D || which == BIN2D || which == BIN3D ||
which == BINCYLINDER) {
- if (which == BIN1D || BINCYLINDER) ndim = 1;
+ if (which == BIN1D || which == BINCYLINDER) ndim = 1;
if (which == BIN2D) ndim = 2;
if (which == BIN3D) ndim = 3;
for (int idim = 0; idim < ndim; idim++) {
diff --git a/src/domain.cpp b/src/domain.cpp
index ee6d2c1bc5..f47be0c6d5 100644
--- a/src/domain.cpp
+++ b/src/domain.cpp
@@ -503,13 +503,12 @@ void Domain::pbc()
double *coord;
int n3 = 3*nlocal;
- if (x) {
- coord = &x[0][0];
- int flag = 0;
- for (i = 0; i < n3; i++)
- if (!ISFINITE(*coord++)) flag = 1;
- if (flag) error->one(FLERR,"Non-numeric atom coords - simulation unstable");
- }
+ coord = &x[0][0]; // note: x is always initialzed to at least one element.
+ int flag = 0;
+ for (i = 0; i < n3; i++)
+ if (!ISFINITE(*coord++)) flag = 1;
+ if (flag) error->one(FLERR,"Non-numeric atom coords - simulation unstable");
+
// setup for PBC checks
if (triclinic == 0) {
diff --git a/src/dump_image.cpp b/src/dump_image.cpp
index de0db88777..2ab9e84467 100644
--- a/src/dump_image.cpp
+++ b/src/dump_image.cpp
@@ -850,7 +850,6 @@ void DumpImage::create_image()
if (bodyflag) {
Body *bptr = avec_body->bptr;
- double **x = atom->x;
int *body = atom->body;
m = 0;
diff --git a/src/fix_ave_time.cpp b/src/fix_ave_time.cpp
index 3537e36d54..588aca4738 100644
--- a/src/fix_ave_time.cpp
+++ b/src/fix_ave_time.cpp
@@ -23,7 +23,6 @@
#include "force.h"
#include "modify.h"
#include "compute.h"
-#include "group.h"
#include "input.h"
#include "variable.h"
#include "memory.h"
@@ -55,6 +54,8 @@ FixAveTime::FixAveTime(LAMMPS *lmp, int narg, char **arg) :
global_freq = nfreq;
+ dynamic_group_allow = 1;
+
// scan values to count them
// then read options so know mode = SCALAR/VECTOR before re-reading values
diff --git a/src/math_const.h b/src/math_const.h
index c0b81bdf8a..d94f783aa5 100644
--- a/src/math_const.h
+++ b/src/math_const.h
@@ -25,6 +25,7 @@ namespace MathConst {
static const double MY_PI2 = 1.57079632679489661923; // pi/2
static const double MY_PI4 = 0.78539816339744830962; // pi/4
static const double MY_PIS = 1.77245385090551602729; // sqrt(pi)
+ static const double MY_ISPI4 = 1.12837916709551257389; // 1/sqrt(pi/4)
static const double MY_SQRT2 = 1.41421356237309504880; // sqrt(2)
static const double MY_CBRT2 = 1.25992104989487316476; // 2*(1/3)
}
diff --git a/src/read_data.cpp b/src/read_data.cpp
index 5a7484dea8..4b790f343c 100644
--- a/src/read_data.cpp
+++ b/src/read_data.cpp
@@ -1468,7 +1468,7 @@ void ReadData::bonus(bigint nbonus, AtomVec *ptr, const char *type)
void ReadData::bodies(int firstpass)
{
- int i,m,nchunk,nline,nmax,ninteger,ndouble,nword,ncount,onebody,tmp;
+ int m,nchunk,nline,nmax,ninteger,ndouble,nword,ncount,onebody,tmp;
char *eof;
int mapflag = 0;
diff --git a/src/set.cpp b/src/set.cpp
index a0bd6c2b65..37a0e815c4 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -917,7 +917,6 @@ void Set::setrandom(int keyword)
} else if (keyword == THETA_RANDOM) {
int nlocal = atom->nlocal;
- double theta;
for (i = 0; i < nlocal; i++) {
if (select[i]) {
if (atom->line[i] < 0)
From 1e180da83007d97c696b6b1ee40c7d726e2f99e4 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:05:09 +0000
Subject: [PATCH 02/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14806
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/GPU/Install.sh | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index e4f901c329..0ff436e95d 100644
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -113,6 +113,10 @@ action pair_table_gpu.cpp pair_table.cpp
action pair_table_gpu.h pair_table.cpp
action pair_tersoff_gpu.cpp pair_tersoff.cpp
action pair_tersoff_gpu.h pair_tersoff.cpp
+action pair_tersoff_mod_gpu.cpp pair_tersoff_mod.cpp
+action pair_tersoff_mod_gpu.h pair_tersoff_mod.cpp
+action pair_tersoff_zbl_gpu.cpp pair_tersoff_zbl.cpp
+action pair_tersoff_zbl_gpu.h pair_tersoff_zbl.cpp
action pair_yukawa_colloid_gpu.cpp pair_yukawa_colloid.cpp
action pair_yukawa_colloid_gpu.h pair_yukawa_colloid.cpp
action pair_yukawa_gpu.cpp pair_yukawa.cpp
From 67a4004f23533fce1af932a7547c51c3cdd296fe Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:05:19 +0000
Subject: [PATCH 03/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14807
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
lib/gpu/Nvidia.makefile | 28 +++++++++++++++++
lib/gpu/Opencl.makefile | 23 +++++++++++++-
lib/gpu/lal_sw.cpp | 6 ++--
lib/gpu/lal_sw.cu | 2 +-
lib/gpu/lal_tersoff.cpp | 2 +-
lib/gpu/lal_tersoff.cu | 60 ++++++++++++++++++-------------------
lib/gpu/lal_tersoff.h | 2 +-
lib/gpu/lal_tersoff_extra.h | 8 ++---
8 files changed, 91 insertions(+), 40 deletions(-)
diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index 004b387649..18efbda55a 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -70,6 +70,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
+ $(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
+ $(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
@@ -122,6 +124,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
+ $(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl_cubin.h \
+ $(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod_cubin.h \
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
@@ -705,6 +709,30 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ters
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/tersoff_zbl.cubin: lal_tersoff_zbl.cu lal_precision.h lal_tersoff_zbl_extra.h lal_preprocessor.h
+ $(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_zbl.cu
+
+$(OBJ_DIR)/tersoff_zbl_cubin.h: $(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl.cubin
+ $(BIN2C) -c -n tersoff_zbl $(OBJ_DIR)/tersoff_zbl.cubin > $(OBJ_DIR)/tersoff_zbl_cubin.h
+
+$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cubin.h $(OBJ_DIR)/lal_base_three.o
+ $(CUDR) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
+ $(CUDR) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/tersoff_mod.cubin: lal_tersoff_mod.cu lal_precision.h lal_tersoff_mod_extra.h lal_preprocessor.h
+ $(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff_mod.cu
+
+$(OBJ_DIR)/tersoff_mod_cubin.h: $(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod.cubin
+ $(BIN2C) -c -n tersoff_mod $(OBJ_DIR)/tersoff_mod.cubin > $(OBJ_DIR)/tersoff_mod_cubin.h
+
+$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cubin.h $(OBJ_DIR)/lal_base_three.o
+ $(CUDR) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
+ $(CUDR) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
+
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index d7bae0f494..b33a392242 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -59,6 +59,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
+ $(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
+ $(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
@@ -87,7 +89,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
- $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
+ $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h \
+ $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
$(OBJ_DIR)/lj_cubic_cl.h
@@ -510,6 +513,24 @@ $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/ter
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
+$(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu lal_tersoff_zbl_extra.h $(PRE1_H)
+ $(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
+
+$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h $(OBJ_DIR)/lal_base_three.o
+ $(OCL) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
+ $(OCL) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu lal_tersoff_mod_extra.h $(PRE1_H)
+ $(BSH) ./geryon/file_to_cstr.sh tersoff_mod $(PRE1_H) lal_tersoff_mod_extra.h lal_tersoff_mod.cu $(OBJ_DIR)/tersoff_mod_cl.h;
+
+$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/lal_base_three.o
+ $(OCL) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
+ $(OCL) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
+
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
diff --git a/lib/gpu/lal_sw.cpp b/lib/gpu/lal_sw.cpp
index f14b0a3438..1f68616b0e 100644
--- a/lib/gpu/lal_sw.cpp
+++ b/lib/gpu/lal_sw.cpp
@@ -142,7 +142,7 @@ int SWT::init(const int ntypes, const int nlocal, const int nall, const int max_
ucl_copy(elem2param,dview_elem2param,false);
UCL_H_Vec dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
- for (int i = 0; i < lj_types; i++)
+ for (int i = 0; i < ntypes; i++)
dview_map[i] = host_map[i];
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
@@ -196,13 +196,15 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
int GX=static_cast(ceil(static_cast(this->ans->inum())/
(BX/this->_threads_per_atom)));
+ // this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa == 1
+ // this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1
int ainum=this->ans->inum();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->x, &sw1, &sw2, &sw3,
&map, &elem2param, &_nelements,
- &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv,
&eflag, &vflag, &ainum, &nbor_pitch,
&this->_threads_per_atom);
diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu
index 4492e5f60a..1e358fb6f7 100644
--- a/lib/gpu/lal_sw.cu
+++ b/lib/gpu/lal_sw.cu
@@ -35,7 +35,7 @@ texture sw3_tex;
#define sw3_tex sw3
#endif
-#define THIRD (numtyp)0.66666667
+#define THIRD (numtyp)0.66666666666666666667
//#define THREE_CONCURRENT
diff --git a/lib/gpu/lal_tersoff.cpp b/lib/gpu/lal_tersoff.cpp
index fc7ebc4f08..bc89c53765 100644
--- a/lib/gpu/lal_tersoff.cpp
+++ b/lib/gpu/lal_tersoff.cpp
@@ -178,7 +178,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
ucl_copy(elem2param,dview_elem2param,false);
UCL_H_Vec dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
- for (int i = 0; i < lj_types; i++)
+ for (int i = 0; i < ntypes; i++)
dview_map[i] = host_map[i];
map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu
index 50202c0ee7..e98a454f58 100644
--- a/lib/gpu/lal_tersoff.cu
+++ b/lib/gpu/lal_tersoff.cu
@@ -43,7 +43,7 @@ texture ts5_tex;
//#define THREE_CONCURRENT
-#define THIRD (numtyp)0.66666667
+#define TWOTHIRD (numtyp)0.66666666666666666667
#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
i, nbor_j, offset_j, idx) \
@@ -108,6 +108,7 @@ texture ts5_tex;
#define store_zeta(z, tid, t_per_atom, offset) \
if (t_per_atom>1) { \
+ __local acctyp red_acc[BLOCK_PAIR]; \
red_acc[tid]=z; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
if (offset < s) { \
@@ -180,7 +181,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
const __global int *restrict map,
const __global int *restrict elem2param,
const int nelements, const int nparams,
- __global numtyp4 * zetaij,
+ __global acctyp4 * zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const int eflag, const int nall, const int inum,
@@ -205,9 +206,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
ts5[tid]=ts5_in[tid];
}
- numtyp z = (numtyp)0;
- __local numtyp red_acc[BLOCK_PAIR];
- if (tid cutsq[ijparam]) continue;
// compute zeta_ij
- z = (numtyp)0;
+ z = (acctyp)0;
int nbor_k = nborj_start-offset_j+offset_k;
for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
@@ -310,7 +309,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
ijparam_beta, ijparam_powern, ijparam_c1, ijparam_c2, ijparam_c3,
ijparam_c4, rsq1, z, eflag, fpfeng);
- numtyp4 zij;
+ acctyp4 zij;
zij.x = fpfeng[0];
zij.y = fpfeng[1];
zij.z = fpfeng[2];
@@ -426,7 +425,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
const __global int *restrict map,
const __global int *restrict elem2param,
const int nelements, const int nparams,
- const __global numtyp4 *restrict zetaij,
+ const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@@ -501,7 +500,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
int idx;
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
i, nbor_j, offset_j, idx);
- numtyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
+ acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
numtyp force = zeta_ij.x*tpainv;
numtyp prefactor = zeta_ij.y;
f.x += delr1[0]*force;
@@ -568,7 +567,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
if (vflag>0) {
acctyp v[6];
numtyp pre = (numtyp)2.0;
- if (evatom==1) pre = THIRD;
+ if (evatom==1) pre = TWOTHIRD;
v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
@@ -595,7 +594,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
const __global int *restrict map,
const __global int *restrict elem2param,
const int nelements, const int nparams,
- const __global numtyp4 *restrict zetaij,
+ const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@@ -710,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
int idx;
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
j, ijnum, offset_kf, idx);
- numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
numtyp force = zeta_ji.x*tpainv;
numtyp prefactor_ji = zeta_ji.y;
f.x += delr1[0]*force;
@@ -776,7 +775,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
int idx;
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
j, nbor_k, offset_k, idx);
- numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
numtyp prefactor_jk = zeta_jk.y;
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
@@ -816,7 +815,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
const __global int *restrict map,
const __global int *restrict elem2param,
const int nelements, const int nparams,
- const __global numtyp4 *restrict zetaij,
+ const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
@@ -931,9 +930,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
int idx;
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
j, ijnum, offset_kf, idx);
- numtyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
numtyp force = zeta_ji.x*tpainv;
- numtyp prefactor = zeta_ji.y;
+ numtyp prefactor_ji = zeta_ji.y;
f.x += delr1[0]*force;
f.y += delr1[1]*force;
f.z += delr1[2]*force;
@@ -987,24 +986,24 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
h = ts4_param.z;
gamma = ts4_param.w;
attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
- prefactor, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
+ prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
f.x += fj[0];
f.y += fj[1];
f.z += fj[2];
- virial[0] += THIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
- virial[1] += THIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
- virial[2] += THIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
- virial[3] += THIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
- virial[4] += THIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
- virial[5] += THIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
+ virial[0] += TWOTHIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
+ virial[1] += TWOTHIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
+ virial[2] += TWOTHIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
+ virial[3] += TWOTHIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
+ virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
+ virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
//int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
//int idx = kk*n_stride + j*t_per_atom + offset_k;
int idx;
zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
j, nbor_k, offset_k, idx);
- numtyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
numtyp prefactor_jk = zeta_jk.y;
int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1025,12 +1024,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
f.y += fk[1];
f.z += fk[2];
- virial[0] += THIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
- virial[1] += THIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
- virial[2] += THIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
- virial[3] += THIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
- virial[4] += THIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
- virial[5] += THIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
+ virial[0] += TWOTHIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
+ virial[1] += TWOTHIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
+ virial[2] += TWOTHIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
+ virial[3] += TWOTHIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
+ virial[4] += TWOTHIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
+ virial[5] += TWOTHIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
+
}
} // for nbor
diff --git a/lib/gpu/lal_tersoff.h b/lib/gpu/lal_tersoff.h
index beae6f5e08..c72ebd7286 100644
--- a/lib/gpu/lal_tersoff.h
+++ b/lib/gpu/lal_tersoff.h
@@ -100,7 +100,7 @@ class Tersoff : public BaseThree {
/// Per-atom arrays:
/// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
/// zetaij.w = zetaij
- UCL_D_Vec _zetaij;
+ UCL_D_Vec _zetaij;
UCL_Kernel k_zeta;
UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
diff --git a/lib/gpu/lal_tersoff_extra.h b/lib/gpu/lal_tersoff_extra.h
index 672a767783..21a0315f71 100644
--- a/lib/gpu/lal_tersoff_extra.h
+++ b/lib/gpu/lal_tersoff_extra.h
@@ -227,7 +227,7 @@ ucl_inline void ters_zetaterm_d(const numtyp prefactor,
if ((int)param_powermint == 3) tmp = t*t*t;
else tmp = t;
- if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp);
@@ -295,7 +295,7 @@ ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
if ((int)param_powermint == 3) tmp = t*t*t;
else tmp = t;
- if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp);
@@ -344,7 +344,7 @@ ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
if ((int)param_powermint == 3) tmp = t*t*t;
else tmp = t;
- if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp);
@@ -391,7 +391,7 @@ ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
if ((int)param_powermint == 3) tmp = t*t*t;
else tmp = t;
- if (tmp > (numtyp)69.0776) ex_delr = (acctyp)1.e30;
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
else ex_delr = ucl_exp(tmp);
From 008896a77d6cb6ea989ef81ef712c7ce0329c49f Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:10:37 +0000
Subject: [PATCH 04/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14808
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
lib/gpu/lal_tersoff_mod.cpp | 456 +++++++++++++
lib/gpu/lal_tersoff_mod.cu | 1070 +++++++++++++++++++++++++++++++
lib/gpu/lal_tersoff_mod.h | 118 ++++
lib/gpu/lal_tersoff_mod_ext.cpp | 135 ++++
lib/gpu/lal_tersoff_mod_extra.h | 627 ++++++++++++++++++
lib/gpu/lal_tersoff_zbl.cpp | 482 ++++++++++++++
lib/gpu/lal_tersoff_zbl.cu | 1065 ++++++++++++++++++++++++++++++
lib/gpu/lal_tersoff_zbl.h | 123 ++++
lib/gpu/lal_tersoff_zbl_ext.cpp | 146 +++++
lib/gpu/lal_tersoff_zbl_extra.h | 690 ++++++++++++++++++++
10 files changed, 4912 insertions(+)
create mode 100644 lib/gpu/lal_tersoff_mod.cpp
create mode 100644 lib/gpu/lal_tersoff_mod.cu
create mode 100644 lib/gpu/lal_tersoff_mod.h
create mode 100644 lib/gpu/lal_tersoff_mod_ext.cpp
create mode 100644 lib/gpu/lal_tersoff_mod_extra.h
create mode 100644 lib/gpu/lal_tersoff_zbl.cpp
create mode 100644 lib/gpu/lal_tersoff_zbl.cu
create mode 100644 lib/gpu/lal_tersoff_zbl.h
create mode 100644 lib/gpu/lal_tersoff_zbl_ext.cpp
create mode 100644 lib/gpu/lal_tersoff_zbl_extra.h
diff --git a/lib/gpu/lal_tersoff_mod.cpp b/lib/gpu/lal_tersoff_mod.cpp
new file mode 100644
index 0000000000..bfcc9c3bd3
--- /dev/null
+++ b/lib/gpu/lal_tersoff_mod.cpp
@@ -0,0 +1,456 @@
+/***************************************************************************
+ tersoff_mod.cpp
+ -------------------
+ Trung Dac Nguyen
+
+ Class for acceleration of the tersoff pair style.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin :
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "tersoff_mod_cl.h"
+#elif defined(USE_CUDART)
+const char *tersoff_mod=0;
+#else
+#include "tersoff_mod_cubin.h"
+#endif
+
+#include "lal_tersoff_mod.h"
+#include
+using namespace LAMMPS_AL;
+#define TersoffMT TersoffMod
+
+extern Device device;
+
+template
+TersoffMT::TersoffMod() : BaseThree(), _allocated(false) {
+}
+
+template
+TersoffMT::~TersoffMod() {
+ clear();
+}
+
+template
+int TersoffMT::bytes_per_atom(const int max_nbors) const {
+ return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template
+int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+ const double cell_size, const double gpu_split, FILE *_screen,
+ int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+ const double* lam1, const double* lam2, const double* lam3,const double* powermint,
+ const double* biga, const double* bigb, const double* bigr, const double* bigd,
+ const double* c1, const double* c2, const double* c3, const double* c4,
+ const double* c5, const double* h, const double* beta, const double* powern,
+ const double* powern_del, const double* ca1, const double* host_cutsq)
+{
+ int success;
+ success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
+ _screen,tersoff_mod,"k_tersoff_mod_repulsive",
+ "k_tersoff_mod_three_center", "k_tersoff_mod_three_end");
+ if (success!=0)
+ return success;
+
+ int ef_nall=nall;
+ if (ef_nall==0)
+ ef_nall=2000;
+ _zetaij.alloc(ef_nall*max_nbors,*(this->ucl_device),UCL_READ_WRITE);
+
+ k_zeta.set_function(*(this->pair_program),"k_tersoff_mod_zeta");
+
+ // If atom type constants fit in shared memory use fast kernel
+ int lj_types=ntypes;
+ shared_types=false;
+ int max_shared_types=this->device->max_shared_types();
+ if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+ lj_types=max_shared_types;
+ shared_types=true;
+ }
+ _lj_types=lj_types;
+
+ _nparams = nparams;
+ _nelements = nelements;
+
+ UCL_H_Vec dview(nparams,*(this->ucl_device),
+ UCL_WRITE_ONLY);
+
+ for (int i=0; iucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(lam1[i]);
+ dview[i].y=static_cast(lam2[i]);
+ dview[i].z=static_cast(lam3[i]);
+ dview[i].w=static_cast(powermint[i]);
+ }
+
+ ucl_copy(ts1,dview,false);
+ ts1_tex.get_texture(*(this->pair_program),"ts1_tex");
+ ts1_tex.bind_float(ts1,4);
+
+ ts2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(biga[i]);
+ dview[i].y=static_cast(bigb[i]);
+ dview[i].z=static_cast(bigr[i]);
+ dview[i].w=static_cast(bigd[i]);
+ }
+
+ ucl_copy(ts2,dview,false);
+ ts2_tex.get_texture(*(this->pair_program),"ts2_tex");
+ ts2_tex.bind_float(ts2,4);
+
+ ts3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(beta[i]);
+ dview[i].y=static_cast(powern[i]);
+ dview[i].z=static_cast(powern_del[i]);
+ dview[i].w=static_cast(ca1[i]);
+ }
+
+ ucl_copy(ts3,dview,false);
+ ts3_tex.get_texture(*(this->pair_program),"ts3_tex");
+ ts3_tex.bind_float(ts3,4);
+
+ ts4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(c1[i]);
+ dview[i].y=static_cast(c2[i]);
+ dview[i].z=static_cast(c3[i]);
+ dview[i].w=static_cast(c4[i]);
+ }
+
+ ucl_copy(ts4,dview,false);
+ ts4_tex.get_texture(*(this->pair_program),"ts4_tex");
+ ts4_tex.bind_float(ts4,4);
+
+ ts5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(c5[i]);
+ dview[i].y=static_cast(h[i]);
+ dview[i].z=(numtyp)0;
+ dview[i].w=(numtyp)0;
+ }
+
+ ucl_copy(ts5,dview,false);
+ ts5_tex.get_texture(*(this->pair_program),"ts5_tex");
+ ts5_tex.bind_float(ts5,4);
+
+ UCL_H_Vec cutsq_view(nparams,*(this->ucl_device),
+ UCL_WRITE_ONLY);
+ for (int i=0; i(host_cutsq[i]);
+ cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+ ucl_copy(cutsq,cutsq_view,false);
+
+ UCL_H_Vec dview_elem2param(nelements*nelements*nelements,
+ *(this->ucl_device), UCL_WRITE_ONLY);
+
+ elem2param.alloc(nelements*nelements*nelements,*(this->ucl_device),
+ UCL_READ_ONLY);
+
+ for (int i = 0; i < nelements; i++)
+ for (int j = 0; j < nelements; j++)
+ for (int k = 0; k < nelements; k++) {
+ int idx = i*nelements*nelements+j*nelements+k;
+ dview_elem2param[idx] = host_elem2param[i][j][k];
+ }
+
+ ucl_copy(elem2param,dview_elem2param,false);
+
+ UCL_H_Vec dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
+ for (int i = 0; i < ntypes; i++)
+ dview_map[i] = host_map[i];
+
+ map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
+ ucl_copy(map,dview_map,false);
+
+ _allocated=true;
+ this->_max_bytes=ts1.row_bytes()+ts2.row_bytes()+ts3.row_bytes()+
+ ts4.row_bytes()+cutsq.row_bytes()+
+ map.row_bytes()+elem2param.row_bytes()+_zetaij.row_bytes();
+ return 0;
+}
+
+template
+void TersoffMT::clear() {
+ if (!_allocated)
+ return;
+ _allocated=false;
+
+ ts1.clear();
+ ts2.clear();
+ ts3.clear();
+ ts4.clear();
+ ts5.clear();
+ cutsq.clear();
+ map.clear();
+ elem2param.clear();
+ _zetaij.clear();
+
+ k_zeta.clear();
+
+ this->clear_atomic();
+}
+
+template
+double TersoffMT::host_memory_usage() const {
+ return this->host_memory_usage_atomic()+sizeof(TersoffMod);
+}
+
+#define KTHREADS this->_threads_per_atom
+#define JTHREADS this->_threads_per_atom
+// ---------------------------------------------------------------------------
+// Copy nbor list from host if necessary and then calculate forces, virials,..
+// ---------------------------------------------------------------------------
+template
+void TersoffMT::compute(const int f_ago, const int nlocal, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh,
+ const bool eflag, const bool vflag, const bool eatom,
+ const bool vatom, int &host_start,
+ const double cpu_time, bool &success) {
+ this->acc_timers();
+ if (nlist==0) {
+ host_start=0;
+ // Make sure textures are correct if realloc by a different hybrid style
+ this->resize_atom(0,nall,success);
+ this->zero_timers();
+ return;
+ }
+
+ int ago=this->hd_balancer.ago_first(f_ago);
+ int inum=this->hd_balancer.balance(ago,nlocal,cpu_time);
+ this->ans->inum(inum);
+ #ifdef THREE_CONCURRENT
+ this->ans2->inum(inum);
+ #endif
+ host_start=inum;
+
+ if (ago==0) {
+ this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success);
+ if (!success)
+ return;
+ _max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist);
+ }
+
+ this->atom->cast_x_data(host_x,host_type);
+ this->hd_balancer.start_timer();
+ this->atom->add_x_data(host_x,host_type);
+
+ // re-allocate zetaij if necessary
+ if (nall*_max_nbors > _zetaij.cols()) {
+ int _nmax=static_cast(static_cast(nall)*1.10);
+ _zetaij.resize(_max_nbors*_nmax);
+ }
+
+ int _eflag;
+ if (eflag)
+ _eflag=1;
+ else
+ _eflag=0;
+
+ int ainum=nall;
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int BX=this->block_pair();
+ int GX=static_cast(ceil(static_cast(ainum)/
+ (BX/(JTHREADS*KTHREADS))));
+
+ this->k_zeta.set_size(GX,BX);
+ this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
+
+ int evatom=0;
+ if (eatom || vatom)
+ evatom=1;
+ #ifdef THREE_CONCURRENT
+ this->ucl_device->sync();
+ #endif
+ loop(eflag,vflag,evatom);
+ this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist);
+ this->device->add_ans_object(this->ans);
+ #ifdef THREE_CONCURRENT
+ this->ans2->copy_answers(eflag,vflag,eatom,vatom,ilist);
+ this->device->add_ans_object(this->ans2);
+ #endif
+ this->hd_balancer.stop_timer();
+}
+
+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary and then compute forces, virials, energies
+// ---------------------------------------------------------------------------
+template
+int ** TersoffMT::compute(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type,
+ double *sublo, double *subhi, tagint *tag,
+ int **nspecial, tagint **special, const bool eflag,
+ const bool vflag, const bool eatom,
+ const bool vatom, int &host_start,
+ int **ilist, int **jnum,
+ const double cpu_time, bool &success) {
+ this->acc_timers();
+
+ if (inum_full==0) {
+ host_start=0;
+ // Make sure textures are correct if realloc by a different hybrid style
+ this->resize_atom(0,nall,success);
+ this->zero_timers();
+ return NULL;
+ }
+
+ this->hd_balancer.balance(cpu_time);
+ int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
+ this->ans->inum(inum);
+ #ifdef THREE_CONCURRENT
+ this->ans2->inum(inum);
+ #endif
+ host_start=inum;
+
+ // Build neighbor list on GPU if necessary
+ if (ago==0) {
+ _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
+ sublo, subhi, tag, nspecial, special, success);
+ if (!success)
+ return NULL;
+ this->hd_balancer.start_timer();
+ } else {
+ this->atom->cast_x_data(host_x,host_type);
+ this->hd_balancer.start_timer();
+ this->atom->add_x_data(host_x,host_type);
+ }
+ *ilist=this->nbor->host_ilist.begin();
+ *jnum=this->nbor->host_acc.begin();
+
+ // re-allocate zetaij if necessary
+ if (nall*_max_nbors > _zetaij.cols()) {
+ int _nmax=static_cast(static_cast(nall)*1.10);
+ _zetaij.resize(_max_nbors*_nmax);
+ }
+
+ int _eflag;
+ if (eflag)
+ _eflag=1;
+ else
+ _eflag=0;
+
+ int ainum=nall;
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int BX=this->block_pair();
+ int GX=static_cast(ceil(static_cast(ainum)/
+ (BX/(JTHREADS*KTHREADS))));
+
+ this->k_zeta.set_size(GX,BX);
+ this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
+
+ int evatom=0;
+ if (eatom || vatom)
+ evatom=1;
+ #ifdef THREE_CONCURRENT
+ this->ucl_device->sync();
+ #endif
+ loop(eflag,vflag,evatom);
+ this->ans->copy_answers(eflag,vflag,eatom,vatom);
+ this->device->add_ans_object(this->ans);
+ #ifdef THREE_CONCURRENT
+ this->ans2->copy_answers(eflag,vflag,eatom,vatom);
+ this->device->add_ans_object(this->ans2);
+ #endif
+ this->hd_balancer.stop_timer();
+
+ return this->nbor->host_jlist.begin()-host_start;
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template
+void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
+ // Compute the block size and grid size to keep all cores busy
+ int BX=this->block_pair();
+ int eflag, vflag;
+ if (_eflag)
+ eflag=1;
+ else
+ eflag=0;
+
+ if (_vflag)
+ vflag=1;
+ else
+ vflag=0;
+
+ int ainum=this->ans->inum();
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int GX=static_cast(ceil(static_cast(this->ans->inum())/
+ (BX/this->_threads_per_atom)));
+
+ this->time_pair.start();
+ this->k_pair.set_size(GX,BX);
+ this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &this->ans->force, &this->ans->engv,
+ &eflag, &vflag, &ainum, &nbor_pitch,
+ &this->_threads_per_atom);
+
+ BX=this->block_size();
+ GX=static_cast(ceil(static_cast(this->ans->inum())/
+ (BX/(KTHREADS*JTHREADS))));
+ this->k_three_center.set_size(GX,BX);
+ this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom, &evatom);
+
+ Answer *end_ans;
+ #ifdef THREE_CONCURRENT
+ end_ans=this->ans2;
+ #else
+ end_ans=this->ans;
+ #endif
+ if (evatom!=0) {
+ this->k_three_end_vatom.set_size(GX,BX);
+ this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom);
+
+ } else {
+ this->k_three_end.set_size(GX,BX);
+ this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom);
+ }
+
+ this->time_pair.stop();
+}
+
+template class TersoffMod;
+
diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu
new file mode 100644
index 0000000000..ba4ad32005
--- /dev/null
+++ b/lib/gpu/lal_tersoff_mod.cu
@@ -0,0 +1,1070 @@
+// **************************************************************************
+// tersoff_mod.cu
+// -------------------
+// Trung Dac Nguyen
+//
+// Device code for acceleration of the tersoff pair style
+//
+// __________________________________________________________________________
+// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+// begin :
+// email : ndactrung@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+#include "lal_tersoff_mod_extra.h"
+
+#ifndef _DOUBLE_DOUBLE
+texture pos_tex;
+texture ts1_tex;
+texture ts2_tex;
+texture ts3_tex;
+texture ts4_tex;
+texture ts5_tex;
+#else
+texture pos_tex;
+texture ts1_tex;
+texture ts2_tex;
+texture ts3_tex;
+texture ts4_tex;
+texture ts5_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define ts1_tex ts1
+#define ts2_tex ts2
+#define ts3_tex ts3
+#define ts4_tex ts4
+#define ts5_tex ts5
+#endif
+
+//#define THREE_CONCURRENT
+
+#define TWOTHIRD (numtyp)0.66666666666666666667
+
+#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
+ i, nbor_j, offset_j, idx) \
+ if (nbor_mem==packed_mem) { \
+ int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; \
+ idx = jj*n_stride + i*t_per_atom + offset_j; \
+ } else { \
+ idx = nbor_j; \
+ }
+
+#if (ARCH < 300)
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \
+ offset, eflag, vflag, ans, engv) \
+ if (t_per_atom>1) { \
+ __local acctyp red_acc[6][BLOCK_PAIR]; \
+ red_acc[0][tid]=f.x; \
+ red_acc[1][tid]=f.y; \
+ red_acc[2][tid]=f.z; \
+ red_acc[3][tid]=energy; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ for (int r=0; r<4; r++) \
+ red_acc[r][tid] += red_acc[r][tid+s]; \
+ } \
+ } \
+ f.x=red_acc[0][tid]; \
+ f.y=red_acc[1][tid]; \
+ f.z=red_acc[2][tid]; \
+ energy=red_acc[3][tid]; \
+ if (vflag>0) { \
+ for (int r=0; r<6; r++) \
+ red_acc[r][tid]=virial[r]; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ for (int r=0; r<6; r++) \
+ red_acc[r][tid] += red_acc[r][tid+s]; \
+ } \
+ } \
+ for (int r=0; r<6; r++) \
+ virial[r]=red_acc[r][tid]; \
+ } \
+ } \
+ if (offset==0) { \
+ int ei=ii; \
+ if (eflag>0) { \
+ engv[ei]+=energy*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ if (vflag>0) { \
+ for (int i=0; i<6; i++) { \
+ engv[ei]+=virial[i]*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ } \
+ acctyp4 old=ans[ii]; \
+ old.x+=f.x; \
+ old.y+=f.y; \
+ old.z+=f.z; \
+ ans[ii]=old; \
+ }
+
+#define store_zeta(z, tid, t_per_atom, offset) \
+ if (t_per_atom>1) { \
+ __local acctyp red_acc[BLOCK_PAIR]; \
+ red_acc[tid]=z; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ red_acc[tid] += red_acc[tid+s]; \
+ } \
+ } \
+ z=red_acc[tid]; \
+ }
+
+#else
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \
+ offset, eflag, vflag, ans, engv) \
+ if (t_per_atom>1) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ f.x += shfl_xor(f.x, s, t_per_atom); \
+ f.y += shfl_xor(f.y, s, t_per_atom); \
+ f.z += shfl_xor(f.z, s, t_per_atom); \
+ energy += shfl_xor(energy, s, t_per_atom); \
+ } \
+ if (vflag>0) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ for (int r=0; r<6; r++) \
+ virial[r] += shfl_xor(virial[r], s, t_per_atom); \
+ } \
+ } \
+ } \
+ if (offset==0) { \
+ int ei=ii; \
+ if (eflag>0) { \
+ engv[ei]+=energy*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ if (vflag>0) { \
+ for (int i=0; i<6; i++) { \
+ engv[ei]+=virial[i]*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ } \
+ acctyp4 old=ans[ii]; \
+ old.x+=f.x; \
+ old.y+=f.y; \
+ old.z+=f.z; \
+ ans[ii]=old; \
+ }
+
+#define store_zeta(z, tid, t_per_atom, offset) \
+ if (t_per_atom>1) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ z += shfl_xor(z, s, t_per_atom); \
+ } \
+ }
+
+#endif
+
+// Tersoff is currently used for 3 elements at most: 3*3*3 = 27 entries
+// while the block size should never be less than 32.
+// SHARED_SIZE = 32 for now to reduce the pressure on the shared memory per block
+// must be increased if there will be more than 3 elements in the future.
+
+#define SHARED_SIZE 32
+
+__kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts3_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp4 *restrict ts5_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ __global acctyp4 * zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ const int eflag, const int nall, const int inum,
+ const int nbor_pitch, const int t_per_atom) {
+ __local int tpa_sq,n_stride;
+ tpa_sq = fast_mul(t_per_atom,t_per_atom);
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ // must be increased if there will be more than 3 elements in the future.
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts3[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ __local numtyp4 ts5[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ // compute zeta_ij
+ z = (numtyp)0;
+
+ int nbor_k = nborj_start-offset_j+offset_k;
+ for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
+ int k=dev_packed[nbor_k];
+ k &= NEIGHMASK;
+
+ if (k == j) continue;
+
+ numtyp4 kx; fetch4(kx,k,pos_tex); //x_[k];
+ int ktype=kx.w;
+ ktype=map[ktype];
+ int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
+
+ // Compute rik
+ delr2.x = kx.x-ix.x;
+ delr2.y = kx.y-ix.y;
+ delr2.z = kx.z-ix.z;
+ numtyp rsq2 = delr2.x*delr2.x+delr2.y*delr2.y+delr2.z*delr2.z;
+
+ if (rsq2 > cutsq[ijkparam]) continue;
+
+ numtyp4 ts1_ijkparam = ts1[ijkparam]; //fetch4(ts1_ijkparam,ijkparam,ts1_tex);
+ numtyp ijkparam_lam3 = ts1_ijkparam.z;
+ numtyp ijkparam_powermint = ts1_ijkparam.w;
+ numtyp4 ts2_ijkparam = ts2[ijkparam]; //fetch4(ts2_ijkparam,ijkparam,ts2_tex);
+ numtyp ijkparam_bigr = ts2_ijkparam.z;
+ numtyp ijkparam_bigd = ts2_ijkparam.w;
+ numtyp4 ts4_ijkparam = ts4[ijkparam]; //fetch4(ts4_ijkparam,ijkparam,ts4_tex);
+ numtyp ijkparam_c1 = ts4_ijkparam.x;
+ numtyp ijkparam_c2 = ts4_ijkparam.y;
+ numtyp ijkparam_c3 = ts4_ijkparam.z;
+ numtyp ijkparam_c4 = ts4_ijkparam.w;
+ numtyp4 ts5_ijkparam = ts5[ijkparam]; //fetch4(ts4_ijkparam,ijkparam,ts4_tex);
+ numtyp ijkparam_c5 = ts5_ijkparam.x;
+ numtyp ijkparam_h = ts5_ijkparam.y;
+ z += zeta(ijkparam_powermint, ijkparam_lam3, ijkparam_bigr, ijkparam_bigd,
+ ijkparam_h, ijkparam_c1, ijkparam_c2, ijkparam_c3, ijkparam_c4,
+ ijkparam_c5, rsq1, rsq2, delr1, delr2);
+ }
+
+ //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
+ //int idx = jj*n_stride + i*t_per_atom + offset_j;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ i, nbor_j, offset_j, idx);
+ store_zeta(z, tid, t_per_atom, offset_k);
+
+ numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
+ numtyp ijparam_lam2 = ts1_ijparam.y;
+ numtyp4 ts2_ijparam = ts2[ijparam]; //fetch4(ts2_ijparam,ijparam,ts2_tex);
+ numtyp ijparam_bigb = ts2_ijparam.y;
+ numtyp ijparam_bigr = ts2_ijparam.z;
+ numtyp ijparam_bigd = ts2_ijparam.w;
+ numtyp4 ts3_ijparam = ts3[ijparam]; //fetch4(ts3_ijparam,ijparam,ts3_tex);
+ numtyp ijparam_beta = ts3_ijparam.x;
+ numtyp ijparam_powern = ts3_ijparam.y;
+ numtyp ijparam_powern_del = ts3_ijparam.z;
+ numtyp ijparam_ca1 = ts3_ijparam.w;
+ numtyp ijparam_ca4 = ucl_recip(ts3_ijparam.w);
+
+ if (offset_k == 0) {
+ numtyp fpfeng[4];
+ force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
+ ijparam_beta, ijparam_powern, ijparam_powern_del, ijparam_ca1,
+ ijparam_ca4, rsq1, z, eflag, fpfeng);
+ acctyp4 zij;
+ zij.x = fpfeng[0];
+ zij.y = fpfeng[1];
+ zij.z = fpfeng[2];
+ zij.w = z;
+ zetaij[idx] = zij;
+ }
+
+ } // for nbor
+ } // if ii
+}
+
+__kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int n_stride;
+ int tid, ii, offset;
+ atom_info(t_per_atom,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ if (tid0)
+ energy+=feng[1];
+ if (vflag>0) {
+ virial[0] += delx*delx*force;
+ virial[1] += dely*dely*force;
+ virial[2] += delz*delz*force;
+ virial[3] += delx*dely*force;
+ virial[4] += delx*delz*force;
+ virial[5] += dely*delz*force;
+ }
+ }
+ } // for nbor
+
+ store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+ ans,engv);
+ } // if ii
+
+}
+
+__kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp4 *restrict ts5_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom, const int evatom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c1, c2, c3, c4, c5, h;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset); // offset ranges from 0 to tpa_sq-1
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ __local numtyp4 ts5[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+ numtyp r1 = ucl_sqrt(rsq1);
+ numtyp r1inv = ucl_rsqrt(rsq1);
+
+ // look up for zeta_ij
+
+ //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
+ //int idx = jj*n_stride + i*t_per_atom + offset_j;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ i, nbor_j, offset_j, idx);
+ acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
+ numtyp force = zeta_ij.x*tpainv;
+ numtyp prefactor = zeta_ij.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ij.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += delr1[0]*delr1[0]*mforce;
+ virial[1] += delr1[1]*delr1[1]*mforce;
+ virial[2] += delr1[2]*delr1[2]*mforce;
+ virial[3] += delr1[0]*delr1[1]*mforce;
+ virial[4] += delr1[0]*delr1[2]*mforce;
+ virial[5] += delr1[1]*delr1[2]*mforce;
+ }
+
+ int nbor_k=nborj_start-offset_j+offset_k;
+ for ( ; nbor_k cutsq[ijkparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+
+ numtyp fi[3], fj[3], fk[3];
+ numtyp4 ts1_ijkparam = ts1[ijkparam]; //fetch4(ts1_ijkparam,ijkparam,ts1_tex);
+ lam3 = ts1_ijkparam.z;
+ powermint = ts1_ijkparam.w;
+ numtyp4 ts2_ijkparam = ts2[ijkparam]; //fetch4(ts2_ijkparam,ijkparam,ts2_tex);
+ bigr = ts2_ijkparam.z;
+ bigd = ts2_ijkparam.w;
+ numtyp4 ts4_ijkparam = ts4[ijkparam]; //fetch4(ts4_ijkparam,ijkparam,ts4_tex);
+ c1 = ts4_ijkparam.x;
+ c2 = ts4_ijkparam.y;
+ c3 = ts4_ijkparam.z;
+ c4 = ts4_ijkparam.w;
+ numtyp4 ts5_ijkparam = ts5[ijkparam]; //fetch4(ts5_ijkparam,ijkparam,ts5_tex);
+ c5 = ts5_ijkparam.x;
+ h = ts5_ijkparam.y;
+ if (vflag>0)
+ attractive(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor, r1, r1inv, r2, r2inv, delr1, delr2, fi, fj, fk);
+ else
+ attractive_fi(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor, r1, r1inv, r2, r2inv, delr1, delr2, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+
+ if (vflag>0) {
+ acctyp v[6];
+ numtyp pre = (numtyp)2.0;
+ if (evatom==1) pre = TWOTHIRD;
+ v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
+ v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
+ v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
+ v[3] = pre*(delr1[0]*fj[1] + delr2[0]*fk[1]);
+ v[4] = pre*(delr1[0]*fj[2] + delr2[0]*fk[2]);
+ v[5] = pre*(delr1[1]*fj[2] + delr2[1]*fk[2]);
+
+ virial[0] += v[0]; virial[1] += v[1]; virial[2] += v[2];
+ virial[3] += v[3]; virial[4] += v[4]; virial[5] += v[5];
+ }
+ } // nbor_k
+ } // for nbor_j
+
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,
+ offset,eflag,vflag,ans,engv);
+ } // if ii
+}
+
+__kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp4 *restrict ts5_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c1, c2, c3, c4, c5, h;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ __local numtyp4 ts5[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ numtyp mdelr1[3];
+ mdelr1[0] = -delr1[0];
+ mdelr1[1] = -delr1[1];
+ mdelr1[2] = -delr1[2];
+
+ int nbor_k=j+nbor_pitch;
+ int numk=dev_nbor[nbor_k];
+ if (dev_nbor==dev_packed) {
+ nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+ k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+ nbor_k+=offset_k;
+ } else {
+ nbor_k+=nbor_pitch;
+ nbor_k=dev_nbor[nbor_k];
+ k_end=nbor_k+numk;
+ nbor_k+=offset_k;
+ }
+ int nbork_start = nbor_k;
+
+ // look up for zeta_ji: find i in the j's neighbor list
+ int m = tid / t_per_atom;
+ int ijnum = -1;
+ for ( ; nbor_k= 0) {
+ offset_kf = offset_k;
+ } else {
+ ijnum = red_acc[2*m+0];
+ offset_kf = red_acc[2*m+1];
+ }
+
+ //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
+ //int idx = iix*n_stride + j*t_per_atom + offset_kf;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, ijnum, offset_kf, idx);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ numtyp force = zeta_ji.x*tpainv;
+ numtyp prefactor_ji = zeta_ji.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ji.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += mdelr1[0]*mdelr1[0]*mforce;
+ virial[1] += mdelr1[1]*mdelr1[1]*mforce;
+ virial[2] += mdelr1[2]*mdelr1[2]*mforce;
+ virial[3] += mdelr1[0]*mdelr1[1]*mforce;
+ virial[4] += mdelr1[0]*mdelr1[2]*mforce;
+ virial[5] += mdelr1[1]*mdelr1[2]*mforce;
+ }
+
+ // attractive forces
+ for (nbor_k = nbork_start ; nbor_k cutsq[jikparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+ numtyp4 ts1_param, ts2_param, ts4_param, ts5_param;
+ numtyp fi[3];
+
+ ts1_param = ts1[jikparam]; //fetch4(ts1_jikparam,jikparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jikparam]; //fetch4(ts2_jikparam,jikparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jikparam]; //fetch4(ts4_jikparam,jikparam,ts4_tex);
+ c1 = ts4_param.x;
+ c2 = ts4_param.y;
+ c3 = ts4_param.z;
+ c4 = ts4_param.w;
+ ts5_param = ts5[jikparam]; //fetch4(ts5_jikparam,jikparam,ts5_tex);
+ c5 = ts5_param.x;
+ h = ts5_param.y;
+ attractive_fj(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+
+ //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
+ //int idx = kk*n_stride + j*t_per_atom + offset_k;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, nbor_k, offset_k, idx);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ numtyp prefactor_jk = zeta_jk.y;
+ int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
+ ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jkiparam]; //fetch4(ts2_jkiparam,jkiparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jkiparam]; //fetch4(ts4_jkiparam,jkiparam,ts4_tex);
+ c1 = ts4_param.x;
+ c2 = ts4_param.y;
+ c3 = ts4_param.z;
+ c4 = ts4_param.w;
+ ts5_param = ts5[jkiparam]; //fetch4(ts5_ikiparam,jkiparam,ts5_tex);
+ c5 = ts5_param.x;
+ h = ts5_param.y;
+ attractive_fk(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor_jk, r2, r2inv, r1, r1inv, delr2, mdelr1, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+ } // for nbor_k
+ } // for nbor_j
+
+ #ifdef THREE_CONCURRENT
+ store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #else
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #endif
+ } // if ii
+}
+
+__kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp4 *restrict ts5_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c1, c2, c3, c4, c5, h;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ __local numtyp4 ts5[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ numtyp mdelr1[3];
+ mdelr1[0] = -delr1[0];
+ mdelr1[1] = -delr1[1];
+ mdelr1[2] = -delr1[2];
+
+ int nbor_k=j+nbor_pitch;
+ int numk=dev_nbor[nbor_k];
+ if (dev_nbor==dev_packed) {
+ nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+ k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+ nbor_k+=offset_k;
+ } else {
+ nbor_k+=nbor_pitch;
+ nbor_k=dev_nbor[nbor_k];
+ k_end=nbor_k+numk;
+ nbor_k+=offset_k;
+ }
+ int nbork_start = nbor_k;
+
+ // look up for zeta_ji
+ int m = tid / t_per_atom;
+ int ijnum = -1;
+ for ( ; nbor_k= 0) {
+ offset_kf = offset_k;
+ } else {
+ ijnum = red_acc[2*m+0];
+ offset_kf = red_acc[2*m+1];
+ }
+
+ //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
+ //int idx = iix*n_stride + j*t_per_atom + offset_kf;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, ijnum, offset_kf, idx);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ numtyp force = zeta_ji.x*tpainv;
+ numtyp prefactor_ji = zeta_ji.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ji.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += mdelr1[0]*mdelr1[0]*mforce;
+ virial[1] += mdelr1[1]*mdelr1[1]*mforce;
+ virial[2] += mdelr1[2]*mdelr1[2]*mforce;
+ virial[3] += mdelr1[0]*mdelr1[1]*mforce;
+ virial[4] += mdelr1[0]*mdelr1[2]*mforce;
+ virial[5] += mdelr1[1]*mdelr1[2]*mforce;
+ }
+
+ // attractive forces
+ for (nbor_k = nbork_start; nbor_k cutsq[jikparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+
+ numtyp fi[3], fj[3], fk[3];
+ numtyp4 ts1_param, ts2_param, ts4_param, ts5_param;
+ ts1_param = ts1[jikparam]; //fetch4(ts1_jikparam,jikparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jikparam]; //fetch4(ts2_jikparam,jikparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jikparam]; //fetch4(ts4_jikparam,jikparam,ts4_tex);
+ c1 = ts4_param.x;
+ c2 = ts4_param.y;
+ c3 = ts4_param.z;
+ c4 = ts4_param.w;
+ ts5_param = ts5[jikparam]; //fetch4(ts5_jijparam,jikparam,ts5_tex);
+ c5 = ts5_param.x;
+ h = ts5_param.y;
+ attractive(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
+ f.x += fj[0];
+ f.y += fj[1];
+ f.z += fj[2];
+
+ virial[0] += TWOTHIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
+ virial[1] += TWOTHIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
+ virial[2] += TWOTHIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
+ virial[3] += TWOTHIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
+ virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
+ virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
+
+ //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
+ //int idx = kk*n_stride + j*t_per_atom + offset_k;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, nbor_k, offset_k, idx);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ numtyp prefactor_jk = zeta_jk.y;
+
+ int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
+ ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jkiparam]; //fetch4(ts2_jkiparam,jkiparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jkiparam]; //fetch4(ts4_jkiparam,jkiparam,ts4_tex);
+ c1 = ts4_param.x;
+ c2 = ts4_param.y;
+ c3 = ts4_param.z;
+ c4 = ts4_param.w;
+ ts5_param = ts5[jkiparam]; //fetch4(ts5_ikiparam,jkiparam,ts5_tex);
+ c5 = ts5_param.x;
+ h = ts5_param.y;
+ attractive(bigr, bigd, powermint, lam3, h, c1, c2, c3, c4, c5,
+ prefactor_jk, r2, r2inv, r1, r1inv, delr2, mdelr1, fi, fj, fk);
+ f.x += fk[0];
+ f.y += fk[1];
+ f.z += fk[2];
+
+ virial[0] += TWOTHIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
+ virial[1] += TWOTHIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
+ virial[2] += TWOTHIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
+ virial[3] += TWOTHIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
+ virial[4] += TWOTHIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
+ virial[5] += TWOTHIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
+ }
+ } // for nbor
+
+ #ifdef THREE_CONCURRENT
+ store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #else
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #endif
+ } // if ii
+}
+
diff --git a/lib/gpu/lal_tersoff_mod.h b/lib/gpu/lal_tersoff_mod.h
new file mode 100644
index 0000000000..9a05c66009
--- /dev/null
+++ b/lib/gpu/lal_tersoff_mod.h
@@ -0,0 +1,118 @@
+/***************************************************************************
+ tersoff_mod.h
+ -------------------
+ Trung Dac Nguyen
+
+ Class for acceleration of the tersoff/mod pair style.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin :
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_TERSOFF_MOD_H
+#define LAL_TERSOFF_MOD_H
+
+#include "lal_base_three.h"
+
+namespace LAMMPS_AL {
+
+template
+class TersoffMod : public BaseThree {
+ public:
+ TersoffMod();
+ ~TersoffMod();
+
+ /// Clear any previous data and set up for a new LAMMPS run for generic systems
+ /** \param max_nbors initial number of rows in the neighbor matrix
+ * \param cell_size cutoff + skin
+ * \param gpu_split fraction of particles handled by device
+ *
+ * Returns:
+ * - 0 if successfull
+ * - -1 if fix gpu not found
+ * - -3 if there is an out of memory error
+ * - -4 if the GPU library was not compiled for GPU
+ * - -5 Double precision is not supported on card **/
+ int init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+ const double cell_size, const double gpu_split, FILE *screen,
+ int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+ const double* lam1, const double* lam2, const double* lam3,
+ const double* powermint, const double* biga, const double* bigb,
+ const double* bigr, const double* bigd, const double* c1, const double* c2,
+ const double* c3, const double* c4, const double* c5,
+ const double* h, const double* beta, const double* powern,
+ const double* powern_del, const double* ca1, const double* cutsq);
+
+ /// Pair loop with host neighboring
+ void compute(const int f_ago, const int inum_full, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh, const bool eflag,
+ const bool vflag, const bool eatom, const bool vatom,
+ int &host_start, const double cpu_time, bool &success);
+
+ /// Pair loop with device neighboring
+ int ** compute(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type, double *sublo,
+ double *subhi, tagint *tag, int **nspecial,
+ tagint **special, const bool eflag, const bool vflag,
+ const bool eatom, const bool vatom, int &host_start,
+ int **ilist, int **numj, const double cpu_time, bool &success);
+
+ /// Clear all host and device data
+ /** \note This is called at the beginning of the init() routine **/
+ void clear();
+
+ /// Returns memory usage on device per atom
+ int bytes_per_atom(const int max_nbors) const;
+
+ /// Total host memory used by library for pair style
+ double host_memory_usage() const;
+
+ // --------------------------- TYPE DATA --------------------------
+
+ /// If atom type constants fit in shared memory, use fast kernels
+ bool shared_types;
+
+ /// Number of atom types
+ int _lj_types;
+
+ /// ts1.x = lam1, ts1.y = lam2, ts1.z = lam3, ts1.w = powermint
+ UCL_D_Vec ts1;
+ /// ts2.x = biga, ts2.y = bigb, ts2.z = bigr, ts2.w = bigd
+ UCL_D_Vec ts2;
+ /// ts3.x = beta, ts3.y = powern, ts3.z = powern_del, ts3.w = ca1
+ UCL_D_Vec ts3;
+ /// ts4.x = c1, ts4.y = c2, ts4.z = c3, ts4.w = c4
+ UCL_D_Vec ts4;
+ /// ts5.x = c5, ts5.y = h
+ UCL_D_Vec ts5;
+
+ UCL_D_Vec cutsq;
+
+ UCL_D_Vec elem2param;
+ UCL_D_Vec map;
+ int _nparams,_nelements;
+
+ /// Per-atom arrays:
+ /// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
+ /// zetaij.w = zetaij
+ UCL_D_Vec _zetaij;
+
+ UCL_Kernel k_zeta;
+ UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex;
+
+ int _max_nbors;
+
+ private:
+ bool _allocated;
+ void loop(const bool _eflag, const bool _vflag, const int evatom);
+};
+
+}
+
+#endif
+
diff --git a/lib/gpu/lal_tersoff_mod_ext.cpp b/lib/gpu/lal_tersoff_mod_ext.cpp
new file mode 100644
index 0000000000..7817e7d08d
--- /dev/null
+++ b/lib/gpu/lal_tersoff_mod_ext.cpp
@@ -0,0 +1,135 @@
+/***************************************************************************
+ tersoff_mod_ext.cpp
+ -------------------
+ Trung Dac Nguyen
+
+ Functions for LAMMPS access to tersoff acceleration routines.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin : Thu April 17, 2014
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include
+#include
+#include
+
+#include "lal_tersoff_mod.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static TersoffMod TSMMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
+ const int max_nbors, const double cell_size, int &gpu_mode, FILE *screen,
+ int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+ const double* ts_lam1, const double* ts_lam2, const double* ts_lam3,
+ const double* ts_powermint, const double* ts_biga, const double* ts_bigb,
+ const double* ts_bigr, const double* ts_bigd, const double* ts_c1,
+ const double* ts_c2, const double* ts_c3, const double* ts_c4,
+ const double* ts_c5, const double* ts_h, const double* ts_beta,
+ const double* ts_powern, const double* ts_powern_del,
+ const double* ts_ca1, const double* ts_cutsq) {
+ TSMMF.clear();
+ gpu_mode=TSMMF.device->gpu_mode();
+ double gpu_split=TSMMF.device->particle_split();
+ int first_gpu=TSMMF.device->first_device();
+ int last_gpu=TSMMF.device->last_device();
+ int world_me=TSMMF.device->world_me();
+ int gpu_rank=TSMMF.device->gpu_rank();
+ int procs_per_gpu=TSMMF.device->procs_per_gpu();
+
+ // disable host/device split for now
+ if (gpu_split != 1.0)
+ return -8;
+
+ TSMMF.device->init_message(screen,"tersoff/mod/gpu",first_gpu,last_gpu);
+
+ bool message=false;
+ if (TSMMF.device->replica_me()==0 && screen)
+ message=true;
+
+ if (message) {
+ fprintf(screen,"Initializing Device and compiling on process 0...");
+ fflush(screen);
+ }
+
+ int init_ok=0;
+ if (world_me==0)
+ init_ok=TSMMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen,
+ host_map, nelements, host_elem2param, nparams,
+ ts_lam1, ts_lam2, ts_lam3, ts_powermint,
+ ts_biga, ts_bigb, ts_bigr, ts_bigd, ts_c1, ts_c2,
+ ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
+ ts_powern_del, ts_ca1, ts_cutsq);
+
+ TSMMF.device->world_barrier();
+ if (message)
+ fprintf(screen,"Done.\n");
+
+ for (int i=0; igpu_barrier();
+ if (message)
+ fprintf(screen,"Done.\n");
+ }
+ if (message)
+ fprintf(screen,"\n");
+
+ if (init_ok==0)
+ TSMMF.estimate_gpu_overhead();
+ return init_ok;
+}
+
+void tersoff_mod_gpu_clear() {
+ TSMMF.clear();
+}
+
+int ** tersoff_mod_gpu_compute_n(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type,
+ double *sublo, double *subhi, tagint *tag, int **nspecial,
+ tagint **special, const bool eflag, const bool vflag,
+ const bool eatom, const bool vatom, int &host_start,
+ int **ilist, int **jnum, const double cpu_time,
+ bool &success) {
+ return TSMMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+ subhi, tag, nspecial, special, eflag, vflag, eatom,
+ vatom, host_start, ilist, jnum, cpu_time, success);
+}
+
+void tersoff_mod_gpu_compute(const int ago, const int nlocal, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh, const bool eflag,
+ const bool vflag, const bool eatom, const bool vatom,
+ int &host_start, const double cpu_time, bool &success) {
+ TSMMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj,
+ firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
+}
+
+double tersoff_mod_gpu_bytes() {
+ return TSMMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_tersoff_mod_extra.h b/lib/gpu/lal_tersoff_mod_extra.h
new file mode 100644
index 0000000000..370aceb634
--- /dev/null
+++ b/lib/gpu/lal_tersoff_mod_extra.h
@@ -0,0 +1,627 @@
+/// **************************************************************************
+// tersoff_mod_extra.h
+// -------------------
+// Trung Dac Nguyen
+//
+// Device code for Tersoff math routines
+//
+// __________________________________________________________________________
+// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+// begin :
+// email : ndactrung@gmail.com
+// ***************************************************************************/*
+
+#ifndef LAL_TERSOFF_MOD_EXTRA_H
+#define LAL_TERSOFF_MOD_EXTRA_H
+
+#ifdef NV_KERNEL
+#include "lal_aux_fun1.h"
+#else
+#endif
+
+#define MY_PI2 (numtyp)1.57079632679489661923
+#define MY_PI4 (numtyp)0.78539816339744830962
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp vec3_dot(const numtyp x[3], const numtyp y[3])
+{
+ return (x[0]*y[0] + x[1]*y[1] + x[2]*y[2]);
+}
+
+ucl_inline void vec3_add(const numtyp x[3], const numtyp y[3], numtyp z[3])
+{
+ z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
+}
+
+ucl_inline void vec3_scale(const numtyp k, const numtyp x[3], numtyp y[3])
+{
+ y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
+}
+
+ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3],
+ const numtyp y[3], numtyp z[3])
+{
+ z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_gijk_mod(const numtyp costheta,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp param_h)
+{
+ const numtyp tmp_h = (param_h - costheta)*(param_h - costheta);
+ return param_c1 + (param_c2*tmp_h/(param_c3 + tmp_h)) *
+ ((numtyp)1.0 + param_c4*ucl_exp(-param_c5*tmp_h));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_gijk_d_mod(const numtyp costheta,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp param_h)
+{
+ const numtyp tmp_h = (param_h - costheta)*(param_h - costheta);
+ const numtyp g1 = (param_h - costheta)/(param_c3 + tmp_h);
+ const numtyp g2 = ucl_exp(-param_c5*tmp_h);
+ return (numtyp)-2.0*param_c2*g1*((1 + param_c4*g2) *
+ (1 + g1*(costheta - param_h)) - tmp_h*param_c4*param_c5*g2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void costheta_d(const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ numtyp *dri,
+ numtyp *drj,
+ numtyp *drk)
+{
+ // first element is derivative wrt Ri, second wrt Rj, third wrt Rk
+
+ numtyp cos_theta = vec3_dot(rij_hat,rik_hat);
+
+ vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj);
+ vec3_scale(ucl_recip(rij),drj,drj);
+ vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk);
+ vec3_scale(ucl_recip(rik),drk,drk);
+ vec3_add(drj,drk,dri);
+ vec3_scale((numtyp)-1.0,dri,dri);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fc(const numtyp r,
+ const numtyp param_bigr,
+ const numtyp param_bigd)
+{
+ if (r < param_bigr-param_bigd) return (numtyp)1.0;
+ if (r > param_bigr+param_bigd) return (numtyp)0.0;
+ return (numtyp)0.5*((numtyp)1.0 -
+ (numtyp)1.125*sin(MY_PI2*(r - param_bigr)/param_bigd) -
+ (numtyp)0.125*sin(3*MY_PI2*(r - param_bigr)/param_bigd));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fc_d(const numtyp r,
+ const numtyp param_bigr,
+ const numtyp param_bigd)
+{
+ if (r < param_bigr-param_bigd) return (numtyp)0.0;
+ if (r > param_bigr+param_bigd) return (numtyp)0.0;
+ return -((numtyp)0.375*MY_PI4/param_bigd) *
+ ((numtyp)3*cos(MY_PI2*(r - param_bigr)/param_bigd) +
+ cos((numtyp)3*MY_PI2*(r - param_bigr)/param_bigd));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fa(const numtyp r,
+ const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2)
+{
+ if (r > param_bigr + param_bigd) return (numtyp)0.0;
+ return -param_bigb * ucl_exp(-param_lam2 * r) *
+ ters_fc(r,param_bigr,param_bigd);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fa_d(const numtyp r,
+ const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2)
+{
+ if (r > param_bigr + param_bigd) return (numtyp)0.0;
+ return param_bigb * ucl_exp(-param_lam2 * r) * (param_lam2 *
+ ters_fc(r,param_bigr,param_bigd) - ters_fc_d(r,param_bigr,param_bigd));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_bij(const numtyp zeta,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_powern_del,
+ const numtyp param_ca1,
+ const numtyp param_ca4)
+{
+ numtyp tmp = param_beta * zeta;
+ if (tmp > param_ca1)
+ return ucl_powr(tmp, -param_powern/((numtyp)2.0*param_powern_del));
+ if (tmp < param_ca4) return (numtyp)1.0;
+ return ucl_powr((numtyp)1.0 + ucl_powr(tmp,param_powern),
+ (numtyp)-1.0/((numtyp)2.0*param_powern_del));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_bij_d(const numtyp zeta,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_powern_del,
+ const numtyp param_ca1,
+ const numtyp param_ca4)
+{
+ numtyp tmp = param_beta * zeta;
+ if (tmp > param_ca1) return (numtyp)-0.5*(param_powern/param_powern_del) *
+ ucl_powr(tmp,(numtyp)-0.5*(param_powern/param_powern_del)) / zeta;
+ if (tmp < param_ca4) return (numtyp)0.0;
+
+ numtyp tmp_n = ucl_powr(tmp,param_powern);
+ return (numtyp)-0.5 *(param_powern/param_powern_del) *
+ ucl_powr((numtyp)1.0+tmp_n, (numtyp)-1.0-((numtyp)1.0 /
+ ((numtyp)2.0*param_powern_del)))*tmp_n / zeta;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void ters_zetaterm_d(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ numtyp dri[3],
+ numtyp drj[3],
+ numtyp drk[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk_mod(cos_theta,param_c1,param_c2,param_c3,param_c4,param_c5,param_h);
+ gijk_d = ters_gijk_d_mod(cos_theta,param_c2,param_c3,param_c4,param_c5,param_h);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Ri
+ // dri = -dfc*gijk*ex_delr*rik_hat;
+ // dri += fc*gijk_d*ex_delr*dcosdri;
+ // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
+
+ vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
+ vec3_scale(prefactor,dri,dri);
+
+ // compute the derivative wrt Rj
+ // drj = fc*gijk_d*ex_delr*dcosdrj;
+ // drj += fc*gijk*ex_delr_d*rij_hat;
+
+ vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
+ vec3_scale(prefactor,drj,drj);
+
+ // compute the derivative wrt Rk
+ // drk = dfc*gijk*ex_delr*rik_hat;
+ // drk += fc*gijk_d*ex_delr*dcosdrk;
+ // drk += -fc*gijk*ex_delr_d*rik_hat;
+
+ vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
+ vec3_scale(prefactor,drk,drk);
+}
+
+ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ numtyp dri[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk_mod(cos_theta,param_c1,param_c2,param_c3,param_c4,param_c5,param_h);
+ gijk_d = ters_gijk_d_mod(cos_theta,param_c2,param_c3,param_c4,param_c5,param_h);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Ri
+ // dri = -dfc*gijk*ex_delr*rik_hat;
+ // dri += fc*gijk_d*ex_delr*dcosdri;
+ // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
+
+ vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
+ vec3_scale(prefactor,dri,dri);
+}
+
+ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ numtyp drj[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk_mod(cos_theta,param_c1,param_c2,param_c3,param_c4,param_c5,param_h);
+ gijk_d = ters_gijk_d_mod(cos_theta,param_c2,param_c3,param_c4,param_c5,param_h);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Rj
+ // drj = fc*gijk_d*ex_delr*dcosdrj;
+ // drj += fc*gijk*ex_delr_d*rij_hat;
+
+ vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
+ vec3_scale(prefactor,drj,drj);
+}
+
+ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ numtyp drk[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk_mod(cos_theta,param_c1,param_c2,param_c3,param_c4,param_c5,param_h);
+ gijk_d = ters_gijk_d_mod(cos_theta,param_c2,param_c3,param_c4,param_c5,param_h);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Rk
+ // drk = dfc*gijk*ex_delr*rik_hat;
+ // drk += fc*gijk_d*ex_delr*dcosdrk;
+ // drk += -fc*gijk*ex_delr_d*rik_hat;
+
+ vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
+ vec3_scale(prefactor,drk,drk);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void repulsive(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam1,
+ const numtyp param_biga,
+ const numtyp rsq,
+ const int eflag,
+ numtyp *ans)
+{
+ numtyp r,tmp_fc,tmp_fc_d,tmp_exp;
+ r = ucl_sqrt(rsq);
+ tmp_fc = ters_fc(r,param_bigr,param_bigd);
+ tmp_fc_d = ters_fc_d(r,param_bigr,param_bigd);
+ tmp_exp = ucl_exp(-param_lam1 * r);
+ // fforce
+ ans[0] = -param_biga*tmp_exp*(tmp_fc_d - tmp_fc*param_lam1)*ucl_recip(r);
+ // eng
+ if (eflag) ans[1] = tmp_fc * param_biga * tmp_exp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp zeta(const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp rsqij,
+ const numtyp rsqik,
+ const numtyp4 delrij,
+ const numtyp4 delrik)
+{
+ numtyp rij,rik,costheta,arg,ex_delr;
+
+ rij = ucl_sqrt(rsqij);
+ rik = ucl_sqrt(rsqik);
+ costheta = (delrij.x*delrik.x + delrij.y*delrik.y +
+ delrij.z*delrik.z) / (rij*rik);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) arg = t*t*t;
+ else arg = t;
+
+ if (arg > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (arg < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(arg);
+
+ return ters_fc(rik,param_bigr,param_bigd) *
+ ters_gijk_mod(costheta,param_c1,param_c2,param_c3,param_c4,param_c5,
+ param_h) * ex_delr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void force_zeta(const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_powern_del,
+ const numtyp param_ca1,
+ const numtyp param_ca4,
+ const numtyp rsq,
+ const numtyp zeta_ij,
+ const int eflag,
+ numtyp fpfeng[4])
+{
+ numtyp r,fa,fa_d,bij;
+
+ r = ucl_sqrt(rsq);
+ fa = ters_fa(r,param_bigb,param_bigr,param_bigd,param_lam2);
+ fa_d = ters_fa_d(r,param_bigb,param_bigr,param_bigd,param_lam2);
+ bij = ters_bij(zeta_ij,param_beta,param_powern,
+ param_powern_del,param_ca1,param_ca4);
+ fpfeng[0] = (numtyp)0.5*bij*fa_d * ucl_recip(r); // fforce
+ fpfeng[1] = (numtyp)-0.5*fa * ters_bij_d(zeta_ij,param_beta, param_powern,
+ param_powern_del,param_ca1,param_ca4); // prefactor
+ if (eflag) fpfeng[2] = (numtyp)0.5*bij*fa; // eng
+}
+
+/* ----------------------------------------------------------------------
+ attractive term
+ use param_ij cutoff for rij test
+ use param_ijk cutoff for rik test
+------------------------------------------------------------------------- */
+
+ucl_inline void attractive(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fi[3],
+ numtyp fj[3],
+ numtyp fk[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_h, param_c1, param_c2, param_c3, param_c4, param_c5,
+ fi, fj, fk);
+}
+
+ucl_inline void attractive_fi(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fi[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fi(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_h, param_c1, param_c2, param_c3, param_c4, param_c5,
+ fi);
+}
+
+ucl_inline void attractive_fj(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fj[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fj(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_h, param_c1, param_c2, param_c3, param_c4, param_c5,
+ fj);
+}
+
+ucl_inline void attractive_fk(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_h,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_c5,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fk[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fk(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_h, param_c1, param_c2, param_c3, param_c4, param_c5,
+ fk);
+}
+
+
+#endif
+
+
diff --git a/lib/gpu/lal_tersoff_zbl.cpp b/lib/gpu/lal_tersoff_zbl.cpp
new file mode 100644
index 0000000000..57688f53ab
--- /dev/null
+++ b/lib/gpu/lal_tersoff_zbl.cpp
@@ -0,0 +1,482 @@
+/***************************************************************************
+ tersoff_zbl.cpp
+ -------------------
+ Trung Dac Nguyen
+
+ Class for acceleration of the tersoff/zbl pair style.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin :
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "tersoff_zbl_cl.h"
+#elif defined(USE_CUDART)
+const char *tersoff_zbl=0;
+#else
+#include "tersoff_zbl_cubin.h"
+#endif
+
+#include "lal_tersoff_zbl.h"
+#include
+using namespace LAMMPS_AL;
+#define TersoffZT TersoffZBL
+
+extern Device device;
+
+template
+TersoffZT::TersoffZBL() : BaseThree(), _allocated(false) {
+}
+
+template
+TersoffZT::~TersoffZBL() {
+ clear();
+}
+
+template
+int TersoffZT::bytes_per_atom(const int max_nbors) const {
+ return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template
+int TersoffZT::init(const int ntypes, const int nlocal, const int nall,
+ const int max_nbors, const double cell_size,
+ const double gpu_split, FILE *_screen, int* host_map,
+ const int nelements, int*** host_elem2param,
+ const int nparams, const double* lam1, const double* lam2,
+ const double* lam3, const double* powermint,
+ const double* biga, const double* bigb, const double* bigr,
+ const double* bigd, const double* c1, const double* c2,
+ const double* c3, const double* c4, const double* c,
+ const double* d, const double* h, const double* gamma,
+ const double* beta, const double* powern, const double* Z_i,
+ const double* Z_j, const double* ZBLcut,
+ const double* ZBLexpscale, const double global_e,
+ const double global_a_0, const double global_epsilon_0,
+ const double* host_cutsq)
+{
+ int success;
+ success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
+ _screen,tersoff_zbl,"k_tersoff_zbl_repulsive",
+ "k_tersoff_zbl_three_center", "k_tersoff_zbl_three_end");
+ if (success!=0)
+ return success;
+
+ int ef_nall=nall;
+ if (ef_nall==0)
+ ef_nall=2000;
+ _zetaij.alloc(ef_nall*max_nbors,*(this->ucl_device),UCL_READ_WRITE);
+
+ k_zeta.set_function(*(this->pair_program),"k_tersoff_zbl_zeta");
+
+ // If atom type constants fit in shared memory use fast kernel
+ int lj_types=ntypes;
+ shared_types=false;
+ int max_shared_types=this->device->max_shared_types();
+ if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+ lj_types=max_shared_types;
+ shared_types=true;
+ }
+ _lj_types=lj_types;
+
+ _nparams = nparams;
+ _nelements = nelements;
+
+ UCL_H_Vec dview(nparams,*(this->ucl_device),
+ UCL_WRITE_ONLY);
+
+ for (int i=0; iucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(lam1[i]);
+ dview[i].y=static_cast(lam2[i]);
+ dview[i].z=static_cast(lam3[i]);
+ dview[i].w=static_cast(powermint[i]);
+ }
+
+ ucl_copy(ts1,dview,false);
+ ts1_tex.get_texture(*(this->pair_program),"ts1_tex");
+ ts1_tex.bind_float(ts1,4);
+
+ ts2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(biga[i]);
+ dview[i].y=static_cast(bigb[i]);
+ dview[i].z=static_cast(bigr[i]);
+ dview[i].w=static_cast(bigd[i]);
+ }
+
+ ucl_copy(ts2,dview,false);
+ ts2_tex.get_texture(*(this->pair_program),"ts2_tex");
+ ts2_tex.bind_float(ts2,4);
+
+ ts3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(c1[i]);
+ dview[i].y=static_cast(c2[i]);
+ dview[i].z=static_cast(c3[i]);
+ dview[i].w=static_cast(c4[i]);
+ }
+
+ ucl_copy(ts3,dview,false);
+ ts3_tex.get_texture(*(this->pair_program),"ts3_tex");
+ ts3_tex.bind_float(ts3,4);
+
+ ts4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(c[i]);
+ dview[i].y=static_cast(d[i]);
+ dview[i].z=static_cast(h[i]);
+ dview[i].w=static_cast(gamma[i]);
+ }
+
+ ucl_copy(ts4,dview,false);
+ ts4_tex.get_texture(*(this->pair_program),"ts4_tex");
+ ts4_tex.bind_float(ts4,4);
+
+ ts5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(beta[i]);
+ dview[i].y=static_cast(powern[i]);
+ dview[i].z=(numtyp)0;
+ dview[i].w=(numtyp)0;
+ }
+
+ ucl_copy(ts5,dview,false);
+ ts5_tex.get_texture(*(this->pair_program),"ts5_tex");
+ ts5_tex.bind_float(ts5,4);
+
+ ts6.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+ for (int i=0; i(Z_i[i]);
+ dview[i].y=static_cast(Z_j[i]);
+ dview[i].z=static_cast(ZBLcut[i]);
+ dview[i].w=static_cast(ZBLexpscale[i]);
+ }
+
+ ucl_copy(ts6,dview,false);
+ ts6_tex.get_texture(*(this->pair_program),"ts6_tex");
+ ts6_tex.bind_float(ts6,4);
+
+ UCL_H_Vec cutsq_view(nparams,*(this->ucl_device),
+ UCL_WRITE_ONLY);
+ for (int i=0; i(host_cutsq[i]);
+ cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+ ucl_copy(cutsq,cutsq_view,false);
+
+ UCL_H_Vec dview_elem2param(nelements*nelements*nelements,
+ *(this->ucl_device), UCL_WRITE_ONLY);
+
+ elem2param.alloc(nelements*nelements*nelements,*(this->ucl_device),
+ UCL_READ_ONLY);
+
+ for (int i = 0; i < nelements; i++)
+ for (int j = 0; j < nelements; j++)
+ for (int k = 0; k < nelements; k++) {
+ int idx = i*nelements*nelements+j*nelements+k;
+ dview_elem2param[idx] = host_elem2param[i][j][k];
+ }
+
+ ucl_copy(elem2param,dview_elem2param,false);
+
+ UCL_H_Vec dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
+ for (int i = 0; i < ntypes; i++)
+ dview_map[i] = host_map[i];
+
+ map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
+ ucl_copy(map,dview_map,false);
+
+ _global_e = global_e;
+ _global_a_0 = global_a_0;
+ _global_epsilon_0 = global_epsilon_0;
+
+ _allocated=true;
+ this->_max_bytes=ts1.row_bytes()+ts2.row_bytes()+ts3.row_bytes()+
+ ts4.row_bytes()+ts5.row_bytes()+cutsq.row_bytes()+
+ map.row_bytes()+elem2param.row_bytes()+_zetaij.row_bytes();
+ return 0;
+}
+
+template
+void TersoffZT::clear() {
+ if (!_allocated)
+ return;
+ _allocated=false;
+
+ ts1.clear();
+ ts2.clear();
+ ts3.clear();
+ ts4.clear();
+ ts5.clear();
+ ts6.clear();
+ cutsq.clear();
+ map.clear();
+ elem2param.clear();
+ _zetaij.clear();
+
+ k_zeta.clear();
+
+ this->clear_atomic();
+}
+
+template
+double TersoffZT::host_memory_usage() const {
+ return this->host_memory_usage_atomic()+sizeof(TersoffZBL);
+}
+
+#define KTHREADS this->_threads_per_atom
+#define JTHREADS this->_threads_per_atom
+// ---------------------------------------------------------------------------
+// Copy nbor list from host if necessary and then calculate forces, virials,..
+// ---------------------------------------------------------------------------
+template
+void TersoffZT::compute(const int f_ago, const int nlocal, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh,
+ const bool eflag, const bool vflag, const bool eatom,
+ const bool vatom, int &host_start,
+ const double cpu_time, bool &success) {
+ this->acc_timers();
+ if (nlist==0) {
+ host_start=0;
+ // Make sure textures are correct if realloc by a different hybrid style
+ this->resize_atom(0,nall,success);
+ this->zero_timers();
+ return;
+ }
+
+ int ago=this->hd_balancer.ago_first(f_ago);
+ int inum=this->hd_balancer.balance(ago,nlocal,cpu_time);
+ this->ans->inum(inum);
+ #ifdef THREE_CONCURRENT
+ this->ans2->inum(inum);
+ #endif
+ host_start=inum;
+
+ if (ago==0) {
+ this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success);
+ if (!success)
+ return;
+ _max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist);
+ }
+
+ this->atom->cast_x_data(host_x,host_type);
+ this->hd_balancer.start_timer();
+ this->atom->add_x_data(host_x,host_type);
+
+ // re-allocate zetaij if necessary
+ if (nall*_max_nbors > _zetaij.cols()) {
+ int _nmax=static_cast(static_cast(nall)*1.10);
+ _zetaij.resize(_max_nbors*_nmax);
+ }
+
+ int _eflag;
+ if (eflag)
+ _eflag=1;
+ else
+ _eflag=0;
+
+ int ainum=nall;
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int BX=this->block_pair();
+ int GX=static_cast(ceil(static_cast(ainum)/
+ (BX/(JTHREADS*KTHREADS))));
+
+ this->k_zeta.set_size(GX,BX);
+ this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
+
+ int evatom=0;
+ if (eatom || vatom)
+ evatom=1;
+ #ifdef THREE_CONCURRENT
+ this->ucl_device->sync();
+ #endif
+ loop(eflag,vflag,evatom);
+ this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist);
+ this->device->add_ans_object(this->ans);
+ #ifdef THREE_CONCURRENT
+ this->ans2->copy_answers(eflag,vflag,eatom,vatom,ilist);
+ this->device->add_ans_object(this->ans2);
+ #endif
+ this->hd_balancer.stop_timer();
+}
+
+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary and then compute forces, virials, energies
+// ---------------------------------------------------------------------------
+template
+int ** TersoffZT::compute(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type,
+ double *sublo, double *subhi, tagint *tag,
+ int **nspecial, tagint **special, const bool eflag,
+ const bool vflag, const bool eatom,
+ const bool vatom, int &host_start,
+ int **ilist, int **jnum,
+ const double cpu_time, bool &success) {
+ this->acc_timers();
+
+ if (inum_full==0) {
+ host_start=0;
+ // Make sure textures are correct if realloc by a different hybrid style
+ this->resize_atom(0,nall,success);
+ this->zero_timers();
+ return NULL;
+ }
+
+ this->hd_balancer.balance(cpu_time);
+ int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
+ this->ans->inum(inum);
+ #ifdef THREE_CONCURRENT
+ this->ans2->inum(inum);
+ #endif
+ host_start=inum;
+
+ // Build neighbor list on GPU if necessary
+ if (ago==0) {
+ _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
+ sublo, subhi, tag, nspecial, special, success);
+ if (!success)
+ return NULL;
+ this->hd_balancer.start_timer();
+ } else {
+ this->atom->cast_x_data(host_x,host_type);
+ this->hd_balancer.start_timer();
+ this->atom->add_x_data(host_x,host_type);
+ }
+ *ilist=this->nbor->host_ilist.begin();
+ *jnum=this->nbor->host_acc.begin();
+
+ // re-allocate zetaij if necessary
+ if (nall*_max_nbors > _zetaij.cols()) {
+ int _nmax=static_cast(static_cast(nall)*1.10);
+ _zetaij.resize(_max_nbors*_nmax);
+ }
+
+ int _eflag;
+ if (eflag)
+ _eflag=1;
+ else
+ _eflag=0;
+
+ int ainum=nall;
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int BX=this->block_pair();
+ int GX=static_cast(ceil(static_cast(ainum)/
+ (BX/(JTHREADS*KTHREADS))));
+
+ this->k_zeta.set_size(GX,BX);
+ this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &_eflag, &nall, &ainum, &nbor_pitch, &this->_threads_per_atom);
+
+ int evatom=0;
+ if (eatom || vatom)
+ evatom=1;
+ #ifdef THREE_CONCURRENT
+ this->ucl_device->sync();
+ #endif
+ loop(eflag,vflag,evatom);
+ this->ans->copy_answers(eflag,vflag,eatom,vatom);
+ this->device->add_ans_object(this->ans);
+ #ifdef THREE_CONCURRENT
+ this->ans2->copy_answers(eflag,vflag,eatom,vatom);
+ this->device->add_ans_object(this->ans2);
+ #endif
+ this->hd_balancer.stop_timer();
+
+ return this->nbor->host_jlist.begin()-host_start;
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template
+void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
+ // Compute the block size and grid size to keep all cores busy
+ int BX=this->block_pair();
+ int eflag, vflag;
+ if (_eflag)
+ eflag=1;
+ else
+ eflag=0;
+
+ if (_vflag)
+ vflag=1;
+ else
+ vflag=0;
+
+ int ainum=this->ans->inum();
+ int nbor_pitch=this->nbor->nbor_pitch();
+ int GX=static_cast(ceil(static_cast(this->ans->inum())/
+ (BX/this->_threads_per_atom)));
+
+ this->time_pair.start();
+ this->k_pair.set_size(GX,BX);
+ this->k_pair.run(&this->atom->x, &ts1, &ts2, &ts6,
+ &_global_e, &_global_a_0, &_global_epsilon_0, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &this->ans->force, &this->ans->engv,
+ &eflag, &vflag, &ainum, &nbor_pitch,
+ &this->_threads_per_atom);
+
+ BX=this->block_size();
+ GX=static_cast(ceil(static_cast(this->ans->inum())/
+ (BX/(KTHREADS*JTHREADS))));
+ this->k_three_center.set_size(GX,BX);
+ this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom, &evatom);
+
+ Answer *end_ans;
+ #ifdef THREE_CONCURRENT
+ end_ans=this->ans2;
+ #else
+ end_ans=this->ans;
+ #endif
+ if (evatom!=0) {
+ this->k_three_end_vatom.set_size(GX,BX);
+ this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom);
+
+ } else {
+ this->k_three_end.set_size(GX,BX);
+ this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
+ &map, &elem2param, &_nelements, &_nparams, &_zetaij,
+ &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+ &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+ &nbor_pitch, &this->_threads_per_atom);
+ }
+
+ this->time_pair.stop();
+}
+
+template class TersoffZBL;
+
diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu
new file mode 100644
index 0000000000..0d6c5a38d6
--- /dev/null
+++ b/lib/gpu/lal_tersoff_zbl.cu
@@ -0,0 +1,1065 @@
+// **************************************************************************
+// tersoff_zbl.cu
+// -------------------
+// Trung Dac Nguyen
+//
+// Device code for acceleration of the tersoff/zbl pair style
+//
+// __________________________________________________________________________
+// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+// begin :
+// email : ndactrung@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+#include "lal_tersoff_zbl_extra.h"
+
+#ifndef _DOUBLE_DOUBLE
+texture pos_tex;
+texture ts1_tex;
+texture ts2_tex;
+texture ts3_tex;
+texture ts4_tex;
+texture ts5_tex;
+texture ts6_tex;
+#else
+texture pos_tex;
+texture ts1_tex;
+texture ts2_tex;
+texture ts3_tex;
+texture ts4_tex;
+texture ts5_tex;
+texture ts6_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define ts1_tex ts1
+#define ts2_tex ts2
+#define ts3_tex ts3
+#define ts4_tex ts4
+#define ts5_tex ts5
+#define ts6_tex ts6
+#endif
+
+//#define THREE_CONCURRENT
+
+#define TWOTHIRD (numtyp)0.66666666666666666667
+
+#define zeta_idx(nbor_mem, packed_mem, nbor_pitch, n_stride, t_per_atom, \
+ i, nbor_j, offset_j, idx) \
+ if (nbor_mem==packed_mem) { \
+ int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; \
+ idx = jj*n_stride + i*t_per_atom + offset_j; \
+ } else { \
+ idx = nbor_j; \
+ }
+
+#if (ARCH < 300)
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \
+ offset, eflag, vflag, ans, engv) \
+ if (t_per_atom>1) { \
+ __local acctyp red_acc[6][BLOCK_PAIR]; \
+ red_acc[0][tid]=f.x; \
+ red_acc[1][tid]=f.y; \
+ red_acc[2][tid]=f.z; \
+ red_acc[3][tid]=energy; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ for (int r=0; r<4; r++) \
+ red_acc[r][tid] += red_acc[r][tid+s]; \
+ } \
+ } \
+ f.x=red_acc[0][tid]; \
+ f.y=red_acc[1][tid]; \
+ f.z=red_acc[2][tid]; \
+ energy=red_acc[3][tid]; \
+ if (vflag>0) { \
+ for (int r=0; r<6; r++) \
+ red_acc[r][tid]=virial[r]; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ for (int r=0; r<6; r++) \
+ red_acc[r][tid] += red_acc[r][tid+s]; \
+ } \
+ } \
+ for (int r=0; r<6; r++) \
+ virial[r]=red_acc[r][tid]; \
+ } \
+ } \
+ if (offset==0) { \
+ int ei=ii; \
+ if (eflag>0) { \
+ engv[ei]+=energy*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ if (vflag>0) { \
+ for (int i=0; i<6; i++) { \
+ engv[ei]+=virial[i]*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ } \
+ acctyp4 old=ans[ii]; \
+ old.x+=f.x; \
+ old.y+=f.y; \
+ old.z+=f.z; \
+ ans[ii]=old; \
+ }
+
+#define store_zeta(z, tid, t_per_atom, offset) \
+ if (t_per_atom>1) { \
+ __local acctyp red_acc[BLOCK_PAIR]; \
+ red_acc[tid]=z; \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ if (offset < s) { \
+ red_acc[tid] += red_acc[tid+s]; \
+ } \
+ } \
+ z=red_acc[tid]; \
+ }
+
+#else
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, \
+ offset, eflag, vflag, ans, engv) \
+ if (t_per_atom>1) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ f.x += shfl_xor(f.x, s, t_per_atom); \
+ f.y += shfl_xor(f.y, s, t_per_atom); \
+ f.z += shfl_xor(f.z, s, t_per_atom); \
+ energy += shfl_xor(energy, s, t_per_atom); \
+ } \
+ if (vflag>0) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ for (int r=0; r<6; r++) \
+ virial[r] += shfl_xor(virial[r], s, t_per_atom); \
+ } \
+ } \
+ } \
+ if (offset==0) { \
+ int ei=ii; \
+ if (eflag>0) { \
+ engv[ei]+=energy*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ if (vflag>0) { \
+ for (int i=0; i<6; i++) { \
+ engv[ei]+=virial[i]*(acctyp)0.5; \
+ ei+=inum; \
+ } \
+ } \
+ acctyp4 old=ans[ii]; \
+ old.x+=f.x; \
+ old.y+=f.y; \
+ old.z+=f.z; \
+ ans[ii]=old; \
+ }
+
+#define store_zeta(z, tid, t_per_atom, offset) \
+ if (t_per_atom>1) { \
+ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
+ z += shfl_xor(z, s, t_per_atom); \
+ } \
+ }
+
+#endif
+
+// Tersoff is currently used for 3 elements at most: 3*3*3 = 27 entries
+// while the block size should never be less than 32.
+// SHARED_SIZE = 32 for now to reduce the pressure on the shared memory per block
+// must be increased if there will be more than 3 elements in the future.
+
+#define SHARED_SIZE 32
+
+__kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts3_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp4 *restrict ts5_in,
+ const __global numtyp4 *restrict ts6_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ __global acctyp4 * zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ const int eflag, const int nall, const int inum,
+ const int nbor_pitch, const int t_per_atom) {
+ __local int tpa_sq,n_stride;
+ tpa_sq = fast_mul(t_per_atom,t_per_atom);
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ // must be increased if there will be more than 3 elements in the future.
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts3[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ __local numtyp4 ts5[SHARED_SIZE];
+ __local numtyp4 ts6[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ // compute zeta_ij
+ z = (acctyp)0;
+
+ int nbor_k = nborj_start-offset_j+offset_k;
+ for ( ; nbor_k < nbor_end; nbor_k+=n_stride) {
+ int k=dev_packed[nbor_k];
+ k &= NEIGHMASK;
+
+ if (k == j) continue;
+
+ numtyp4 kx; fetch4(kx,k,pos_tex); //x_[k];
+ int ktype=kx.w;
+ ktype=map[ktype];
+ int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
+
+ // Compute rik
+ delr2.x = kx.x-ix.x;
+ delr2.y = kx.y-ix.y;
+ delr2.z = kx.z-ix.z;
+ numtyp rsq2 = delr2.x*delr2.x+delr2.y*delr2.y+delr2.z*delr2.z;
+
+ if (rsq2 > cutsq[ijkparam]) continue;
+
+ numtyp4 ts1_ijkparam = ts1[ijkparam]; //fetch4(ts1_ijkparam,ijkparam,ts1_tex);
+ numtyp ijkparam_lam3 = ts1_ijkparam.z;
+ numtyp ijkparam_powermint = ts1_ijkparam.w;
+ numtyp4 ts2_ijkparam = ts2[ijkparam]; //fetch4(ts2_ijkparam,ijkparam,ts2_tex);
+ numtyp ijkparam_bigr = ts2_ijkparam.z;
+ numtyp ijkparam_bigd = ts2_ijkparam.w;
+ numtyp4 ts4_ijkparam = ts4[ijkparam]; //fetch4(ts4_ijkparam,ijkparam,ts4_tex);
+ numtyp ijkparam_c = ts4_ijkparam.x;
+ numtyp ijkparam_d = ts4_ijkparam.y;
+ numtyp ijkparam_h = ts4_ijkparam.z;
+ numtyp ijkparam_gamma = ts4_ijkparam.w;
+ z += zeta(ijkparam_powermint, ijkparam_lam3, ijkparam_bigr, ijkparam_bigd,
+ ijkparam_c, ijkparam_d, ijkparam_h, ijkparam_gamma,
+ rsq1, rsq2, delr1, delr2);
+ }
+
+ //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
+ //int idx = jj*n_stride + i*t_per_atom + offset_j;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ i, nbor_j, offset_j, idx);
+ store_zeta(z, tid, t_per_atom, offset_k);
+
+ numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
+ numtyp ijparam_lam2 = ts1_ijparam.y;
+ numtyp4 ts2_ijparam = ts2[ijparam]; //fetch4(ts2_ijparam,ijparam,ts2_tex);
+ numtyp ijparam_bigb = ts2_ijparam.y;
+ numtyp ijparam_bigr = ts2_ijparam.z;
+ numtyp ijparam_bigd = ts2_ijparam.w;
+ numtyp4 ts3_ijparam = ts3[ijparam]; //fetch4(ts3_ijparam,ijparam,ts3_tex);
+ numtyp ijparam_c1 = ts3_ijparam.x;
+ numtyp ijparam_c2 = ts3_ijparam.y;
+ numtyp ijparam_c3 = ts3_ijparam.z;
+ numtyp ijparam_c4 = ts3_ijparam.w;
+ numtyp4 ts5_ijparam = ts5[ijparam]; //fetch4(ts5_ijparam,ijparam,ts5_tex);
+ numtyp ijparam_beta = ts5_ijparam.x;
+ numtyp ijparam_powern = ts5_ijparam.y;
+ numtyp4 ts6_ijparam = ts6[ijparam]; //fetch4(ts6_ijparam,ijparam,ts6_tex);
+ numtyp ijparam_ZBLcut = ts6_ijparam.z;
+ numtyp ijparam_ZBLexpscale = ts6_ijparam.w;
+
+ if (offset_k == 0) {
+ numtyp fpfeng[4];
+ force_zeta(ijparam_bigb, ijparam_bigr, ijparam_bigd, ijparam_lam2,
+ ijparam_beta, ijparam_powern, ijparam_c1, ijparam_c2, ijparam_c3,
+ ijparam_c4, ijparam_ZBLcut, ijparam_ZBLexpscale, rsq1, z, eflag, fpfeng);
+ acctyp4 zij;
+ zij.x = fpfeng[0];
+ zij.y = fpfeng[1];
+ zij.z = fpfeng[2];
+ zij.w = z;
+ zetaij[idx] = zij;
+ }
+
+ } // for nbor
+ } // if ii
+}
+
+__kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts6_in,
+ const numtyp global_e, const numtyp global_a_0,
+ const numtyp global_epsilon_0,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int n_stride;
+ int tid, ii, offset;
+ atom_info(t_per_atom,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts6[SHARED_SIZE];
+ if (tid0)
+ energy+=feng[1];
+ if (vflag>0) {
+ virial[0] += delx*delx*force;
+ virial[1] += dely*dely*force;
+ virial[2] += delz*delz*force;
+ virial[3] += delx*dely*force;
+ virial[4] += delx*delz*force;
+ virial[5] += dely*delz*force;
+ }
+ }
+ } // for nbor
+
+ store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+ ans,engv);
+ } // if ii
+
+}
+
+__kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom, const int evatom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c, d, h, gamma;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset); // offset ranges from 0 to tpa_sq-1
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+ numtyp r1 = ucl_sqrt(rsq1);
+ numtyp r1inv = ucl_rsqrt(rsq1);
+
+ // look up for zeta_ij
+
+ //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
+ //int idx = jj*n_stride + i*t_per_atom + offset_j;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ i, nbor_j, offset_j, idx);
+ acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
+ numtyp force = zeta_ij.x*tpainv;
+ numtyp prefactor = zeta_ij.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ij.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += delr1[0]*delr1[0]*mforce;
+ virial[1] += delr1[1]*delr1[1]*mforce;
+ virial[2] += delr1[2]*delr1[2]*mforce;
+ virial[3] += delr1[0]*delr1[1]*mforce;
+ virial[4] += delr1[0]*delr1[2]*mforce;
+ virial[5] += delr1[1]*delr1[2]*mforce;
+ }
+
+ int nbor_k=nborj_start-offset_j+offset_k;
+ for ( ; nbor_k cutsq[ijkparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+
+ numtyp fi[3], fj[3], fk[3];
+ numtyp4 ts1_ijkparam = ts1[ijkparam]; //fetch4(ts1_ijkparam,ijkparam,ts1_tex);
+ lam3 = ts1_ijkparam.z;
+ powermint = ts1_ijkparam.w;
+ numtyp4 ts2_ijkparam = ts2[ijkparam]; //fetch4(ts2_ijkparam,ijkparam,ts2_tex);
+ bigr = ts2_ijkparam.z;
+ bigd = ts2_ijkparam.w;
+ numtyp4 ts4_ijkparam = ts4[ijkparam]; //fetch4(ts4_ijkparam,ijkparam,ts4_tex);
+ c = ts4_ijkparam.x;
+ d = ts4_ijkparam.y;
+ h = ts4_ijkparam.z;
+ gamma = ts4_ijkparam.w;
+ if (vflag>0)
+ attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor, r1, r1inv, r2, r2inv, delr1, delr2, fi, fj, fk);
+ else
+ attractive_fi(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor, r1, r1inv, r2, r2inv, delr1, delr2, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+
+ if (vflag>0) {
+ acctyp v[6];
+ numtyp pre = (numtyp)2.0;
+ if (evatom==1) pre = TWOTHIRD;
+ v[0] = pre*(delr1[0]*fj[0] + delr2[0]*fk[0]);
+ v[1] = pre*(delr1[1]*fj[1] + delr2[1]*fk[1]);
+ v[2] = pre*(delr1[2]*fj[2] + delr2[2]*fk[2]);
+ v[3] = pre*(delr1[0]*fj[1] + delr2[0]*fk[1]);
+ v[4] = pre*(delr1[0]*fj[2] + delr2[0]*fk[2]);
+ v[5] = pre*(delr1[1]*fj[2] + delr2[1]*fk[2]);
+
+ virial[0] += v[0]; virial[1] += v[1]; virial[2] += v[2];
+ virial[3] += v[3]; virial[4] += v[4]; virial[5] += v[5];
+ }
+ } // nbor_k
+ } // for nbor_j
+
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,
+ offset,eflag,vflag,ans,engv);
+ } // if ii
+}
+
+__kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c, d, h, gamma;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ numtyp mdelr1[3];
+ mdelr1[0] = -delr1[0];
+ mdelr1[1] = -delr1[1];
+ mdelr1[2] = -delr1[2];
+
+ int nbor_k=j+nbor_pitch;
+ int numk=dev_nbor[nbor_k];
+ if (dev_nbor==dev_packed) {
+ nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+ k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+ nbor_k+=offset_k;
+ } else {
+ nbor_k+=nbor_pitch;
+ nbor_k=dev_nbor[nbor_k];
+ k_end=nbor_k+numk;
+ nbor_k+=offset_k;
+ }
+ int nbork_start = nbor_k;
+
+ // look up for zeta_ji: find i in the j's neighbor list
+ int m = tid / t_per_atom;
+ int ijnum = -1;
+ for ( ; nbor_k= 0) {
+ offset_kf = offset_k;
+ } else {
+ ijnum = red_acc[2*m+0];
+ offset_kf = red_acc[2*m+1];
+ }
+
+ //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
+ //int idx = iix*n_stride + j*t_per_atom + offset_kf;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, ijnum, offset_kf, idx);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ numtyp force = zeta_ji.x*tpainv;
+ numtyp prefactor_ji = zeta_ji.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ji.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += mdelr1[0]*mdelr1[0]*mforce;
+ virial[1] += mdelr1[1]*mdelr1[1]*mforce;
+ virial[2] += mdelr1[2]*mdelr1[2]*mforce;
+ virial[3] += mdelr1[0]*mdelr1[1]*mforce;
+ virial[4] += mdelr1[0]*mdelr1[2]*mforce;
+ virial[5] += mdelr1[1]*mdelr1[2]*mforce;
+ }
+
+ // attractive forces
+ for (nbor_k = nbork_start ; nbor_k cutsq[jikparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+ numtyp4 ts1_param, ts2_param, ts4_param;
+ numtyp fi[3];
+
+ ts1_param = ts1[jikparam]; //fetch4(ts1_jikparam,jikparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jikparam]; //fetch4(ts2_jikparam,jikparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jikparam]; //fetch4(ts4_jikparam,jikparam,ts4_tex);
+ c = ts4_param.x;
+ d = ts4_param.y;
+ h = ts4_param.z;
+ gamma = ts4_param.w;
+ attractive_fj(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+
+ //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
+ //int idx = kk*n_stride + j*t_per_atom + offset_k;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, nbor_k, offset_k, idx);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ numtyp prefactor_jk = zeta_jk.y;
+ int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
+ ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jkiparam]; //fetch4(ts2_jkiparam,jkiparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jkiparam]; //fetch4(ts4_jkiparam,jkiparam,ts4_tex);
+ c = ts4_param.x;
+ d = ts4_param.y;
+ h = ts4_param.z;
+ gamma = ts4_param.w;
+ attractive_fk(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor_jk, r2, r2inv, r1, r1inv, delr2, mdelr1, fi);
+ f.x += fi[0];
+ f.y += fi[1];
+ f.z += fi[2];
+ } // for nbor_k
+ } // for nbor_j
+
+ #ifdef THREE_CONCURRENT
+ store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #else
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #endif
+ } // if ii
+}
+
+__kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
+ const __global numtyp4 *restrict ts1_in,
+ const __global numtyp4 *restrict ts2_in,
+ const __global numtyp4 *restrict ts4_in,
+ const __global numtyp *restrict cutsq,
+ const __global int *restrict map,
+ const __global int *restrict elem2param,
+ const int nelements, const int nparams,
+ const __global acctyp4 *restrict zetaij,
+ const __global int * dev_nbor,
+ const __global int * dev_packed,
+ __global acctyp4 *restrict ans,
+ __global acctyp *restrict engv,
+ const int eflag, const int vflag,
+ const int inum, const int nbor_pitch,
+ const int t_per_atom) {
+ __local int tpa_sq, n_stride;
+ tpa_sq=fast_mul(t_per_atom,t_per_atom);
+ numtyp lam3, powermint, bigr, bigd, c, d, h, gamma;
+
+ int tid, ii, offset;
+ atom_info(tpa_sq,ii,tid,offset);
+
+ __local numtyp4 ts1[SHARED_SIZE];
+ __local numtyp4 ts2[SHARED_SIZE];
+ __local numtyp4 ts4[SHARED_SIZE];
+ if (tid cutsq[ijparam]) continue;
+
+ numtyp mdelr1[3];
+ mdelr1[0] = -delr1[0];
+ mdelr1[1] = -delr1[1];
+ mdelr1[2] = -delr1[2];
+
+ int nbor_k=j+nbor_pitch;
+ int numk=dev_nbor[nbor_k];
+ if (dev_nbor==dev_packed) {
+ nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+ k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+ nbor_k+=offset_k;
+ } else {
+ nbor_k+=nbor_pitch;
+ nbor_k=dev_nbor[nbor_k];
+ k_end=nbor_k+numk;
+ nbor_k+=offset_k;
+ }
+ int nbork_start = nbor_k;
+
+ // look up for zeta_ji
+ int m = tid / t_per_atom;
+ int ijnum = -1;
+ for ( ; nbor_k= 0) {
+ offset_kf = offset_k;
+ } else {
+ ijnum = red_acc[2*m+0];
+ offset_kf = red_acc[2*m+1];
+ }
+
+ //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
+ //int idx = iix*n_stride + j*t_per_atom + offset_kf;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, ijnum, offset_kf, idx);
+ acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
+ numtyp force = zeta_ji.x*tpainv;
+ numtyp prefactor_ji = zeta_ji.y;
+ f.x += delr1[0]*force;
+ f.y += delr1[1]*force;
+ f.z += delr1[2]*force;
+
+ if (eflag>0) {
+ energy+=zeta_ji.z*tpainv;
+ }
+ if (vflag>0) {
+ numtyp mforce = -force;
+ virial[0] += mdelr1[0]*mdelr1[0]*mforce;
+ virial[1] += mdelr1[1]*mdelr1[1]*mforce;
+ virial[2] += mdelr1[2]*mdelr1[2]*mforce;
+ virial[3] += mdelr1[0]*mdelr1[1]*mforce;
+ virial[4] += mdelr1[0]*mdelr1[2]*mforce;
+ virial[5] += mdelr1[1]*mdelr1[2]*mforce;
+ }
+
+ // attractive forces
+ for (nbor_k = nbork_start; nbor_k cutsq[jikparam]) continue;
+ numtyp r2 = ucl_sqrt(rsq2);
+ numtyp r2inv = ucl_rsqrt(rsq2);
+
+ numtyp fi[3], fj[3], fk[3];
+ numtyp4 ts1_param, ts2_param, ts4_param;
+ ts1_param = ts1[jikparam]; //fetch4(ts1_jikparam,jikparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jikparam]; //fetch4(ts2_jikparam,jikparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jikparam]; //fetch4(ts4_jikparam,jikparam,ts4_tex);
+ c = ts4_param.x;
+ d = ts4_param.y;
+ h = ts4_param.z;
+ gamma = ts4_param.w;
+ attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor_ji, r1, r1inv, r2, r2inv, mdelr1, delr2, fi, fj, fk);
+ f.x += fj[0];
+ f.y += fj[1];
+ f.z += fj[2];
+
+ virial[0] += TWOTHIRD*(mdelr1[0]*fj[0] + delr2[0]*fk[0]);
+ virial[1] += TWOTHIRD*(mdelr1[1]*fj[1] + delr2[1]*fk[1]);
+ virial[2] += TWOTHIRD*(mdelr1[2]*fj[2] + delr2[2]*fk[2]);
+ virial[3] += TWOTHIRD*(mdelr1[0]*fj[1] + delr2[0]*fk[1]);
+ virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
+ virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);
+
+ //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
+ //int idx = kk*n_stride + j*t_per_atom + offset_k;
+ int idx;
+ zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
+ j, nbor_k, offset_k, idx);
+ acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
+ numtyp prefactor_jk = zeta_jk.y;
+
+ int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
+ ts1_param = ts1[jkiparam]; //fetch4(ts1_jkiparam,jkiparam,ts1_tex);
+ lam3 = ts1_param.z;
+ powermint = ts1_param.w;
+ ts2_param = ts2[jkiparam]; //fetch4(ts2_jkiparam,jkiparam,ts2_tex);
+ bigr = ts2_param.z;
+ bigd = ts2_param.w;
+ ts4_param = ts4[jkiparam]; //fetch4(ts4_jkiparam,jkiparam,ts4_tex);
+ c = ts4_param.x;
+ d = ts4_param.y;
+ h = ts4_param.z;
+ gamma = ts4_param.w;
+ attractive(bigr, bigd, powermint, lam3, c, d, h, gamma,
+ prefactor_jk, r2, r2inv, r1, r1inv, delr2, mdelr1, fi, fj, fk);
+ f.x += fk[0];
+ f.y += fk[1];
+ f.z += fk[2];
+
+ virial[0] += TWOTHIRD*(delr2[0]*fj[0] + mdelr1[0]*fk[0]);
+ virial[1] += TWOTHIRD*(delr2[1]*fj[1] + mdelr1[1]*fk[1]);
+ virial[2] += TWOTHIRD*(delr2[2]*fj[2] + mdelr1[2]*fk[2]);
+ virial[3] += TWOTHIRD*(delr2[0]*fj[1] + mdelr1[0]*fk[1]);
+ virial[4] += TWOTHIRD*(delr2[0]*fj[2] + mdelr1[0]*fk[2]);
+ virial[5] += TWOTHIRD*(delr2[1]*fj[2] + mdelr1[1]*fk[2]);
+ }
+ } // for nbor
+
+ #ifdef THREE_CONCURRENT
+ store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #else
+ store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+ eflag,vflag,ans,engv);
+ #endif
+ } // if ii
+}
+
diff --git a/lib/gpu/lal_tersoff_zbl.h b/lib/gpu/lal_tersoff_zbl.h
new file mode 100644
index 0000000000..cc0b848845
--- /dev/null
+++ b/lib/gpu/lal_tersoff_zbl.h
@@ -0,0 +1,123 @@
+/***************************************************************************
+ tersoff_zbl.h
+ -------------------
+ Trung Dac Nguyen
+
+ Class for acceleration of the tersoff pair style.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin :
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_TERSOFF_ZBL_H
+#define LAL_TERSOFF_ZBL_H
+
+#include "lal_base_three.h"
+
+namespace LAMMPS_AL {
+
+template
+class TersoffZBL : public BaseThree {
+ public:
+ TersoffZBL();
+ ~TersoffZBL();
+
+ /// Clear any previous data and set up for a new LAMMPS run for generic systems
+ /** \param max_nbors initial number of rows in the neighbor matrix
+ * \param cell_size cutoff + skin
+ * \param gpu_split fraction of particles handled by device
+ *
+ * Returns:
+ * - 0 if successfull
+ * - -1 if fix gpu not found
+ * - -3 if there is an out of memory error
+ * - -4 if the GPU library was not compiled for GPU
+ * - -5 Double precision is not supported on card **/
+ int init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+ const double cell_size, const double gpu_split, FILE *screen,
+ int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+ const double* lam1, const double* lam2, const double* lam3,
+ const double* powermint, const double* biga, const double* bigb,
+ const double* bigr, const double* bigd, const double* c1, const double* c2,
+ const double* c3, const double* c4, const double* c, const double* d,
+ const double* h, const double* gamma, const double* beta,
+ const double* powern, const double* Z_i, const double* Z_j,
+ const double* ZBLcut, const double* ZBLexpscale, const double global_e,
+ const double global_a_0, const double global_epsilon_0, const double* cutsq);
+
+ /// Pair loop with host neighboring
+ void compute(const int f_ago, const int inum_full, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh, const bool eflag,
+ const bool vflag, const bool eatom, const bool vatom,
+ int &host_start, const double cpu_time, bool &success);
+
+ /// Pair loop with device neighboring
+ int ** compute(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type, double *sublo,
+ double *subhi, tagint *tag, int **nspecial,
+ tagint **special, const bool eflag, const bool vflag,
+ const bool eatom, const bool vatom, int &host_start,
+ int **ilist, int **numj, const double cpu_time, bool &success);
+
+ /// Clear all host and device data
+ /** \note This is called at the beginning of the init() routine **/
+ void clear();
+
+ /// Returns memory usage on device per atom
+ int bytes_per_atom(const int max_nbors) const;
+
+ /// Total host memory used by library for pair style
+ double host_memory_usage() const;
+
+ // --------------------------- TYPE DATA --------------------------
+
+ /// If atom type constants fit in shared memory, use fast kernels
+ bool shared_types;
+
+ /// Number of atom types
+ int _lj_types;
+
+ /// ts1.x = lam1, ts1.y = lam2, ts1.z = lam3, ts1.w = powermint
+ UCL_D_Vec ts1;
+ /// ts2.x = biga, ts2.y = bigb, ts2.z = bigr, ts2.w = bigd
+ UCL_D_Vec ts2;
+ /// ts3.x = c1, ts3.y = c2, ts3.z = c3, ts3.w = c4
+ UCL_D_Vec ts3;
+ /// ts4.x = c, ts4.y = d, ts4.z = h, ts4.w = gamma
+ UCL_D_Vec ts4;
+ /// ts5.x = beta, ts5.y = powern
+ UCL_D_Vec ts5;
+ /// ts6.x = Z_i, ts6.y = Z_j, ts6.z = ZBLcut, ts6.w = ZBLexpscale
+ UCL_D_Vec ts6;
+
+ UCL_D_Vec cutsq;
+
+ UCL_D_Vec elem2param;
+ UCL_D_Vec map;
+ int _nparams,_nelements;
+
+ /// Per-atom arrays:
+ /// zetaij.x = force, zetaij.y = prefactor, zetaij.z = evdwl,
+ /// zetaij.w = zetaij
+ UCL_D_Vec _zetaij;
+
+ UCL_Kernel k_zeta;
+ UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex, ts6_tex;
+
+ int _max_nbors;
+ numtyp _global_e,_global_a_0,_global_epsilon_0;
+
+ private:
+ bool _allocated;
+ void loop(const bool _eflag, const bool _vflag, const int evatom);
+};
+
+}
+
+#endif
+
diff --git a/lib/gpu/lal_tersoff_zbl_ext.cpp b/lib/gpu/lal_tersoff_zbl_ext.cpp
new file mode 100644
index 0000000000..fce240f8fe
--- /dev/null
+++ b/lib/gpu/lal_tersoff_zbl_ext.cpp
@@ -0,0 +1,146 @@
+/***************************************************************************
+ tersoff_zbl_ext.cpp
+ -------------------
+ Trung Dac Nguyen
+
+ Functions for LAMMPS access to tersoff/zbl acceleration routines.
+
+ __________________________________________________________________________
+ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+ begin :
+ email : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include
+#include
+#include
+
+#include "lal_tersoff_zbl.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static TersoffZBL TSZMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
+ const int max_nbors, const double cell_size, int &gpu_mode,
+ FILE *screen, int* host_map, const int nelements,
+ int*** host_elem2param, const int nparams,
+ const double* ts_lam1, const double* ts_lam2,
+ const double* ts_lam3, const double* ts_powermint,
+ const double* ts_biga, const double* ts_bigb,
+ const double* ts_bigr, const double* ts_bigd,
+ const double* ts_c1, const double* ts_c2,
+ const double* ts_c3, const double* ts_c4,
+ const double* ts_c, const double* ts_d, const double* ts_h,
+ const double* ts_gamma, const double* ts_beta,
+ const double* ts_powern, const double* ts_Z_i,
+ const double* ts_Z_j, const double* ts_ZBLcut,
+ const double* ts_ZBLexpscale, const double global_e,
+ const double global_a_0, const double global_epsilon_0,
+ const double* ts_cutsq) {
+ TSZMF.clear();
+ gpu_mode=TSZMF.device->gpu_mode();
+ double gpu_split=TSZMF.device->particle_split();
+ int first_gpu=TSZMF.device->first_device();
+ int last_gpu=TSZMF.device->last_device();
+ int world_me=TSZMF.device->world_me();
+ int gpu_rank=TSZMF.device->gpu_rank();
+ int procs_per_gpu=TSZMF.device->procs_per_gpu();
+
+ // disable host/device split for now
+ if (gpu_split != 1.0)
+ return -8;
+
+ TSZMF.device->init_message(screen,"tersoff/zbl/gpu",first_gpu,last_gpu);
+
+ bool message=false;
+ if (TSZMF.device->replica_me()==0 && screen)
+ message=true;
+
+ if (message) {
+ fprintf(screen,"Initializing Device and compiling on process 0...");
+ fflush(screen);
+ }
+
+ int init_ok=0;
+ if (world_me==0)
+ init_ok=TSZMF.init(ntypes, inum, nall, 300, cell_size, gpu_split, screen,
+ host_map, nelements, host_elem2param, nparams,
+ ts_lam1, ts_lam2, ts_lam3, ts_powermint,
+ ts_biga, ts_bigb, ts_bigr, ts_bigd,
+ ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
+ ts_gamma, ts_beta, ts_powern, ts_Z_i, ts_Z_j,
+ ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
+ global_epsilon_0, ts_cutsq);
+
+ TSZMF.device->world_barrier();
+ if (message)
+ fprintf(screen,"Done.\n");
+
+ for (int i=0; igpu_barrier();
+ if (message)
+ fprintf(screen,"Done.\n");
+ }
+ if (message)
+ fprintf(screen,"\n");
+
+ if (init_ok==0)
+ TSZMF.estimate_gpu_overhead();
+ return init_ok;
+}
+
+void tersoff_zbl_gpu_clear() {
+ TSZMF.clear();
+}
+
+int ** tersoff_zbl_gpu_compute_n(const int ago, const int inum_full,
+ const int nall, double **host_x, int *host_type,
+ double *sublo, double *subhi, tagint *tag, int **nspecial,
+ tagint **special, const bool eflag, const bool vflag,
+ const bool eatom, const bool vatom, int &host_start,
+ int **ilist, int **jnum, const double cpu_time,
+ bool &success) {
+ return TSZMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+ subhi, tag, nspecial, special, eflag, vflag, eatom,
+ vatom, host_start, ilist, jnum, cpu_time, success);
+}
+
+void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall,
+ const int nlist, double **host_x, int *host_type,
+ int *ilist, int *numj, int **firstneigh, const bool eflag,
+ const bool vflag, const bool eatom, const bool vatom,
+ int &host_start, const double cpu_time, bool &success) {
+ TSZMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj,
+ firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
+}
+
+double tersoff_zbl_gpu_bytes() {
+ return TSZMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_tersoff_zbl_extra.h b/lib/gpu/lal_tersoff_zbl_extra.h
new file mode 100644
index 0000000000..79afb4de82
--- /dev/null
+++ b/lib/gpu/lal_tersoff_zbl_extra.h
@@ -0,0 +1,690 @@
+/// **************************************************************************
+// tersoff_zbl_extra.h
+// -------------------
+// Trung Dac Nguyen
+//
+// Device code for Tersoff math routines
+//
+// __________________________________________________________________________
+// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+// begin :
+// email : ndactrung@gmail.com
+// ***************************************************************************/*
+
+#ifndef LAL_TERSOFF_ZBL_EXTRA_H
+#define LAL_TERSOFF_ZBL_EXTRA_H
+
+#ifdef NV_KERNEL
+#include "lal_aux_fun1.h"
+#else
+#endif
+
+#define MY_PI (numtyp)3.14159265358979323846
+#define MY_PI2 (numtyp)1.57079632679489661923
+#define MY_PI4 (numtyp)0.78539816339744830962
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp vec3_dot(const numtyp x[3], const numtyp y[3])
+{
+ return (x[0]*y[0] + x[1]*y[1] + x[2]*y[2]);
+}
+
+ucl_inline void vec3_add(const numtyp x[3], const numtyp y[3], numtyp z[3])
+{
+ z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
+}
+
+ucl_inline void vec3_scale(const numtyp k, const numtyp x[3], numtyp y[3])
+{
+ y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
+}
+
+ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3],
+ const numtyp y[3], numtyp z[3])
+{
+ z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_gijk(const numtyp costheta,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma)
+{
+ const numtyp ters_c = param_c * param_c;
+ const numtyp ters_d = param_d * param_d;
+ const numtyp hcth = param_h - costheta;
+ return param_gamma*((numtyp)1.0 + ters_c*ucl_recip(ters_d) -
+ ters_c *ucl_recip(ters_d + hcth*hcth));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_gijk_d(const numtyp costheta,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma)
+{
+ const numtyp ters_c = param_c * param_c;
+ const numtyp ters_d = param_d * param_d;
+ const numtyp hcth = param_h - costheta;
+ const numtyp numerator = (numtyp)-2.0 * ters_c * hcth;
+ const numtyp denominator = ucl_recip(ters_d + hcth*hcth);
+ return param_gamma*numerator*denominator*denominator;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void costheta_d(const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ numtyp *dri,
+ numtyp *drj,
+ numtyp *drk)
+{
+ // first element is derivative wrt Ri, second wrt Rj, third wrt Rk
+
+ numtyp cos_theta = vec3_dot(rij_hat,rik_hat);
+
+ vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj);
+ vec3_scale(ucl_recip(rij),drj,drj);
+ vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk);
+ vec3_scale(ucl_recip(rik),drk,drk);
+ vec3_add(drj,drk,dri);
+ vec3_scale((numtyp)-1.0,dri,dri);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fc(const numtyp r,
+ const numtyp param_bigr,
+ const numtyp param_bigd)
+{
+ if (r < param_bigr-param_bigd) return (numtyp)1.0;
+ if (r > param_bigr+param_bigd) return (numtyp)0.0;
+ return (numtyp)0.5*((numtyp)1.0 - sin(MY_PI2*(r - param_bigr)/param_bigd));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fc_d(const numtyp r,
+ const numtyp param_bigr,
+ const numtyp param_bigd)
+{
+ if (r < param_bigr-param_bigd) return (numtyp)0.0;
+ if (r > param_bigr+param_bigd) return (numtyp)0.0;
+ return -(MY_PI4/param_bigd) * cos(MY_PI2*(r - param_bigr)/param_bigd);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp F_fermi(const numtyp r, const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale)
+{
+ return ucl_recip((numtyp)1.0+ucl_exp(-param_ZBLexpscale*(r-param_ZBLcut)));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp F_fermi_d(const numtyp r, const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale)
+{
+ numtyp a = ucl_exp(-param_ZBLexpscale*(r-param_ZBLcut));
+ numtyp b = (numtyp)1.0 + a;
+ return param_ZBLexpscale*a*ucl_recip(b*b);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fa(const numtyp r,
+ const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2,
+ const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale)
+{
+ if (r > param_bigr + param_bigd) return (numtyp)0.0;
+ return -param_bigb * ucl_exp(-param_lam2 * r) *
+ ters_fc(r,param_bigr,param_bigd)*F_fermi(r,param_ZBLcut,param_ZBLexpscale);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_fa_d(const numtyp r,
+ const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2,
+ const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale)
+{
+ if (r > param_bigr + param_bigd) return (numtyp)0.0;
+ numtyp f = F_fermi(r,param_ZBLcut,param_ZBLexpscale);
+ return param_bigb * ucl_exp(-param_lam2 * r) *
+ (param_lam2 * ters_fc(r,param_bigr,param_bigd) * f -
+ ters_fc_d(r,param_bigr,param_bigd) * f -
+ ters_fc(r,param_bigr,param_bigd) * F_fermi_d(r,param_ZBLcut,param_ZBLexpscale));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_bij(const numtyp zeta,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4)
+{
+ numtyp tmp = param_beta * zeta;
+ if (tmp > param_c1) return ucl_rsqrt(tmp);
+ if (tmp > param_c2)
+ return ((numtyp)1.0 - ucl_powr(tmp,-param_powern) /
+ ((numtyp)2.0*param_powern))*ucl_rsqrt(tmp);
+ if (tmp < param_c4) return (numtyp)1.0;
+ if (tmp < param_c3)
+ return (numtyp)1.0 - ucl_powr(tmp,param_powern)/((numtyp)2.0*param_powern);
+ return ucl_powr((numtyp)1.0 + ucl_powr(tmp,param_powern),
+ (numtyp)-1.0/((numtyp)2.0*param_powern));
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp ters_bij_d(const numtyp zeta,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4)
+{
+ numtyp tmp = param_beta * zeta;
+ if (tmp > param_c1)
+ return param_beta * (numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5);
+ if (tmp > param_c2)
+ return param_beta * ((numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5) *
+ // error in negligible 2nd term fixed 9/30/2015
+ // (1.0 - 0.5*(1.0 + 1.0/(2.0*param->powern)) *
+ ((numtyp)1.0 - ((numtyp)1.0 + (numtyp)1.0 /((numtyp)2.0 * param_powern)) *
+ ucl_powr(tmp,-param_powern)));
+ if (tmp < param_c4) return (numtyp)0.0;
+ if (tmp < param_c3)
+ return (numtyp)-0.5*param_beta * ucl_powr(tmp,param_powern-(numtyp)1.0);
+
+ numtyp tmp_n = ucl_powr(tmp,param_powern);
+ return (numtyp)-0.5 * ucl_powr((numtyp)1.0+tmp_n, (numtyp) -
+ (numtyp)1.0-((numtyp)1.0 / ((numtyp)2.0 * param_powern)))*tmp_n / zeta;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void ters_zetaterm_d(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ numtyp dri[3],
+ numtyp drj[3],
+ numtyp drk[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk(cos_theta,param_c,param_d,param_h,param_gamma);
+ gijk_d = ters_gijk_d(cos_theta,param_c,param_d,param_h,param_gamma);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Ri
+ // dri = -dfc*gijk*ex_delr*rik_hat;
+ // dri += fc*gijk_d*ex_delr*dcosdri;
+ // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
+
+ vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
+ vec3_scale(prefactor,dri,dri);
+
+ // compute the derivative wrt Rj
+ // drj = fc*gijk_d*ex_delr*dcosdrj;
+ // drj += fc*gijk*ex_delr_d*rij_hat;
+
+ vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
+ vec3_scale(prefactor,drj,drj);
+
+ // compute the derivative wrt Rk
+ // drk = dfc*gijk*ex_delr*rik_hat;
+ // drk += fc*gijk_d*ex_delr*dcosdrk;
+ // drk += -fc*gijk*ex_delr_d*rik_hat;
+
+ vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
+ vec3_scale(prefactor,drk,drk);
+}
+
+ucl_inline void ters_zetaterm_d_fi(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ numtyp dri[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk(cos_theta,param_c,param_d,param_h,param_gamma);
+ gijk_d = ters_gijk_d(cos_theta,param_c,param_d,param_h,param_gamma);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Ri
+ // dri = -dfc*gijk*ex_delr*rik_hat;
+ // dri += fc*gijk_d*ex_delr*dcosdri;
+ // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
+
+ vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
+ vec3_scale(prefactor,dri,dri);
+}
+
+ucl_inline void ters_zetaterm_d_fj(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ numtyp drj[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk(cos_theta,param_c,param_d,param_h,param_gamma);
+ gijk_d = ters_gijk_d(cos_theta,param_c,param_d,param_h,param_gamma);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Rj
+ // drj = fc*gijk_d*ex_delr*dcosdrj;
+ // drj += fc*gijk*ex_delr_d*rij_hat;
+
+ vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
+ vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
+ vec3_scale(prefactor,drj,drj);
+}
+
+ucl_inline void ters_zetaterm_d_fk(const numtyp prefactor,
+ const numtyp rij_hat[3],
+ const numtyp rij,
+ const numtyp rik_hat[3],
+ const numtyp rik,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ numtyp drk[3])
+{
+ numtyp gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
+ numtyp dcosdri[3],dcosdrj[3],dcosdrk[3];
+
+ fc = ters_fc(rik,param_bigr,param_bigd);
+ dfc = ters_fc_d(rik,param_bigr,param_bigd);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) tmp = t*t*t;
+ else tmp = t;
+
+ if (tmp > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (tmp < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(tmp);
+
+ if ((int)param_powermint == 3)
+ ex_delr_d = (numtyp)3.0*param_lam3*t*t*ex_delr;
+ else ex_delr_d = param_lam3 * ex_delr;
+
+ cos_theta = vec3_dot(rij_hat,rik_hat);
+ gijk = ters_gijk(cos_theta,param_c,param_d,param_h,param_gamma);
+ gijk_d = ters_gijk_d(cos_theta,param_c,param_d,param_h,param_gamma);
+ costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
+
+ // compute the derivative wrt Rk
+ // drk = dfc*gijk*ex_delr*rik_hat;
+ // drk += fc*gijk_d*ex_delr*dcosdrk;
+ // drk += -fc*gijk*ex_delr_d*rik_hat;
+
+ vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
+ vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
+ vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
+ vec3_scale(prefactor,drk,drk);
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void repulsive(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam1,
+ const numtyp param_biga,
+ const numtyp param_Z_i,
+ const numtyp param_Z_j,
+ const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale,
+ const numtyp global_e,
+ const numtyp global_a_0,
+ const numtyp global_epsilon_0,
+ const numtyp rsq,
+ const int eflag,
+ numtyp *ans)
+{
+ numtyp r,tmp_fc,tmp_fc_d,tmp_exp;
+
+ // Tersoff repulsive portion
+
+ r = ucl_sqrt(rsq);
+ tmp_fc = ters_fc(r,param_bigr,param_bigd);
+ tmp_fc_d = ters_fc_d(r,param_bigr,param_bigd);
+ tmp_exp = ucl_exp(-param_lam1 * r);
+
+ numtyp fforce_ters = param_biga * tmp_exp * (tmp_fc_d - tmp_fc*param_lam1);
+ numtyp eng_ters = tmp_fc * param_biga * tmp_exp;
+
+ // ZBL repulsive portion
+
+ numtyp esq = global_e*global_e;
+ numtyp a_ij = ((numtyp)0.8854*global_a_0) /
+ (ucl_powr(param_Z_i,(numtyp)0.23) + ucl_powr(param_Z_j,(numtyp)0.23));
+ numtyp premult = (param_Z_i * param_Z_j * esq)/((numtyp)4.0*MY_PI*global_epsilon_0);
+ numtyp r_ov_a = r/a_ij;
+ numtyp t1 = (numtyp)0.1818*ucl_exp((numtyp)-3.2*r_ov_a);
+ numtyp t2 = (numtyp)0.5099*ucl_exp((numtyp)-0.9423*r_ov_a);
+ numtyp t3 = (numtyp)0.2802*ucl_exp((numtyp)-0.4029*r_ov_a);
+ numtyp t4 = (numtyp)0.02817*ucl_exp((numtyp)-0.2016*r_ov_a);
+ numtyp phi = t1 + t2 + t3 + t4;
+ numtyp dphi = (numtyp)-3.2*t1 - (numtyp)0.9423*t2 - (numtyp)0.4029*t3 -
+ (numtyp)0.2016*t4;
+ dphi *= ucl_recip(a_ij);
+/*
+ numtyp phi = (numtyp)0.1818*ucl_exp((numtyp)-3.2*r_ov_a) +
+ (numtyp)0.5099*ucl_exp((numtyp)-0.9423*r_ov_a) +
+ (numtyp)0.2802*ucl_exp((numtyp)-0.4029*r_ov_a) +
+ (numtyp)0.02817*ucl_exp((numtyp)-0.2016*r_ov_a);
+ numtyp dphi = ucl_recip(a_ij) * ((numtyp)-3.2*(numtyp)0.1818*ucl_exp((numtyp)-3.2*r_ov_a) -
+ (numtyp)0.9423*(numtyp)0.5099*ucl_exp((numtyp)-0.9423*r_ov_a) -
+ (numtyp)0.4029*(numtyp)0.2802*ucl_exp((numtyp)-0.4029*r_ov_a) -
+ (numtyp)0.2016*(numtyp)0.02817*ucl_exp((numtyp)-0.2016*r_ov_a));
+*/
+ numtyp rinv = ucl_recip(r);
+ numtyp fforce_ZBL = premult*(-phi)/rsq + premult*dphi*rinv;
+ numtyp eng_ZBL = premult*rinv*phi;
+
+ // combine two parts with smoothing by Fermi-like function
+ // ans[0] = fforce
+ numtyp f = F_fermi(r,param_ZBLcut,param_ZBLexpscale);
+ numtyp f_d = F_fermi_d(r,param_ZBLcut,param_ZBLexpscale);
+ ans[0] = -(-f_d * eng_ZBL + ((numtyp)1.0 - f)*fforce_ZBL + f_d*eng_ters +
+ f*fforce_ters) * rinv;
+
+ // ans[1] = eng
+ if (eflag) ans[1] = ((numtyp)1.0 - f)*eng_ZBL + f*eng_ters;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline numtyp zeta(const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ const numtyp rsqij,
+ const numtyp rsqik,
+ const numtyp4 delrij,
+ const numtyp4 delrik)
+{
+ numtyp rij,rik,costheta,arg,ex_delr;
+
+ rij = ucl_sqrt(rsqij);
+ rik = ucl_sqrt(rsqik);
+ costheta = (delrij.x*delrik.x + delrij.y*delrik.y +
+ delrij.z*delrik.z) / (rij*rik);
+
+ numtyp t = param_lam3*(rij-rik);
+ if ((int)param_powermint == 3) arg = t*t*t;
+ else arg = t;
+
+ if (arg > (numtyp)69.0776) ex_delr = (numtyp)1.e30;
+ else if (arg < (numtyp)-69.0776) ex_delr = (numtyp)0.0;
+ else ex_delr = ucl_exp(arg);
+
+ return ters_fc(rik,param_bigr,param_bigd) *
+ ters_gijk(costheta,param_c, param_d, param_h, param_gamma) * ex_delr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ucl_inline void force_zeta(const numtyp param_bigb,
+ const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_lam2,
+ const numtyp param_beta,
+ const numtyp param_powern,
+ const numtyp param_c1,
+ const numtyp param_c2,
+ const numtyp param_c3,
+ const numtyp param_c4,
+ const numtyp param_ZBLcut,
+ const numtyp param_ZBLexpscale,
+ const numtyp rsq,
+ const numtyp zeta_ij,
+ const int eflag,
+ numtyp fpfeng[4])
+{
+ numtyp r,fa,fa_d,bij;
+
+ r = ucl_sqrt(rsq);
+ fa = ters_fa(r,param_bigb,param_bigr,param_bigd,param_lam2,param_ZBLcut,param_ZBLexpscale);
+ fa_d = ters_fa_d(r,param_bigb,param_bigr,param_bigd,param_lam2,param_ZBLcut,param_ZBLexpscale);
+ bij = ters_bij(zeta_ij,param_beta,param_powern,
+ param_c1,param_c2, param_c3, param_c4);
+ fpfeng[0] = (numtyp)0.5*bij*fa_d * ucl_recip(r); // fforce
+ fpfeng[1] = (numtyp)-0.5*fa * ters_bij_d(zeta_ij,param_beta, param_powern,
+ param_c1,param_c2, param_c3, param_c4); // prefactor
+ if (eflag) fpfeng[2] = (numtyp)0.5*bij*fa; // eng
+}
+
+/* ----------------------------------------------------------------------
+ attractive term
+ use param_ij cutoff for rij test
+ use param_ijk cutoff for rik test
+------------------------------------------------------------------------- */
+
+ucl_inline void attractive(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fi[3],
+ numtyp fj[3],
+ numtyp fk[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_c, param_d, param_h, param_gamma, fi, fj, fk);
+}
+
+ucl_inline void attractive_fi(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fi[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fi(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_c, param_d, param_h, param_gamma, fi);
+}
+
+ucl_inline void attractive_fj(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fj[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fj(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_c, param_d, param_h, param_gamma, fj);
+}
+
+ucl_inline void attractive_fk(const numtyp param_bigr,
+ const numtyp param_bigd,
+ const numtyp param_powermint,
+ const numtyp param_lam3,
+ const numtyp param_c,
+ const numtyp param_d,
+ const numtyp param_h,
+ const numtyp param_gamma,
+ const numtyp prefactor,
+ const numtyp rij,
+ const numtyp rijinv,
+ const numtyp rik,
+ const numtyp rikinv,
+ const numtyp delrij[3],
+ const numtyp delrik[3],
+ numtyp fk[3])
+{
+ numtyp rij_hat[3],rik_hat[3];
+ vec3_scale(rijinv,delrij,rij_hat);
+ vec3_scale(rikinv,delrik,rik_hat);
+ ters_zetaterm_d_fk(prefactor,rij_hat,rij,rik_hat,rik,
+ param_bigr, param_bigd, param_powermint, param_lam3,
+ param_c, param_d, param_h, param_gamma, fk);
+}
+
+
+#endif
+
+
From 13c55490099890791a2bed3aff92ced293a53da8 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:12:44 +0000
Subject: [PATCH 05/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14809
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/bond.cpp | 71 ++++++++++++++++++++++++++++++++++++++++++++++
src/bond.h | 2 ++
src/input.cpp | 12 ++++++++
src/input.h | 1 +
src/pair.cpp | 8 +++---
src/pair_table.cpp | 27 +++++++++++++-----
6 files changed, 110 insertions(+), 11 deletions(-)
diff --git a/src/bond.cpp b/src/bond.cpp
index 443816418f..5c2622281d 100644
--- a/src/bond.cpp
+++ b/src/bond.cpp
@@ -16,6 +16,7 @@
#include "atom.h"
#include "comm.h"
#include "force.h"
+#include "neighbor.h"
#include "suffix.h"
#include "atom_masks.h"
#include "memory.h"
@@ -23,6 +24,8 @@
using namespace LAMMPS_NS;
+enum{NONE,LINEAR,SPLINE};
+
/* -----------------------------------------------------------------------
set bond contribution to Vdwl energy to 0.0
a particular bond style can override this
@@ -212,6 +215,74 @@ void Bond::ev_tally(int i, int j, int nlocal, int newton_bond,
}
}
+/* ----------------------------------------------------------------------
+ write a table of bond potential energy/force vs distance to a file
+------------------------------------------------------------------------- */
+
+void Bond::write_file(int narg, char **arg)
+{
+ if (narg != 6 && narg !=8) error->all(FLERR,"Illegal bond_write command");
+
+ // parse optional arguments
+
+ int itype = 0;
+ int jtype = 0;
+ if (narg == 8) {
+ itype = force->inumeric(FLERR,arg[6]);
+ jtype = force->inumeric(FLERR,arg[7]);
+ if (itype < 1 || itype > atom->ntypes || jtype < 1 || jtype > atom->ntypes)
+ error->all(FLERR,"Invalid atom types in bond_write command");
+ }
+
+ int btype = force->inumeric(FLERR,arg[0]);
+ int n = force->inumeric(FLERR,arg[1]);
+ double inner = force->numeric(FLERR,arg[2]);
+ double outer = force->numeric(FLERR,arg[3]);
+ if (inner <= 0.0 || inner >= outer)
+ error->all(FLERR,"Invalid rlo/rhi values in bond_write command");
+
+
+ double r0 = equilibrium_distance(btype);
+
+ // open file in append mode
+ // print header in format used by bond_style table
+
+ int me;
+ MPI_Comm_rank(world,&me);
+ FILE *fp;
+ if (me == 0) {
+ fp = fopen(arg[4],"a");
+ if (fp == NULL) error->one(FLERR,"Cannot open bond_write file");
+ }
+
+ // initialize potentials before evaluating bond potential
+ // insures all bond coeffs are set and force constants
+ // also initialize neighbor so that neighbor requests are processed
+ // NOTE: might be safest to just do lmp->init()
+
+ force->init();
+ neighbor->init();
+
+ if (me == 0) {
+ double r,e,f;
+
+ // evaluate energy and force at each of N distances
+ // note that Bond::single() takes r**2 and returns f/r.
+
+ fprintf(fp,"# Bond potential %s for bond type %d: i,r,energy,force\n",
+ force->bond_style,btype);
+ fprintf(fp,"\n%s\nN %d EQ %.15g\n\n",arg[5],n,r0);
+
+ const double dr = (outer-inner) / static_cast(n-1);
+ for (int i = 0; i < n; i++) {
+ r = inner + dr * static_cast(i);
+ e = single(btype,r*r,itype,jtype,f);
+ fprintf(fp,"%d %.15g %.15g %.15g\n",i+1,r,e,f*r);
+ }
+ fclose(fp);
+ }
+}
+
/* ---------------------------------------------------------------------- */
double Bond::memory_usage()
diff --git a/src/bond.h b/src/bond.h
index 6841e9362d..d455cda204 100644
--- a/src/bond.h
+++ b/src/bond.h
@@ -54,6 +54,8 @@ class Bond : protected Pointers {
virtual unsigned int data_mask() {return datamask;}
virtual unsigned int data_mask_ext() {return datamask_ext;}
+ void write_file(int, char**);
+
protected:
int suffix_flag; // suffix compatibility flag
diff --git a/src/input.cpp b/src/input.cpp
index 3ceb906911..bcb64effe9 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -644,6 +644,7 @@ int Input::execute_command()
else if (!strcmp(command,"atom_style")) atom_style();
else if (!strcmp(command,"bond_coeff")) bond_coeff();
else if (!strcmp(command,"bond_style")) bond_style();
+ else if (!strcmp(command,"bond_write")) bond_write();
else if (!strcmp(command,"boundary")) boundary();
else if (!strcmp(command,"box")) box();
else if (!strcmp(command,"comm_modify")) comm_modify();
@@ -1281,6 +1282,17 @@ void Input::bond_style()
/* ---------------------------------------------------------------------- */
+void Input::bond_write()
+{
+ if (atom->avec->bonds_allow == 0)
+ error->all(FLERR,"Bond_write command when no bonds allowed");
+ if (force->bond == NULL)
+ error->all(FLERR,"Bond_write command before bond_style is defined");
+ else force->bond->write_file(narg,arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
void Input::boundary()
{
if (domain->box_exist)
diff --git a/src/input.h b/src/input.h
index 9bdedabe14..ccda3f49b1 100644
--- a/src/input.h
+++ b/src/input.h
@@ -86,6 +86,7 @@ class Input : protected Pointers {
void atom_style();
void bond_coeff();
void bond_style();
+ void bond_write();
void boundary();
void box();
void comm_modify();
diff --git a/src/pair.cpp b/src/pair.cpp
index 89c5bea380..e3c4a1aa36 100644
--- a/src/pair.cpp
+++ b/src/pair.cpp
@@ -1574,9 +1574,9 @@ void Pair::write_file(int narg, char **arg)
fprintf(fp,"# Pair potential %s for atom types %d %d: i,r,energy,force\n",
force->pair_style,itype,jtype);
if (style == RLINEAR)
- fprintf(fp,"\n%s\nN %d R %g %g\n\n",arg[7],n,inner,outer);
+ fprintf(fp,"\n%s\nN %d R %.15g %.15g\n\n",arg[7],n,inner,outer);
if (style == RSQ)
- fprintf(fp,"\n%s\nN %d RSQ %g %g\n\n",arg[7],n,inner,outer);
+ fprintf(fp,"\n%s\nN %d RSQ %.15g %.15g\n\n",arg[7],n,inner,outer);
}
// initialize potentials before evaluating pair potential
@@ -1618,7 +1618,7 @@ void Pair::write_file(int narg, char **arg)
init_bitmap(inner,outer,n,masklo,maskhi,nmask,nshiftbits);
int ntable = 1 << n;
if (me == 0)
- fprintf(fp,"\n%s\nN %d BITMAP %g %g\n\n",arg[7],ntable,inner,outer);
+ fprintf(fp,"\n%s\nN %d BITMAP %.15g %.15g\n\n",arg[7],ntable,inner,outer);
n = ntable;
}
@@ -1647,7 +1647,7 @@ void Pair::write_file(int narg, char **arg)
e = single(0,1,itype,jtype,rsq,1.0,1.0,f);
f *= r;
} else e = f = 0.0;
- if (me == 0) fprintf(fp,"%d %g %g %g\n",i+1,r,e,f);
+ if (me == 0) fprintf(fp,"%d %.15g %.15g %.15g\n",i+1,r,e,f);
}
// restore original vecs that were swapped in for
diff --git a/src/pair_table.cpp b/src/pair_table.cpp
index 1013bd3677..71b4d49b5f 100644
--- a/src/pair_table.cpp
+++ b/src/pair_table.cpp
@@ -395,13 +395,16 @@ void PairTable::read_table(Table *tb, char *file, char *keyword)
union_int_float_t rsq_lookup;
int rerror = 0;
+ int cerror = 0;
fgets(line,MAXLINE,fp);
for (int i = 0; i < tb->ninput; i++) {
- fgets(line,MAXLINE,fp);
- sscanf(line,"%d %lg %lg %lg",&itmp,&rfile,&tb->efile[i],&tb->ffile[i]);
- rnew = rfile;
+ if (NULL == fgets(line,MAXLINE,fp))
+ error->one(FLERR,"Premature end of file in pair table");
+ if (4 != sscanf(line,"%d %lg %lg %lg",
+ &itmp,&rfile,&tb->efile[i],&tb->ffile[i])) ++cerror;
+ rnew = rfile;
if (tb->rflag == RLINEAR)
rnew = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1);
else if (tb->rflag == RSQ) {
@@ -419,6 +422,7 @@ void PairTable::read_table(Table *tb, char *file, char *keyword)
}
if (tb->rflag && fabs(rnew-rfile)/rfile > EPSILONR) rerror++;
+
tb->rfile[i] = rnew;
}
@@ -452,8 +456,8 @@ void PairTable::read_table(Table *tb, char *file, char *keyword)
if (ferror) {
char str[128];
- sprintf(str,"%d force values in table are inconsistent with -dE/dr; "
- "should only be mistakenly flagged at inflection points",ferror);
+ sprintf(str,"%d of %d force values in table are inconsistent with -dE/dr.\n"
+ " Should only be flagged at inflection points",ferror,tb->ninput);
error->warning(FLERR,str);
}
@@ -461,8 +465,17 @@ void PairTable::read_table(Table *tb, char *file, char *keyword)
if (rerror) {
char str[128];
- sprintf(str,"%d distance values in table differ signifcantly "
- "from re-computed values",rerror);
+ sprintf(str,"%d of %d distance values in table with relative error\n"
+ " over %g to re-computed values",rerror,tb->ninput,EPSILONR);
+ error->warning(FLERR,str);
+ }
+
+ // warn if data was read incompletely, e.g. columns were missing
+
+ if (cerror) {
+ char str[128];
+ sprintf(str,"%d of %d lines in table were incomplete\n"
+ " or could not be parsed completely",cerror,tb->ninput);
error->warning(FLERR,str);
}
}
From d8a711e50ea14de5f87bad21531eea0993dc6515 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 7 Apr 2016 21:13:52 +0000
Subject: [PATCH 06/12] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@14810
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Section_commands.html | 4 +-
doc/Section_commands.txt | 4 +-
doc/_images/fix_nh1.jpg | Bin 31088 -> 68243 bytes
doc/_sources/Manual.txt | 4 +-
doc/_sources/Section_commands.txt | 4 +-
doc/_sources/bond_table.txt | 2 +-
doc/_sources/lattice.txt | 8 +-
doc/_sources/pair_colloid.txt | 2 +-
doc/_sources/pair_dpd.txt | 2 +-
doc/_sources/pair_table.txt | 2 +-
doc/_sources/pair_tersoff_mod.txt | 3 +
doc/_sources/pair_tersoff_zbl.txt | 3 +
doc/_sources/pair_write.txt | 1 +
doc/_sources/temper.txt | 6 +-
doc/bond_table.html | 2 +-
doc/bond_table.txt | 2 +-
doc/bond_write.html | 248 ++++++++++++++++++++++++++++++
doc/bond_write.txt | 64 ++++++++
doc/genindex.html | 8 +-
doc/lattice.html | 8 +-
doc/pair_colloid.html | 4 +-
doc/pair_colloid.txt | 2 +-
doc/pair_dpd.html | 2 +-
doc/pair_dpd.txt | 2 +-
doc/pair_table.html | 2 +-
doc/pair_table.txt | 2 +-
doc/pair_tersoff_mod.html | 3 +
doc/pair_tersoff_mod.txt | 1 +
doc/pair_tersoff_zbl.html | 3 +
doc/pair_tersoff_zbl.txt | 1 +
doc/pair_write.html | 3 +-
doc/pair_write.txt | 1 +
doc/searchindex.js | 2 +-
doc/temper.html | 6 +-
doc/temper.txt | 6 +-
35 files changed, 379 insertions(+), 38 deletions(-)
create mode 100644 doc/bond_write.html
create mode 100644 doc/bond_write.txt
diff --git a/doc/Section_commands.html b/doc/Section_commands.html
index 6f1ec8745b..a0b5680239 100644
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@@ -1157,9 +1157,9 @@ KOKKOS, o = USER-OMP, t = OPT.
| sw (cgkio) |
table (gko) |
tersoff (cgkio) |
-tersoff/mod (ko) |
+tersoff/mod (gko) |
-| tersoff/zbl (ko) |
+
| tersoff/zbl (gko) |
tip4p/cut (o) |
tip4p/long (o) |
tri/lj |
diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt
index 977993ee4b..d0f5e34bad 100644
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@@ -905,8 +905,8 @@ KOKKOS, o = USER-OMP, t = OPT.
"sw (cgkio)"_pair_sw.html,
"table (gko)"_pair_table.html,
"tersoff (cgkio)"_pair_tersoff.html,
-"tersoff/mod (ko)"_pair_tersoff_mod.html,
-"tersoff/zbl (ko)"_pair_tersoff_zbl.html,
+"tersoff/mod (gko)"_pair_tersoff_mod.html,
+"tersoff/zbl (gko)"_pair_tersoff_zbl.html,
"tip4p/cut (o)"_pair_coul.html,
"tip4p/long (o)"_pair_coul.html,
"tri/lj"_pair_tri_lj.html,
diff --git a/doc/_images/fix_nh1.jpg b/doc/_images/fix_nh1.jpg
index db59dbc43e6f922be085465f9837738c468297e5..af07ea3a4b3a197d6b415937d1325a7c2a5a4a00 100644
GIT binary patch
literal 68243
zcmbTd2Ut_V);79B=v8_r^j?%IH6YSNjG%M{Y0`v%bcIBbj)0(ofP&H#Y0`Ud3M$ei
z2qd6_(h^bFnh@^xoco<~{_j5jz5o4&%+8zHGkedPvgTcDhNF*1c;MI>b1QQI27>{X
z&>wKL%IaigYU+O84rOk2)(pA<0IZg0{e#go#{eKGIQ+Vu#VK(oXBTmX_W%vR1keK~
z0Kmg5EX3x5`9%NIK4uyhXVlYJd|6*
z+b7Hm!U+(*78M@yH=PJ!9?!tPn1=Q*z7ACo!UBJ>??18fKl1z&8~?@LLBZZop1*Yt
z@ecC-i-#bb5*gtGVfa}HCq?@EL_v54!qV3wg8U&2LYODW$0G~?Xj%WJhx>T>LHIa?
zS+8HTGlj4L05G!p{s;E_4;=0j4b>9>Owl1R*ZqC{!o}si6vR*H>Yfz0^ohFW6CSQ|
z-oq=<`BRB>C{wed{QUKTAwiSmOS^I>pwzi78D%AY{3jeP+|CQ_i9sKRv
zf9BY;{YRg{_}>0I?cZhporW#}fRn$Vwz>E3G|zkhXiWqF0o=dSq@Mx+M*;w}O#FvD
ze1FG_UwC+ko|;-zRFtZ}kC*D-2K`s~|IF~Ood0|9AMaEB`@Vm*o%m^=s~!>8!o~kK
zsu%hiI^w!`Scr$0kGRTzAH@ITg8xyi|EPzuozGRD>psEIs~n(F<{#_}b$770f4Dz7
zSlmDOf7iqR!(#tYhrjTj;~E0oWmJI2T@^Ss!2`gz-vYGUECBpc5p)OkuYNnrtp#zu!4uA&`1jGO-
zKps#5)BznpA20^YfHMFJxCl4{Zh#ly2Lu7vfhZsrxD6x$>A*eUAy5Dm17$!ZPzy8w
ztw0yh4~zigzz1L!SOoCE7hn@00{Z|NQe+G;HW)Wd5Qc!sz?5L>u#+$&m>KLW%ns%Z
zy8`ot1;Zj>v9LR^bXX3o0QL-439E;-!}?)wV3V)~7yGzm1BG>>RXXsT&iX!>c!X=Z8gG@CTP-~gNj&Igx(tH4jf
zP2niG3;ZhlIvfK}gXh9a;I;4$_$YiDj)!l-f78;_a?^^_s?h4wTGHCndeMf`VrcKu
z7SO(+ZJ`~aou(zw5^1S)Y;+=YN_2X3R&-8uzI0J^Np!h%6?DyX!*sKBU+DJf>FN3C
z<>+|vZ@TxTRR9b=MV(qlp~c{9Z@Wigd8budjZePJRq
zb27^@8!}&H4q(2`{D`@hd6@YV^BxNeizJIKi!F;k%Pp2iEU#Ezvk+K*vvRR2vYN79
zVvS(UWPQ%s$GX6}$HvAc%Vx~x!WPb!$yUiW$hOP|vU9VmvRkoxvt!u{*<0AB*mpRX
zIAl0XINUgHaO83{a!hb+9b-Bsd(8Bh$1%*Y!ei~n=8yg2BgR{~cl*C5v#H=J9F+mzdj`wn**_b~St9(o=*9xI*zo^+lXp7%UNUM^lu
zUI*T2-a_7PUOXS1PnOS$?;2kgUjyG9-yeQaeiMEl{#5>2{we+g0bv1S0Uv=hfjWU%
zfj@%cf@Xq&g4u$tf=fcQLW)ARLQz6RLc>B^!aTzI!d}8@!VSVW5gHLi5j&BaBBdg4
zMSh8jiCTz;h~|q9if)PVi5ZIph&>SN5&MSVMi?Rd5f2c(i0|UO;wIwP#2<+di4!G6
zBrGK&B%VpUmmo{ZN!m-sOV&v)Nij>El)5VQKx#mWD2SCHG05O&%#9DqkW$qd>24QXx>`iNXg(8bvKdKgB}D
z2_+gOZ6$xDCrXpbbjrHQ!OA7d^D3+=CMuCCRVoBkKGn0T@v5z=J8IHuE^66oqiWRS
zTF0*)FFU??g6qVY6Y(cHPW(_;RQFOZRG-mc(=gY-YP4yPG?g@cG>bGpYH@4XXeDb6
zXdP^co_qU$5UbtR^-mboiexUwK
z{S5;-10RDjgEd1*Ll47ehIk`!qbo*FjqpebqzAGDxoRwB>}_0Oyl$dk5@1qovUBSA
zsq3ejPaT-*nZ}v+ora&bIGuL-y&0F8y;*_Tvbm(WuX&X@(L&4OrbVA6ou##9w&lE)
zsFjyhr4{jv&Y8F~L)NU;7px1cSI;V*y?(aChQ{WM&3&6s=VZwL4)KYd3m<`-1z0mlyUgp1PQIanWAhKEi&$fy3dFL#4xk<7vkn
zN4%4oQ=HSdvyijDbDIl;i-Svr%P&_m*N3iOE@@p#x-@rL?sD|yH*P|1*W9|@Iov(m
z8?Vq^alG>K3dIBEQR4B-)5`O)C(+B)E6;1w+t~Yo_ctFypB$g{tAV0-pySU30uv7sMEJC8#}^D>yKC2rY(=Mo)$)g(QXG
zL-j%*h7zw^U#|#*gn9hd(5CpOyie6g%}=GKd8dt~A5YIsr`+|rJC<=G<59*@
z=GDyiS=w1e_vr5h-6UU!c&EHk3}A*K0bKj{bZ`hxTv;Rv^c%^&r|=WAD>w~YblW}$t|TX
zjV%3I=1?|TezLr>LZ~9W;_!L!bNmat7ekdgm6b0=US?I%R7F&MtG-nIq2_c=Tdhj%
zvpT-I^jE;Eh*#h1uhh>qST_tbo@}gbl4*L}%-x*U0&9tBA-4Lp;@h0tCfm=n4|M2v
zGc6@^E<6~&F{6}w@m0vw0+S1&@pK+*)xTl>YqM6
zJu+i8Gd_E6c5=>s?&JKWdHhH3j~ffu7D%{A9C`89C%R8*OUIUSmqnIKRuoq1@Y?up
z!fC?W)eEbOYhG)cpTj;=z9f8Q`&(rA6UID+fO{@rUqH<>0bpa|Z`jc1H2EXKUCri~#5pT4G9)
z8gk*gt(Mv>|1(HJO9`xTweQJxE^Th*I=)vCkuf%s@5%d$oee_kU#&~*qc!mZ+EMr-Oq`8T}*o8g>LR=2@XtjaLiO5dp2NXc)#CGbIM3M
zTHlJqaZ5{ICGgaIHDM-v`_!0E0qzg(BA1z_or1*VUW{?
z>rB|rj05Uc#eA>$<*hhjGu}IdUZ|l-d3Oy=3A>D7`l(s1BY??@*5zxVKbB{{Kw|
z$h>r_+6^x4t;Ybr{zF|Jek1Mog!^IFRnU}t(nl&3YqTQMQ#1lZ6|1Sg7Cf7b*J0{=u
z7~fT|TTdgT=ijdo&lQ7;Y6EqkB7XuUuo^SodN8lOOi$ujMs!<$&J?Vrx%FYijO+DV
zm0z3%baC7frN47anA9J)L8%HB4|st-O>FaU;;Ob3evDGXk#*gI|#AG#a~
zOD@_ugZ5MClJ5F0=C_HZMSR{NqRv)!Ar~`CZt1+JdxG`kSR6s&tcRCRf@sqCpfz!K
zEoxx#;hmu#L9$0zb4PQY?7YL&&X`2UtG2Vp-lj^UI&^K%jojaO3CNeE{X4!ik|UWH
z;)~e=^d|29ViA?Hm{RfX?&;}%a;2pqUQ_Dl%HXf{IObCv_?VF{W$putHq;9B*UxcC{%5q~d-hf^}qgTT#g
z9$RC&|BAe~N$#sji9`!Ra!QvE$%tyGG>GAuR@I8LBh576aEn7z;}XnF`nCdinsdit
zT3?Ywl>6dlUop{cWg*HOtRov~KVwg=LWTNjC6!2AHnnb#Kb1u{6Y@rK1Y{By&h*OF
z-Wa%L*pl`R8iV0k$wgchXbxI9~Vc;KNt94O3c)>SAIJL
zz@+DVQ@R|nt7-{49qU_EDuN_djx94hNETbAR6KlXNz7|ei(r0znu}Y^#|)d@Djy63
zzWLR$9>(_u#|iI$0WVj&50`v5VkXVsiBK>=u3_l|a+JP{anF_<0nWiY#mdB2ho5y6
ze%J`p54?(={{g%TzmENnH6LitO148z_={<6k^Roy`A~tc+&J|wSWt(5S
z^ZapH2Hlft=4q7Pu!3#9y_l7h#eDky2#D6%4wCC@i?tS}TudJMT7O?v@lh{BtmUlHE!(b0`QnNXesb-yoObY&q
z+7|fg;GMG+ob6&Z5N{~}n^g<#K4F5wtF|9=f7N1Z1~;Tk|Zy-0ic_8;Q{@0dlME^ZmgChKD+Ty%yHh0mcyD9ZjSbYn#Ul})hC*31NIYMdgh278g@5h5*o@3r4m?Mw^V5b$yys^Tcx#7|xh_xtm=yBTHvuydG
z5P4DXs>aePhvb-o1$}8%UlLU<`DnebqX11YGS_T25V2E|%bMw^puCea3|
zxq?#hKUTFe{b>{D1NgowSkMV@JbUrKOtiYh=81*TK`hY`o4Fzwp_UmbXA9ckx8*K^
zCiq{`$C^6smRqD~Rb9|TDA!*y4SL?M-k1=lN6Jf$`J=P{gTyh6WW|j39t*~Ly0asA
zW1a3>-E?PlI&7JGEAx5S!}Y$*nzBGEmsYYTZA_I58J%8&U#ImKW+HKbRw;Z2XDSI&gg3F#b5`eKT3D%lh)?C*hr
z>0}Aa%z;Ob0%jc7xv~FF>12@m;16AG$K$_izFKkl1{_PPM60BHpM6XHXt6fGEeti)
z7HCL37|=m7o^_`+P^zKWPp$T(+XcF&pk?DECnIPt>|Hh`x1S#xgZaBwe25((6Wq54
z*Ny;Cxp<%#7kK*c-ZRi4*C908ag^v1YxAM`G)Xl}y3;$?Cv%lP#v}FNhhx8QS_e-i
z)|KueeX%%=ek}3AE-q{s!DJw()!aKmMl<(QBj5<2
z9h^lH;*S8dS~|8q?q_BXYkfU}r-w^A>#J7L?YQf2;wpmWBvc~c@(H^BqGIM(2W}xx
z1@0A6SuSu;pl09hC9}uIO}Z@$ck{2_0s|}2##IovV2Mgxb`;5`S$x020V|iQ0Wfzb
z|AVLA?dNH{2M9{<*G*tS86G5&IDrdWb>A)|0fzhOQ7_y;IDUqUGWQ3aR9$i6()aVK
z{M_zKqbI^7W9qAIj)e((c3+^fw31IvkdI^gC5;TgI;)&j;dSEM0@1R$A59lr-&I`Vw=JiDco}GtTK(x3g904BKga&YZEiNnlDsi|l`-R)a4c?Jz%@^^x8cWK|
zBl6~eI_E7XU$``I;~nqXN&{lMo{+WZHIO`nVECz)!e=B_G&XPhY8E1WH{bEiJGHj4
zPg;A=TNeABdV#n>)9IaYTcW?t<0(y={J$cH#$i3uU&(zzZa;{G%0h=PK8f73Y_$x3
zBJ}Y@+kCTrsvT|+mF(Hbq#FFS_4c_bo-ro85Tym0KIM>_(x!Nkc`-x1Ohl-)43)n1f{OizWxGc}
zU|sx;o#Rvu$rwYYwT`p2NwyyuXM(nDM;H%Nw8K|kD1UOv($wB
zS{Yoy8TJNpd|Vtn0(g*KHKv0(9D?8YI!3a!d%L%4CKrUFYlj9ywdd5*5xhq1(d%my!@33lRrs*fAgC6coO`c7UA6eSRmqF4!Jy5=7xA&yp!v!r
zsS1=dhdR_k7MKxi^T8HGeP}-%KJS6YdV)b=_>&Pq+|F&F`83#hZWe*+m{AEn|BV{g
zJpKF3{lb_WbIX}GT7&hUVfzdeu0TTGh+2|6JyisBU1Lb_1#R(Ji7V_sSD4h2>aI?s
zn>n}wt3>Dz9CB_7u3H9q9d8N|&-R9B$L9j+1+=l)b4UY;9Hxb@EUxd{koa%y)*>5X
z&2L9w83Rd;W)XzA8z1>!%>;ctYsE6E3fJ_xH1gxAxNdDAl}V5sx`cu3omU4*T|R4z
z_ES`GziAX9OXQo`>FfS3ztUqIL*MXI>ea}di77u)LvH1m#c8-dE+(FPgCykl;@goe
zsQwps=L$H6l;Wh;7?PEBlCs{mnV_pLv#SfaE|k#v3Z#Y_BTE0%?uxC0>YH)c5Nkpf
zqz>mDU}0;nb?HdXh(I-fV%RKWQha%)?06r(Lo+6kb{^&}EYioB!r_)dro)V2#13!w
z`edG9s
z$jVkb5%*oI*aVcrzw=#Pu0$)HNc!skh5Eie?$-8R(9W}@i=OHw=A+AXlVgBL3=ik#Lt>Yl_N)n%*(zmzv1KW>URYgnhqwxp|On
zvHdPa{Fu=rKui+_l@@I3O|4ul<~}rxcb)z`y^F_fJ<44+7E_P>44<_$+Q6X`~0ks)n+NKm1CV!Ig{1X_m~lncbHRU`#^7HZ1T^+&Mg
z_LW#4;^C(bna}DR
zuzpqF?fReh%Rc@fTH>v9{SCR#{Bzb*Hn(adh5_`PFds5r5hCsx0TVoqGDO9`*sjYXCCtA!Z+YRe^WwQ{|!@=+Sud*&Zi`~c>T
zrH=Wox;@9+T)1>tIowiM3l=&S+r1)w5<7jT|Bt`ZTJv;STf)PRHYWaJgJ+*jLc{b<
zZ?lF90l(d;EI#-{i~WFUiXhoLmVR2cc+?$GEy)_1o3lJkRUrnw9Wb=8DGa7fXL{gv
zdTJ+=KF#^mj&5Bm6#*VtfR%Vd?x7=qLyINI-IyerAXJeb7rgkaySYd!SK<7DqlE**
zhqOx4O+s)MX_SCWGvFn$uWT1E+|?|s;l6O^w`#5vMdr?n0di?&w5T^X474bCz
z2gNO;+Y!X{eN|%l(wElc*uX%YEUJpN?}@YsV)@-?60bgKH^Tpr8_`XDz8biWWlhF#C(p#xnpgQ?~yq6aNn!-j&bnrStMfd%}D0hP|~0)
z=ubd0Q?B)bw+W93h0Y%&z^Ip6U$aDB=a|WS_|YkgeF6(zS}pCx*52N+PQ&JM=rMlDk1}p}ClaWX@406%O=T{O
zXh-_m=-WryFgSn&!lUL(oziC|$Tmxlvik)XY@4TB;zM@pbyu3|C_H!e?}11%cc2jq
zSVu}nO-G>9kDI&z-3iNu+MRq4RV!6C;;mntomVff>24#-G`_nT`lByBQRa(N1@J8b
zGPdLf$yp1SwKbiA)*e#U-~@X>&O&t0*Y~)371HM1j5?iv
zh3ibE#Klbarz*8x-L08{x?D~^=QlR`AI{v3XT-#~ScbTAYwo+L}
z*AVd?_pw(gY=^Z8MQTLty)O%XRLN5|@9sJ;u)4m|ff`w^4y`BflMnU5_xLl(P2gQ!pgM&}q-2GZPeM3^vskdUyKUp3p
z4tj80-_2g
z66&5$tBh}PyjV03u0D6u^ZW8rFLNcd4#ganeL1Yf;`5FGI3=zsfcrbHwuWHsdf8$B
zT1?)J#HZR(Iog15!B^b%kQ48KI5{((^RWNZ=uU?nd`*?1&bc=uMtWhG0ilV0cSgTr
znhG?>N?`FZ@s1hSA*v)U%4rdYTgG)iY3>|Wwp;2ga(?}zB&0fU*M;&4yi8^TtM(ZV
zTa5GrwT}P*(yD%_#9|SO88UNA?PuxJBVfRS5abrg7UL5oRv+ru=sfo0jkFr;yU!{4
z&P)&5fY+qw6uzo8bBOBEl8A_HGol6obteLmbR$(8qu1CzQ(OBBMf(Q*;*zu5_Y>*%
z-ZSCgTiAhFi@iBJ{`t7@E>*R4Xh00cY5d|6@YYbP;a$wK5HH^=poo61bbjZ5#sUTp^^
zp2$_webfP5rH&w4=C@19{+kHVpKJ`4hlQ~!kW*yDB`ZO4Zl@+9eUpRJ-PJ-&d2Uj5
ztzbdtc=qnXO29iInX>B4wQmSemY>4ILKWjDDqWiEvLWc#0pMnzn`|%9@h(Nn5Vy=G
z?9|}HZ6-PZG+LxeN%Zx}m5Yg#wda{&z54NNj!7>
zMPl>efw~ei2_XzE-8Sd;KMpgQ1dJO3N)pEza6AGk
zRZ2t+v9aKhX}eVZ4KcP}qUcJhUFx~0x_Wgn3i!BGH?E=h@|W~2U9I1DNYDFwn@}UYpz`PdI&)d!
zFdw6c9jFFX2tjAYPfdA}EE0t6+UC$xPI4X5g)W@c^vjmFXza)}ll5=lIL+5oBkCfu
zwfJ`7eljTWB32S)AeMh61*LDgiBC2MSq$U;nC=J4?*uiG+vjjf=;lvL;!ENhE{i?C
zeCz?yWD1Y=uU>z2su9iMBLgkn;}mnXRkf6>f-T*}ceZ@8w)u7&yt9!_wMC2P1XzWb
z)KU-~TBN0-d=3hyE0vw8HQ6#=R1UAR-7VC2Zzx_rh=2H=5wE>*6peFj2=HTCuaYVp
ztXIqw>wMTwWu(qwMGb<=FvG_vr?4qvl9_D$PKB#qGs+9y*`3Tj`{!B*N>4LVeY?A^
zGw&3hFH}6P^rjbIZ6tcwhQz;gZ!nx6MDkGk_j`C|Rere|qEC@{+y$HT+nbbLOMmqv
zaPa-u-wmT@xn;0TdB_9%c|Z{fC81Y~2a7*Sxey^mLW$)zK=2D8AS2CV#%yALq&%u>
zOvQms-*?AaKT#Y|(k~qTp!ZK77Hh3Q*&6t=iFNxK4Mf@U3O^TRBi@$!4}|tO
zqc}-n`5FhoxvfUax0Ui}g5pIlUAfPw3~0@S6z2pEiAQhQtQO%+DG*x_@g@s=&kV=(bM}|
zbDB?t9g+-9hyH9?YpOht)*B4ZWARk=$~>Xxyr^g)Gk$lBG%hz*{o<(@64Jct-Lsl2
zpd2E}KyWR{os4z_ZE6C0Ib=Ide)63Bb+ePjUOYq_*B`*}ilI9$#$5_B=-4%D+}5Z}
zo%kbwpK@wi<>!;I)sg2lv1aY@lGO&?f%Y9UL8(6QQ0-Rjr8azIU9CJ&tF+cT9OQ*R
zNKpBOq^0PSo)3*z?(!}C>}9pCu!y1IiPT|_P+D}ld!^(VifgBqeRhYihoy|cadZ`a$BZsN0i6)>2!o^
zW$Xz1dlvJ`7VYsnO*#;<8?5azJpwF($j(@TNeT8xQ8&YW;Njf}_l3351DBmv(!^TU
z(Cgsv-^%n4*F+u9bxaek=E#vx$?0(~zK`EGNhP$E`Wg!;9g1(qK^Gijc__Vab{*5~%N?5nN$6*??D
zwkH_E9$wfl$J~hZJWPyrBtyN|?`YsfJ{8SC;43EW^}n+uFHoMm%_RGqwpV?K>y~n?8njkDq<%n(^?GHcib;ZS;v%6n9itk{
zObV{SuL$@#;JWAc(W$kXpx3=meq81ay4+0JIm`HDT7iXhV5D{HuTv=m%!aP)O`w_F
z7C+$#NGwunosE?Ws-@V@Xo*_eHnk9COMXbBE970H)Gsw!-25_R<$15H)=t4AC%AWb
zg%|6;dk4|@{O&KrLe{?Phgt)V)pu^&U4n$b%TcGNpAyR-MCQKg4_j5}nH6#5w|n2J
zaac6`)r4!9<&uP$PxSM_tOYq}Xi&Hg_SdEitjS!l`lN&5-9!X$lfW<3T|KvFt8rYo
zFBH2UCkmY#`143F-OpGX5QiHk?<89M_T=jm%d5fQikoF2Hzu7INoQ*T{|<>atboK~
zL{sDn98-6;BdqhAGEOvW_zQH_yFTh!x*o}M!c3)sX4PH)%MZj9)&mR5tEKiRQD>0c
z5Dh8D?HNx()hHxf?ry2s?`E7hOkNWe`>c>|_sZ%`E?b6ls5QLnndmKNZ~Y;xA;ksE
zU*kwDCWi68e@^Tq^fZzbm(dH$b1M$hKjSP=O*321f-sTRDRdN~iII%u#`tgl$<
zc`+~-DAEJiSL0xH-Cb&Ri_vw1E|Ha`ukX7AIMiP@hAgtl59;>@Ux;V=!9R0=3iI16
zYuJVH{mjEh!Qp5|iX6$}c2_7#!0vRI9d-z9?GhgK;wCCjMo=ZtK=T1+Lk6dTpnEGzjwtjGWAM$Im
zh!V+b3?uF-$j0H~t!bjyQ13C#-NY3hO=Rkg_mzQlegzSoiwiYd>K~2s<|oe{nE5)x
z|J+~dkjrIv=Mu3$YDFhmFU%nWW)NR_@nCxzVSwq-;6X!MNgNdiQ5gE`wo*C
z``VGy-?O_4bH9Jr*&9-nPGsJ-69MAlAmzJpSoG{L4T~@8Wx^l=i66i5i!l-t9|%o4
z(=3kO5a(Jsa|urunA~=+o=VcnCIwQNL-B|^xQStTttIYN)n^qH{xsb#kLqtdLPhIm
zrXE&H%rNqsvgpgyyj6dljrN-hXsA~=!qUy1E^>Jlvl)u>F)3s;keE^`kH
z9VAnvg4MK68X&LcMZH?wfvs~Iq_BqG>3+Za5PkyBb(|`e`|2>GM=ZKfnrNuOvORZi_*a%8_A5;%knp1A9#0}ewSkbGDfjM1S;
zk>m#StB}bzemb<=7-=jIK))~#fAi*{uuaF4WAGn@u_8RDG2$9LyFh6b%>|A-A3o|0
z;#ep)2VF^CL&e9!kr`GmanX6_=IhpGlWKwj%f@4iBpP#M?CX`(U5CD1tGv2-DPJ$SP28j%YJq5AxoVq2Hqi`#`ZfS{jxaZMkmq=kasYwU~)f3
z;RD6cC-myI;QO}I>uueyy7pA=PC1Xgw3XQ7Iqh_Eyo7gekpq`E(jy100(P;e!zxM`
zNn==y4IIbU_zs;yu)V9w+Cs8_`u3-J_(^f%PWTlrrUHUgb86A5c8|ZB>8K{Qoe}m2
zMM_@FOF;VWvY6FS{7LkMgl^fJ4;!tk7PQT$)f*Mz-|eA2nm(JDVv51RKIRB0X@qD4
zh2;p?bZiEr&bQ5WA-y81oZmcOy0kc|JlAmCwiM
z*REK=4NN+8Y$RvpyHFx@Yy|glaVYW2KaY)=UG?>(xn*^d2eyVJ?yW44sc92;jsPOG
z$kkZBY>Ntg5PEsxsmy>E_!EvyNEw)2*W>1(Pv2W7z1!V@5}vZ
zbr!omj-`l>C()UR%uoJ=6%&xW{>ysAo+mZYr+%=Ro|)Y)?P}~uY_$kuO%s*A@JaoZ
z)TVn8mKbp^<*#*%CCQ~q^>q-{Qh&1Dn7yr_IHtu|)1PLhn?VX+Ir#=|)Q%{9_#1NA
zFkA>EL0`0qB!;9jolD>mFs#Rryfvj6Bb#&Kg;`jOw7vdHP$?mL;-qe$IHL5{XZWT&
zCs;>iR$9Ym7Pqzztw6fCfxU*<*stA7+jMB;&I?wJYBvUqV_&Qc?YVyR{}b2zD}O;G
zw7%fWx{UjYl#}c$!LVCOdkwq$!X)GtG;8TJz)6ztDNrGhY#gg~_*CWRBDu;&w<&R+
zB3R{girA6ToL(3mtP*_sohrD_ez%j=xq*K@(b`-drzi=Zw8a^WKv3x}~R*sHyXZb~Hdmd~p5*3M6bb&~mmJ&@siKL2)|
zUtZqok2r(ZZ-0zzer0fowlh}^Hr7P?ax5bE`wmO7fmvBc0KMJHekk#4InikCjB~8b
z;gjkB?Yt!Fo1LdEmyz4|EQgZ@4(P*=ZTUFW>g;4KVethB;`yrLMYI)|N~lXO5=`%d
z=rx7~OhYk)^QYp!>|7h#?0nNxpFb(Owd0^Grv6>vn#Z2Jv-Ix^`wrl{wc-Q>(G|Nm
z*&`M!L9!SYWjWtP^y|nQEU362gsoVTO=7hqevgW*lvbA-&*!q6Yzi=F;o{3$!Qyk=
z>rq)fG@C0zJ;$kY^-dcr+`WEvT%)Th)`NNxKin3;Gh+fv%1wLMQ>sG4xNIcH^>8%6vH^lBcM8oNjBHPo8imVw}8iOks>|E
zZ)_MewtsR1$3Wq(8|cv}(3O1!9i1~B#K`n!z~0`oFu3<{DV|&9da*|6WA-vAKM?Cf
zTG>w|^$x5+25%pVCXmVrqAH`+R%_E&<$q$26$&m6<4#Ohd$xBZZ?ndh#XEbq4m{Pr
z71V!50j+uj+>w~vhz|#FR@b*&>evSi~>**hyIR7a0&8hu+Lrc-c
zMa}cn1u>(!S6Gb4V^V0w3-keW{jl8A{qAhP+9`%LU>cHfNmN%&
zqKnL$9{icU!>ogzTUl{Kmk#6p2e~^QrwCZSB#<`eo8Y9k7P8>EFAW3p-trVre3HE66
zz_QByQ-wph9|Ea}g0bcOF+qI%)%eY?NsgJ;omWUzdA90YPdVLs-ENZbHuxHGiU}A&
z{Jy)2%B1LfsOFdX<08I1$$e&6^W|OPXVC?&SDPR
zd@ny@g&n-(B_qQ_VQcGKPhL`-NZ2$4VRRdY-#xb3BQX;j
zwklBji!Dwi9BdZ8d^tSDEfCF7$}}U>{AA{ZN4}k3j?>_IhRqDy4_(&J!4j6#SlydJ
zaiTSETf#KC7&uYf4|24nN~Nll*q_Ej?yBT{Gf-|VD-GMCHd2GOJ1oJdx<}t6poRmV
zNZAlq7OQ=xzBINI_X}gR%GS0PKbeJ9CT86{k>Gr*(>upm2nkAMW)si}f80}J)x03P
zN+=Nb6Jl~yY3IdnLOq?TFVfkP)5@)CWz=a_{1(H!q{gNABD;Nc)z33bV|kImh=gKV
z10Q5^j|#MU4R+ACUQ3m3iHZ`IF^ga=yBkF-w2mdL_ck6J@)k2dW`p|?;L^R^HL;(3
z_{s=@8F$2pHiE;|Zo}1JPfX2E6>iJP-H3O-&VF8da)Fn1P)UK6Ins(#mf1Xa)zM1r
z*I{GW!3nJI?jI#14v1&;Qk$<)3=VsY*kYYYPqLDGgrV)%_l-5*gT0y?TH2SCw|MHA
zPrT=KqVl5m?GKx&{Fp@}zL{U>h5N&E7@6c#-EsO3&BswbFE$S3cQV+%WK3Cv29Qf*
zmFu7BTM3HHSAKxGKkm_;K_w#b-;3!CLX%OvnD==?UO{mc8hq`B{gDE{Z*wZ%9<95E
zn0!guAU%AHQHHcor2#^44@RupS`V$HKX>=XX2arkY$lKL)6Y>-8^%V2!?GSDKV-w`
z*5R6aDw6x%1VxHUHyQkE{8tO}^gG@z8;MNbNPB-o@Q~dYi~8NWUwhy)Nc<~zej^_0
zYb+$Xe{W_b(Ui*Tco!XBRpZ|G$DkFpie}y@D61b0itZ@AEYQsI$ZWVP2UlB+zIXX_
z`m1_!*8c2afeP|k-GbCAa-ci#e2^>x5sle1b0u*WZgGusD~J8|$a~BArhY&8@?zv?
zUo8@L>m3of#y^Nykn3Ne4?Y5Xb{YJzJY7oVi_tb1*|(Z(sQz)bmnv;@UG>)s%6=Gd
z##9B&h2}kZC>>p@A_+ty5m2wcKpew*6fd?8E^{7sHer)`*g=Lhc`f}6?$wJ?^PYSK
z%L+2-XS*ye=jK&tIsDEqeM}=Eduq}wwHCA3+hPO}JK7P_^U=B#EGw0TVldxj_^dZ=
zi=t7z^u0C8*SfW}qkw>xG)q~$Y?$j6>XUlA<9}x}cp)63Tb@b4e;JbaF
z*f@;kKwiZ%Z~V#F$#}T{mY#_-9X67tQ>m_V(|1jkZB1Ji+zl3wH~I3KAoQ80iZ0|n
z#r&}17Y8VQFuKBs?LXhc``w6_G?9qm+T0%<4qO#gy4XZI?f?8t{ToG>r8k<6iKt!m
zBprEgE`|GJ#_X-bD@402oI4)1C5-RIm9C-Udw2s^Sbwoeotcld#rLv|Vq(oo{qyP<
zkAUa6?{@ElcAl_O!*E(n)IKYqQ9wF-r)d86wfWad!|K-@44)N!pFqL~v{xA!mObiecIO
z3nUi?cH@F*bx_z)1ELvEV8C2`jNPRcpGC$T)HD_%l}wNpxK&KoC4qNmLdArYN2{G~
z_)uNZ8FIii7)WCxd}a(#bvSMX{rh*{Q;yrcS0l1UdWm79+X^oOn7ulPsDT>z;G9Q{
z^Oequ;0wZ};IW+=@%}nm-p{^1?!W0}Ux3PET0`u}HoN>En_z
zl~SblJkFwCi;`xCTnoFP^vVc|pJIbVG(kRdPSBZv;)&*7&DcqgrNXDbHrCI54rF1;
z?QPr3FLKSdd7~zE1uyydq4F}r>gEI(hz0d740PCZZ-2t^Z&eao)}0D?(~rAu7PKD|
zc+zpzT6odvQMRb!S8PYXDDrmiv0ncg?CQbR1PP960h8DKW5WoGH<3Kio;zRv
z$hA&4k-XQ)_IAQgbKhOJM&7UYHcM-T(XXkgSW?tq?(PqLTCTuUve_a|KuEVk5Ew+A<|4xeLx`8;I0R&Gtg7t}T
zpBv--i!JjiKSjC5wA;z2=OgWkbta&VTi1sUsRf35upe9Ih||;|zFtlYV=Vt{mkDGu
zFq3wkR0iw|Nek81*P&K71G{BBG8JSxx-31XmBpvGFD5I7%6b{Qox0cv+!!52;F1U2
z8^2JqJl3Ai*F_W`lNrKAW(8ma*0N=;2`8Y;A%$k#v0Fd$&R4dT*377Ee)M*
zjZ?DH`P<@2ftCr0O9f{CI4f{Ptmf|)k_JfU6DF;L<)w({3=NCgtSI*3?~K-HP{nSN
zk=RLRB9=;b1pKO6+FakoaZs7liPrlbB;V8~RJxeLEHp3Tm7mQ(hI%sLvis06(1qzi
z7wRRd0*4YS?$J_L{G@s90z3T_6965W;xIfc?%k*cArCRl5%6dBQ!MXcWsewUvhn+g
zSXL4yV>MOtVa6`s2>SS9DhsX3sNlzse}p!S_@R?l^CK(Ce;m`O9Gf^)Dn+B#ST+3c
z!r2b%Lf6+n%r`!tvVUd5P{b&Xk5|InVy1kCszv*q!5Zu*M@0LL(D<)3xu-O1Mquxy
zSDY0;8*2#9Wu4g#CRiUE6T)>y(z
zSvos~l-jQbm>A7c%{KQ^^p$3*oJ4LN>$&3zCA9KKpY1p3n_Sb*S6_eJNg2#ZetN>o
zN^1(_BSTa0+}L}KTnKhxv7RbK&VNUa=r3xoCRp*qU97ricXocKUp}LFgsiUCqpc<|
z@h97VQNxi{0v6j*bm6wbs1V4Mv$txh&k>IM>&$e^g&L2qwCE)lnA5t=nRTNAsn
zlF*i;6Rl>PKVY51COm=CG_QSoo7qGlpwc^!$UrgyFFlQbkrJxO5yapjt9az`R$7g&
zTKzXMj~IPP$L&R%>Opar_nC#SoYvlpCAPRc>@2K2tI_TV*)`_6;68q^P3!tgYwg4O
zjuiAmTMphuk$s2rC7I^cliDntn8orwEn4uqcytHwt-2Vp`Ae>?d9B;a+6W>{4&rDj
z&;TnO>e}Kz3<1e>Nth%Htr2qy81CnJLg!5!>T(;-_9w!rPv5q573t)4bedOU=e;{~
zOfteRj0i-XW%R~_w?7CnTgx}mvDl`>WU3V@=&&EczYFyX<{rhc&Fh*}yP8BsNYros
zu?`5&v0zIuSxweW$>>}sHLjhOq`aoDgPawVXXA)WgG!in*@c0f_KPd4?2e%B@DqzW*yv1
z1c40|QsMz>ek9#X|I9onWegoES{ematBE(dx-&Q(xvtN=R~+-^e`+hP+jsbz;ctc7
z@JbSFv@ui29jPgQBJd~m<$>Hmt?3gsqU(MyD+6zLemcHVUFFkMnG}BcPdoO)BJH&UbuJ+G8Xq
zeAasU7lg-Z1I_F&R#D`h91!?$Lc6LZ`GZhazNY~DyMS|BxEEZ{eJ~fmY&BvJO01xt
zqc7rH=N1bh4*p>fq03p6f4yaKVzAh7MoR9EqDkjP_ekluw3SD3ZVvr!9tTeWaOY$8
zBXtb$B!X?8>Wp$=`UPB&%5%2~UOoWz1Os&I*TCI2ht5E2gN{gPUt1%8Ll@JAq$+Lc
z&rkvyO`3+1EE)ls>c@0m>RDPA{VY(uA^fEETlet1u0K&hNo6V7E)+!4HF*4QA8#F^hB%7G2PcHM{a&9PiCP{|i{=TT}W
z@_>D2fnfYep@xG{o)xOmxD4&bwy_SN*`ZVcVPZtzo+L17+;nl3R(&q_!TwvUR2%$VF(JBXC7{G!e
z=&yegFd*WEti!S)^tZHJ#_N?iO|SV{hj9wnXs6%xM@|jjV(386cPkMy
zPf$4lr)s}-fwmS9EqX$gs5J8S4LrFVT(z`vm6zAVE8~2xd|Jw#t^AAH}
zDSZ`=k(dn;Jtc~AP+
zw_u`(p@ge1W1B`?Pr_c3P-*I!N$&i!8Xy4))D(8S{BGam;eV7K-|vdUyNtcuEE2t$
zm!oBGZ*K!q|3jQ@8J1tVB)MxpRW+@2G95krn+1CrMVJqCHbW88=bimkb~04KktK@K+d*
zGylj@xf+h!?S6VIq~n7;$B(|DXpkqy_zBgPxRYH)pABCZoNryT3}&ZZ1;SoS1oy4W
zw{pXS`Be7N=l@`ufnsMb`j|Cx8Osf_zJYgx
zxZy-XZHfE(-q53sl$vIfUcppr+pGS8nF-IWhu2|-_c&YYl
z8JFhdU#A^sdcpLjdip=5*Z=Ex#2#@HIHxj@$wgo0r$)BTVWd~7rf;2gE4sVL?bRB!
zPG$-dw2}j3^m`#Q#e{2Z?4Qf!-?*ls%Ro7`?kzfpAw|a(xqyBnI}A-YlBnq6pqMzzgq88_W|JkI7tQdh40n7?x%c$1@){PuR%qjZJ<-9K-xjMc;
zh9N=E;VuZkeI1Hn6xHktWxif4bSiesc;yP@;-sMvVhZ1=~;eY?$kt>bPYd>mN@tVKOW4EG^yV2!U5k7v*|cpZR0rY{a>?Q?GE
z)ZEr8;z`_9|DHApK1tE*E~nnmr+{$U8AjPR*l@_ElS)b)I^dHTvJ*2hq9NIaGkbAFqC3GgN@_DV{S8JZxzhbl&f1u?--v9-#WFujhc0J-tzi=qc1GBjEwlK7mOoH_PQv#IA24$7
zNH@u?Ler>wE6&mNJ0FDayQU3nnQTcFhlnfx9(%`?a_GE|!tIZE@M6U16Q#-vSg%ZG
z7qEAl>+i#wNQH*ffSQ7+2S26nRFy`08k{n~`FdOwT{vfS`$JU>^fK7==zwy!Wo8y7
z?!;5%say*hjiS`hX{u8>03vyVJK1_*Ty1qz6;%pqqg}P=yRAAYZ~F4FQ)T?-m6=FW
zd~F^;rbq&w2j2m!iq)?X9?fNN&kfgkDcXImJMkN>UoXWYl`41f!17zm-rfFyX?!Ql9#g27_EF#IDMk$imX-DaSb
zgIh?sovC+M%2v3US2AOUrqrR|huWABk89vcLR5oK*iS2JK*2l!>BSK%=C8bNt^uJy
z;O{8TD8CQ6Yis^9CN+l40oqE6`TinI4M%(6%PB^+u0q-!c6dRlNnqQ2+vmwtgo<}q
zO)Klgp&G@j3QLcBw{&X0q>;1@-EqdoJQ{z#gSXkAhU(LT-ctd=qh{6&^o}IoqRJ2Y
zSpl2oFwC>O*JQew{U7DD^IFl36esopb8{+JJ*^*pL6`;IL}lHIFK^?(Z{IV
zzC={4z6j|su3N2Il)EbX<+rJC2bxtT{-;KQIbNUp5)m4s`oZi|(N72C9C+ASb4%B(o~cDl1?&l>#!
z(#LbIsEruBJWJdI>8sz7IwY&dy5i*kesKEFWo**Dhc2*a2fG`uRzky*WS;q4`Y4g~
zM|jG?8;raA9vLVCx_wl_
zEFoH*a8H`ifr{h(r;UOmUg3zz`663wowg+*r?9Z#p0n5ae;r`Jz!G~8C_cy}V+W&s
z>0;xA@KlNxLY)+m@Q}vYM?W#6Ygh%d@5qg8{xi$kbg3g_olTLL!JS>hhvEE{x#P^B
zu%Ak2ufdXL(IjX5ipgexNm%Gh__;t8!eOF{mqk*hr%0}2yY<{|cM9w3qvV7W3fmx4
zCwR9bkiv`{6Prw{d-zA!U{lEwj?g2Zd9IWOP9`G^;5Y0wt*?ow7gzjQyIJtF>}kzi
z25l?}jQvZo9)RrLjt&vZ2sH0%1dF@=={ijN9k}5z|9gB3
z-oD@|{|ZWk6h46ZQ0TJZ0Hi&$kAgmi@a`ttN{Y_8f}6^U-=p6P8Mq21fo#3{5BUw5
zN-cF07$F@9XXmNAJ*p`TAVTJYUS0$POLJmhv~Vw!>DV
zd~eI@yUrAi6|r;SW%4x)9D88y0M*6@zQrFnf;CYL)Ek{DJky3v)K`VO*a6yguA@&3
z{O>nQ7+Pc^FK(@_jWxag#CJB>(4$;qpNA^={Xlje8oBFy>&~sP5%EN27vW3b
zwvvZfE&e)Gb8c>Vb4`^;6CdS$(^dHN?oc?0qgt(4nbI&QZ>tlCt0Y>}j7D-^>mL7C
zz8@})oZrI%^Igp?Z9%ULQ}5AZ*NnaV@O~%GGp+m;QF%|aMzsGlj1)8HdTl*Dr`nYr
z%cgtMB;Y5O6*&au#BjoehmK5krb#12tC$LMe(A#H*l%>6)`St8!3PxzHO?z+$YqYm
z879ulpS`jTm4CwKzrNt%oObwq&PxEEY=sxs*;Fg1fczNR?^XpL(VEV3KZM7
zIC+SFI@z02T+F1D_B_ILF?b%AkXMsthYGCj>dmQEQ3irh5tp#)*iLt&Y~y~$)W$+}S2vmQCZi*N+DxmbYp87cp9J!MESO0&G`g`-&KKTvl4^3K9{68j1*`17Hf(d_pvp2o
zo!yKm!F2GnX--`t!g;Q-+aQognOlY?D*mYC=B|AyQ|`kN#PtB8s`=I{OQBYtw(AZS
zM9fF?`l>)Vfw>D*xNp83AVQM#r4}{B4@rZ<3yUP+E?C$ImK3?U=$Jr{bgi$?1W9-A
zWuZf&`(w0&GoZq)nIL(gF_jv%`A0hSD#55DvDRkw<_k;=Elr9e8D`SC?_bsYYfTcK-ooxogB!g)tsTPz
z+ZQQYMBEAlwP7$-fn1w7$264Rdtr$BO;=8r=-%viQcaE1MF7}Soh3?`9GK85k$gy;
z$BDY&;CmS1iqexnwXY}td%eTB;EM6JyepHZXB!;dBTWl$fA}z(o$C<}xC~wP&Cvbp
zY{faJ*cJ$JY)d)nKl&I}iaTJxsrz8)EMG%LT3ua5EZga+<40iJ&<_P)Og<76ltF`3
z;xr}M02y-lEKxsK@8>E!bAnU^kE8Rs*q$k{ER^nwx1TX)w&LYJ0Mm$vgV$y%x;);X
zcixd@!NmSY7q*RO*a>;@`=eav75~GNaCiKGSa@pn$~AihmuSrehj!2yE>a-Z!@(N0%UIlpM*JECe|&z)7v>BEDI;T
z=y39e<(Cpat6pZCPum}x9xgL!oVrde1+b0u75y{d!#shZ`E;!*eExU=T@^xV{u{t4
zFp_-h+U)Kxw!8|bHqW4+qte)SlIHB9cJ<{3d=F;KPP2ldY6Gj~h+%Y7BbCpS$U3lx
z7&Ly<;EOvfc%dMaclwFL1Hzj`
z6QH=(W4o)gw$d`;)U`#O0E?gM2w%>5AH5f2CH&IL&LWIS*jVeoR}FfFg8PMtCO|
zoPro&bMvi=%$x4nZrKRWDp%uToYGY?a>y(ED7tEk>r0HP)v-Eoh@`PTZ&6+CFi_zb9b_N?>lSEu}EWpqz7WTiSE;Y6jQmv2@i>(Ug{_fb$_mAN#~@6qAWC#ow>w*D<~8K$$OQNrXt
zQ>}^Xh;M^Ea9*^T{lPlHJ$S>6>vDp(w*6_aN5J?Iae7MEoahmcfz0Y(gs037t;@~s
z-0hgxco(SXpB=Es@j|b+)k)fsGrP0w?M<^`7KX21|4!aUD6I+=z{89jg>nQTe98Mx
zV*07vgPJ$%?wf^0)U16Vo@r}sk9T%S{^S{-qT!?a{5Bth-m#Q8bF5fHVQ|=~Tm2mu
z70a|YFOG}cKCx&x_U0p-ZrrV@|B%Nm=1GO|sn(j_fAL)C(nr=t26TUn1lW9GeL`PD
zHw~9lLgCP>B;}sX;yq5PQWr)Q*f*Nl71J{rZkK;CQkib%WB;Z#$AF&w*?49`TKn8y+4z9*Y}eo01MO;$vtklYjlX1|&^0a1g^L
zag)6mAzF>PM;u0R{?iO1awA>RE!3d3r!}858Y$uw{~_GrMwxhOlHj#w7pX!US*Ppw
z#(7Ntr(?&z+&j9p4oI{Fb*q&`VtWBhxv2)OGJ@fn<98=P)73}ke?!;dybQFPZ%dibbOAdtz`f>UK@;I%G
z>b5y~0--wpDR&Uh6_zkXwJKZ3nS0lKQ#|%0Ejp8|8M=o1m;~L%CIv_rM)j1e-W<|u3
z6CM3OrRUunDmZNKA-?q&mi3{VO`!S6g7c-tFa>c1Gv8F>HjriLzAFE$zVue^Da*b!
z+c$3AQ>^8GU1=O9-T7&8QlP#$EC4Tp^=;%wbAXlH$-oliSwOpWuk{=KJSIc0y{Nt=
z`R2F7Q8TCKimyT?wTzoHFVQ&vHn=SIZO3k^{z+jHoIHJsusIv|=O!o}2hHIcwE!cE
zBOtyRJOt8=AbnzWP;Tu
z)#X%^xf`%%+zf72hn7FKu4p14>A5Q^^Q`Ew>#t9Y!UW-#DkohHJMkR?HEQQr^KuRFP@7vIh>8)NqgITclWn&FY%zayJq*c!*kkeJL?
z;RZ~{&BS8vc9hFI^BQ|U<+3n&@_X@P4QUmz6(e?~XWx
z_i|jiW@r7}iz4xNaS3i=nZbJS7=0SeOFskmC176ZV?#+;o78J$?%zGn@yq!~sZzT6
z7v1wtuDbTV)CD$tFNk_NX|@D1Nap=_%lW@Pmnxk$93JOQ58FNcbm2D(7!(E5$^63$
zpn6@GsUApmXq^wPR~Kjh&A!_@E~nDyYg+CLii=Kr71;>Hr!)FUGq9h&$uc20ieLi%
z5b(H6!d@c%G;(tN%xt$EVNU>;I-z!}`N6d7x$AFE$4rlAG>Q)r-L*_i?7Y3jk(bE#
zBpFHeO%2=mxRTxWRBsK?#wjqZW3i>ex~17=sp;Aju$z{83L?5E>NYx
zowx%b5A=XKD05<1RU!G-fW9jEgG2Mz{8Bj*OXG_XWP6tA4-WIlU@?j87&F$`(aX(o
zVB7w<5ct)EB~+gdQgj7q9Hg_89nv0l7VH*fPj6ZuC>TU~dWGvg%W&yH+qOCenoHf#
z-4J1WHpMz^>oAFBh6cg#$&j|_y?&l#?1{ELCIC%8`3?nHPTs5ETTs%3Eh{c?9(0`3
zaUm++Ks;NA7aBe-8N1~1;{WP^_J3#-%e1Akbm4VH@OUDCaS_>v
z|M?Kx&O^)7fzYn$>UmiHuN}{$6yMaH1136-sG!D^^l3@0m1vSVp{hQD2#s?N(QaGN
zIG>z&i)?s&%$4PKV7>JESWUB5fxv6Go?tp>D#bnk(iB1EE7-Kk>=suf!4_~LGwQE)
zKmV+1eK&V5qhBFug)-GP^&GV*{|mGw6y9lL-La;yA!Gq3f0!>Ul(5I`>>O}*K72^?
z#i&eAp$Sc+D`(@@-kBFXu(DkET8C7)TE_2!4G&2_BxlaDu4mmhXY=(S%H?&p5k_>7
z*1Z$PJ3+3SRVMWe0rM6n?r&yP?kTmwR?J<}?Lz~-1!m>-K9y-b9pTF@xO2+pt=;<-
z@eU(tThe5vI#jG}loZrfu|52u=DCyGNXRFlk82c>-H)?rzxx`WW4(*vNGQ1LOJbye
z_K=!!(ztUXNPnzf(#k8tnN)o{%CA)S8;tMl>Yih^89la_YSH3;HRwK1&31c9oiprM
zXvLa~TXd06NAdeSkFjh^QE}D;iM~$`qaLh0$p~?9mem5;#B|um`X#bSVk45@-~C+2
ztVX(l`^fdTor__PPJv2yGA?=ACE(cNc2t{(b>E-*oM}Wp@r5orPUlD`ZS3_M1wORG
z$(qkUgz_%EdYNiDSA3sK?Fi%%
z9*5D|cCmLrwE5VfeKT1(j;JC8Y~E@mImBzY`Gg!#D0sEa7bzDVDkW9c=PQhkIup`b
zSuS__O@kBxwcA1Ap&q9d{DM4@6apwDq5VE*$%HN@XAk(u^=AS1MaFI{!nTeVeo>Md
z8$!Wu>%3!%&hZE&
zu`5Ui^%x;;kw6c(GcV~=c#>0zM=zF(epb9O&RI65PJL)rcwjaWLk>Hr;0pdC?r~Yh
z{EC!<)QE`BT?6?cggBjhj8?A;q39tOQR3LUcou}=6aq!aNdF6Qb#E
zzFH0FdZ1lai!)IA^q@;!v)v4)9z@N9)%x`plxp1OsYemYK_{&>^zIjm_9SrvYFpe7
z>PU8X^rc8zDS>a4yHy%+9fkf6)9f-PHpla>j2lVC*tIu| |u`br11qhKeq3Md}I
z-9tivRAMm&%?>shuiDAc5;w$!c-{M2TLSF^bIQ5jG~%
z@!Y?#=Xf~i(-_8Cx|~yQWm&?cLrY1W@@4|Gb7qyN)$PV6*Y%O-&4cb38_*+UcqO%Q%oKwz&Im|fYc*b
zpeNg~D<%iIvp;ULRyD&cFOi!(He}A$h@W)!A98RbG8dYcPF!F`_4joygD@uOv!&Q~k
zI**)*uWET~w&}pYSDBS1a_q$Ap9;qR8Yb<%RH#6!%40ky!fewjMfh7K5Zv$V{jurw
z`sJ2X7Z-SulNatq%N^bf0lttJ_Qqiesg2+v5Y+Ub3aWulj|P;x>ucZeJgC5=gt*P<
zb6*~Y|HE*iVY$OEzeBPAeD?0WiRV^|FgC`!KzPdVwQl3pPYO;qC7ZLQO3C$AC3ls>UwOxi#!T7|GknTY0Cf~+K}Yeb?;wK&
z3f02KE*1iv>e>A+*TlrI6UR97Bpzkcb{eYz*#$De4mEvOtP2nBh7
z@u;E`*a!JCIK)jLu^tv-PKFWo;u;YOuU7|$VY9EqgQafFEd}|W4c1&YDU4xpYzT=-
zg!q61Bp=ufrRY#WUeykf2xCFa=^DODm!MuAri2YhSkXtZZ6
zKiN6s4F!r944O=ESZ>VOuUKKa#8-VqAh5=;79KHp6o#p<#2ccOe=fim9KBs*?vHK{
zoR$ojOr=Uyfs42QQC0k=xp<(J$-=J88*EFbfwWfm&zELrv#tJRS-8#K_!vuARS^zw
z7pCRtpFyybhxWHGH|~s+W!->C?$sF`D^&HseV?|@Fr+mPTHU&4EQxR)+y^$tQXq(Jk`Ij3t~6x(nT`JO11p
zb5LXeO)}N7cb!1Lb9@X7Bt@(YsNMLI(U6&pl!8~Te8}}!_`>kPy8VTVA3TBulSYnERT(4N&bBTXJe
z0^i7-{TRk6y0SZ_dK0$m>y&DdtgCYST8E~wed#%qx$2XSjH7;%K!)d#+QAQ6Bi0kL
zGJBv05)J=hNJ6(3^wvhJ4Hw;7HVgr|*QH<<-*Np~e#iAPVuXlqhUnE#EI$uQ*z_wBPU&+~^}ESdgXUWxC!5(g>RJ8gE}V4s*{f3V5=+9_EOwTR?|YdulRK5AeqS(&eTVe5abB1lA$
zu7Rqkr}Wd{UL=RG16f1;M+1Wp-hUXppxgoW1t_0R;gjDrh3q5vzC~OdOX1yw@4BzR
z6!M6{>V^RBkT@K2vgpq;A{*iJDd>B{nT0DBEyK^mD
zZ?M1tbDd$ReD;fvJTvk(%LrA7lK@HTmLm4Z9#qkC0v8}@Ajgab+5^8CqF@0F>V=B&
zS6{X6z4(*edew}$nxlTJ>Mq+)VTcX#BUX~CMX{l>kunE^pJ@3U4(Myl1cD4uJbn0HetRsE<(=qVr%kUyyXiiAdr>Z)n?}emR3G+%1x0rqIfH7(R321x
z&`K~;t`KfRwPgJXVvpws%5j#HDCzp-(ug}~
z@4~Z9SEW@olrQS*Q@y*b_^HMV9Ix(#zfX-;7UBY0-^9naS|^^@932((@FQgc@+CWX
z5XYbj<{n(2l?GP$Kjc<%?gsicMx~zU7#H2Jpyhi>Z;s}Cb-TK4u<8cd&IHA9$O*=!
zy<+w~<)HTp;X%qzf3@ZmKtE5~t^b_M+!-5qT_B%DUWCO
zn({j3YF9^3b9cH=>;^k?wq=@XLq+gx%kuA%alyufQ*EcLn&wKH?)O;@s{S3jb<#xDc#tIn5n^$I7pf4Hj5-xNf`W6;RIs8
z4_w%`xze-@<9B+JvE{ofE1K(EFlP@OxFWnp!TxHs{!U?EQ`5fpR+C+2;{bZIu
z{us)C4XMJ3Fq-i^rlZ6(l>3T4N#>AUOWk#Maars9`7|rn)H|yIKlS<#XePKqk*(hC
z0pIirFhZKqap;+A!=JDUDerH7mbx9@`L5~sh!MrEo3Y3CjV}KU@VTzh7x?Kbc#?%m
zoCkl{cwu8$I%=TFpw4ydvDg=zR^OJMf&dbe>xyOXi4W(QzW%Gl`0rF!|L?f;AZON9
zr`18ijYrGkPU`FSL~_AF&Rm1KlA3ec8x-5u1JWN{dvpBtEk~tMUfDwamN79+C8610
zsGS~gE*Ku@(9l#lP{2HPLwAmjqy@r|!sR|e
zaYgk#!UmvIaL+SCUrz
zE}dg&z2%*EJH6D$_S*5nXN2<-W*Q%y5FsS{iogW)CJ(2p_=ssewa1=ie{Iy(c7QsR
z%)mI8ox-pk(2(5V1TQven?qBg_TyOop-bcjl8Qkm$&dhnRhKI#0_KJJmb8l{FA5luSkZ1
zt3@7YY50cb*3~1G6D(_v90vC?#a@y!;v<<(rHya>!vIn7
zOQhYN1fd{OFm!M?_`nosA)^mY0<>r)aKRqm4u!e1cNf~ZIFaU8YvnuHDs9Wx3@&m@
zU7$~0jA3S#DA*x`m(>FI5*a!O%L&qzsX@RVse0f$G1(jNBv{DWkpoLA7I1-eug+D!
zN-F*Oiav$C#(vspEwIYz4OEBYPaK`IfbxLK2Lw_~5iGPf*gN<%6ej{kRv;=j6tN~z
z^iu2YxkGcOpaYl0ix&}7UZ}AbQ_3^8-a`c
z4`@BA0j~s5WY(cYsK_lRho5&kTS@e%8sc^#=b#ZPxVh#xV^7{Qor<2NT%tySaBl!4
zPD~{eDS_KQSeGE5F4{E9wQi^AcEuM>NZ)rKku*E$I+z^F9#}Uu$*)?@m%>O6TxAC@
z1-8Co%f5aq({Z9Q(B8DYRq}0-OZ8v{fZ;0Hm^9`@y9}*cl8c@pLc7z$nhswr5MkiE
z`AGwL4@9tBCp?7+-=6W#86R|Kn<*Z-e4NT=v5t5#TBJD#MLjkqCLaLDE)r~lE!T2D(s%EA`xp+C|7lC+6jw(1qiQ^x(^-T8kQ
z3#nk=(6Q0XH9cq?OhN^V`Y)Lgy2C_C8`^b0-+xISz-mWqP
z5r=HJQT#CHQg1lvY6+$_eJtxA{xQROu1CvQbGCcJ_~)N``(=rS52~VLCZC}*|6lP)
zIHZ?nWxx_B2Z^lvAo*r!k7tdl4U`bxf9YwR{w-cv1QqaO@iBJ`zR}518^W9-a6Klt
zy%?eT;AJshVV~#Vh?e^s0$h^XusD2-&t?*!k3AA2B4-q~yBS7wrp
z+v_D@)+WQu=C}GY&7jWab}v&pzS~5-$~Nip;0z?H5%dr$2JP!vu(e;s3i?nuNUsot
z8*`2-4)S%nXImA768mxO&+N*Y=33>BF;fI;gSM4`oV5+On}Gqk8pNPgmUQTr1qWaz
z-(2na4?|62;%n?+FrWbH<+V2Vl8S`;_86(wz&qhSlEa8|W9%=ALOOIn?X{?Mo4@j&
z67FE@QlG!lj?I=See-;B^Yw7d3YT9L8_1rnvLFv<%jy?6VRla
z!Rer2LU5zUWTUEOGf?BV$4O_uqKLHCK!ZCJ{pvd*57RnqpxI|fvrT^*kMZ_*dNV*^
z2vEQqJBttj1c@pM$a4#uBV*T~F@6HBaAe|0-mQQYZuO7<8#v7onwKG^r+_Y6%uB;+ISHBL$~8{}>rs
zR2|Jh*k#^K5igGX(pj>Gw#`_FnToA_3gJvSF~Hj-K!of@@S%VLFE(BGp#-vs;*-{efM}F3$Ps}}q52J?C7wLWdz-+m>tZ^`in-FOE$)qT@@#*y
zq#CTJlUTHfeE$RzKnJ%`_ZY0h31y!rmWHZ-5I+{Q#|p&8&f%oRx_4x*Mk
zd-2?@9z5_U2jDTeU_&yMV8w~|a2Z^Dq+Xz<5F)+sv$-JIt*)Ms^-|O+=3dG-j64zt
zVQ)nNmm-LbiDA$rq(GcgI{=z;Si-`wR;%B&iZIU(x?q^_`qkRtGmh*hkRAof#h6H1
z2T0mLGok{}fJ*QV*xyA9=?jdL8VSnjwZNnu`S@b;?k%$dhfWXqg?VMJQuEb5jrugA
zp%*K1+lT?#fMB2%&?V?&be>EDS_uLygjxJwF<&r~%&ao>33_tfPRu9SAg@<3Lf_IU
zjd0Hv=epOzi)DAwP;-MJw*El~JoRj$yB+LBqr
zNS(9UzW2w!$bp-V4as+Rgc1(tC-_qN_eK`hWvzHtwbC$B@YE}LvTxd^ccvokg$}))
zbrQQUowMax{f+4yZF>DU#ftP4e%w5>`UBP5NnSQ
zBv3L>$PU!DqIO&YuWlU(y3E#>bd>M5NV!HDpZjCATp&`hpg6J%7HY>eNhKhOIe#md
zl|JE&d>zJpg~dDyE3lnq7${i5b&J2)LVFJv-Pi=NZucmbxwNC;eNgFB({VrNS`Svek7Yv?cMRtX+Cw<;w_c*C%nvX25D?(H=F!(cp1
zVgixLt?YjoM(6Pef8ZEWZGFj7H;~NQkGf-Gr7}>2U9#B>VO32}?@YTbc`?`Z3N1nH
zIl>#*2?0Iw9RFe1RA+1=y`!^#Cvg%YGIl^VI=#vK=q79D;ukHdz_6b#D6%^aCJnJs@Z{u<%g@)frW$;4L*9J;
zH>G(s*Wcz4M<*HfS2T|Pbp(g~gf{hW4T=E25cVycD>WR1lkLlxKFOu5SheQ6hb;OL
zBF@D?jlX}gNhd`Z8cQDby#i?Vl
z8(IS+f183&-sn+H`pwPg)dBk9YF(_T5n8T5PZ;Z1SAq^W)6kQsK6ecN-%1pKkruXB
zhGzqyA`3Va(|4gG{`X)~i}O2!-qm~*6D)7KnF0S0cV_nV(}9;@+m;(esKIaovzs2N
zVyt~w7OFE@@3617-pXRptA$iC?jKU)Xqv1EnyJc}1}QI@HcOuwP(8WI;)ds;&%0{_
zCKOlXB($Y*M}wjrUE7Nn4olVJx1N?-{D5DjE>bj_Es`0e-@e6aKd~GII
zwbAz{b!bs^HU)5?kCULO`n=ekiOm%}7aX=(-MeOfla%qceEH)wcZJfi4ez%L_hhnI
zdbIc|_E(`tcXcovOXm6L$O;DPnF
zm`{1Y;7}N6S}nsBZ#@Hh#=OV&46&@A@YIvv=^U>};s@^FQormsF~`#!I`4W?|L-;fIY@l`COc@7
z2M-|obP+0T1Hz;-q0_|_j*FQSi~EgJ+YCVU0cRIrh64XHlvdw>6odzFOeSLFUB_Q&
zrDKT-8JB*&8{N$Kxjq~%#i!P-Y7#76jQ@-yKfy7bg$q-K!4=4pfP(R?pbumKnFu0;
z3+B9-srv}`<1H#QPUTw5U;oTzdmpky#w0*aNokGdkucMd<;$JFz=tiG=psa*|#3G7^ZAa3gIbPhK%ZkN@r`&JWB8JGmlJM=;`
z@uUY@=}39%HK2`zz5zs&9Ef??C%Q8A+sN~6?XB~_72bCL`EgG=-{2I+HR5u7UD^|J
zOU>yi*6))8JWurb0U6@sGK2^Tn~0DqXXol4Fq*ydrtO5CZATu*>re64&wV*q1HRU7
zpqHz`O6&!y&lDRRHNk(NOupBJ?e~ZkyK(4#aiQ+1OhHmx?M&b=%6a!|YZ`SI241`B
zHf5!sZIwBn3lt~pd@5Og~8RV>0SuRp=a~ROX!~A|nrK2n}_HHJu`*cLTi)VkaX&pQv=fYDrzx1KwB`=Yg`tx^R0df78k=5|&s_-*K
zaHp{bOP2d;Ll3|L{u_STlb&8pV5N&sNd3c*U5gsQH_MUnslT~B=|YJ^GyxB@t>{#9
zvPgSNJAu2&xz=kdf{fUFruLW8AG
z{~qU8q(+;N``OvnOb+mDW!M7I%Yb|5;?-B7B2HUw%`75$!}^}3#e~V;U?%!UB$wnV
z8X6%%3}%8OEDt@Fi&*kliv71RTJJygro3AT_}rBH_?x>w*w#}+x-EJB$&5~c#eb17J((a
z&=>DGx31?7Vfx-q61-=MYWA4SLp||v$>Mns@-^h|E`n{6e13$bo(iS}1!&$&;1=(v
zbImpwjNVbiJq$g^qAh;*-gA=aVK3_%N|LSzhaR{C+Mpvi1QPaLg$oYt&ku}#9pK=S
z7EX?3V7w`uy{QJb3jw5KU}M#-G3_A{0@nSrf94+jxY(
z<==DyicE?7^p8~wKO+@`#JEPY@RhZl$8Ckh6+U7Kn;y%>BapUCDxcwoNj%nfLdBNM
zlN7i$`#ZGEzdUTcB^VYdFgw8Et7im1Mv;n4#apZ%s!;zfs>&(SMHzM&{nnP-j|g{Ad(*j{B0XO
z1*}0WK32HZWEzAqqTInc>TS36riF?NX>h@z$gVXcaz#i}dO~WbQ0nKQbaH~d{FsZ>
zh80z4lL%U`EjiG|l=q2;!<4;*gl!S~4Hfq2{B85dvJ6v|1@X5p*SzJ5F3o(BNZYw^
z-!8KkOR`l-F69MdOc_YqdrU^t*=er;h}$gG6Uu4$3ch`G0DJ6B4z+>}V+sZyB&F03
z=VyI$Xcm5IA^2gDQy_rcE$Sdy9d4ITli;m)kQH
zp3G6T9~{*eYIeMd{<^9<%Jd-TI9~kn$w8)X|JnjZWIhd(hi*^GBSyQqBGw?>)yU-|FF=II+wUDf9ru$d?pI3Y}EG9cO1>C
z^yY6unRhC1eGj(G&csVayw@4M$^QOJ5d(FP7N`Dlvk|xiJJ=?lYf@Wtg{v5ntP)xW
z&NkfJ4Qiciv*P1>7u$O9Zj||$rKxTp<;zCHn^I6!xuHX;S8!Rp+dQl68?9q(gG)cE
zyKzJF#btiA7(0eAouoe2BEXQ&{suV?J_6WOk4@Bj$kDPcNK7A5`Yt(mA3kZjibTwTLAyie($)~?{PeNsFOISkf*@wY9js6
zM5zmjUHV*C3-_0;Brmn$uDNnzSu`_J_foP=c@WhG9k%O*|T!NG^l2?
z?oF#e`ssh-LHNUCx>M!s-6&kJ@YdNlMPoW`aK3d!-v^Z!N`DTQ}rJqB@V!OOWn2)scqk2
zoSjgVD*2J@vBp3H=ZtgY()gl+=Bj-M#N`fMu@OAbRs8*;tu$DjT_9^8By6pZ5F!eN
zskh1Mu?+TNj|J`D{TT`OA|lmr5%CVDnuGgJH0Gvje>K0ItQoBgvNA&47*os^ROvK_
z43?%Fh9B!iA}}h~O#!gh1Am11rM;)Fp^bFyviJH)k>+j5$PVB;;c%5q*WOc)W9W$}V*d#Yk
z1|Z0xib>z5hP)@xH!E%hwH|kG7$Pc_*=g>^4{*t+Ql{;=R>l9-$cO!_k#BQ$#A;$E
z#U(g+t5(rj_05YUz9E%{(_roV=Y(wK?@(TJw+~B(mSYKEp=LO6-XAv-Vwlbl01G9)
zQ^6mn`W~C`+xqFZQ?Bi2)zXXof(>K6!gL*P_kcAC;EbYkWzsyyWUrXVGCk6c{!d2fk=twM%j?LNMO;$62RWvN;5J7
z#_qN%kM@#&1=)%pElQ!tn^VBg7zUxlYV%$v^85ZGrV%ir(RABf##>I6=YQW1{dCY=
zIS)>DMI_8(>!YeDTRlq$S^5ER=)JU@kr1w!@2y1v^1(-Y{RK)tW~t0vdUNhf&nYvN
z4w8xyIFw^*Y0n6)3^6I9Bla$xjaJ-Mke>?ohVy#RFnu1XZ?Oe3ntk_$OJ3{7x%vhj
zaeK!1$$PRiSDDf@=|;6T%5?w>Kx{}J)C(Lk%j>XdQe6j
zm>R*5!s5{}X!hnnO(FuAym$VkAI01YPPl)d)kDtR)gZuAAz$q1g_enVK9``*&nFas
zUD>%Dp>rF1RdgkwKVb2)M$r5yDH8pyHh)Bqhv?)^;Q2KP59iIGz-qa87r}3Ab1oB!
zHzApK!e2lWJNd`d`^IScx}<{@H-+XInlxA`&AB95^kp`cC1ZlB&nUd#^r*%Bz_Z&H
zw&PpEKYe;Vq^3hf8ak(+*|ubVb9FISc-QtNuH&s;rlk46XO7hD{eXMG8an3-#sa8s
zcm`C#7
zjG%Uk&D{%>wZ}6mzaLwNzDvq%e{4B!BH(Dyo!0t&R})2|s8eqPf2-T*?#Wmy3N*%0
z{>>_a-AJ52pkMGR?amA`H1Gk{IPXV=rn%XOYT@rUl<@Xs?RUyCJT+JrcM|%weQa8`AyNf4vq7}9O4If`xO`NbDAg!7@}F+
zFG2J@M&p7GVCJQ$kC?AmdPu%r*Yf_1#7Q^by{PxqZq92^!h)KZA5~N3<^~F4BEAEGtVO!pNX}1etF2zEM
zGlAD@;7RO_5qa~@R{DO{;-2mX6Yqj@AkSOcC>v+b%>4QcoPc0n1s1prHucSvByr(5d|*rt!<`~xv{hM>Pm!vAOFFqV;7T307uq~oX2
z2ZLBph8j9yd=c$CNXQ`sz}AtQ@0fdYe>^UaZ0mj!5BnIt{Qh4`bt74%t<5-!t<(=E
zn^5dJ=w1E;7un$E>f9~)DE-j&RE~6QJ1{B;Je^t)o#VyfJ1#~BAZj_nX`^9i4i+aS
zW(gNg$xmmvG0VTBKMYkevFk+u-uMldTVKE~|13=TVJ!1K3M|~Z+Y2;Jsst^Iyu9b6
z10X6RbsDC7#_q%0?8!sk@pq*%&o_qWU#?dk98Djjg9|Zy@*UphxK888@9~RUGz4rQ
z`f#NKW}TSpx880C9X)Y2#=*kTTY8oU55}Eh;~AGoOVTtjkqyc^PE#kMqjxNUVdcZ$
zHfodGt~2+ZN_e_4HR(E=Ym0j@?Nt8n7hH~-TR7;AeFV7z0v)g4l0w<0+R(P%SP|*y
zxFrsh{p%qTXk_sv`6ar6e
zLUwF{3u>?tFO5WXq}UF?pg4wEeoqmU^4NoY`Wh&s9KWq4Rapk+UK2BxxZ6qPW_jB$
z>vZu6fIg)kl%_HpjrQ81xrb0131fqSn%1<&Gu7$24-7>$Qw-*lj#{n6A`WhF;o*-!e)b7mBBwk}wFHU23}_iP6n?{_Z;*~w%dOMe5pU(*
zHOOkTG-O{A@(<|zOT xUw%&0s)TaR?v~<
zLwDO?@uf=*iV5gzX~Df1p_-_J$i}~eb0v`Ny{$tPejXx|`^Qz5r&U`Aj&DN?p@i^_VsJ?g@CNCau
zKA)4e5Z<+mrm4_qVE{Maz|%53=`Nu7aY~MoTErchC6X2PxqI|1@`(N1h`n1`V%OYY
z(C)o2_WgOW+I^r@bpB8WPDHP@2WRHyUeh9oiiu4d=fAE*77l-{dV*=eHyk0!FNqtid!U<;k=(&ki@a;@MlX5T?0){|`F6JmgHX?>KABa4X?OaR
zrLS8-WSi+nx(WQ&RDIfY@$M9Z)eiO${KD~j%IPH;eimeB(YON`DX>B#{shJRf@%u(|-lD}jC--K~tpCu4YamSj+VKx^`5)kEAJaf!T{Q0eAh
zKwk^nIQCLIvGL)EO46CB`OW+rM@*LWA5+(SOBUt1lfM+OB<;N
z)|k-`=SBu}VMLlPx8mk$H*E#2UjNwT6UiJZACi3;C~c5)-+l7-$l((LHMT`4J)oUe
zV(0;S^a{QfhP*k80Dd$UgEh1|m!G&)oY7N)GN1?%eU3fV8pmBdKjqS0;WB5Q#+wv1
zs@zducO`!gdmT0}b(vY+q@Jh)c>9uud?Z9%)Ts_KBJth_?Mqe8Rv`y&kI9VYuH*{a
zaLnvF{(@>8^1A#nHP-hgWDLmgPtmri_n4UqO|x4~P+3$U1@Vl4<(~J|7in`SYp=eV
z6!oFXQ~Uc!KZoG)w{2N9GM+Ir%XjO={r3>-@5%b}R4?XJmJCB@GWh0nHE109*pR1n
zvtL-LO@5j&2vJtTYjU>==nd6u0`%`ZYiYdpR@79uH+4SzFx~7;(N%KL<#rNErbaXK
z_HAP=HUj4Vzyg7uBjF-z1hx|hhrr2;elSLwJ?4w2Oq2%W{tBzC9w@Q=(EoezgMo(D
zb&!1u8?q6Wco=3hI2r3fZha)0e|F+Gl+XF=7vIV+2U_yB1k)k+YtC|Vv1_kgI#*}@
znzvF8QdxOYEwWHjH#
zsD-4L6z#B0&@xO~C{78rJMV957mQtQ&7MJi@iGi5b%s?aICa6u0k9+~7qIe=p;YSH
za~HRITXD-z@1#qi4r)idJ9o?Ws>0AzG`CDzRJ?F9<2%^*jyo&1GzC~fsF_y;LV#9`
zm=5Irw57Br)W5w?&){X$nNvGp><@MAAIRe+RyUdo{R{1vJ>#Xn>J$f#q~CQx35#IH
zvH-gJqpYWM^?yp3lmCiK|LU^tABcLFj+ZAio58FhMbYn9J*w561I|
zYry^@3bdiRKN%gsz5~C@Wlh$Ie9L~62#qOH8%ITys#ty;(QaOIYb@-jEOE*)IJsJ3
zdtr95h=-E*lp@t#V3CLt4L?|19MEF7rXzm4&UdqVIn68moP1@2MrI_x7bBE*`W0&e
zjL^G|hb9Ty=q7XozgqOw&o)!cmv?hFCXdt)o|yN#b@H3E?B`!FhJXdJrQMsZ=#PQG
zeR*pz3GqFQFb&NVag$h%7MgheVG4B`+hg!88*1y!ZgD
zX=&qlE$GOGtq;;KAzKOXUiT{*2a!$}XlP`Ccz@viInc`t*4}Wa%ks@~S3+)NpLTRl
z3OyBirMY%y?>`v8%P^XXZfvJ6%7huyVgCx<4cA(2MkhYVUBn2~2VVW5;o%(FARo@_
zo|1C+AdMsOV`IN!-mmbWhFjVc3x1P*Fnr|nJjxErVmq~VYO?KDOgBA7k9Zq&I6NpD
z$M(%y>2Xh)>+hvGYw8ktWy76V3!{1Tnotli9bx1a7OuD7NSvY|Hd5iIdY22Q>ZRfG
z^{Sh-$b|O}PfE4|ezn(;HJx7ZLQRWwIyN2e}I|D5Hus*hIt61ocfm^`Q!)
zf0yR@pKp!bU*DEQsF`2Y
zf!(9K8nUSnYB3}4{KX8de01A1=wsZAnDF?s8`q%IdyH?iKS!9a;P_?RXqH5M_WnjD
zCQ9sQk$(F%)0~jXA!gp;5Qa)cV3~wrY^K5Ns!wu(6~i;R1e{J{(yeb7-{M;zT$o|H+u$XDeVY$z){IX>ri
zr{hBVt2}~W>1~N@GfM>z$!NH-U#*bku$RM)=P~B@na8K?*qpI~feh17yp0F^QR@TM
zgrvpK1Nk9UIP0+Mq60>zhNrAWGGs$C8;%T`x?Qp38pe<xa=4-V7R
zDGAh5v<*W5GPcqJDd{7opKGrQ9xN;g5PTBs^K|g5aZ7!TEyr7~3emmO9)ElOhRoDz
zuzp`G)mZ>+<&kdi0o^FIiMf04mUCYvFI|2(vpj4iVRBt^!G!HL+fnud(ldZzrB21R
zGn=s2L0XVN-1im7FeOh85n!@@vk!%Kttkheo4wx&JEgvtA7*k}Uhle0UpPC^W4;Hx
zKfwjuU}S)bxOM08%;MCnOmprJRPF7t^)qx?dgG!2we_v|*Y6S=
zNrkRZo`6L)S`>AeUrxDOsB|VbFyhu!&1$XK;`f^=qYtZnluShf{({x;Irafq9eTa{
z1rU6vkd~lGU`?>9?eJ7}8sVeeov1QaK~
znFXD4tf{5e;B|WO&nRy4N(TC-n_lxXOrrk%+6QG5>W{c_w`O(tHB!ip=3;s{<;2G$
z%5BOj8iT*a)~iW>A;r^hu$O_hBBqn)r(|sc`eraE;{;%Fq>*f)CLE14bXVJ#i16oM
zOeIgZYRollGYe$OY&I)XTgWA7P&;?~WASyeBH;7a7^letN$8sS)gmJrCX9(GMtuE|
zJIH@Pzl^P4`ZB<^+ND<$IM+C68e;KHi%{A`
zB^c(8=IIn>+@0-IM9O`~Q*@(WjK3+KL7kB-`(3oQ?Dl=i!RwGtM$YWl{@CWS?><=98&zX
zMLkCwt3h#tBqH*!d;{0sTc8H|PdqDwk%)ebz{_~4$k~F^?HxI`4S^qZwW}-5TRjyj
z5$}ikj*!9=;RRv<4g@!8gEH!|N?l!22s$eIAyK
zCWNIJYU6*x4~=l>e`Zyc*j5a;)L+QwRXyh<$dNx9f7m+Va@+u0H|)YCW9j@ZCSQM%
z49&g76%{&ds9QyAiHy3i^M9`OT@&DP175&?2JvWR
zD&Q!_VmgA$7i_6E24FPwY44hI-kidApLeQW`W{Z)$(kJC@xDIfApgRUmF@%vbhu*^
z9?bTh;luB4xR7&fvC|Awx-#ci)kX~ByL-ufXn5*|TG%K7r~t3ucs4!k$`$x)dwYdphi??99iVhglzr
z1ZQ`kDKG(sJgHjIVOHB#a31YV(;=OG*W5APELC`R-AbXLaB>9t>^@P8J2e^NlevvB
z`8%T*^LL=)O(SU32)fCR7EXzRO3$-Zlqbyh>3G0E)qbA!H9{}Ac*4{A&hFjC_tgu}
zE9>U11nE~o{uBkR&i>?O{G_%24ZQEcW`Uw$FeV)gCA2zd#$rGv45uc-+qlOSowTxp3vU@F#-Ff<6;_nQp
zHIKt}G;7e3fR2QI_wN)Wf6DPZ%6eV1XkPy=!=|mh;G>)6ykq$0jl`3uukT@uIirHW
zidK8G&N}n=%mSGDR%$`y^PC@T$I4$aG$wxY2+Ll@MZU0cor7NGdluki)c$zS>B~|p
zdLxps^+la(O4;hiCS^!Wbx4$2g^4zvHmz8Aezf{k=0QonMWu%WY&nV$Awvv{`zc*2
z08l#?`B+>s(7)l1J2?-PB)~@Q=6IDpmi{a&rdW|CM0!4mM{294!o@KbCOK%
zE?aONJurY%Qad8IqG5h%;pu)UrLVlZz)*Dvgkfk*loInf+8b!7p(L?p!j|Aq557s&
z9g*p#NDX%*zA8888NU0zr3H@L7cLE4P2j!S^7+z3kNUqGXp9nK0odl?c^fc}QDk5H
zXZZm`i34k$JPl9vG}c_bx2Pqfiqgb6)lA_vt{1fv$uu7p-z-_UPgV)Aa#bl+c(td@
z*TC@YI>oHKukhH6hJ3z5L-VyzWvH$}-toX#%hh*&MjsPJQ`it5D1O=$VGL)0pN;E?
zBquXNZTV;;)~z#I?Jr6Xv}_M3*nC*h(BqFc2UM@iY#er64j`lFdd`|fG%A9g@2#!5
ze83RpWgWMQ*dI$OJP~+eEkeiPZOfvO%PSS$Eer0SZv}HDIJ}%NUm0g;qj=cd6fumU
zf=w3VwR_0bz&Y>CfaW154SuZYC1aQ}
z%8~iBQaH=H}v5)r61dOye@Sz
z9W%@fjkI@l%F333T?}95QMhE>()cDrqp#jtrDnoB)dGZiP7pj|W>LB{2g)C~_j`LH
zNZxu5)vnKw!0nSoe$}Cjs@p5wj|cebCrDgWA6
z8|m50XOCJu@tauub166LgGCx8BTi{oUC
zq8rsyOUD_U#fKh|&Ria8x}^5$PY`Hf)vV|TrZ
z@@fSH7yehsNqWyN3)(WfxE85?@qO@4{uJl*>+7L%qK*%1ZcXA==i3qzHe~I;XJ)gt
zQn19uT)@!9rZB=tt-eigPT$r7>Wej9jd@(x_OP+c#Sinzv{n{>F6~q(IfKQ$LY}1e
z>+B!tg9#{3qpnvPC_z!P`Pt7RfY~WEWG;*K35Q|v5l9P8MBOepH|rye{Wn(=EuU`x
za{_sBl<=oya>vpoD74W0oSu-0Xi1=S)~2KPTZ21SNE_&Q82?UiENBj%AwUzvF8%0~
zCD%L+EdFsn(aH@NJ>)5Qa%{()b6A;-);(U|S66ey!DJS4qYyY_g5D-WWP~b0Yoz<5
z%mGc2>@x_58XkQGCU!bvNb}s76B36K@gE7Qnf!>{$QeECV
zmLcg(gOl1zIj>JO+aJc5VV{D2uT!Fn#B+{#tr3k)2rKblUSnJ%81j{WAoBs>P^q8o
zahHV8je)(6NnBK;q-}jU@{Wv+tzlSm#Z||et7%KMN8EX3FLYi~*)dQ4_SzNw6CZwn
zY?Z@LUzefz4?eC)?Vm)jO5^)JwqxTm8q>nO5qkVyBgp44Y{+eDS1V<=FVM}7
za;%Cl##I_q^O6V^cfnPUj+_p7TAUO$#+hE}QubUa#rCj)<)%f_GODwKwW1Ruw?wGi&Awh(@`+gQ8#x((@@Ubg&vxB(3`-|>@KH-
zzrMNs;(*+_%Ods0Y&)E_3~^69tDfw6o>oN&hB9nF8}Ye+b#d`W2+EHOEu9@#3o3I9
zO(v-Mr2mS{v68-Q?Gt$Sz#XH3oI4G*IEO!`;LNyARs_5gZ`hnT^%V1|VMwJfOOj@z
zn5>m(7)W&YCN)gloHRUhQ@y5I&1-=3Gb58L3G%Vgd}PR)lTYYU-T%9HtODtW
zvN$r|l(At|IvrE@+pGgD$!MoSAaE>2@2w?X%j
z%UFpW1cUi1mOYxz2{eCjBAiulOL;SyhWLL2LI2A~=ih!h|F`c(LQ`N2E}+$1V1k(D
z8mM&hKz}$2og4RXFZtjYz+-;?*Foa}9(pCt
zykJqcKqqOz#Onr+X}XDgY=(d2Uz@!DcsYGQw`{GWNx}TAi6R)Y41O)eKpC47C0!>8
zO=75q-fL+=2&3T3;c3^lPxAyu^4+v{Yq+w%@2+@aG{&|6^}J2Rs1JW1G%P4drA{S!
zNa*yn)RF1^N_gSf0MD1NSNHL2Y+5sQ79*bEVI
z%fTN0yO00RpWrdhI{ww=2U^^acQ(Wol)kt699c)E
z6bfhbY6G(p3xq~wJ|3y#pu2Z=G)-?b^VG=Ky^RT1`Tont`P+eOIsTo4?3*pK=G`j?
zpl44!_^;dYU$^I>-HB+C0}!r5FYV-_qdBD^di!2JyWG;+8G|Ddxi^DMwN7b9|B4$R
zR!x)S^?qd8k8=&~>8?K|1>>fuv<%QbeOWSCjD+GBc4n4%QDVEd7pnVrkE3zi>TMVYo^CJVxSz7f7GeK53KVDILyu%o%o67VkqB4U7~A!EgfjZ+g!A!eJUmI
zS>&T|{FB2(w~@1x9WAZA3fU(Wl5g~mg#4~4te9WhLy14QKwfZNFmE)~Qj |