diff --git a/src/Depend.sh b/src/Depend.sh index 520d9ae2bf..0962dace51 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -126,4 +126,5 @@ fi if (test $1 = "USER-REAXC") then depend KOKKOS + depend USER-OMP fi diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp new file mode 100644 index 0000000000..c8e5e3cb93 --- /dev/null +++ b/src/USER-OMP/fix_qeq_reax_omp.cpp @@ -0,0 +1,1300 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Hybrid and sub-group capabilities: Ray Shan (Sandia) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "fix_qeq_reax_omp.h" +#include "pair_reaxc_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "force.h" +#include "group.h" +#include "pair.h" +#include "respa.h" +#include "memory.h" +#include "citeme.h" +#include "error.h" +#include "reaxc_defs.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define EV_TO_KCAL_PER_MOL 14.4 +//#define DANGER_ZONE 0.95 +//#define LOOSE_ZONE 0.7 +#define SQR(x) ((x)*(x)) +#define CUBE(x) ((x)*(x)*(x)) +#define MIN_NBRS 100 + +/* ---------------------------------------------------------------------- */ + +FixQEqReaxOMP::FixQEqReaxOMP(LAMMPS *lmp, int narg, char **arg) : + FixQEqReax(lmp, narg, arg) +{ + if (narg < 8 || narg > 9) error->all(FLERR,"Illegal fix qeq/reax/omp command"); + + nevery = force->inumeric(FLERR,arg[3]); + swa = force->numeric(FLERR,arg[4]); + swb = force->numeric(FLERR,arg[5]); + tolerance = force->numeric(FLERR,arg[6]); + + // dual CG support + dual_enabled = 0; + if(narg == 9) + if(strcmp(arg[8],"dual") == 0) dual_enabled = 1; + else error->all(FLERR,"Unknown fix qeq/reax argument in location 9"); + + // perform initial allocation of atom-based arrays + // register with Atom class + + s_hist = t_hist = NULL; + grow_arrays(atom->nmax); + atom->add_callback(0); + for( int i = 0; i < atom->nmax; i++ ) + for (int j = 0; j < nprev; ++j ) + s_hist[i][j] = t_hist[i][j] = 0; + + reaxc = NULL; + reaxc = (PairReaxCOMP *) force->pair_match("reax/c/omp",1); + + b_temp = NULL; + + // ASPC: Kolafa, J. Comp. Chem., 25(3), 335 (2003) + do_aspc = 0; + aspc_order = 1; + aspc_order_max = nprev - 2; // Must be consistent with nprev to store history: nprev = aspc_order + 2 + aspc_omega = 0.0; + aspc_b = NULL; +} + +FixQEqReaxOMP::~FixQEqReaxOMP() +{ + memory->destroy(b_temp); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::post_constructor() +{ + pertype_parameters(pertype_option); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::pertype_parameters(char *arg) +{ + if (strcmp(arg,"reax/c") == 0) { + reaxflag = 1; + Pair *pair = force->pair_match("reax/c",0); + if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax/omp"); + int tmp; + chi = (double *) pair->extract("chi",tmp); + eta = (double *) pair->extract("eta",tmp); + gamma = (double *) pair->extract("gamma",tmp); + if (chi == NULL || eta == NULL || gamma == NULL) + error->all(FLERR, + "Fix qeq/reax/omp could not extract params from pair reax/c"); + return; + } + + int i,itype,ntypes; + double v1,v2,v3; + FILE *pf; + + reaxflag = 0; + ntypes = atom->ntypes; + + memory->create(chi,ntypes+1,"qeq/reax:chi"); + memory->create(eta,ntypes+1,"qeq/reax:eta"); + memory->create(gamma,ntypes+1,"qeq/reax:gamma"); + + if (comm->me == 0) { + if ((pf = fopen(arg,"r")) == NULL) + error->one(FLERR,"Fix qeq/reax/omp parameter file could not be found"); + + for (i = 1; i <= ntypes && !feof(pf); i++) { + fscanf(pf,"%d %lg %lg %lg",&itype,&v1,&v2,&v3); + if (itype < 1 || itype > ntypes) + error->one(FLERR,"Fix qeq/reax/omp invalid atom type in param file"); + chi[itype] = v1; + eta[itype] = v2; + gamma[itype] = v3; + } + if (i <= ntypes) error->one(FLERR,"Invalid param file for fix qeq/reax/omp"); + fclose(pf); + } + + MPI_Bcast(&chi[1],ntypes,MPI_DOUBLE,0,world); + MPI_Bcast(&eta[1],ntypes,MPI_DOUBLE,0,world); + MPI_Bcast(&gamma[1],ntypes,MPI_DOUBLE,0,world); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::allocate_storage() +{ + FixQEqReax::allocate_storage(); + + // dual CG support + int size = nmax; + if(dual_enabled) size*= 2; + memory->create(b_temp, comm->nthreads, size, "qeq/reax/omp:b_temp"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::deallocate_storage() +{ + memory->destroy(b_temp); + + FixQEqReax::deallocate_storage(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::allocate_matrix() +{ + int i,ii,inum,m; + int *ilist, *numneigh; + + int mincap; + double safezone; + + if( reaxflag ) { + mincap = reaxc->system->mincap; + safezone = reaxc->system->safezone; + } else { + mincap = MIN_CAP; + safezone = SAFE_ZONE; + } + + n = atom->nlocal; + n_cap = MAX( (int)(n * safezone), mincap ); + + // determine the total space for the H matrix + + if (reaxc) { + inum = reaxc->list->inum; + ilist = reaxc->list->ilist; + numneigh = reaxc->list->numneigh; + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + } + + m = 0; + for( ii = 0; ii < inum; ii++ ) { + i = ilist[ii]; + m += numneigh[i]; + } + m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS ); + + H.n = n_cap; + H.m = m_cap; + memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); + memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); + memory->create(H.jlist,m_cap,"qeq:H.jlist"); + memory->create(H.val,m_cap,"qeq:H.val"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init() +{ + if (!atom->q_flag) error->all(FLERR,"Fix qeq/reax/omp requires atom attribute q"); + + ngroup = group->count(igroup); + if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms"); + + if (!force->pair_match("reax/c/omp",1)) + error->all(FLERR,"Must use pair_style reax/c/omp with fix qeq/reax/omp"); + + // need a half neighbor list w/ Newton off and ghost neighbors + // built whenever re-neighboring occurs + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->newton = 2; + neighbor->requests[irequest]->ghost = 1; + + init_shielding(); + init_taper(); + + if (strstr(update->integrate_style,"respa")) + nlevels_respa = ((Respa *) update->integrate)->nlevels; + + // APSC setup + if(do_aspc) { + memory->create(aspc_b, aspc_order_max+2, "qeq/reax/aspc_b"); + + // Calculate damping factor + double o = double(aspc_order); + aspc_omega = (o+2.0) / (2*o+3.0); + + // Calculate B coefficients + double c = (4.0 * o + 6.0) / (o + 3.0); + aspc_b[0] = c; + + double n = 1.0; + double d = 4.0; + double s = -1.0; + double f = 2.0; + + for(int i=1; iall(FLERR,"Early Termination"); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::compute_H() +{ + int inum, *ilist, *numneigh, **firstneigh; + double SMALL = 0.0001; + + int *type = atom->type; + tagint * tag = atom->tag; + double **x = atom->x; + int *mask = atom->mask; + + if(reaxc) { + inum = reaxc->list->inum; + ilist = reaxc->list->ilist; + numneigh = reaxc->list->numneigh; + firstneigh = reaxc->list->firstneigh; + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + int ai, num_nbrs; + + // sumscan of the number of neighbors per atom to determine the offsets + // most likely, we are overallocating. desirable to work on this part + // to reduce the memory footprint of the far_nbrs list. + + num_nbrs = 0; + + for (int itr_i = 0; itr_i < inum; ++itr_i) { + ai = ilist[itr_i]; + H.firstnbr[ai] = num_nbrs; + num_nbrs += numneigh[ai]; + } + + // fill in the H matrix + +#pragma omp parallel default(shared) + { + int i, j, ii, jj, mfill, jnum, flag; + int *jlist; + double dx, dy, dz, r_sqr; + + mfill = 0; + + //#pragma omp for schedule(dynamic,50) +#pragma omp for schedule(guided) + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if(mask[i] & groupbit) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + mfill = H.firstnbr[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + dx = x[j][0] - x[i][0]; + dy = x[j][1] - x[i][1]; + dz = x[j][2] - x[i][2]; + r_sqr = SQR(dx) + SQR(dy) + SQR(dz); + + flag = 0; + if (r_sqr <= SQR(swb)) { + if (j < n) flag = 1; + else if (tag[i] < tag[j]) flag = 1; + else if (tag[i] == tag[j]) { + if (dz > SMALL) flag = 1; + else if (fabs(dz) < SMALL) { + if (dy > SMALL) flag = 1; + else if (fabs(dy) < SMALL && dx > SMALL) flag = 1; + } + } + } + + if( flag ) { + H.jlist[mfill] = j; + H.val[mfill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] ); + mfill++; + } + } + + H.numnbrs[i] = mfill - H.firstnbr[i]; + } + } + + if (mfill >= H.m) { + char str[128]; + sprintf(str,"H matrix size has been exceeded: mfill=%d H.m=%d\n", + mfill, H.m ); + error->warning(FLERR,str); + error->all(FLERR,"Fix qeq/reax/omp has insufficient QEq matrix size"); + } + } // omp + +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init_storage() +{ + int NN; + + if(reaxc) NN = reaxc->list->inum + reaxc->list->gnum; + else NN = list->inum + list->gnum; + +#pragma omp parallel for schedule(static) + for (int i = 0; i < NN; i++) { + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi[atom->type[i]]; + b_t[i] = -1.0; + b_prc[i] = 0; + b_prm[i] = 0; + s[i] = t[i] = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::pre_force(int vflag) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase, funcstartTimeBase; + funcstartTimeBase = MPI_Wtime(); +#endif + + double t_start, t_end; + + if (update->ntimestep % nevery) return; + if( comm->me == 0 ) t_start = MPI_Wtime(); + + n = atom->nlocal; + N = atom->nlocal + atom->nghost; + + // grow arrays if necessary + // need to be atom->nmax in length + + if( atom->nmax > nmax ) reallocate_storage(); + if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE ) + reallocate_matrix(); + +#ifdef OMP_TIMING + startTimeBase = MPI_Wtime(); +#endif + + init_matvec(); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEINITMVINDEX] += (endTimeBase-startTimeBase); + startTimeBase = endTimeBase; +#endif + + if(dual_enabled) matvecs = dual_CG(b_s, b_t, s, t); // OMP_TIMING inside dual_CG + else { + + matvecs_s = CG(b_s, s); // CG on s - parallel + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG1INDEX]++; + ompTimingCGCount[COMPUTECG1INDEX]+= matvecs_s; + startTimeBase = endTimeBase; +#endif + + matvecs_t = CG(b_t, t); // CG on t - parallel + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG2INDEX]++; + ompTimingCGCount[COMPUTECG2INDEX]+= matvecs_t; + startTimeBase = endTimeBase; +#endif + + } // if(dual_enabled) + + // if(comm->me == 0) fprintf(stdout,"matvecs= %i %i\n",matvecs_s,matvecs_t); + +#ifdef OMP_TIMING + startTimeBase = MPI_Wtime(); +#endif + + calculate_Q(); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECALCQINDEX] += (endTimeBase-startTimeBase); +#endif + + if( comm->me == 0 ) { + t_end = MPI_Wtime(); + qeq_time = t_end - t_start; + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEQEQINDEX] += (endTimeBase-funcstartTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init_matvec() +{ +#ifdef OMP_TIMING + long endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* fill-in H matrix */ + compute_H(); + + int nn,i; + int *ilist; + + if(reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + // Should really be more careful with initialization and first (aspc_order+2) MD steps + if(do_aspc) { + + double m_aspc_omega = 1.0 - aspc_omega; +#pragma omp parallel for schedule(dynamic,50) private(i) + for(int ii = 0; ii < nn; ++ii ) { + i = ilist[ii]; + if(atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[ atom->type[i] ]; + b_s[i] = -chi[ atom->type[i] ]; + b_t[i] = -1.0; + + // Predictor Step + double tp = 0.0; + double sp = 0.0; + for(int j=0; jmask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[ atom->type[i] ]; + b_s[i] = -chi[ atom->type[i] ]; + b_t[i] = -1.0; + + /* linear extrapolation for s & t from previous solutions */ + //s[i] = 2 * s_hist[i][0] - s_hist[i][1]; + //t[i] = 2 * t_hist[i][0] - t_hist[i][1]; + + /* quadratic extrapolation for s & t from previous solutions */ + //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] ); + t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] ); + + /* cubic extrapolation for s & t from previous solutions */ + s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); + //t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]); + } + } + } + + pack_flag = 2; + comm->forward_comm_fix(this); //Dist_vector( s ); + pack_flag = 3; + comm->forward_comm_fix(this); //Dist_vector( t ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEMVCOMPINDEX] += (long) (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +int FixQEqReaxOMP::CG( double *b, double *x ) +{ + int i, ii, j, imax; + double tmp, alpha, beta, b_norm; + double sig_old, sig_new; + + double my_buf[2], buf[2]; + + int nn, jj; + int *ilist; + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + imax = 200; + + pack_flag = 1; + sparse_matvec( &H, x, q ); + comm->reverse_comm_fix( this ); //Coll_Vector( q ); + + double tmp1, tmp2; + tmp1 = tmp2 = 0.0; + +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2) + for( jj = 0; jj < nn; ++jj ) { + i = ilist[jj]; + if (atom->mask[i] & groupbit) { + r[i] = b[i] - q[i]; + d[i] = r[i] * Hdia_inv[i]; //pre-condition + + tmp1 += b[i] * b[i]; + tmp2 += r[i] * d[i]; + } + } + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + b_norm = sqrt(buf[0]); + sig_new = buf[1]; + + for( i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i ) { + comm->forward_comm_fix(this); //Dist_vector( d ); + sparse_matvec( &H, d, q ); + comm->reverse_comm_fix(this); //Coll_vector( q ); + + tmp1 = 0.0; +#pragma omp parallel + { + +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) tmp1 += d[ii] * q[ii]; + } + +#pragma omp barrier +#pragma omp master + { + MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world); + + alpha = sig_new / tmp2; + tmp1 = 0.0; + } + +#pragma omp barrier +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) { + x[ii] += alpha * d[ii]; + r[ii] -= alpha * q[ii]; + + // pre-conditioning + p[ii] = r[ii] * Hdia_inv[ii]; + tmp1 += r[ii] * p[ii]; + } + } + } // omp parallel + + sig_old = sig_new; + + MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world); + + sig_new = tmp2; + beta = sig_new / sig_old; + +#pragma omp for schedule(dynamic,50) private(ii) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) d[ii] = p[ii] + beta * d[ii]; + } + } + + if (i >= imax && comm->me == 0) { + char str[128]; + sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + + return i; +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::sparse_matvec( sparse_matrix *A, double *x, double *b ) +{ +#pragma omp parallel default(shared) + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int nthreads = comm->nthreads; + int tid = omp_get_thread_num(); + + if(reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#pragma omp for schedule(dynamic,50) + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if(atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i]; + } + +#pragma omp for schedule(dynamic,50) + for (ii = nn; ii < NN; ++ii) { + i = ilist[ii]; + if(atom->mask[i] & groupbit) b[i] = 0; + } + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) + for(int t=0; tmask[i] & groupbit) { + for (itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + b[i] += A->val[itr_j] * x[j]; + + b_temp[tid][j] += A->val[itr_j] * x[i]; + } + } + } + + // Wait till b_temp accumulated +#pragma omp barrier + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) + for (int t = 0; t < nthreads; ++t) b[i] += b_temp[t][i]; + + } //end omp parallel +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::calculate_Q() +{ + int i; + double *q = atom->q; + + int nn; + int *ilist; + + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + double tmp1, tmp2; + tmp1 = tmp2 = 0.0; +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2) + for(int ii = 0; ii < nn; ii++) { + i = ilist[ii]; + if(atom->mask[i] & groupbit) { + tmp1 += s[i]; + tmp2 += t[i]; + } + } + + double my_buf[2], buf[2]; + buf[0] = 0.0; + buf[1] = 0.0; + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf,&buf,2,MPI_DOUBLE,MPI_SUM,world); + + double u = buf[0] / buf[1]; + +#pragma omp parallel for schedule(static) private(i) + for (int ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if(atom->mask[i] & groupbit) { + q[i] = s[i] - u * t[i]; + + // backup s & t + for (int k = 4; k > 0; --k) { + s_hist[i][k] = s_hist[i][k-1]; + t_hist[i][k] = t_hist[i][k-1]; + } + s_hist[i][0] = s[i]; + t_hist[i][0] = t[i]; + } + } + + pack_flag = 4; + comm->forward_comm_fix( this ); //Dist_vector( atom->q ); +} + +/* ---------------------------------------------------------------------- */ + +// double FixQEqReaxOMP::parallel_norm( double *v, int n ) +// { +// int i; +// double my_sum, norm_sqr; + +// int *ilist; + +// if (reaxc) ilist = reaxc->list->ilist; +// else ilist = list->ilist; + +// my_sum = 0.0; +// norm_sqr = 0.0; + +// #pragma omp parallel for schedule(static) private(i) reduction(+:my_sum) +// for (int ii = 0; ii < n; ++ii) { +// i = ilist[ii]; +// if(atom->mask[i] & groupbit) my_sum += SQR( v[i] ); +// } + +// MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + +// return sqrt( norm_sqr ); +// } + +// /* ---------------------------------------------------------------------- */ + +// double FixQEqReaxOMP::parallel_dot( double *v1, double *v2, int n ) +// { +// int i; +// double my_dot, res; + +// int *ilist; + +// if (reaxc) ilist = reaxc->list->ilist; +// else ilist = list->ilist; + +// my_dot = 0.0; +// res = 0.0; + +// #pragma omp parallel for schedule(static) private(i) reduction(+:my_dot) +// for (int ii = 0; ii < n; ++ii) { +// i = ilist[ii]; +// if(atom->mask[i] & groupbit) my_dot += v1[i] * v2[i]; +// } + +// MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world ); + +// return res; +// } + +// /* ---------------------------------------------------------------------- */ + +// double FixQEqReaxOMP::parallel_vector_acc( double *v, int n ) +// { +// int i; +// double my_acc, res; + +// int *ilist; + +// if (reaxc) ilist = reaxc->list->ilist; +// else ilist = list->ilist; + +// my_acc = 0.0; +// res = 0.0; + +// #pragma omp parallel for schedule(static) private(i) reduction(+:my_acc) +// for (int ii = 0; ii < n; ++ii) { +// i = ilist[ii]; +// if(atom->mask[i] & groupbit) my_acc += v[i]; +// } + +// MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world ); + +// return res; +// } + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::vector_sum( double* dest, double c, double* v, + double d, double* y, int k ) +{ + int i; + int *ilist; + + if (reaxc) ilist = reaxc->list->ilist; + else ilist = list->ilist; + +#pragma omp parallel for schedule(static) private(i) + for (int ii=0; iimask[i] & groupbit) dest[i] = c * v[i] + d * y[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::vector_add( double* dest, double c, double* v, int k ) +{ + int i; + int *ilist; + + if (reaxc) ilist = reaxc->list->ilist; + else ilist = list->ilist; + +#pragma omp parallel for schedule(static) private(i) + for (int ii=0; iimask[i] & groupbit) dest[i] += c * v[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +/* dual CG support */ +/* ---------------------------------------------------------------------- */ + +int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int i, j, imax; + double tmp, alpha_s, alpha_t, beta_s, beta_t, b_norm_s, b_norm_t; + double sig_old_s, sig_old_t, sig_new_s, sig_new_t; + + double my_buf[4], buf[4]; + + int nn, ii, jj; + int *ilist; + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + imax = 200; + + pack_flag = 5; // forward 2x d and reverse 2x q + dual_sparse_matvec( &H, x1, x2, q ); + comm->reverse_comm_fix( this ); //Coll_Vector( q ); + + double tmp1, tmp2, tmp3, tmp4; + tmp1 = tmp2 = tmp3 = tmp4 = 0.0; + +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2,tmp3,tmp4) + for( jj = 0; jj < nn; ++jj ) { + i = ilist[jj]; + if (atom->mask[i] & groupbit) { + int indxI = 2 * i; + r[indxI ] = b1[i] - q[indxI ]; + r[indxI+1] = b2[i] - q[indxI+1]; + + d[indxI ] = r[indxI ] * Hdia_inv[i]; //pre-condition + d[indxI+1] = r[indxI+1] * Hdia_inv[i]; + + tmp1 += b1[i] * b1[i]; + tmp2 += b2[i] * b2[i]; + + tmp3 += r[indxI ] * d[indxI ]; + tmp4 += r[indxI+1] * d[indxI+1]; + } + } + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + my_buf[2] = tmp3; + my_buf[3] = tmp4; + + MPI_Allreduce(&my_buf, &buf, 4, MPI_DOUBLE, MPI_SUM, world); + + b_norm_s = sqrt(buf[0]); + b_norm_t = sqrt(buf[1]); + + sig_new_s = buf[2]; + sig_new_t = buf[3]; + + for( i = 1; i < imax; ++i ) { + comm->forward_comm_fix(this); //Dist_vector( d ); + dual_sparse_matvec( &H, d, q ); + comm->reverse_comm_fix(this); //Coll_vector( q ); + + tmp1 = tmp2 = 0.0; +#pragma omp parallel + { + +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + tmp1 += d[indxI ] * q[indxI ]; + tmp2 += d[indxI+1] * q[indxI+1]; + } + } + +#pragma omp barrier +#pragma omp master + { + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + alpha_s = sig_new_s / buf[0]; + alpha_t = sig_new_t / buf[1]; + + tmp1 = tmp2 = 0.0; + } + +#pragma omp barrier +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + x1[ii] += alpha_s * d[indxI ]; + x2[ii] += alpha_t * d[indxI+1]; + + r[indxI ] -= alpha_s * q[indxI ]; + r[indxI+1] -= alpha_t * q[indxI+1]; + + // pre-conditioning + p[indxI ] = r[indxI ] * Hdia_inv[ii]; + p[indxI+1] = r[indxI+1] * Hdia_inv[ii]; + + tmp1 += r[indxI ] * p[indxI ]; + tmp2 += r[indxI+1] * p[indxI+1]; + } + } + } // omp parallel + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + sig_old_s = sig_new_s; + sig_old_t = sig_new_t; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + sig_new_s = buf[0]; + sig_new_t = buf[1]; + + if( sqrt(sig_new_s)/b_norm_s <= tolerance || sqrt(sig_new_t)/b_norm_t <= tolerance) break; + + beta_s = sig_new_s / sig_old_s; + beta_t = sig_new_t / sig_old_t; + +#pragma omp for schedule(dynamic,50) private(ii) + for( jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if(atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + + d[indxI ] = p[indxI ] + beta_s * d[indxI ]; + d[indxI+1] = p[indxI+1] + beta_t * d[indxI+1]; + } + } + } + + i++; + matvecs_s = matvecs_t = i; // The plus one makes consistent with count from CG() + matvecs = i; + + // Timing info for iterating s&t together +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG1INDEX]++; + ompTimingCGCount[COMPUTECG1INDEX]+= i; + startTimeBase = endTimeBase; +#endif + + // If necessary, converge other system + if(sqrt(sig_new_s)/b_norm_s > tolerance) { + pack_flag = 2; + comm->forward_comm_fix(this); // x1 => s + + i+= CG(b1, x1); + matvecs_s = i; + } + else if(sqrt(sig_new_t)/b_norm_t > tolerance) { + pack_flag = 3; + comm->forward_comm_fix(this); // x2 => t + + i+= CG(b2, x2); + matvecs_t = i; + } + + // Timing info for remainder of s or t +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG2INDEX]++; + ompTimingCGCount[COMPUTECG2INDEX]+= i - matvecs; + startTimeBase = endTimeBase; +#endif + + if ( i >= imax && comm->me == 0) { + char str[128]; + sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + + return i; +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2, double *b ) +{ +#pragma omp parallel default(shared) + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int indxI, indxJ; + + int nthreads = comm->nthreads; + int tid = omp_get_thread_num(); + + if (reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#pragma omp for schedule(dynamic,50) + for( ii = 0; ii < nn; ++ii ) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI ] = eta[ atom->type[i] ] * x1[i]; + b[indxI+1] = eta[ atom->type[i] ] * x2[i]; + } + } + +#pragma omp for schedule(dynamic,50) + for( ii = nn; ii < NN; ++ii ) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI] = 0; + b[indxI+1] = 0; + } + } + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for(int t=0; tmask[i] & groupbit) { + indxI = 2 * i; + for( itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + indxJ = 2 * j; + b[indxI ] += A->val[itr_j] * x1[j]; + b[indxI+1] += A->val[itr_j] * x2[j]; + + b_temp[tid][indxJ ] += A->val[itr_j] * x1[i]; + b_temp[tid][indxJ+1] += A->val[itr_j] * x2[i]; + } + } + } + + // Wait till b_temp accumulated +#pragma omp barrier +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t = 0; t < nthreads; ++t) { + b[indxI ] += b_temp[t][indxI ]; + b[indxI+1] += b_temp[t][indxI+1]; + } + } + + } // omp parallel +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) +{ +#pragma omp parallel default(shared) + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int indxI, indxJ; + + int nthreads = comm->nthreads; + int tid = omp_get_thread_num(); + + if (reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#pragma omp for schedule(dynamic,50) + for( ii = 0; ii < nn; ++ii ) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI ] = eta[ atom->type[i] ] * x[indxI ]; + b[indxI+1] = eta[ atom->type[i] ] * x[indxI+1]; + } + } + +#pragma omp for schedule(dynamic,50) + for( ii = nn; ii < NN; ++ii ) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI] = 0; + b[indxI+1] = 0; + } + } + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for(int t=0; tmask[i] & groupbit) { + indxI = 2 * i; + for( itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + indxJ = 2 * j; + b[indxI ] += A->val[itr_j] * x[indxJ ]; + b[indxI+1] += A->val[itr_j] * x[indxJ+1]; + + b_temp[tid][indxJ ] += A->val[itr_j] * x[indxI ]; + b_temp[tid][indxJ+1] += A->val[itr_j] * x[indxI+1]; + } + } + } + + // Wait till b_temp accumulated +#pragma omp barrier +#pragma omp for schedule(dynamic,50) + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t = 0; t < nthreads; ++t) { + b[indxI ] += b_temp[t][indxI ]; + b[indxI+1] += b_temp[t][indxI+1]; + } + } + + } // omp parallel +} diff --git a/src/USER-OMP/fix_qeq_reax_omp.h b/src/USER-OMP/fix_qeq_reax_omp.h new file mode 100644 index 0000000000..c06185ca2e --- /dev/null +++ b/src/USER-OMP/fix_qeq_reax_omp.h @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(qeq/reax/omp,FixQEqReaxOMP) + +#else + +#ifndef LMP_FIX_QEQ_REAX_OMP_H +#define LMP_FIX_QEQ_REAX_OMP_H + +#include "fix_qeq_reax.h" + +namespace LAMMPS_NS { + +class FixQEqReaxOMP : public FixQEqReax { + + public: + FixQEqReaxOMP(class LAMMPS *, int, char **); + ~FixQEqReaxOMP(); + virtual void init(); + virtual void init_storage(); + virtual void pre_force(int); + virtual void post_constructor(); + + protected: + double **b_temp; + + class PairReaxCOMP *reaxc; + + int do_aspc; + int aspc_order, aspc_order_max; + double aspc_omega; + double * aspc_b; + + virtual void pertype_parameters(char*); + virtual void allocate_storage(); + virtual void deallocate_storage(); + virtual void allocate_matrix(); + virtual void init_matvec(); + virtual void compute_H(); + + virtual int CG(double*,double*); + virtual void sparse_matvec(sparse_matrix*,double*,double*); + virtual void calculate_Q(); + + /* virtual double parallel_norm( double*, int ); */ + /* virtual double parallel_dot( double*, double*, int ); */ + /* virtual double parallel_vector_acc( double*, int ); */ + + virtual void vector_sum(double*,double,double*,double,double*,int); + virtual void vector_add(double*, double, double*,int); + + // dual CG support + virtual int dual_CG(double*,double*,double*,double*); + virtual void dual_sparse_matvec(sparse_matrix*,double*,double*,double*); + virtual void dual_sparse_matvec(sparse_matrix*,double*,double*); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_reaxc_species_omp.cpp b/src/USER-OMP/fix_reaxc_species_omp.cpp new file mode 100644 index 0000000000..e9ac388252 --- /dev/null +++ b/src/USER-OMP/fix_reaxc_species_omp.cpp @@ -0,0 +1,83 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Ray Shan (Sandia, tnshan@sandia.gov) + Oleg Sergeev (VNIIA, sergeev@vniia.ru) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "stdlib.h" +#include "math.h" +#include "atom.h" +#include "string.h" +#include "fix_ave_atom.h" +#include "fix_reaxc_species_omp.h" +#include "domain.h" +#include "update.h" +#include "pair_reaxc_omp.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "comm.h" +#include "force.h" +#include "compute.h" +#include "input.h" +#include "variable.h" +#include "memory.h" +#include "error.h" +#include "reaxc_list.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixReaxCSpeciesOMP::FixReaxCSpeciesOMP(LAMMPS *lmp, int narg, char **arg) : + FixReaxCSpecies(lmp, narg, arg) +{ +} + +/* ---------------------------------------------------------------------- */ + +void FixReaxCSpeciesOMP::init() +{ + if (atom->tag_enable == 0) + error->all(FLERR,"Cannot use fix reax/c/species unless atoms have IDs"); + + reaxc = (PairReaxCOMP *) force->pair_match("reax/c/omp",1); + if (reaxc == NULL) error->all(FLERR,"Cannot use fix reax/c/species/omp without " + "pair_style reax/c/omp"); + + reaxc->fixspecies_flag = 1; + nvalid = update->ntimestep+nfreq; + + // check if this fix has been called twice + int count = 0; + for (int i = 0; i < modify->nfix; i++) + if (strcmp(modify->fix[i]->style,"reax/c/species/omp") == 0) count++; + if (count > 1 && comm->me == 0) + error->warning(FLERR,"More than one fix reax/c/species"); + + if (!setupflag) { + // create a compute to store properties + create_compute(); + + // create a fix to point to fix_ave_atom for averaging stored properties + create_fix(); + + setupflag = 1; + } + +} diff --git a/src/USER-OMP/fix_reaxc_species_omp.h b/src/USER-OMP/fix_reaxc_species_omp.h new file mode 100644 index 0000000000..006bb3e87a --- /dev/null +++ b/src/USER-OMP/fix_reaxc_species_omp.h @@ -0,0 +1,44 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(reax/c/species/omp,FixReaxCSpeciesOMP) + +#else + +#ifndef LMP_FIX_REAXC_SPECIES_OMP_H +#define LMP_FIX_REAXC_SPECIES_OMP_H + +#include "pair_reaxc_omp.h" +#include "fix_reaxc_species.h" + +#define BUFLEN 1000 + +namespace LAMMPS_NS { + + class FixReaxCSpeciesOMP : public FixReaxCSpecies { + + public: + FixReaxCSpeciesOMP(class LAMMPS *, int, char **); + ~FixReaxCSpeciesOMP(){}; + virtual void init(); + + private: + class PairReaxCOMP *reaxc; + + }; +} + +#endif +#endif diff --git a/src/USER-OMP/pair_reaxc_omp.cpp b/src/USER-OMP/pair_reaxc_omp.cpp new file mode 100644 index 0000000000..c7a6c27f48 --- /dev/null +++ b/src/USER-OMP/pair_reaxc_omp.cpp @@ -0,0 +1,584 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + Per-atom energy/virial added by Ray Shan (Sandia) + Fix reax/c/bonds and fix reax/c/species for pair_style reax/c added by + Ray Shan (Sandia) +------------------------------------------------------------------------- */ + +#include "pair_reaxc_omp.h" +#include "atom.h" +#include "update.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "modify.h" +#include "fix.h" +#include "fix_reaxc.h" +#include "citeme.h" +#include "memory.h" +#include "error.h" + +#include "reaxc_types.h" +#include "reaxc_allocate.h" +#include "reaxc_control.h" +#include "reaxc_ffield.h" +#include "reaxc_forces_omp.h" +#include "reaxc_init_md_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_reset_tools.h" +#include "reaxc_tool_box.h" +#include "reaxc_traj.h" +#include "reaxc_vector.h" +#include "fix_reaxc_bonds.h" + +using namespace LAMMPS_NS; + +#ifdef OMP_TIMING +double ompTimingData[LASTTIMINGINDEX]; +int ompTimingCount[LASTTIMINGINDEX]; +int ompTimingCGCount[LASTTIMINGINDEX]; +#endif + +/* ---------------------------------------------------------------------- */ + +PairReaxCOMP::PairReaxCOMP(LAMMPS *lmp) : PairReaxC(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + system->pair_ptr = this; + + num_nbrs_offset = NULL; + +#ifdef OMP_TIMING + for (int i=0;inum_intrs; ++i) + sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction"); + + memory->destroy(num_nbrs_offset); + +#ifdef OMP_TIMING + int myrank; + + MPI_Comm_rank(mpi_data->world,&myrank); + + // Write screen output + if (myrank == 0 && screen) { + fprintf(screen,"\n\nWrite_Lists took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]); + + fprintf(screen,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]); + fprintf(screen,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); + fprintf(screen,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); + fprintf(screen,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); + fprintf(screen,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(screen,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); + fprintf(screen,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); + fprintf(screen,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); + fprintf(screen,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(screen,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); + + fprintf(screen,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); + fprintf(screen,"\n ->QEQ init: %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]); + + double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]); + fprintf(screen,"\n ->QEQ CG1: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg); + + avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]); + fprintf(screen,"\n ->QEQ CG2: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg); + fprintf(screen,"\n ->QEQ CalcQ: %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]); + } + + // Write logfile output + if (myrank == 0 && logfile) { + fprintf(logfile,"\n\nWrite_Lists took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]); + + fprintf(logfile,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]); + fprintf(logfile,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); + fprintf(logfile,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); + fprintf(logfile,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); + fprintf(logfile,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(logfile,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); + fprintf(logfile,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); + fprintf(logfile,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); + fprintf(logfile,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(logfile,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); + + fprintf(logfile,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); + fprintf(logfile,"\n ->QEQ init: %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]); + + double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]); + fprintf(logfile,"\n ->QEQ CG1: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg); + + avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]); + fprintf(logfile,"\n ->QEQ CG2: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg); + fprintf(logfile,"\n ->QEQ CalcQ: %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]); + } +#endif +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::compute(int eflag, int vflag) +{ + double evdwl,ecoul; + double t_start, t_end; + + // communicate num_bonds once every reneighboring + // 2 num arrays stored by fix, grab ptr to them + + if (neighbor->ago == 0) comm->forward_comm_fix(fix_reax); + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else ev_unset(); + + if (vflag_global) control->virial = 1; + else control->virial = 0; + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + system->bigN = static_cast (atom->natoms); // all atoms in the system + + system->big_box.V = 0; + system->big_box.box_norms[0] = 0; + system->big_box.box_norms[1] = 0; + system->big_box.box_norms[2] = 0; + if( comm->me == 0 ) t_start = MPI_Wtime(); + // setup data structures + + setup(); + + Reset( system, control, data, workspace, &lists, world ); + + // Why not update workspace like in MPI-only code? + // Using the MPI-only way messes up the hb energy + //workspace->realloc.num_far = write_reax_lists(); + write_reax_lists(); + + // timing for filling in the reax lists + if( comm->me == 0 ) { + t_end = MPI_Wtime(); + data->timing.nbrs = t_end - t_start; + } + + // forces + +#ifdef OMP_TIMING + double startTimeBase,endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + Compute_ForcesOMP(system,control,data,workspace,&lists,out_control,mpi_data); + read_reax_forces(vflag); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEINDEX] += (endTimeBase-startTimeBase); +#endif + +#pragma omp parallel for schedule(static) + for(int k = 0; k < system->N; ++k) { + num_bonds[k] = system->my_atoms[k].num_bonds; + num_hbonds[k] = system->my_atoms[k].num_hbonds; + } + + // energies and pressure + + if (eflag_global) { + evdwl += data->my_en.e_bond; + evdwl += data->my_en.e_ov; + evdwl += data->my_en.e_un; + evdwl += data->my_en.e_lp; + evdwl += data->my_en.e_ang; + evdwl += data->my_en.e_pen; + evdwl += data->my_en.e_coa; + evdwl += data->my_en.e_hb; + evdwl += data->my_en.e_tor; + evdwl += data->my_en.e_con; + evdwl += data->my_en.e_vdW; + + ecoul += data->my_en.e_ele; + ecoul += data->my_en.e_pol; + + // Store the different parts of the energy + // in a list for output by compute pair command + + pvector[0] = data->my_en.e_bond; + pvector[1] = data->my_en.e_ov + data->my_en.e_un; + pvector[2] = data->my_en.e_lp; + pvector[3] = 0.0; + pvector[4] = data->my_en.e_ang; + pvector[5] = data->my_en.e_pen; + pvector[6] = data->my_en.e_coa; + pvector[7] = data->my_en.e_hb; + pvector[8] = data->my_en.e_tor; + pvector[9] = data->my_en.e_con; + pvector[10] = data->my_en.e_vdW; + pvector[11] = data->my_en.e_ele; + pvector[12] = 0.0; + pvector[13] = data->my_en.e_pol; + } + + if (vflag_fdotr) virial_fdotr_compute(); + +// Set internal timestep counter to that of LAMMPS + + data->step = update->ntimestep; + + Output_Results( system, control, data, &lists, out_control, mpi_data ); + + // populate tmpid and tmpbo arrays for fix reax/c/species + int i, j; + + if(fixspecies_flag) { + if (system->N > nmax) { + memory->destroy(tmpid); + memory->destroy(tmpbo); + nmax = system->N; + memory->create(tmpid,nmax,MAXSPECBOND,"pair:tmpid"); + memory->create(tmpbo,nmax,MAXSPECBOND,"pair:tmpbo"); + } + +#pragma omp parallel for collapse(2) schedule(static) default(shared) + for (i = 0; i < system->N; i ++) + for (j = 0; j < MAXSPECBOND; j ++) { + tmpbo[i][j] = 0.0; + tmpid[i][j] = 0; + } + + FindBond(); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::init_style( ) +{ + if (!atom->q_flag) + error->all(FLERR,"Pair reax/c/omp requires atom attribute q"); + + // firstwarn = 1; + + int iqeq = modify->find_fix_by_style("qeq/reax/omp"); + if (iqeq < 0 && qeqflag == 1) + error->all(FLERR,"Pair reax/c/omp requires use of fix qeq/reax/omp"); + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + system->bigN = static_cast (atom->natoms); // all atoms in the system + system->wsize = comm->nprocs; + + system->big_box.V = 0; + system->big_box.box_norms[0] = 0; + system->big_box.box_norms[1] = 0; + system->big_box.box_norms[2] = 0; + + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style reax/c/omp requires atom IDs"); + if (force->newton_pair == 0) + error->all(FLERR,"Pair style reax/c/omp requires newton pair on"); + + // need a half neighbor list w/ Newton off and ghost neighbors + // built whenever re-neighboring occurs + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->newton = 2; + neighbor->requests[irequest]->ghost = 1; + + cutmax = MAX3(control->nonb_cut, control->hbond_cut, 2*control->bond_cut); + + for( int i = 0; i < LIST_N; ++i ) + lists[i].allocated = 0; + + if (fix_reax == NULL) { + char **fixarg = new char*[3]; + fixarg[0] = (char *) "REAXC"; + fixarg[1] = (char *) "all"; + fixarg[2] = (char *) "REAXC"; + modify->add_fix(3,fixarg); + delete [] fixarg; + fix_reax = (FixReaxC *) modify->fix[modify->nfix-1]; + } + +#pragma omp parallel + { control->nthreads = omp_get_num_threads(); } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::setup( ) +{ + int oldN; + int mincap = system->mincap; + double safezone = system->safezone; + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + oldN = system->N; + system->bigN = static_cast (atom->natoms); // all atoms in the system + + if (system->N > nmax) { + memory->destroy(num_nbrs_offset); + // Don't update nmax here. It is updated at end of compute(). + memory->create(num_nbrs_offset, system->N, "pair:num_nbrs_offset"); + } + + if (setup_flag == 0) { + + setup_flag = 1; + + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + control->vlist_cut = neighbor->cutneighmax; + + // determine the local and total capacity + + system->local_cap = MAX( (int)(system->n * safezone), mincap ); + system->total_cap = MAX( (int)(system->N * safezone), mincap ); + + // initialize my data structures + + PreAllocate_Space( system, control, workspace, world ); + write_reax_atoms(); + + int num_nbrs = estimate_reax_lists(); + if(!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR, + lists+FAR_NBRS, world)) + error->all(FLERR,"Pair reax/c problem in far neighbor list"); + + write_reax_lists(); + + InitializeOMP( system, control, data, workspace, &lists, out_control, + mpi_data, world ); + + for( int k = 0; k < system->N; ++k ) { + num_bonds[k] = system->my_atoms[k].num_bonds; + num_hbonds[k] = system->my_atoms[k].num_hbonds; + } + + } else { + + // fill in reax datastructures + + write_reax_atoms(); + + // reset the bond list info for new atoms + + for(int k = oldN; k < system->N; ++k) + Set_End_Index( k, Start_Index( k, lists+BONDS ), lists+BONDS ); + + // estimate far neighbor list size + // Not present in MPI-only version + workspace->realloc.num_far = estimate_reax_lists(); + + // check if I need to shrink/extend my data-structs + + ReAllocate( system, control, data, workspace, &lists, mpi_data ); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::write_reax_atoms() +{ + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + if (system->N > system->total_cap) + error->all(FLERR,"Too many ghost atoms"); + +#pragma omp parallel for schedule(static) default(shared) + for( int i = 0; i < system->N; ++i ){ + system->my_atoms[i].orig_id = atom->tag[i]; + system->my_atoms[i].type = map[atom->type[i]]; + system->my_atoms[i].x[0] = atom->x[i][0]; + system->my_atoms[i].x[1] = atom->x[i][1]; + system->my_atoms[i].x[2] = atom->x[i][2]; + system->my_atoms[i].q = atom->q[i]; + system->my_atoms[i].num_bonds = num_bonds[i]; + system->my_atoms[i].num_hbonds = num_hbonds[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +int PairReaxCOMP::estimate_reax_lists() +{ + int i; + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int numall = list->inum + list->gnum; + int mincap = system->mincap; + + // for good performance in the OpenMP implementation, each thread needs + // to know where to place the neighbors of the atoms it is responsible for. + // The sumscan values for the list->numneigh will be used to determine the + // neighbor offset of each atom. Note that this may cause some significant + // memory overhead if delayed neighboring is used - so it may be desirable + // to work on this part to reduce the memory footprint of the far_nbrs list. + + int num_nbrs = 0; + + for (int itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + num_nbrs += numneigh[i]; + } + + int new_estimate = MAX (num_nbrs, mincap*MIN_NBRS); + + return new_estimate; +} + +/* ---------------------------------------------------------------------- */ + +int PairReaxCOMP::write_reax_lists() +{ +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int itr_i, itr_j, i, j, num_mynbrs; + int *jlist; + double d_sqr, dist, cutoff_sqr; + rvec dvec; + + double **x = atom->x; + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + reax_list *far_nbrs = lists + FAR_NBRS; + far_neighbor_data *far_list = far_nbrs->select.far_nbr_list; + + int num_nbrs = 0; + int inum = list->inum; + int gnum = list->gnum; + int numall = inum + gnum; + + // sumscan of the number of neighbors per atom to determine the offsets + // most likely, we are overallocating. desirable to work on this part + // to reduce the memory footprint of the far_nbrs list. + + num_nbrs = 0; + + for (itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + num_nbrs_offset[i] = num_nbrs; + num_nbrs += numneigh[i]; + } + +//#pragma omp parallel for schedule(guided) default(shared) +#pragma omp parallel for schedule(dynamic,50) default(shared) \ + private(itr_i, itr_j, i, j, jlist, cutoff_sqr, num_mynbrs, d_sqr, dvec, dist) + for (itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + jlist = firstneigh[i]; + Set_Start_Index( i, num_nbrs_offset[i], far_nbrs ); + + if (i < inum) + cutoff_sqr = control->nonb_cut*control->nonb_cut; + else + cutoff_sqr = control->bond_cut*control->bond_cut; + + num_mynbrs = 0; + + for (itr_j = 0; itr_j < numneigh[i]; ++itr_j) { + j = jlist[itr_j]; + j &= NEIGHMASK; + get_distance( x[j], x[i], &d_sqr, &dvec ); + + if (d_sqr <= cutoff_sqr) { + dist = sqrt( d_sqr ); + set_far_nbr( &far_list[num_nbrs_offset[i] + num_mynbrs], j, dist, dvec ); + ++num_mynbrs; + } + } + Set_End_Index( i, num_nbrs_offset[i] + num_mynbrs, far_nbrs ); + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEWLINDEX] += (endTimeBase-startTimeBase); +#endif + + return num_nbrs; +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::read_reax_forces(int vflag) +{ +#pragma omp parallel for schedule(static) default(shared) + for( int i = 0; i < system->N; ++i ) { + system->my_atoms[i].f[0] = workspace->f[i][0]; + system->my_atoms[i].f[1] = workspace->f[i][1]; + system->my_atoms[i].f[2] = workspace->f[i][2]; + + atom->f[i][0] = -workspace->f[i][0]; + atom->f[i][1] = -workspace->f[i][1]; + atom->f[i][2] = -workspace->f[i][2]; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::FindBond() +{ + int i, ii, j, pj, jtag, nj, jtmp, jj; + double bo_tmp, bo_cut, rij, rsq; + + bond_data *bo_ij; + bo_cut = 0.10; + +#pragma omp parallel for schedule(static) default(shared) \ + private(i, nj, pj, bo_ij, j, bo_tmp) + for (i = 0; i < system->n; i++) { + nj = 0; + for( pj = Start_Index(i, lists); pj < End_Index(i, lists); ++pj ) { + bo_ij = &( lists->select.bond_list[pj] ); + j = bo_ij->nbr; + if (j < i) continue; + + bo_tmp = bo_ij->bo_data.BO; + + if (bo_tmp >= bo_cut ) { + tmpid[i][nj] = j; + tmpbo[i][nj] = bo_tmp; + nj ++; + if (nj > MAXSPECBOND) error->all(FLERR,"Increase MAXSPECBOND in fix_reaxc_species.h"); + } + } + } +} + diff --git a/src/USER-OMP/pair_reaxc_omp.h b/src/USER-OMP/pair_reaxc_omp.h new file mode 100644 index 0000000000..5d5c7e145b --- /dev/null +++ b/src/USER-OMP/pair_reaxc_omp.h @@ -0,0 +1,113 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(reax/c/omp,PairReaxCOMP) + +#else + +#ifndef LMP_PAIR_REAXC_OMP_H +#define LMP_PAIR_REAXC_OMP_H + +#include "pair_reaxc.h" +#include "thr_omp.h" +#include "suffix.h" + +namespace LAMMPS_NS { + +class PairReaxCOMP : public PairReaxC, public ThrOMP { + public: + PairReaxCOMP(class LAMMPS *); + ~PairReaxCOMP(); + virtual void compute(int, int); + virtual void init_style(); + + inline FixOMP *getFixOMP() { + return fix; + }; + + inline void ev_setup_thr_proxy(int eflagparm, int vflagparm, int nallparm, + double *eatomparm, double **vatomparm, ThrData *thrparm) { + ev_setup_thr(eflagparm, vflagparm, nallparm, eatomparm, vatomparm, thrparm); + }; + + // reduce per thread data as needed + inline void reduce_thr_proxy(void * const styleparm, const int eflagparm, + const int vflagparm, ThrData * const thrparm) { + reduce_thr(styleparm, eflagparm, vflagparm, thrparm); + } + + inline void ev_tally_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, + const double evdwlparm, const double ecoulparm, + const double fpairparm, const double delxparm, + const double delyparm, const double delzparm, + ThrData * const thrparm) { + ev_tally_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + evdwlparm, ecoulparm, fpairparm, delxparm, delyparm, delzparm, thrparm); + } + + inline void ev_tally_xyz_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, + const double evdwlparm, const double ecoulparm, + const double fxparm, const double fyparm, const double fzparm, + const double delxparm, const double delyparm, + const double delzparm, ThrData * const thrparm) { + ev_tally_xyz_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + evdwlparm, ecoulparm, fxparm, fyparm, fzparm, + delxparm, delyparm, delzparm, thrparm); + } + + inline void ev_tally3_thr_proxy(Pair * const pairparm,int i, int j, int k, + double evdwl, double ecoul, double *fj, double *fk, + double *drji, double *drki, ThrData * const thrparm) { + ev_tally3_thr(pairparm, i, j, k, evdwl, ecoul, fj, fk, drji, drki, thrparm); + } + + protected: + virtual void setup(); + virtual void write_reax_atoms(); + virtual int estimate_reax_lists(); + virtual int write_reax_lists(); + virtual void read_reax_forces(int); + virtual void FindBond(); + + // work array used in write_reax_lists() + int * num_nbrs_offset; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Too many ghost atoms + +Number of ghost atoms has increased too much during simulation and has exceeded +the size of reax/c arrays. Increase safe_zone and min_cap in pair_style reax/c +command + +*/ diff --git a/src/USER-OMP/reaxc_bond_orders_omp.cpp b/src/USER-OMP/reaxc_bond_orders_omp.cpp new file mode 100644 index 0000000000..1d32cbd34f --- /dev/null +++ b/src/USER-OMP/reaxc_bond_orders_omp.cpp @@ -0,0 +1,711 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include +#include "pair_reaxc_omp.h" +#include "reaxc_types.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, + storage *workspace, reax_list **lists ) { + reax_list *bonds = (*lists) + BONDS; + bond_data *nbr_j, *nbr_k; + bond_order_data *bo_ij, *bo_ji; + dbond_coefficients coef; + int pk, k, j; + + PairReaxCOMP *pair_reax_ptr = static_cast(system->pair_ptr); + + int tid = omp_get_thread_num(); + ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + long reductionOffset = (system->N * tid); + + /* Virial Tallying variables */ + double f_scaler; + rvec fi_tmp, fj_tmp, fk_tmp, delij, delji, delki, delkj, temp; + + /* Initializations */ + nbr_j = &(bonds->select.bond_list[pj]); + j = nbr_j->nbr; + bo_ij = &(nbr_j->bo_data); + bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data); + + double c = bo_ij->Cdbo + bo_ji->Cdbo; + coef.C1dbo = bo_ij->C1dbo * c; + coef.C2dbo = bo_ij->C2dbo * c; + coef.C3dbo = bo_ij->C3dbo * c; + + c = bo_ij->Cdbopi + bo_ji->Cdbopi; + coef.C1dbopi = bo_ij->C1dbopi * c; + coef.C2dbopi = bo_ij->C2dbopi * c; + coef.C3dbopi = bo_ij->C3dbopi * c; + coef.C4dbopi = bo_ij->C4dbopi * c; + + c = bo_ij->Cdbopi2 + bo_ji->Cdbopi2; + coef.C1dbopi2 = bo_ij->C1dbopi2 * c; + coef.C2dbopi2 = bo_ij->C2dbopi2 * c; + coef.C3dbopi2 = bo_ij->C3dbopi2 * c; + coef.C4dbopi2 = bo_ij->C4dbopi2 * c; + + c = workspace->CdDelta[i] + workspace->CdDelta[j]; + coef.C1dDelta = bo_ij->C1dbo * c; + coef.C2dDelta = bo_ij->C2dbo * c; + coef.C3dDelta = bo_ij->C3dbo * c; + + // The same "c" refactoring here can be replicated below in Add_dBond_to_Forces_NPTOMP(), but + // I'd prefer to wait for a test to verify changes before doing so (just to be safe). + + // forces on i + // rvec_Scale( temp, coef.C1dbo, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C2dbo, workspace->dDeltap_self[i] ); + // rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] ); + // rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); + // rvec_ScaledAdd( temp, coef.C2dbopi, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbopi, workspace->dDeltap_self[i]); + // rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + // rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] ); + + c = (coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2); + rvec_Scale( temp, c, bo_ij->dBOp ); + + c = (coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2); + rvec_ScaledAdd( temp, c, workspace->dDeltap_self[i] ); + + rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); + rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + + rvec_Add(workspace->forceReduction[reductionOffset+i],temp ); + + if( system->pair_ptr->vflag_atom) { + rvec_Scale(fi_tmp, -1.0, temp); + rvec_ScaledSum( delij, 1., system->my_atoms[i].x,-1., system->my_atoms[j].x ); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,i,j,system->N,0,0,0, + fi_tmp[0],fi_tmp[1],fi_tmp[2], + delij[0],delij[1],delij[2],thr); + } + + // forces on j + // rvec_Scale( temp, -coef.C1dbo, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbo, workspace->dDeltap_self[j] ); + // rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dDelta, workspace->dDeltap_self[j]); + // rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); + // rvec_ScaledAdd( temp, -coef.C2dbopi, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C4dbopi, workspace->dDeltap_self[j]); + // rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + // rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C4dbopi2, workspace->dDeltap_self[j]); + + + c = -(coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2); + rvec_Scale( temp, c, bo_ij->dBOp ); + + c = (coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2); + rvec_ScaledAdd( temp, c, workspace->dDeltap_self[j] ); + + rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); + rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + + + rvec_Add(workspace->forceReduction[reductionOffset+j],temp ); + + if( system->pair_ptr->vflag_atom) { + rvec_Scale(fj_tmp, -1.0, temp); + rvec_ScaledSum( delji, 1., system->my_atoms[j].x,-1., system->my_atoms[i].x ); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,j,i,system->N,0,0,0, + fj_tmp[0],fj_tmp[1],fj_tmp[2], + delji[0],delji[1],delji[2],thr); + } + + // forces on k: i neighbor + for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + // rvec_Scale( temp, -coef.C2dbo, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C2dDelta, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp); + + const double c = -(coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2); + rvec_Scale(temp, c, nbr_k->bo_data.dBOp); + + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + if( system->pair_ptr->vflag_atom ) { + rvec_Scale(fk_tmp, -1.0, temp); + rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delki[0],delki[1],delki[2],thr); + rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delkj[0],delkj[1],delkj[2],thr); + } + } + + // forces on k: j neighbor + for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + // rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp ); + // rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp); + + const double c = -(coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2); + rvec_Scale(temp, c, nbr_k->bo_data.dBOp); + + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + if( system->pair_ptr->vflag_atom ) { + rvec_Scale(fk_tmp, -1.0, temp); + rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delki[0],delki[1],delki[2],thr); + + rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delkj[0],delkj[1],delkj[2],thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void Add_dBond_to_Forces_NPTOMP( reax_system *system, int i, int pj, simulation_data *data, + storage *workspace, reax_list **lists ) { + reax_list *bonds = (*lists) + BONDS; + bond_data *nbr_j, *nbr_k; + bond_order_data *bo_ij, *bo_ji; + dbond_coefficients coef; + rvec temp, ext_press; + ivec rel_box; + int pk, k, j; + + PairReaxCOMP *pair_reax_ptr = static_cast(system->pair_ptr); + + int tid = omp_get_thread_num(); + ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + long reductionOffset = (system->N * tid); + + /* Initializations */ + nbr_j = &(bonds->select.bond_list[pj]); + j = nbr_j->nbr; + bo_ij = &(nbr_j->bo_data); + bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data); + + coef.C1dbo = bo_ij->C1dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + coef.C2dbo = bo_ij->C2dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + coef.C3dbo = bo_ij->C3dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + + coef.C1dbopi = bo_ij->C1dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C2dbopi = bo_ij->C2dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C3dbopi = bo_ij->C3dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C4dbopi = bo_ij->C4dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + + coef.C1dbopi2 = bo_ij->C1dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C2dbopi2 = bo_ij->C2dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C3dbopi2 = bo_ij->C3dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C4dbopi2 = bo_ij->C4dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + + coef.C1dDelta = bo_ij->C1dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + coef.C2dDelta = bo_ij->C2dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + coef.C3dDelta = bo_ij->C3dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + + + /************************************ + * forces related to atom i * + * first neighbors of atom i * + ************************************/ + for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + rvec_Scale(temp, -coef.C2dbo, nbr_k->bo_data.dBOp); /*2nd, dBO*/ + rvec_ScaledAdd(temp, -coef.C2dDelta, nbr_k->bo_data.dBOp);/*dDelta*/ + rvec_ScaledAdd(temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); /*3rd, dBOpi*/ + rvec_ScaledAdd(temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp);/*3rd, dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + /* pressure */ + rvec_iMultiply( ext_press, nbr_k->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* then atom i itself */ + rvec_Scale( temp, coef.C1dbo, bo_ij->dBOp ); /*1st,dBO*/ + rvec_ScaledAdd( temp, coef.C2dbo, workspace->dDeltap_self[i] ); /*2nd,dBO*/ + rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp ); /*1st,dBO*/ + rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] );/*2nd,dBO*/ + rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); /*1st,dBOpi*/ + rvec_ScaledAdd( temp, coef.C2dbopi, bo_ij->dBOp ); /*2nd,dBOpi*/ + rvec_ScaledAdd( temp, coef.C3dbopi, workspace->dDeltap_self[i]);/*3rd,dBOpi*/ + + rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); /*1st,dBO_pi2*/ + rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp ); /*2nd,dBO_pi2*/ + rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] );/*3rd*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+i],temp ); + + for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp ); /*3rd,dBO*/ + rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp);/*dDelta*/ + rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); /*4th,dBOpi*/ + rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp);/*4th,dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + /* pressure */ + if( k != i ) { + ivec_Sum( rel_box, nbr_k->rel_box, nbr_j->rel_box ); //rel_box(k, i) + rvec_iMultiply( ext_press, rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + + /* then atom j itself */ + rvec_Scale( temp, -coef.C1dbo, bo_ij->dBOp ); /*1st, dBO*/ + rvec_ScaledAdd( temp, coef.C3dbo, workspace->dDeltap_self[j] ); /*2nd, dBO*/ + rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp ); /*1st, dBO*/ + rvec_ScaledAdd( temp, coef.C3dDelta, workspace->dDeltap_self[j]);/*2nd, dBO*/ + + rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); /*1st,dBOpi*/ + rvec_ScaledAdd( temp, -coef.C2dbopi, bo_ij->dBOp ); /*2nd,dBOpi*/ + rvec_ScaledAdd( temp, coef.C4dbopi, workspace->dDeltap_self[j]);/*3rd,dBOpi*/ + + rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); /*1st,dBOpi2*/ + rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp ); /*2nd,dBOpi2*/ + rvec_ScaledAdd( temp,coef.C4dbopi2,workspace->dDeltap_self[j]);/*3rd,dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+j],temp ); + + /* pressure */ + rvec_iMultiply( ext_press, nbr_j->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); +} + +/* ---------------------------------------------------------------------- */ + +int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut, + int i, int btop_i, far_neighbor_data *nbr_pj, + single_body_parameters *sbp_i, single_body_parameters *sbp_j, + two_body_parameters *twbp, + int btop_j, double C12, double C34, double C56, double BO, double BO_s, double BO_pi, double BO_pi2) { + int j; + double rr2; + double Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; + bond_data *ibond, *jbond; + bond_order_data *bo_ij, *bo_ji; + + j = nbr_pj->nbr; + rr2 = 1.0 / SQR(nbr_pj->d); + + // Top portion of BOp() moved to reaxc_forces_omp.cpp::Init_Forces_noQEq_OMP() + + /* Initially BO values are the uncorrected ones, page 1 */ + + /****** bonds i-j and j-i ******/ + ibond = &( bonds->select.bond_list[btop_i] ); + jbond = &( bonds->select.bond_list[btop_j] ); + + ibond->nbr = j; + jbond->nbr = i; + ibond->d = nbr_pj->d; + jbond->d = nbr_pj->d; + rvec_Copy( ibond->dvec, nbr_pj->dvec ); + rvec_Scale( jbond->dvec, -1, nbr_pj->dvec ); + ivec_Copy( ibond->rel_box, nbr_pj->rel_box ); + ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box ); + ibond->dbond_index = btop_i; + jbond->dbond_index = btop_i; + ibond->sym_index = btop_j; + jbond->sym_index = btop_i; + + bo_ij = &( ibond->bo_data ); + bo_ji = &( jbond->bo_data ); + bo_ji->BO = bo_ij->BO = BO; + bo_ji->BO_s = bo_ij->BO_s = BO_s; + bo_ji->BO_pi = bo_ij->BO_pi = BO_pi; + bo_ji->BO_pi2 = bo_ij->BO_pi2 = BO_pi2; + + /* Bond Order page2-3, derivative of total bond order prime */ + Cln_BOp_s = twbp->p_bo2 * C12 * rr2; + Cln_BOp_pi = twbp->p_bo4 * C34 * rr2; + Cln_BOp_pi2 = twbp->p_bo6 * C56 * rr2; + + /* Only dln_BOp_xx wrt. dr_i is stored here, note that + dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ + rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi,-bo_ij->BO_pi*Cln_BOp_pi,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi2, + -bo_ij->BO_pi2*Cln_BOp_pi2,ibond->dvec); + rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); + rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); + rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); + + rvec_Scale( bo_ij->dBOp, + -(bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); + + bo_ij->BO_s -= bo_cut; + bo_ij->BO -= bo_cut; + bo_ji->BO_s -= bo_cut; + bo_ji->BO -= bo_cut; + + bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0; + bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0; + + return 1; +} + +/* ---------------------------------------------------------------------- */ + +void BOOMP( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + double p_lp1 = system->reax_param.gp.l[15]; + int num_bonds = 0; + double p_boc1 = system->reax_param.gp.l[0]; + double p_boc2 = system->reax_param.gp.l[1]; + reax_list *bonds = (*lists) + BONDS; + int natoms = system->N; + int nthreads = control->nthreads; + +#pragma omp parallel default(shared) + { + int i, j, pj, type_i, type_j; + int start_i, end_i, sym_index; + double val_i, Deltap_i, Deltap_boc_i; + double val_j, Deltap_j, Deltap_boc_j; + double f1, f2, f3, f4, f5, f4f5, exp_f4, exp_f5; + double exp_p1i, exp_p2i, exp_p1j, exp_p2j, explp1; + double temp, u1_ij, u1_ji, Cf1A_ij, Cf1B_ij, Cf1_ij, Cf1_ji; + double Cf45_ij, Cf45_ji; //u_ij, u_ji + double A0_ij, A1_ij, A2_ij, A2_ji, A3_ij, A3_ji; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_order_data *bo_ij, *bo_ji; + + int tid = omp_get_thread_num(); + + /* Calculate Deltaprime, Deltaprime_boc values */ +#pragma omp for schedule(static) + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[type_i]); + workspace->Deltap[i] = workspace->total_bond_order[i] - sbp_i->valency; + workspace->Deltap_boc[i] = + workspace->total_bond_order[i] - sbp_i->valency_val; + + workspace->total_bond_order[i] = 0; + } + + // Wait till initialization complete +#pragma omp barrier + + /* Corrected Bond Order calculations */ +//#pragma omp for schedule(dynamic,50) +#pragma omp for schedule(guided) + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[type_i]); + val_i = sbp_i->valency; + Deltap_i = workspace->Deltap[i]; + Deltap_boc_i = workspace->Deltap_boc[i]; + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + + if( i < j || workspace->bond_mark[j] > 3) { + twbp = &( system->reax_param.tbp[type_i][type_j] ); + + if( twbp->ovc < 0.001 && twbp->v13cor < 0.001 ) { + bo_ij->C1dbo = 1.000000; + bo_ij->C2dbo = 0.000000; + bo_ij->C3dbo = 0.000000; + + bo_ij->C1dbopi = bo_ij->BO_pi; + bo_ij->C2dbopi = 0.000000; + bo_ij->C3dbopi = 0.000000; + bo_ij->C4dbopi = 0.000000; + + bo_ij->C1dbopi2 = bo_ij->BO_pi2; + bo_ij->C2dbopi2 = 0.000000; + bo_ij->C3dbopi2 = 0.000000; + bo_ij->C4dbopi2 = 0.000000; + + } + else { + val_j = system->reax_param.sbp[type_j].valency; + Deltap_j = workspace->Deltap[j]; + Deltap_boc_j = workspace->Deltap_boc[j]; + + /* on page 1 */ + if( twbp->ovc >= 0.001 ) { + /* Correction for overcoordination */ + exp_p1i = exp( -p_boc1 * Deltap_i ); + exp_p2i = exp( -p_boc2 * Deltap_i ); + exp_p1j = exp( -p_boc1 * Deltap_j ); + exp_p2j = exp( -p_boc2 * Deltap_j ); + + f2 = exp_p1i + exp_p1j; + f3 = -1.0 / p_boc2 * log( 0.5 * ( exp_p2i + exp_p2j ) ); + f1 = 0.5 * ( ( val_i + f2 )/( val_i + f2 + f3 ) + + ( val_j + f2 )/( val_j + f2 + f3 ) ); + + /* Now come the derivates */ + /* Bond Order pages 5-7, derivative of f1 */ + temp = f2 + f3; + u1_ij = val_i + temp; + u1_ji = val_j + temp; + Cf1A_ij = 0.5 * f3 * (1.0 / SQR( u1_ij ) + + 1.0 / SQR( u1_ji )); + Cf1B_ij = -0.5 * (( u1_ij - f3 ) / SQR( u1_ij ) + + ( u1_ji - f3 ) / SQR( u1_ji )); + + Cf1_ij = 0.50 * ( -p_boc1 * exp_p1i / u1_ij - + ((val_i+f2) / SQR(u1_ij)) * + ( -p_boc1 * exp_p1i + + exp_p2i / ( exp_p2i + exp_p2j ) ) + + -p_boc1 * exp_p1i / u1_ji - + ((val_j+f2) / SQR(u1_ji)) * + ( -p_boc1 * exp_p1i + + exp_p2i / ( exp_p2i + exp_p2j ) )); + + + Cf1_ji = -Cf1A_ij * p_boc1 * exp_p1j + + Cf1B_ij * exp_p2j / ( exp_p2i + exp_p2j ); + } + else { + /* No overcoordination correction! */ + f1 = 1.0; + Cf1_ij = Cf1_ji = 0.0; + } + + if( twbp->v13cor >= 0.001 ) { + /* Correction for 1-3 bond orders */ + exp_f4 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) - + Deltap_boc_i) * twbp->p_boc3 + twbp->p_boc5); + exp_f5 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) - + Deltap_boc_j) * twbp->p_boc3 + twbp->p_boc5); + + f4 = 1. / (1. + exp_f4); + f5 = 1. / (1. + exp_f5); + f4f5 = f4 * f5; + + /* Bond Order pages 8-9, derivative of f4 and f5 */ + Cf45_ij = -f4 * exp_f4; + Cf45_ji = -f5 * exp_f5; + } + else { + f4 = f5 = f4f5 = 1.0; + Cf45_ij = Cf45_ji = 0.0; + } + + /* Bond Order page 10, derivative of total bond order */ + A0_ij = f1 * f4f5; + A1_ij = -2 * twbp->p_boc3 * twbp->p_boc4 * bo_ij->BO * + (Cf45_ij + Cf45_ji); + A2_ij = Cf1_ij / f1 + twbp->p_boc3 * Cf45_ij; + A2_ji = Cf1_ji / f1 + twbp->p_boc3 * Cf45_ji; + A3_ij = A2_ij + Cf1_ij / f1; + A3_ji = A2_ji + Cf1_ji / f1; + + /* find corrected bond orders and their derivative coef */ + bo_ij->BO = bo_ij->BO * A0_ij; + bo_ij->BO_pi = bo_ij->BO_pi * A0_ij *f1; + bo_ij->BO_pi2= bo_ij->BO_pi2* A0_ij *f1; + bo_ij->BO_s = bo_ij->BO - ( bo_ij->BO_pi + bo_ij->BO_pi2 ); + + bo_ij->C1dbo = A0_ij + bo_ij->BO * A1_ij; + bo_ij->C2dbo = bo_ij->BO * A2_ij; + bo_ij->C3dbo = bo_ij->BO * A2_ji; + + bo_ij->C1dbopi = f1*f1*f4*f5; + bo_ij->C2dbopi = bo_ij->BO_pi * A1_ij; + bo_ij->C3dbopi = bo_ij->BO_pi * A3_ij; + bo_ij->C4dbopi = bo_ij->BO_pi * A3_ji; + + bo_ij->C1dbopi2 = f1*f1*f4*f5; + bo_ij->C2dbopi2 = bo_ij->BO_pi2 * A1_ij; + bo_ij->C3dbopi2 = bo_ij->BO_pi2 * A3_ij; + bo_ij->C4dbopi2 = bo_ij->BO_pi2 * A3_ji; + } + + /* neglect bonds that are < 1e-10 */ + if( bo_ij->BO < 1e-10 ) + bo_ij->BO = 0.0; + if( bo_ij->BO_s < 1e-10 ) + bo_ij->BO_s = 0.0; + if( bo_ij->BO_pi < 1e-10 ) + bo_ij->BO_pi = 0.0; + if( bo_ij->BO_pi2 < 1e-10 ) + bo_ij->BO_pi2 = 0.0; + + workspace->total_bond_order[i] += bo_ij->BO; //now keeps total_BO + } + // else { + // /* We only need to update bond orders from bo_ji + // everything else is set in uncorrected_bo calculations */ + // sym_index = bonds->select.bond_list[pj].sym_index; + // bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data); + // bo_ij->BO = bo_ji->BO; + // bo_ij->BO_s = bo_ji->BO_s; + // bo_ij->BO_pi = bo_ji->BO_pi; + // bo_ij->BO_pi2 = bo_ji->BO_pi2; + + // workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO + // } + } + + } + + // Wait for bo_ij to be updated +#pragma omp barrier + + // Try to combine the following for-loop back into the for-loop above + /*-------------------------*/ +#pragma omp for schedule(guided) + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + if( i < j || workspace->bond_mark[j] > 3) { + // Computed in previous for-loop + } else { + /* We only need to update bond orders from bo_ji + everything else is set in uncorrected_bo calculations */ + sym_index = bonds->select.bond_list[pj].sym_index; + + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data); + bo_ij->BO = bo_ji->BO; + bo_ij->BO_s = bo_ji->BO_s; + bo_ij->BO_pi = bo_ji->BO_pi; + bo_ij->BO_pi2 = bo_ji->BO_pi2; + + workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO + } + } + + } + + /*-------------------------*/ + + // Need to wait for total_bond_order to be accumulated. +#pragma omp barrier + + /* Calculate some helper variables that are used at many places + throughout force calculations */ +#pragma omp for schedule(guided) + for(j = 0; j < system->N; ++j ) { + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + sbp_j = &(system->reax_param.sbp[ type_j ]); + + workspace->Delta[j] = workspace->total_bond_order[j] - sbp_j->valency; + workspace->Delta_e[j] = workspace->total_bond_order[j] - sbp_j->valency_e; + workspace->Delta_boc[j] = workspace->total_bond_order[j] - + sbp_j->valency_boc; + workspace->Delta_val[j] = workspace->total_bond_order[j] - + sbp_j->valency_val; + + workspace->vlpex[j] = workspace->Delta_e[j] - + 2.0 * (int)(workspace->Delta_e[j]/2.0); + explp1 = exp(-p_lp1 * SQR(2.0 + workspace->vlpex[j])); + workspace->nlp[j] = explp1 - (int)(workspace->Delta_e[j] / 2.0); + workspace->Delta_lp[j] = sbp_j->nlp_opt - workspace->nlp[j]; + workspace->Clp[j] = 2.0 * p_lp1 * explp1 * (2.0 + workspace->vlpex[j]); + workspace->dDelta_lp[j] = workspace->Clp[j]; + + if( sbp_j->mass > 21.0 ) { + workspace->nlp_temp[j] = 0.5 * (sbp_j->valency_e - sbp_j->valency); + workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j]; + workspace->dDelta_lp_temp[j] = 0.; + } + else { + workspace->nlp_temp[j] = workspace->nlp[j]; + workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j]; + workspace->dDelta_lp_temp[j] = workspace->Clp[j]; + } + } + + } // parallel region + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBOINDEX] += (endTimeBase-startTimeBase); + +#endif +} diff --git a/src/USER-OMP/reaxc_bond_orders_omp.h b/src/USER-OMP/reaxc_bond_orders_omp.h new file mode 100644 index 0000000000..272309cadd --- /dev/null +++ b/src/USER-OMP/reaxc_bond_orders_omp.h @@ -0,0 +1,43 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __BOND_ORDERS_OMP_H_ +#define __BOND_ORDERS_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_bond_orders.h" + +void Add_dBond_to_ForcesOMP( reax_system*, int, int, storage*, reax_list** ); +void Add_dBond_to_Forces_NPTOMP( reax_system *system, int, int, simulation_data*, + storage*, reax_list** ); + +int BOp_OMP(storage*, reax_list*, double, int, int, far_neighbor_data*, + single_body_parameters*, single_body_parameters*, two_body_parameters*, + int, double, double, double, double, double, double, double); + +void BOOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); +#endif diff --git a/src/USER-OMP/reaxc_bonds_omp.cpp b/src/USER-OMP/reaxc_bonds_omp.cpp new file mode 100644 index 0000000000..4dc666b3ee --- /dev/null +++ b/src/USER-OMP/reaxc_bonds_omp.cpp @@ -0,0 +1,174 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include + +#include "reaxc_bonds_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->n; + int nthreads = control->nthreads; + reax_list *bonds = (*lists) + BONDS; + double gp3 = system->reax_param.gp.l[3]; + double gp4 = system->reax_param.gp.l[4]; + double gp7 = system->reax_param.gp.l[7]; + double gp10 = system->reax_param.gp.l[10]; + double gp37 = (int) system->reax_param.gp.l[37]; + double total_Ebond = 0.0; + +#pragma omp parallel default(shared) reduction(+: total_Ebond) + { + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + double ebond, ebond_thr=0.0, pow_BOs_be2, exp_be12, CEbo; + double gp3, gp4, gp7, gp10, gp37; + double exphu, exphua1, exphub1, exphuov, hulpov, estriph, estriph_thr=0.0; + double decobdbo, decobdboua, decobdboub; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_order_data *bo_ij; + int tid = omp_get_thread_num(); + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, + system->pair_ptr->eatom, system->pair_ptr->vatom, thr); + +#pragma omp for schedule(guided) + for (i = 0; i < natoms; ++i) { + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + + if( system->my_atoms[i].orig_id > system->my_atoms[j].orig_id ) continue; + + if( system->my_atoms[i].orig_id == system->my_atoms[j].orig_id ) { + if (system->my_atoms[j].x[2] < system->my_atoms[i].x[2]) continue; + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + system->my_atoms[j].x[1] < system->my_atoms[i].x[1]) continue; + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + system->my_atoms[j].x[1] == system->my_atoms[i].x[1] && + system->my_atoms[j].x[0] < system->my_atoms[i].x[0]) continue; + } + + /* set the pointers */ + type_i = system->my_atoms[i].type; + type_j = system->my_atoms[j].type; + sbp_i = &( system->reax_param.sbp[type_i] ); + sbp_j = &( system->reax_param.sbp[type_j] ); + twbp = &( system->reax_param.tbp[type_i][type_j] ); + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + + /* calculate the constants */ + pow_BOs_be2 = pow( bo_ij->BO_s, twbp->p_be2 ); + exp_be12 = exp( twbp->p_be1 * ( 1.0 - pow_BOs_be2 ) ); + CEbo = -twbp->De_s * exp_be12 * + ( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 ); + + /* calculate the Bond Energy */ + total_Ebond += ebond = + -twbp->De_s * bo_ij->BO_s * exp_be12 + -twbp->De_p * bo_ij->BO_pi + -twbp->De_pp * bo_ij->BO_pi2; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + ebond, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + /* calculate derivatives of Bond Orders */ + bo_ij->Cdbo += CEbo; + bo_ij->Cdbopi -= (CEbo + twbp->De_p); + bo_ij->Cdbopi2 -= (CEbo + twbp->De_pp); + + /* Stabilisation terminal triple bond */ + if (bo_ij->BO >= 1.00) { + if (gp37 == 2 || + (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) || + (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990)) { + exphu = exp( -gp7 * SQR(bo_ij->BO - 2.50) ); + exphua1 = exp(-gp3 * (workspace->total_bond_order[i]-bo_ij->BO)); + exphub1 = exp(-gp3 * (workspace->total_bond_order[j]-bo_ij->BO)); + exphuov = exp(gp4 * (workspace->Delta[i] + workspace->Delta[j])); + hulpov = 1.0 / (1.0 + 25.0 * exphuov); + + estriph = gp10 * exphu * hulpov * (exphua1 + exphub1); + total_Ebond += estriph; + + decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) * + ( gp3 - 2.0 * gp7 * (bo_ij->BO-2.50) ); + decobdboua = -gp10 * exphu * hulpov * + (gp3*exphua1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); + decobdboub = -gp10 * exphu * hulpov * + (gp3*exphub1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + estriph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + bo_ij->Cdbo += decobdbo; + workspace->CdDelta[i] += decobdboua; + workspace->CdDeltaReduction[reductionOffset+j] += decobdboub; + } + } + } + } // for(i) + + } // omp + + data->my_en.e_bond += total_Ebond; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBONDSINDEX] += (endTimeBase-startTimeBase); +#endif + +} diff --git a/src/USER-OMP/reaxc_bonds_omp.h b/src/USER-OMP/reaxc_bonds_omp.h new file mode 100644 index 0000000000..8c07fd8957 --- /dev/null +++ b/src/USER-OMP/reaxc_bonds_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __BONDS_OMP_H_ +#define __BONDS_OMP_H_ + +#include "reaxc_types.h" + +void BondsOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp new file mode 100644 index 0000000000..1bf04129e0 --- /dev/null +++ b/src/USER-OMP/reaxc_forces_omp.cpp @@ -0,0 +1,637 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "omp.h" +#include "thr_data.h" + +#include "reaxc_forces_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_bonds_omp.h" +#include "reaxc_hydrogen_bonds_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_multi_body_omp.h" +#include "reaxc_nonbonded_omp.h" +#include "reaxc_tool_box.h" +#include "reaxc_torsion_angles_omp.h" +#include "reaxc_valence_angles_omp.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +// Functions defined in reaxc_forces.cpp +extern interaction_function Interaction_Functions[]; +extern double Compute_H(double, double, double*); +extern double Compute_tabH(double, int, int); +extern void Dummy_Interaction(reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls*); + +/* ---------------------------------------------------------------------- */ + +void Init_Force_FunctionsOMP( control_params *control ) +{ + Interaction_Functions[0] = BOOMP; + Interaction_Functions[1] = BondsOMP; //Dummy_Interaction; + Interaction_Functions[2] = Atom_EnergyOMP; //Dummy_Interaction; + Interaction_Functions[3] = Valence_AnglesOMP; //Dummy_Interaction; + Interaction_Functions[4] = Torsion_AnglesOMP; //Dummy_Interaction; + if( control->hbond_cut > 0 ) + Interaction_Functions[5] = Hydrogen_BondsOMP; + else Interaction_Functions[5] = Dummy_Interaction; + Interaction_Functions[6] = Dummy_Interaction; //empty + Interaction_Functions[7] = Dummy_Interaction; //empty + Interaction_Functions[8] = Dummy_Interaction; //empty + Interaction_Functions[9] = Dummy_Interaction; //empty +} + +/* ---------------------------------------------------------------------- */ + +// Only difference with MPI-only version is inclusion of OMP_TIMING statements +void Compute_Bonded_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) +{ + int i; + +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* Implement all force calls as function pointers */ + for( i = 0; i < NUM_INTRS; i++ ) { + (Interaction_Functions[i])( system, control, data, workspace, + lists, out_control ); + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBFINDEX] += (endTimeBase-startTimeBase); +#endif + +} + +// Only difference with MPI-only version is inclusion of OMP_TIMING statements +void Compute_NonBonded_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) +{ + /* van der Waals and Coulomb interactions */ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + if( control->tabulate == 0 ) + vdW_Coulomb_Energy_OMP( system, control, data, workspace, + lists, out_control ); + else + Tabulated_vdW_Coulomb_Energy_OMP( system, control, data, workspace, + lists, out_control ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTENBFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +/* this version of Compute_Total_Force computes forces from + coefficients accumulated by all interaction functions. + Saves enormous time & space! */ +void Compute_Total_ForceOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, mpi_datatypes *mpi_data ) +{ +#ifdef OMP_TIMING + double startTimeBase,endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->N; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + reax_list *bonds = (*lists) + BONDS; + +#pragma omp parallel default(shared) //default(none) + { + int i, j, k, pj, pk, start_j, end_j; + int tid = omp_get_thread_num(); + bond_order_data *bo_jk; + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(0, 1, natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +#pragma omp for schedule(guided) + for (i = 0; i < system->N; ++i) { + for (j = 0; j < nthreads; ++j) + workspace->CdDelta[i] += workspace->CdDeltaReduction[system->N*j+i]; + } + +#pragma omp for schedule(dynamic,50) + for (j = 0; j < system->N; ++j) { + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + bo_jk = &( bonds->select.bond_list[pk].bo_data ); + for (k = 0; k < nthreads; ++k) + bo_jk->Cdbo += bo_jk->CdboReduction[k]; + } + } + +// #pragma omp for schedule(guided) //(dynamic,50) +// for (i = 0; i < system->N; ++i) +// for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) +// if (i < bonds->select.bond_list[pj].nbr) { +// if (control->virial == 0) +// Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists ); +// else +// Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists ); +// } + + if(control->virial == 0) { + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < system->N; ++i) { + const int startj = Start_Index(i, bonds); + const int endj = End_Index(i, bonds); + for (pj = startj; pj < endj; ++pj) + if (i < bonds->select.bond_list[pj].nbr) + Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists ); + } + + } else { + +#pragma omp for schedule(dynamic,50) + for (i = 0; i < system->N; ++i) { + const int startj = Start_Index(i, bonds); + const int endj = End_Index(i, bonds); + for (pj = startj; pj < endj; ++pj) + if (i < bonds->select.bond_list[pj].nbr) + Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists ); + } + + } // if(virial == 0) + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, 0, 1, thr); + +#pragma omp for schedule(guided) + for (i = 0; i < system->N; ++i) { + for (j = 0; j < nthreads; ++j) + rvec_Add( workspace->f[i], workspace->forceReduction[system->N*j+i] ); + } + + +#pragma omp for schedule(guided) + for (i = 0; i < totalReductionSize; i++) { + workspace->forceReduction[i][0] = 0; + workspace->forceReduction[i][1] = 0; + workspace->forceReduction[i][2] = 0; + workspace->CdDeltaReduction[i] = 0; + } + } // parallel region + + if (control->virial) + for (int i=0; i < nthreads; ++i) { + rvec_Add(data->my_ext_press, workspace->my_ext_pressReduction[i]); + workspace->my_ext_pressReduction[i][0] = 0; + workspace->my_ext_pressReduction[i][1] = 0; + workspace->my_ext_pressReduction[i][2] = 0; + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTETFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lists, + int step, int n, int N, int numH, MPI_Comm comm ) +{ + int i, comp, Hindex; + reax_list *bonds, *hbonds; + reallocate_data *realloc = &(workspace->realloc); + double saferzone = system->saferzone; + +#pragma omp parallel default(shared) private(i, comp, Hindex) + { + + /* bond list */ + if( N > 0 ) { + bonds = *lists + BONDS; + +#pragma omp for schedule(guided) + for( i = 0; i < N; ++i ) { + system->my_atoms[i].num_bonds = MAX(Num_Entries(i,bonds)*2, MIN_BONDS); + + if( i < N-1 ) + comp = Start_Index(i+1, bonds); + else comp = bonds->num_intrs; + + if( End_Index(i, bonds) > comp ) { + fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", + step, i, End_Index(i,bonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + } + + + /* hbonds list */ + if( numH > 0 ) { + hbonds = *lists + HBONDS; + +#pragma omp for schedule(guided) + for( i = 0; i < n; ++i ) { + Hindex = system->my_atoms[i].Hindex; + if( Hindex > -1 ) { + system->my_atoms[i].num_hbonds = + (int)(MAX( Num_Entries(Hindex, hbonds)*saferzone, MIN_HBONDS )); + + if( Hindex < numH-1 ) + comp = Start_Index(Hindex+1, hbonds); + else comp = hbonds->num_intrs; + + if( End_Index(Hindex, hbonds) > comp ) { + fprintf(stderr,"step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + step, Hindex, End_Index(Hindex,hbonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + } + } + + } // omp parallel +} + + +void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) { +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int i, j, pi, pj; + int start_i, end_i, start_j, end_j; + int type_i, type_j; + int ihb, jhb, ihb_top, jhb_top; + int local, flag; + double r_ij, cutoff; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + far_neighbor_data *nbr_pj; + reax_atom *atom_i, *atom_j; + bond_data *ibond, *jbond; + reax_list *far_nbrs = *lists + FAR_NBRS; + reax_list *bonds = *lists + BONDS; + reax_list *hbonds = *lists + HBONDS; + int num_bonds = 0; + int num_hbonds = 0; + int btop_i = 0; + int btop_j = 0; + int renbr = (data->step-data->prev_steps) % control->reneighbor == 0; + + // We will use CdDeltaReduction as a temporary (double) buffer to accumulate total_bond_order + // This is safe because CdDeltaReduction is currently zeroed and its accumulation doesn't start until BondsOMP() + double * tmp_bond_order = workspace->CdDeltaReduction; + + // We do the same with forceReduction as a temporary (rvec) buffer to accumulate dDeltap_self + // This is safe because forceReduction is currently zeroed and its accumulation does start until Hydrogen_BondsOMP() + rvec * tmp_ddelta = workspace->forceReduction; + + /* uncorrected bond orders */ + cutoff = control->bond_cut; + +#pragma omp parallel default(shared) \ + private(i, atom_i, type_i, pi, start_i, end_i, sbp_i, btop_i, ibond, ihb, ihb_top, \ + j, atom_j, type_j, pj, start_j, end_j, sbp_j, nbr_pj, jbond, jhb, twbp) + { + + int nthreads = control->nthreads; + int tid = omp_get_thread_num(); + long reductionOffset = system->N * tid; + long totalReductionSize = system->N * nthreads; + +#pragma omp for schedule(dynamic,50) reduction(+ : num_bonds) +//#pragma omp for schedule(guided) reduction(+ : num_bonds) + for (i = 0; i < system->N; ++i) { + atom_i = &(system->my_atoms[i]); + type_i = atom_i->type; + sbp_i = &(system->reax_param.sbp[type_i]); + + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); + if (nbr_pj->d <= cutoff) { + j = nbr_pj->nbr; + atom_j = &(system->my_atoms[j]); + type_j = atom_j->type; + sbp_j = &(system->reax_param.sbp[type_j]); + twbp = &(system->reax_param.tbp[type_i][type_j]); + +// #pragma omp critical +// { +// btop_i = End_Index(i, bonds); +// if( BOp(workspace, bonds, control->bo_cut, i, btop_i, nbr_pj, sbp_i, sbp_j, twbp) ) { +// num_bonds++; +// btop_i++; +// Set_End_Index(i, btop_i, bonds); +// } + +// } + + // Trying to minimize time spent in critical section by moving initial part of BOp() + // outside of critical section. + + // Start top portion of BOp() + int jj = nbr_pj->nbr; + double C12, C34, C56; + double BO, BO_s, BO_pi, BO_pi2; + double bo_cut = control->bo_cut; + + if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { + C12 = twbp->p_bo1 * pow( nbr_pj->d / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + bo_cut) * exp( C12 ); + } + else BO_s = C12 = 0.0; + + if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) { + C34 = twbp->p_bo3 * pow( nbr_pj->d / twbp->r_p, twbp->p_bo4 ); + BO_pi = exp( C34 ); + } + else BO_pi = C34 = 0.0; + + if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { + C56 = twbp->p_bo5 * pow( nbr_pj->d / twbp->r_pp, twbp->p_bo6 ); + BO_pi2= exp( C56 ); + } + else BO_pi2 = C56 = 0.0; + + /* Initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + // End top portion of BOp() + + if(BO >= bo_cut) { + int btop_j; + + // Update indices in critical section +#pragma omp critical + { + btop_i = End_Index( i, bonds ); + btop_j = End_Index( j, bonds ); + Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( i, btop_i+1, bonds ); + } // omp critical + + // Finish remaining BOp() work + BOp_OMP(workspace, bonds, bo_cut, + i , btop_i, nbr_pj, sbp_i, sbp_j, twbp, btop_j, + C12, C34, C56, BO, BO_s, BO_pi, BO_pi2); + + bond_data * ibond = &(bonds->select.bond_list[btop_i]); + bond_order_data * bo_ij = &(ibond->bo_data); + + bond_data * jbond = &(bonds->select.bond_list[btop_j]); + bond_order_data * bo_ji = &(jbond->bo_data); + + workspace->total_bond_order[i] += bo_ij->BO; + tmp_bond_order[reductionOffset + j] += bo_ji->BO; + + rvec_Add(workspace->dDeltap_self[i], bo_ij->dBOp); + rvec_Add(tmp_ddelta[reductionOffset + j], bo_ji->dBOp); + + btop_i++; + num_bonds++; + } // if(BO>=bo_cut) + + } // if(cutoff) + + } // for(pj) + } // for(i) + + // Need to wait for all indices and tmp arrays accumulated. +#pragma omp barrier + +#pragma omp for schedule(dynamic,50) + for(i=0; iN; i++) + for(int t=0; tN + i; + workspace->dDeltap_self[i][0] += tmp_ddelta[indx][0]; + workspace->dDeltap_self[i][1] += tmp_ddelta[indx][1]; + workspace->dDeltap_self[i][2] += tmp_ddelta[indx][2]; + workspace->total_bond_order[i] += tmp_bond_order[indx]; + } + + // Not needed anymore with newest BOp_OMP()? + //#pragma omp for schedule(guided) +// #pragma omp for schedule(dynamic,50) +// for (i = 0; i < system->N; ++i) { +// start_i = Start_Index(i, bonds); +// end_i = End_Index(i, bonds); + +// for (pi = start_i; pi < end_i; ++pi) { +// ibond = &(bonds->select.bond_list[pi]); +// j = ibond->nbr; + +// if (i < j) { +// start_j = Start_Index(j, bonds); +// end_j = End_Index(j, bonds); + +// for (pj = start_j; pj < end_j; ++pj) { +// jbond = &(bonds->select.bond_list[pj]); + +// if (jbond->nbr == i) { +// ibond->sym_index = pj; +// jbond->sym_index = pi; +// break; +// } +// } +// } +// } +// } + + /* hydrogen bond list */ + if (control->hbond_cut > 0) { + cutoff = control->hbond_cut; + +//#pragma omp for schedule(guided) reduction(+ : num_hbonds) +#pragma omp for schedule(dynamic,50) reduction(+ : num_hbonds) + for (i = 0; i < system->n; ++i) { + atom_i = &(system->my_atoms[i]); + type_i = atom_i->type; + sbp_i = &(system->reax_param.sbp[type_i]); + ihb = sbp_i->p_hbond; + +#pragma omp critical + { + + if (ihb == 1 || ihb == 2) { + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + + for (pj = start_i; pj < end_i; ++pj) { + nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); + j = nbr_pj->nbr; + atom_j = &(system->my_atoms[j]); + type_j = atom_j->type; + if(type_j < 0) continue; + sbp_j = &(system->reax_param.sbp[type_j]); + jhb = sbp_j->p_hbond; + + if (nbr_pj->d <= control->hbond_cut) { + int iflag = 0; + int jflag = 0; + + if(ihb==1 && jhb==2) iflag = 1; + else if(jn && ihb == 2 && jhb == 1) jflag = 1; + + if(iflag || jflag) { + + // This critical section enforces H-bonds to be added by threads one at a time. +// #pragma omp critical +// { + if(iflag) { + ihb_top = End_Index(atom_i->Hindex, hbonds); + Set_End_Index(atom_i->Hindex, ihb_top+1, hbonds); + } else if(jflag) { + jhb_top = End_Index(atom_j->Hindex, hbonds); + Set_End_Index(atom_j->Hindex, jhb_top+1, hbonds); + } + // } // omp critical + + if(iflag) { + hbonds->select.hbond_list[ihb_top].nbr = j; + hbonds->select.hbond_list[ihb_top].scl = 1; + hbonds->select.hbond_list[ihb_top].ptr = nbr_pj; + } else if(jflag) { + hbonds->select.hbond_list[jhb_top].nbr = i; + hbonds->select.hbond_list[jhb_top].scl = -1; + hbonds->select.hbond_list[jhb_top].ptr = nbr_pj; + } + + num_hbonds++; + } // if(iflag || jflag) + + } + } + } + + } // omp critical + } + + } // if(control->hbond > 0) + + // Zero buffers for others to use as intended. +#pragma omp for schedule(guided) + for(i=0; irealloc.num_bonds = num_bonds; + workspace->realloc.num_hbonds = num_hbonds; + + Validate_ListsOMP( system, workspace, lists, data->step, + system->n, system->N, system->numH, comm ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEIFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void Compute_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) +{ + int qeq_flag; + MPI_Comm comm = mpi_data->world; + + // Init Forces +#if defined(LOG_PERFORMANCE) + double t_start = 0; + if( system->my_rank == MASTER_NODE ) + t_start = Get_Time( ); +#endif + + Init_Forces_noQEq_OMP( system, control, data, workspace, + lists, out_control, comm ); + +#if defined(LOG_PERFORMANCE) + //MPI_Barrier( comm ); + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.init_forces) ); +#endif + + // Bonded Interactions + Compute_Bonded_ForcesOMP( system, control, data, workspace, + lists, out_control, mpi_data->world ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.bonded) ); +#endif + + // Nonbonded Interactions + Compute_NonBonded_ForcesOMP( system, control, data, workspace, + lists, out_control, mpi_data->world ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.nonb) ); +#endif + + // Total Force + Compute_Total_ForceOMP( system, control, data, workspace, lists, mpi_data ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.bonded) ); +#endif +} diff --git a/src/USER-OMP/reaxc_forces_omp.h b/src/USER-OMP/reaxc_forces_omp.h new file mode 100644 index 0000000000..f4941fc7fa --- /dev/null +++ b/src/USER-OMP/reaxc_forces_omp.h @@ -0,0 +1,36 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __FORCES_OMP_H_ +#define __FORCES_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_defs.h" + +void Init_Force_FunctionsOMP( control_params* ); +void Compute_ForcesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls*, mpi_datatypes* ); +#endif diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp new file mode 100644 index 0000000000..5047e2775e --- /dev/null +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp @@ -0,0 +1,242 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include + +#include "reaxc_hydrogen_bonds_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_valence_angles.h" // To access Calculate_Theta() +#include "reaxc_valence_angles_omp.h" // To access Calculate_dCos_ThetaOMP() +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Hydrogen_BondsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + const int nthreads = control->nthreads; + long totalReductionSize = system->N; + +#pragma omp parallel default(shared) //default(none) + { + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + int hblist[MAX_BONDS]; + int itr, top; + int num_hb_intrs = 0; + ivec rel_jk; + double r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; + double e_hb, e_hb_thr = 0.0, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; + rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; + rvec dvec_jk, force, ext_press; + hbond_parameters *hbp; + bond_order_data *bo_ij; + bond_data *pbond_ij; + far_neighbor_data *nbr_jk; + reax_list *bonds, *hbonds; + bond_data *bond_list; + hbond_data *hbond_list; + + // tally variables + double fi_tmp[3], fk_tmp[3], delij[3], delkj[3]; + + bonds = (*lists) + BONDS; + bond_list = bonds->select.bond_list; + hbonds = (*lists) + HBONDS; + hbond_list = hbonds->select.hbond_list; + + int natoms = system->n; + int tid = omp_get_thread_num(); + const int idelta = 1 + natoms/nthreads; + int ifrom = tid*idelta; + int ito = ((ifrom + idelta) > natoms) ? natoms : ifrom + idelta; + + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + + /* loops below discover the Hydrogen bonds between i-j-k triplets. + here j is H atom and there has to be some bond between i and j. + Hydrogen bond is between j and k. + so in this function i->X, j->H, k->Z when we map + variables onto the ones in the handout.*/ + // for( j = 0; j < system->n; ++j ) + for( j = ifrom; j < ito; ++j ) { + /* j has to be of type H */ + if( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) { + /*set j's variables */ + type_j = system->my_atoms[j].type; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); + hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); + if(type_j < 0) continue; + + top = 0; + for( pi = start_j; pi < end_j; ++pi ) { + pbond_ij = &( bond_list[pi] ); + i = pbond_ij->nbr; + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + bo_ij = &(pbond_ij->bo_data); + + if( system->reax_param.sbp[type_i].p_hbond == 2 && + bo_ij->BO >= HB_THRESHOLD ) + hblist[top++] = pi; + } + + for( pk = hb_start_j; pk < hb_end_j; ++pk ) { + /* set k's varibles */ + k = hbond_list[pk].nbr; + type_k = system->my_atoms[k].type; + if(type_k < 0) continue; + nbr_jk = hbond_list[pk].ptr; + r_jk = nbr_jk->d; + rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); + + for( itr = 0; itr < top; ++itr ) { + pi = hblist[itr]; + pbond_ij = &( bonds->select.bond_list[pi] ); + i = pbond_ij->nbr; + + if( system->my_atoms[i].orig_id != system->my_atoms[k].orig_id ) { + bo_ij = &(pbond_ij->bo_data); + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]); + ++num_hb_intrs; + + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &theta, &cos_theta ); + /* the derivative of cos(theta) */ + Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &dcos_theta_di, &dcos_theta_dj, + &dcos_theta_dk ); + + /* hydrogen bond energy*/ + sin_theta2 = sin( theta/2.0 ); + sin_xhz4 = SQR(sin_theta2); + sin_xhz4 *= sin_xhz4; + cos_xhz1 = ( 1.0 - cos_theta ); + exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO ); + exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + + r_jk / hbp->r0_hb - 2.0 ) ); + + e_hb_thr += e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + + CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; + CEhb2 = -hbp->p_hb1/2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; + CEhb3 = -hbp->p_hb3 * + (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; + + /* hydrogen bond forces */ + bo_ij->Cdbo += CEhb1; // dbo term + + if( control->virial == 0 ) { + // dcos terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+i], +CEhb2, dcos_theta_di ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb2, dcos_theta_dk ); + // dr terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], -CEhb3/r_jk, dvec_jk ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb3/r_jk, dvec_jk ); + } + else { + /* for pressure coupling, terms that are not related to bond order + derivatives are added directly into pressure vector/tensor */ + rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms + rvec_Add(workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj ); + + ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box ); + rvec_Scale( force, +CEhb2, dcos_theta_dk ); + rvec_Add(workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + // dr terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j],-CEhb3/r_jk, dvec_jk ); + + rvec_Scale( force, CEhb3/r_jk, dvec_jk ); + rvec_Add(workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + } + + /* tally into per-atom virials */ + if (system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + rvec_ScaledSum( delij, 1., system->my_atoms[j].x, + -1., system->my_atoms[i].x ); + rvec_ScaledSum( delkj, 1., system->my_atoms[j].x, + -1., system->my_atoms[k].x ); + + rvec_Scale(fi_tmp, CEhb2, dcos_theta_di); + rvec_Scale(fk_tmp, CEhb2, dcos_theta_dk); + rvec_ScaledAdd(fk_tmp, CEhb3/r_jk, dvec_jk); + + pair_reax_ptr->ev_tally3_thr_proxy(system->pair_ptr,i,j,k,e_hb,0.0,fi_tmp,fk_tmp,delij,delkj,thr); + } + } + } + } + + } + } +#pragma omp critical + { + data->my_en.e_hb += e_hb_thr; + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); +} + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEHBONDSINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.h b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h new file mode 100644 index 0000000000..b4a2d78dbb --- /dev/null +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __HBONDS_OMP_H_ +#define __HBONDS_OMP_H_ + +#include "reaxc_types.h" + +void Hydrogen_BondsOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_init_md_omp.cpp b/src/USER-OMP/reaxc_init_md_omp.cpp new file mode 100644 index 0000000000..1584d5e53e --- /dev/null +++ b/src/USER-OMP/reaxc_init_md_omp.cpp @@ -0,0 +1,188 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "reaxc_init_md_omp.h" +#include "reaxc_allocate.h" +#include "reaxc_forces.h" +#include "reaxc_forces_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_reset_tools.h" +#include "reaxc_system_props.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" +#include "omp.h" + +// Functions definedd in reaxc_init_md.cpp +extern int Init_MPI_Datatypes(reax_system*, storage*, mpi_datatypes*, MPI_Comm, char*); +extern int Init_System(reax_system*, control_params*, char*); +extern int Init_Simulation_Data(reax_system*, control_params*, simulation_data*, char*); +extern int Init_Workspace(reax_system*, control_params*, storage*, MPI_Comm, char*); + +/* ---------------------------------------------------------------------- */ + +int Init_ListsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) +{ + int i, total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; + int *hb_top, *bond_top; + MPI_Comm comm; + + int TWICE = 2; + int mincap = system->mincap; + double safezone = system->safezone; + double saferzone = system->saferzone; + + comm = mpi_data->world; + bond_top = (int*) calloc( system->total_cap, sizeof(int) ); + hb_top = (int*) calloc( system->local_cap, sizeof(int) ); + Estimate_Storages( system, control, lists, + &Htop, hb_top, bond_top, &num_3body, comm ); + + if( control->hbond_cut > 0 ) { + /* init H indexes */ + total_hbonds = 0; + for( i = 0; i < system->n; ++i ) { + system->my_atoms[i].num_hbonds = hb_top[i]; + total_hbonds += hb_top[i]; + } + total_hbonds = (int)(MAX( total_hbonds*saferzone, mincap*MIN_HBONDS )); + + if( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, + *lists+HBONDS, comm ) ) { + fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + + total_bonds = 0; + for( i = 0; i < system->N; ++i ) { + system->my_atoms[i].num_bonds = bond_top[i]; + total_bonds += bond_top[i]; + } + bond_cap = (int)(MAX( total_bonds*safezone, mincap*MIN_BONDS )); + + if( !Make_List( system->total_cap, bond_cap, TYP_BOND, + *lists+BONDS, comm ) ) { + fprintf( stderr, "not enough space for bonds list. terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + + int nthreads = control->nthreads; + reax_list *bonds = (*lists)+BONDS; + + for (i = 0; i < bonds->num_intrs; ++i) + bonds->select.bond_list[i].bo_data.CdboReduction = + (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); + + /* 3bodies list */ + cap_3body = (int)(MAX( num_3body*safezone, MIN_3BODIES )); + if( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, + *lists+THREE_BODIES, comm ) ){ + + fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + + free( hb_top ); + free( bond_top ); + + return SUCCESS; +} + +/* ---------------------------------------------------------------------- */ + +// The only difference with the MPI-only function is calls to Init_ListsOMP and Init_Force_FunctionsOMP(). +void InitializeOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data, MPI_Comm comm ) +{ + char msg[MAX_STR]; + + + if( Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE ) { + fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", + system->my_rank ); + fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_System(system, control, msg) == FAILURE ){ + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: system could not be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Simulation_Data( system, control, data, msg ) == FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Workspace( system, control, workspace, mpi_data->world, msg ) == + FAILURE ) { + fprintf( stderr, "p%d:init_workspace: not enough memory\n", + system->my_rank ); + fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_ListsOMP( system, control, data, workspace, lists, mpi_data, msg ) == + FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: system could not be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Output_Files(system,control,out_control,mpi_data,msg)== FAILURE) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: could not open output files! terminating...\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( control->tabulate ) { + if( Init_Lookup_Tables( system, control, workspace, mpi_data, msg ) == FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + } + + + Init_Force_FunctionsOMP( control ); +} + diff --git a/src/USER-OMP/reaxc_init_md_omp.h b/src/USER-OMP/reaxc_init_md_omp.h new file mode 100644 index 0000000000..bb4e9c7061 --- /dev/null +++ b/src/USER-OMP/reaxc_init_md_omp.h @@ -0,0 +1,34 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __INIT_MD_OMP_H_ +#define __INIT_MD_OMP_H_ + +#include "reaxc_types.h" + +void InitializeOMP( reax_system*, control_params*, simulation_data*, storage*, + reax_list**, output_controls*, mpi_datatypes*, MPI_Comm ); +#endif diff --git a/src/USER-OMP/reaxc_multi_body_omp.cpp b/src/USER-OMP/reaxc_multi_body_omp.cpp new file mode 100644 index 0000000000..a355ce8609 --- /dev/null +++ b/src/USER-OMP/reaxc_multi_body_omp.cpp @@ -0,0 +1,285 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include +#include "thr_data.h" + +#include "reaxc_multi_body_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Atom_EnergyOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* Initialize parameters */ + double p_lp1 = system->reax_param.gp.l[15]; + double p_lp3 = system->reax_param.gp.l[5]; + double p_ovun3 = system->reax_param.gp.l[32]; + double p_ovun4 = system->reax_param.gp.l[31]; + double p_ovun6 = system->reax_param.gp.l[6]; + double p_ovun7 = system->reax_param.gp.l[8]; + double p_ovun8 = system->reax_param.gp.l[9]; + + int natoms = system->n; + int nthreads = control->nthreads; + reax_list *bonds = (*lists) + BONDS; + + double total_Elp = 0.0; + double total_Eun = 0.0; + double total_Eov = 0.0; + +#pragma omp parallel default(shared) reduction(+:total_Elp, total_Eun, total_Eov) +{ + int i, j, pj, type_i, type_j; + double Delta_lpcorr, dfvl; + double e_lp, expvd2, inv_expvd2, dElp, CElp, DlpVi; + double e_lph, Di, vov3, deahu2dbo, deahu2dsbo; + double e_ov, CEover1, CEover2, CEover3, CEover4; + double exp_ovun1, exp_ovun2, sum_ovun1, sum_ovun2; + double exp_ovun2n, exp_ovun6, exp_ovun8; + double inv_exp_ovun1, inv_exp_ovun2, inv_exp_ovun2n, inv_exp_ovun8; + double e_un, CEunder1, CEunder2, CEunder3, CEunder4; + double eng_tmp, f_tmp; + double p_lp2, p_ovun2, p_ovun5; + int numbonds; + + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_data *pbond; + bond_order_data *bo_ij; + + int tid = omp_get_thread_num(); + + long reductionOffset = (system->N * tid); + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, + system->pair_ptr->eatom, system->pair_ptr->vatom, thr); + +#pragma omp for schedule(guided) + for ( i = 0; i < system->n; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[ type_i ]); + + /* lone-pair Energy */ + p_lp2 = sbp_i->p_lp2; + expvd2 = exp( -75 * workspace->Delta_lp[i] ); + inv_expvd2 = 1. / (1. + expvd2 ); + + numbonds = 0; + e_lp = 0.0; + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + numbonds ++; + + /* calculate the energy */ + if(numbonds > 0) + total_Elp += e_lp = + p_lp2 * workspace->Delta_lp[i] * inv_expvd2; + + dElp = p_lp2 * inv_expvd2 + + 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); + CElp = dElp * workspace->dDelta_lp[i]; + + if(numbonds > 0) workspace->CdDelta[i] += CElp; // lp - 1st term + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + e_lp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + /* correction for C2 */ + if( p_lp3 > 0.001 && !strcmp(system->reax_param.sbp[type_i].name, "C") ) + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + if( !strcmp( system->reax_param.sbp[type_j].name, "C" ) ) { + twbp = &( system->reax_param.tbp[type_i][type_j]); + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + Di = workspace->Delta[i]; + vov3 = bo_ij->BO - Di - 0.040*pow(Di, 4.); + + if( vov3 > 3. ) { + total_Elp += e_lph = p_lp3 * SQR(vov3-3.0); + + deahu2dbo = 2.*p_lp3*(vov3 - 3.); + deahu2dsbo = 2.*p_lp3*(vov3 - 3.)*(-1. - 0.16*pow(Di, 3.)); + + bo_ij->Cdbo += deahu2dbo; + workspace->CdDelta[i] += deahu2dsbo; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, system->n, 1, + e_lph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + } + } + } + } +#pragma omp barrier + +#pragma omp for schedule(guided) + for (i = 0; i < system->n; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[ type_i ]); + + /* over-coordination energy */ + if( sbp_i->mass > 21.0 ) + dfvl = 0.0; + else dfvl = 1.0; // only for 1st-row elements + + p_ovun2 = sbp_i->p_ovun2; + sum_ovun1 = sum_ovun2 = 0; + for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + bo_ij = &(bonds->select.bond_list[pj].bo_data); + twbp = &(system->reax_param.tbp[ type_i ][ type_j ]); + + sum_ovun1 += twbp->p_ovun1 * twbp->De_s * bo_ij->BO; + sum_ovun2 += (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j])* + ( bo_ij->BO_pi + bo_ij->BO_pi2 ); + } + + exp_ovun1 = p_ovun3 * exp( p_ovun4 * sum_ovun2 ); + inv_exp_ovun1 = 1.0 / (1 + exp_ovun1); + Delta_lpcorr = workspace->Delta[i] - + (dfvl * workspace->Delta_lp_temp[i]) * inv_exp_ovun1; + + exp_ovun2 = exp( p_ovun2 * Delta_lpcorr ); + inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2); + + DlpVi = 1.0 / (Delta_lpcorr + sbp_i->valency + 1e-8); + CEover1 = Delta_lpcorr * DlpVi * inv_exp_ovun2; + + total_Eov += e_ov = sum_ovun1 * CEover1; + + CEover2 = sum_ovun1 * DlpVi * inv_exp_ovun2 * + (1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 )); + + CEover3 = CEover2 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1 ); + + CEover4 = CEover2 * (dfvl * workspace->Delta_lp_temp[i]) * + p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1); + + + /* under-coordination potential */ + p_ovun2 = sbp_i->p_ovun2; + p_ovun5 = sbp_i->p_ovun5; + + exp_ovun2n = 1.0 / exp_ovun2; + exp_ovun6 = exp( p_ovun6 * Delta_lpcorr ); + exp_ovun8 = p_ovun7 * exp(p_ovun8 * sum_ovun2); + inv_exp_ovun2n = 1.0 / (1.0 + exp_ovun2n); + inv_exp_ovun8 = 1.0 / (1.0 + exp_ovun8); + + numbonds = 0; + e_un = 0.0; + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + numbonds ++; + + if(numbonds > 0) total_Eun += e_un = + -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; + + CEunder1 = inv_exp_ovun2n * + ( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 + + p_ovun2 * e_un * exp_ovun2n ); + CEunder2 = -e_un * p_ovun8 * exp_ovun8 * inv_exp_ovun8; + CEunder3 = CEunder1 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1); + CEunder4 = CEunder1 * (dfvl*workspace->Delta_lp_temp[i]) * + p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1) + CEunder2; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) { + eng_tmp = e_ov; + if(numbonds > 0) eng_tmp+= e_un; + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + } + + /* forces */ + workspace->CdDelta[i] += CEover3; // OvCoor - 2nd term + if(numbonds > 0) workspace->CdDelta[i] += CEunder3; // UnCoor - 1st term + + for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) { + pbond = &(bonds->select.bond_list[pj]); + j = pbond->nbr; + bo_ij = &(pbond->bo_data); + twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] + [system->my_atoms[pbond->nbr].type]); + + bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor-1st + workspace->CdDeltaReduction[reductionOffset+j] += + CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor-3a + + bo_ij->Cdbopi += CEover4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b + bo_ij->Cdbopi2 += CEover4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b + + workspace->CdDeltaReduction[reductionOffset+j] += + CEunder4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // UnCoor - 2a + + bo_ij->Cdbopi += CEunder4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b + bo_ij->Cdbopi2 += CEunder4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + + } + + data->my_en.e_lp += total_Elp; + data->my_en.e_ov += total_Eov; + data->my_en.e_un += total_Eun; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEATOMENERGYINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_multi_body_omp.h b/src/USER-OMP/reaxc_multi_body_omp.h new file mode 100644 index 0000000000..b0746569ca --- /dev/null +++ b/src/USER-OMP/reaxc_multi_body_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __MULTI_BODY_OMP_H_ +#define __MULTI_BODY_OMP_H_ + +#include "reaxc_types.h" + +void Atom_EnergyOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_nonbonded_omp.cpp b/src/USER-OMP/reaxc_nonbonded_omp.cpp new file mode 100644 index 0000000000..73dbc1dda4 --- /dev/null +++ b/src/USER-OMP/reaxc_nonbonded_omp.cpp @@ -0,0 +1,383 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include +#include "thr_data.h" + +#include "reaxc_types.h" + +#include "reaxc_nonbonded.h" +#include "reaxc_nonbonded_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { + + int natoms = system->n; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + reax_list *far_nbrs = (*lists) + FAR_NBRS; + double p_vdW1 = system->reax_param.gp.l[28]; + double p_vdW1i = 1.0 / p_vdW1; + double total_EvdW = 0.; + double total_Eele = 0.; + +#pragma omp parallel default(shared) reduction(+: total_EvdW, total_Eele) //default(none) + { + int tid = omp_get_thread_num(); + int i, j, pj; + int start_i, end_i, orig_i, orig_j, flag; + double powr_vdW1, powgi_vdW1; + double tmp, r_ij, fn13, exp1, exp2; + double Tap, dTap, dfn13, CEvd, CEclmb, de_core; + double dr3gamij_1, dr3gamij_3; + double e_ele, e_ele_thr, e_vdW, e_vdW_thr, e_core, SMALL = 0.0001; + double e_lg, de_lg, r_ij5, r_ij6, re6; + rvec temp, ext_press; + two_body_parameters *twbp; + far_neighbor_data *nbr_pj; + + // Tallying variables: + double pe_vdw, f_tmp, delij[3]; + + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + e_core = 0; + e_vdW = 0; + e_vdW_thr = 0; + e_lg = 0; + de_lg = 0.0; + +//#pragma omp for schedule(dynamic,50) +#pragma omp for schedule(guided) + for( i = 0; i < natoms; ++i ) { + if(system->my_atoms[i].type < 0) continue; + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + if(nbr_pj->d <= control->nonb_cut) { + if(j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + } + + if (flag) { + + r_ij = nbr_pj->d; + twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] + [ system->my_atoms[j].type ]); + + /* Calculate Taper and its derivative */ + // Tap = nbr_pj->Tap; -- precomputed during compte_H + Tap = workspace->Tap[7] * r_ij + workspace->Tap[6]; + Tap = Tap * r_ij + workspace->Tap[5]; + Tap = Tap * r_ij + workspace->Tap[4]; + Tap = Tap * r_ij + workspace->Tap[3]; + Tap = Tap * r_ij + workspace->Tap[2]; + Tap = Tap * r_ij + workspace->Tap[1]; + Tap = Tap * r_ij + workspace->Tap[0]; + + dTap = 7*workspace->Tap[7] * r_ij + 6*workspace->Tap[6]; + dTap = dTap * r_ij + 5*workspace->Tap[5]; + dTap = dTap * r_ij + 4*workspace->Tap[4]; + dTap = dTap * r_ij + 3*workspace->Tap[3]; + dTap = dTap * r_ij + 2*workspace->Tap[2]; + dTap += workspace->Tap[1]/r_ij; + + /*vdWaals Calculations*/ + if(system->reax_param.gp.vdw_type==1 || system->reax_param.gp.vdw_type==3) + { // shielding + powr_vdW1 = pow(r_ij, p_vdW1); + powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1); + + fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i ); + exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); + exp2 = exp( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); + + e_vdW = twbp->D * (exp1 - 2.0 * exp2); + total_EvdW += Tap * e_vdW; + + dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * + pow(r_ij, p_vdW1 - 2.0); + + CEvd = dTap * e_vdW - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; + } + else{ // no shielding + exp1 = exp( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); + exp2 = exp( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); + + e_vdW = twbp->D * (exp1 - 2.0 * exp2); + total_EvdW += Tap * e_vdW; + + CEvd = dTap * e_vdW - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) / r_ij; + } + + if(system->reax_param.gp.vdw_type==2 || system->reax_param.gp.vdw_type==3) + { // innner wall + e_core = twbp->ecore * exp(twbp->acore * (1.0-(r_ij/twbp->rcore))); + total_EvdW += Tap * e_core; + + de_core = -(twbp->acore/twbp->rcore) * e_core; + CEvd += dTap * e_core + Tap * de_core / r_ij; + + // lg correction, only if lgvdw is yes + if (control->lgflag) { + r_ij5 = pow( r_ij, 5.0 ); + r_ij6 = pow( r_ij, 6.0 ); + re6 = pow( twbp->lgre, 6.0 ); + + e_lg = -(twbp->lgcij/( r_ij6 + re6 )); + total_EvdW += Tap * e_lg; + + de_lg = -6.0 * e_lg * r_ij5 / ( r_ij6 + re6 ) ; + CEvd += dTap * e_lg + Tap * de_lg / r_ij; + } + + } + + /*Coulomb Calculations*/ + dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma ); + dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 ); + + tmp = Tap / dr3gamij_3; + total_Eele += e_ele = + C_ele * system->my_atoms[i].q * system->my_atoms[j].q * tmp; + + CEclmb = C_ele * system->my_atoms[i].q * system->my_atoms[j].q * + ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + pe_vdw = Tap * (e_vdW + e_core + e_lg); + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -(CEvd + CEclmb); + pair_reax_ptr->ev_tally_thr_proxy( system->pair_ptr, i, j, natoms, + 1, pe_vdw, e_ele, f_tmp, + delij[0], delij[1], delij[2], thr); + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+j], + +(CEvd + CEclmb), nbr_pj->dvec ); + } + else { /* NPT, iNPT or sNPT */ + /* for pressure coupling, terms not related to bond order + derivatives are added directly into pressure vector/tensor */ + + rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[reductionOffset+i], -1., temp ); + rvec_Add( workspace->forceReduction[reductionOffset+j], temp); + + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // parallel region + + data->my_en.e_vdW = total_EvdW; + data->my_en.e_ele = total_Eele; + + Compute_Polarization_Energy( system, data ); +} + +/* ---------------------------------------------------------------------- */ + +void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, + output_controls *out_control ) { + + double SMALL = 0.0001; + int natoms = system->n; + reax_list *far_nbrs = (*lists) + FAR_NBRS; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + double total_EvdW = 0.; + double total_Eele = 0.; + +#pragma omp parallel default(shared) reduction(+:total_EvdW, total_Eele) + { + int i, j, pj, r; + int type_i, type_j, tmin, tmax; + int start_i, end_i, orig_i, orig_j, flag; + double r_ij, base, dif; + double e_vdW, e_ele; + double CEvd, CEclmb; + double f_tmp, delij[3]; + rvec temp, ext_press; + far_neighbor_data *nbr_pj; + LR_lookup_table *t; + int tid = omp_get_thread_num(); + long froffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +//#pragma omp for schedule(dynamic,50) +#pragma omp for schedule(guided) + for (i = 0; i < natoms; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + start_i = Start_Index(i,far_nbrs); + end_i = End_Index(i,far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for (pj = start_i; pj < end_i; ++pj) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + if(nbr_pj->d <= control->nonb_cut) { + if(j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + + } + + if (flag) { + + r_ij = nbr_pj->d; + tmin = MIN( type_i, type_j ); + tmax = MAX( type_i, type_j ); + t = &( LR[tmin][tmax] ); + + /* Cubic Spline Interpolation */ + r = (int)(r_ij * t->inv_dx); + if( r == 0 ) ++r; + base = (double)(r+1) * t->dx; + dif = r_ij - base; + + e_vdW = ((t->vdW[r].d*dif + t->vdW[r].c)*dif + t->vdW[r].b)*dif + + t->vdW[r].a; + + e_ele = ((t->ele[r].d*dif + t->ele[r].c)*dif + t->ele[r].b)*dif + + t->ele[r].a; + e_ele *= system->my_atoms[i].q * system->my_atoms[j].q; + + total_EvdW += e_vdW; + total_Eele += e_ele; + + CEvd = ((t->CEvd[r].d*dif + t->CEvd[r].c)*dif + t->CEvd[r].b)*dif + + t->CEvd[r].a; + + CEclmb = ((t->CEclmb[r].d*dif+t->CEclmb[r].c)*dif+t->CEclmb[r].b)*dif + + t->CEclmb[r].a; + CEclmb *= system->my_atoms[i].q * system->my_atoms[j].q; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -(CEvd + CEclmb); + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, e_vdW, e_ele, + f_tmp, delij[0], delij[1], delij[2], thr); + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->forceReduction[froffset+j], + +(CEvd + CEclmb), nbr_pj->dvec ); + } + else { // NPT, iNPT or sNPT + /* for pressure coupling, terms not related to bond order derivatives + are added directly into pressure vector/tensor */ + rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + + rvec_ScaledAdd( workspace->f[i], -1., temp ); + rvec_Add( workspace->forceReduction[froffset+j], temp ); + + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // end omp parallel + + data->my_en.e_vdW = total_EvdW; + data->my_en.e_ele = total_Eele; + + Compute_Polarization_Energy( system, data ); +} diff --git a/src/USER-OMP/reaxc_nonbonded_omp.h b/src/USER-OMP/reaxc_nonbonded_omp.h new file mode 100644 index 0000000000..1ea51257cf --- /dev/null +++ b/src/USER-OMP/reaxc_nonbonded_omp.h @@ -0,0 +1,38 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __NONBONDED_OMP_H_ +#define __NONBONDED_OMP_H_ + +#include "reaxc_types.h" + +void vdW_Coulomb_Energy_OMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +void Tabulated_vdW_Coulomb_Energy_OMP( reax_system*, control_params*, + simulation_data*, storage*, + reax_list**, output_controls* ); +#endif diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.cpp b/src/USER-OMP/reaxc_torsion_angles_omp.cpp new file mode 100644 index 0000000000..294eeb9544 --- /dev/null +++ b/src/USER-OMP/reaxc_torsion_angles_omp.cpp @@ -0,0 +1,467 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include +#include "thr_data.h" + +#include "reaxc_types.h" +#include "reaxc_torsion_angles_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" + +#define MIN_SINE 1e-10 + +using namespace LAMMPS_NS; + +// Functions defined in reaxc_torsion_angles.cpp +extern double Calculate_Omega(rvec, double, rvec, double, rvec, double, rvec, double, + three_body_interaction_data*, three_body_interaction_data*, + rvec, rvec, rvec, rvec, output_controls*); + +/* ---------------------------------------------------------------------- */ + +void Torsion_AnglesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->n; + reax_list *bonds = (*lists) + BONDS; + reax_list *thb_intrs = (*lists) + THREE_BODIES; + double p_tor2 = system->reax_param.gp.l[23]; + double p_tor3 = system->reax_param.gp.l[24]; + double p_tor4 = system->reax_param.gp.l[25]; + double p_cot2 = system->reax_param.gp.l[27]; + double total_Etor = 0; + double total_Econ = 0; + int nthreads = control->nthreads; + +#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ) + { + int i, j, k, l, pi, pj, pk, pl, pij, plk; + int type_i, type_j, type_k, type_l; + int start_j, end_j, start_k, end_k; + int start_pj, end_pj, start_pk, end_pk; + int num_frb_intrs = 0; + + double Delta_j, Delta_k; + double r_ij, r_jk, r_kl, r_li; + double BOA_ij, BOA_jk, BOA_kl; + + double exp_tor2_ij, exp_tor2_jk, exp_tor2_kl; + double exp_tor1, exp_tor3_DjDk, exp_tor4_DjDk, exp_tor34_inv; + double exp_cot2_jk, exp_cot2_ij, exp_cot2_kl; + double fn10, f11_DjDk, dfn11, fn12; + double theta_ijk, theta_jkl; + double sin_ijk, sin_jkl; + double cos_ijk, cos_jkl; + double tan_ijk_i, tan_jkl_i; + double omega, cos_omega, cos2omega, cos3omega; + rvec dcos_omega_di, dcos_omega_dj, dcos_omega_dk, dcos_omega_dl; + double CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4; + double CEtors5, CEtors6, CEtors7, CEtors8, CEtors9; + double Cconj, CEconj1, CEconj2, CEconj3; + double CEconj4, CEconj5, CEconj6; + double e_tor, e_con; + rvec dvec_li; + rvec force, ext_press; + ivec rel_box_jl; + // rtensor total_rtensor, temp_rtensor; + four_body_header *fbh; + four_body_parameters *fbp; + bond_data *pbond_ij, *pbond_jk, *pbond_kl; + bond_order_data *bo_ij, *bo_jk, *bo_kl; + three_body_interaction_data *p_ijk, *p_jkl; + + // Virial tallying variables + double delil[3], deljl[3], delkl[3]; + double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3]; + + int tid = omp_get_thread_num(); + long reductionOffset = (system->N * tid); + int num_thb_intrs = 0; + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + system->N, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +#pragma omp for schedule(static) + for (j = 0; j < system->N; ++j) { + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + bo_jk = &( bonds->select.bond_list[pk].bo_data ); + for (k = 0; k < nthreads; ++k) + bo_jk->CdboReduction[k] = 0.; + } + } + +#pragma omp for schedule(dynamic,50) + for (j = 0; j < natoms; ++j) { + type_j = system->my_atoms[j].type; + Delta_j = workspace->Delta_boc[j]; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + pbond_jk = &( bonds->select.bond_list[pk] ); + k = pbond_jk->nbr; + bo_jk = &( pbond_jk->bo_data ); + BOA_jk = bo_jk->BO - control->thb_cut; + + /* see if there are any 3-body interactions involving j&k + where j is the central atom. Otherwise there is no point in + trying to form a 4-body interaction out of this neighborhood */ + if (system->my_atoms[j].orig_id < system->my_atoms[k].orig_id && + bo_jk->BO > control->thb_cut/*0*/ && Num_Entries(pk, thb_intrs)) { + start_k = Start_Index(k, bonds); + end_k = End_Index(k, bonds); + pj = pbond_jk->sym_index; // pj points to j on k's list + + /* do the same check as above: + are there any 3-body interactions involving k&j + where k is the central atom */ + if (Num_Entries(pj, thb_intrs)) { + type_k = system->my_atoms[k].type; + Delta_k = workspace->Delta_boc[k]; + r_jk = pbond_jk->d; + + start_pk = Start_Index(pk, thb_intrs ); + end_pk = End_Index(pk, thb_intrs ); + start_pj = Start_Index(pj, thb_intrs ); + end_pj = End_Index(pj, thb_intrs ); + + exp_tor2_jk = exp( -p_tor2 * BOA_jk ); + exp_cot2_jk = exp( -p_cot2 * SQR(BOA_jk - 1.5) ); + exp_tor3_DjDk = exp( -p_tor3 * (Delta_j + Delta_k) ); + exp_tor4_DjDk = exp( p_tor4 * (Delta_j + Delta_k) ); + exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DjDk + exp_tor4_DjDk); + f11_DjDk = (2.0 + exp_tor3_DjDk) * exp_tor34_inv; + + + /* pick i up from j-k interaction where j is the central atom */ + for (pi = start_pk; pi < end_pk; ++pi) { + p_ijk = &( thb_intrs->select.three_body_list[pi] ); + pij = p_ijk->pthb; // pij is pointer to i on j's bond_list + pbond_ij = &( bonds->select.bond_list[pij] ); + bo_ij = &( pbond_ij->bo_data ); + + if (bo_ij->BO > control->thb_cut/*0*/) { + i = p_ijk->thb; + type_i = system->my_atoms[i].type; + r_ij = pbond_ij->d; + BOA_ij = bo_ij->BO - control->thb_cut; + + theta_ijk = p_ijk->theta; + sin_ijk = sin( theta_ijk ); + cos_ijk = cos( theta_ijk ); + //tan_ijk_i = 1. / tan( theta_ijk ); + if( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) + tan_ijk_i = cos_ijk / MIN_SINE; + else if( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) + tan_ijk_i = cos_ijk / -MIN_SINE; + else tan_ijk_i = cos_ijk / sin_ijk; + + exp_tor2_ij = exp( -p_tor2 * BOA_ij ); + exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij -1.5) ); + + + /* pick l up from j-k interaction where k is the central atom */ + for (pl = start_pj; pl < end_pj; ++pl) { + p_jkl = &( thb_intrs->select.three_body_list[pl] ); + l = p_jkl->thb; + plk = p_jkl->pthb; //pointer to l on k's bond_list! + pbond_kl = &( bonds->select.bond_list[plk] ); + bo_kl = &( pbond_kl->bo_data ); + type_l = system->my_atoms[l].type; + fbh = &(system->reax_param.fbp[type_i][type_j] + [type_k][type_l]); + fbp = &(system->reax_param.fbp[type_i][type_j] + [type_k][type_l].prm[0]); + + if (i != l && fbh->cnt && + bo_kl->BO > control->thb_cut/*0*/ && + bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/) { + ++num_frb_intrs; + //fprintf(stderr, + // "%5d: %6d %6d %6d %6d\n", num_frb_intrs, + // system->my_atoms[i].orig_id,system->my_atoms[j].orig_id, + // system->my_atoms[k].orig_id,system->my_atoms[l].orig_id); + + r_kl = pbond_kl->d; + BOA_kl = bo_kl->BO - control->thb_cut; + + theta_jkl = p_jkl->theta; + sin_jkl = sin( theta_jkl ); + cos_jkl = cos( theta_jkl ); + //tan_jkl_i = 1. / tan( theta_jkl ); + if( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) + tan_jkl_i = cos_jkl / MIN_SINE; + else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) + tan_jkl_i = cos_jkl / -MIN_SINE; + else tan_jkl_i = cos_jkl /sin_jkl; + + rvec_ScaledSum( dvec_li, 1., system->my_atoms[i].x, + -1., system->my_atoms[l].x ); + r_li = rvec_Norm( dvec_li ); + + + /* omega and its derivative */ + omega = Calculate_Omega( pbond_ij->dvec, r_ij, + pbond_jk->dvec, r_jk, + pbond_kl->dvec, r_kl, + dvec_li, r_li, + p_ijk, p_jkl, + dcos_omega_di, dcos_omega_dj, + dcos_omega_dk, dcos_omega_dl, + out_control ); + + cos_omega = cos( omega ); + cos2omega = cos( 2. * omega ); + cos3omega = cos( 3. * omega ); + /* end omega calculations */ + + /* torsion energy */ + exp_tor1 = exp( fbp->p_tor1 * + SQR(2.0 - bo_jk->BO_pi - f11_DjDk) ); + exp_tor2_kl = exp( -p_tor2 * BOA_kl ); + exp_cot2_kl = exp( -p_cot2 * SQR(BOA_kl - 1.5) ); + fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * + (1.0 - exp_tor2_kl); + + CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) + + fbp->V2 * exp_tor1 * (1.0 - cos2omega) + + fbp->V3 * (1.0 + cos3omega) ); + + total_Etor += e_tor = fn10 * sin_ijk * sin_jkl * CV; + + dfn11 = (-p_tor3 * exp_tor3_DjDk + + (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * + (2.0 + exp_tor3_DjDk) * exp_tor34_inv) * + exp_tor34_inv; + + CEtors1 = sin_ijk * sin_jkl * CV; + + CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 * + (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) * + sin_ijk * sin_jkl; + CEtors3 = CEtors2 * dfn11; + + CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij * + (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl); + CEtors5 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * exp_tor2_jk * (1.0 - exp_tor2_kl); + CEtors6 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * exp_tor2_kl; + + cmn = -fn10 * CV; + CEtors7 = cmn * sin_jkl * tan_ijk_i; + CEtors8 = cmn * sin_ijk * tan_jkl_i; + + CEtors9 = fn10 * sin_ijk * sin_jkl * + (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + + 1.5 * fbp->V3 * (cos2omega + 2.0 * SQR(cos_omega))); + /* end of torsion energy */ + + + /* 4-body conjugation energy */ + fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl; + //data->my_en.e_con += e_con = + total_Econ += e_con = + fbp->p_cot1 * fn12 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + CEconj1 = Cconj * (BOA_ij - 1.5e0); + CEconj2 = Cconj * (BOA_jk - 1.5e0); + CEconj3 = Cconj * (BOA_kl - 1.5e0); + + CEconj4 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i; + CEconj5 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i; + CEconj6 = 2.0 * fbp->p_cot1 * fn12 * + cos_omega * sin_ijk * sin_jkl; + /* end 4-body conjugation energy */ + + /* FORCES */ + bo_jk->Cdbopi += CEtors2; + workspace->CdDelta[j] += CEtors3; + //workspace->CdDelta[k] += CEtors3; + workspace->CdDeltaReduction[reductionOffset+k] += CEtors3; + bo_ij->Cdbo += (CEtors4 + CEconj1); + bo_jk->Cdbo += (CEtors5 + CEconj2); + //bo_kl->Cdbo += (CEtors6 + CEconj3); + bo_kl->CdboReduction[tid] += (CEtors6 + CEconj3); + + if( control->virial == 0 ) { + /* dcos_theta_ijk */ + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors7 + CEconj4, p_ijk->dcos_di ); + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l], + CEtors8 + CEconj5, p_jkl->dcos_dk ); + + /* dcos_omega */ + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors9 + CEconj6, dcos_omega_dk ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l], + CEtors9 + CEconj6, dcos_omega_dl ); + } + else { + ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); + + /* dcos_theta_ijk */ + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+l], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + /* dcos_omega */ + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dl ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* tally into per-atom virials */ + if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + + // acquire vectors + rvec_ScaledSum( delil, 1., system->my_atoms[l].x, + -1., system->my_atoms[i].x ); + rvec_ScaledSum( deljl, 1., system->my_atoms[l].x, + -1., system->my_atoms[j].x ); + rvec_ScaledSum( delkl, 1., system->my_atoms[l].x, + -1., system->my_atoms[k].x ); + // dcos_theta_ijk + rvec_Scale( fi_tmp, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_Scale( fj_tmp, CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_Scale( fk_tmp, CEtors7 + CEconj4, p_ijk->dcos_di ); + + // dcos_theta_jkl + rvec_ScaledAdd( fj_tmp, CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( fk_tmp, CEtors8 + CEconj5, p_jkl->dcos_dj ); + + // dcos_omega + rvec_ScaledAdd( fi_tmp, CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( fj_tmp, CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( fk_tmp, CEtors9 + CEconj6, dcos_omega_dk ); + + // tally + eng_tmp = e_tor + e_con; + + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, k, system->n, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + // NEED TO MAKE AN OMP VERSION OF THIS CALL! + if (system->pair_ptr->vflag_atom) + system->pair_ptr->v_tally4(i, j, k, l, fi_tmp, fj_tmp, fk_tmp, + delil, deljl, delkl ); + } + + } // pl check ends + } // pl loop ends + } // pi check ends + } // pi loop ends + } // k-j neighbor check ends + } // jmy_en.e_tor = total_Etor; + data->my_en.e_con = total_Econ; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTETORSIONANGLESBOINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.h b/src/USER-OMP/reaxc_torsion_angles_omp.h new file mode 100644 index 0000000000..51b05f7ae1 --- /dev/null +++ b/src/USER-OMP/reaxc_torsion_angles_omp.h @@ -0,0 +1,36 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __TORSION_ANGLES_OMP_H_ +#define __TORSION_ANGLES_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_torsion_angles.h" + +void Torsion_AnglesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_valence_angles_omp.cpp b/src/USER-OMP/reaxc_valence_angles_omp.cpp new file mode 100644 index 0000000000..7c45db1493 --- /dev/null +++ b/src/USER-OMP/reaxc_valence_angles_omp.cpp @@ -0,0 +1,613 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include +#include "thr_data.h" + +#include "reaxc_types.h" +#include "reaxc_valence_angles.h" +#include "reaxc_valence_angles_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Calculate_dCos_ThetaOMP( rvec dvec_ji, double d_ji, rvec dvec_jk, double d_jk, + rvec* dcos_theta_di, + rvec* dcos_theta_dj, + rvec* dcos_theta_dk ) +{ + int t; + double sqr_d_ji = SQR(d_ji); + double sqr_d_jk = SQR(d_jk); + double inv_dists = 1.0 / (d_ji * d_jk); + double inv_dists3 = inv_dists * inv_dists * inv_dists; + double dot_dvecs = dvec_ji[0]*dvec_jk[0] + dvec_ji[1]*dvec_jk[1] + dvec_ji[2]*dvec_jk[2]; + double Cdot_inv3 = dot_dvecs * inv_dists3; + + double csqr_jk = Cdot_inv3 * sqr_d_jk; + double csqr_ji = Cdot_inv3 * sqr_d_ji; + + // Try to help compiler out by unrolling + // x-component + double dinv_jk = dvec_jk[0] * inv_dists; + double dinv_ji = dvec_ji[0] * inv_dists; + + double cdev_ji = csqr_jk * dvec_ji[0]; + double cdev_jk = csqr_ji * dvec_jk[0]; + + (*dcos_theta_di)[0] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[0] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[0] = dinv_ji - cdev_jk; + + // y-component + dinv_jk = dvec_jk[1] * inv_dists; + dinv_ji = dvec_ji[1] * inv_dists; + + cdev_ji = csqr_jk * dvec_ji[1]; + cdev_jk = csqr_ji * dvec_jk[1]; + + (*dcos_theta_di)[1] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[1] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[1] = dinv_ji - cdev_jk; + + // z-component + dinv_jk = dvec_jk[2] * inv_dists; + dinv_ji = dvec_ji[2] * inv_dists; + + cdev_ji = csqr_jk * dvec_ji[2]; + cdev_jk = csqr_ji * dvec_jk[2]; + + (*dcos_theta_di)[2] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[2] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[2] = dinv_ji - cdev_jk; +} + +/* ---------------------------------------------------------------------- */ + +/* this is a 3-body interaction in which the main role is + played by j which sits in the middle of the other two. */ +void Valence_AnglesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + reax_list *bonds = (*lists) + BONDS; + reax_list *thb_intrs = (*lists) + THREE_BODIES; + + // Precompute and store valence_angle offsets for OpenMP code. + int * _my_offset = workspace->valence_angle_atom_myoffset; + + /* global parameters used in these calculations */ + double p_val6 = system->reax_param.gp.l[14]; + double p_val8 = system->reax_param.gp.l[33]; + double p_val9 = system->reax_param.gp.l[16]; + double p_val10 = system->reax_param.gp.l[17]; + double total_Eang = 0; + double total_Epen = 0; + double total_Ecoa = 0; + + int per_atom = (thb_intrs->num_intrs / system->N); + int nthreads = control->nthreads; + int chunksize = system->N/(nthreads*10); + int num_thb_intrs = 0; + int TWICE = 2; + +#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) + { + int i, j, pi, k, pk, t; + int type_i, type_j, type_k; + int start_j, end_j, start_pk, end_pk; + int cnt, my_offset, mark; + + double temp, temp_bo_jt, pBOjt7; + double p_val1, p_val2, p_val3, p_val4, p_val5, p_val7; + double p_pen1, p_pen2, p_pen3, p_pen4; + double p_coa1, p_coa2, p_coa3, p_coa4; + double trm8, expval6, expval7, expval2theta, expval12theta, exp3ij, exp3jk; + double exp_pen2ij, exp_pen2jk, exp_pen3, exp_pen4, trm_pen34, exp_coa2; + double dSBO1, dSBO2, SBO, SBO2, CSBO2, SBOp, prod_SBO, vlpadj; + double CEval1, CEval2, CEval3, CEval4, CEval5, CEval6, CEval7, CEval8; + double CEpen1, CEpen2, CEpen3; + double e_ang, e_coa, e_pen; + double CEcoa1, CEcoa2, CEcoa3, CEcoa4, CEcoa5; + double Cf7ij, Cf7jk, Cf8j, Cf9j; + double f7_ij, f7_jk, f8_Dj, f9_Dj; + double Ctheta_0, theta_0, theta_00, theta, cos_theta, sin_theta; + double r_ij, r_jk; + double BOA_ij, BOA_jk; + rvec force, ext_press; + // rtensor temp_rtensor, total_rtensor; + + // Tallying variables + double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3]; + double delij[3], delkj[3]; + + three_body_header *thbh; + three_body_parameters *thbp; + three_body_interaction_data *p_ijk, *p_kji; + bond_data *pbond_ij, *pbond_jk, *pbond_jt; + bond_order_data *bo_ij, *bo_jk, *bo_jt; + + int tid = omp_get_thread_num(); + long reductionOffset = (system->N * tid); + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + system->N, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + + + // Run through a minimal for(jnum_intrs / nthreads; + +#pragma omp for schedule(dynamic,50) + for (j = 0; j < system->N; ++j) { + type_j = system->my_atoms[j].type; + _my_offset[j] = 0; + if(type_j < 0) continue; + + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + // Always point to start of workspace to count angles + my_offset = tid * per_thread; + + for (pi = start_j; pi < end_j; ++pi) { + Set_Start_Index( pi, my_offset, thb_intrs ); + pbond_ij = &(bonds->select.bond_list[pi]); + bo_ij = &(pbond_ij->bo_data); + BOA_ij = bo_ij->BO - control->thb_cut; + + if (BOA_ij > 0.0) { + i = pbond_ij->nbr; + + /* first copy 3-body intrs from previously computed ones where i>k. + in the second for-loop below, + we compute only new 3-body intrs where i < k */ + for (pk = start_j; pk < pi; ++pk) { + start_pk = Start_Index( pk, thb_intrs ); + end_pk = End_Index( pk, thb_intrs ); + + for (t = start_pk; t < end_pk; ++t) + if (thb_intrs->select.three_body_list[t].thb == i) { + + p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); + p_ijk->thb = bonds->select.bond_list[pk].nbr; + + ++my_offset; + break; + } + } // for(pk) + + /* and this is the second for loop mentioned above */ + for (pk = pi+1; pk < end_j; ++pk) { + pbond_jk = &(bonds->select.bond_list[pk]); + k = pbond_jk->nbr; + + if (j >= system->n && i >= system->n && k >= system->n) continue; + + p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); + p_ijk->thb = k; + + ++my_offset; // add this to the list of 3-body interactions + } // for(pk) + } // if() + + Set_End_Index(pi, my_offset, thb_intrs ); + } // for(pi) + + // Confirm that thb_intrs->num_intrs / nthreads is enough to hold all angles from a single atom + if(my_offset >= (tid+1)*per_thread) { + int me; + MPI_Comm_rank(MPI_COMM_WORLD,&me); + fprintf( stderr, "step%d-ran out of space on angle_list on proc %i for atom %i:", data->step, me, j); + fprintf( stderr, " nthreads= %d, tid=%d, my_offset=%d, per_thread=%d\n", nthreads, tid, my_offset, per_thread); + fprintf( stderr, " num_intrs= %i N= %i\n",thb_intrs->num_intrs , system->N); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + + // Number of angles owned by this atom + _my_offset[j] = my_offset - tid * per_thread; + } // for(j) + + // Wait for all threads to finish counting angles +#pragma omp barrier + + // Master thread uses angle counts to compute offsets + // This can be threaded +#pragma omp master + { + int current_count = 0; + int m = _my_offset[0]; + _my_offset[0] = current_count; + for(j=1; jN; j++) { + current_count+= m; + m = _my_offset[j]; + _my_offset[j] = current_count; + } + _my_offset[system->N] = current_count + m; // Used to test if last particle has any angles + } + + // All threads wait till master thread finished computing offsets +#pragma omp barrier + + // Original loop, but now using precomputed offsets + // Safe to use all threads available, regardless of threads tasked above + // We also now skip over atoms that have no angles assigned +#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided) + for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + // Skip if no angles for this atom + if(_my_offset[j] == _my_offset[j+1]) continue; + + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + type_j = system->my_atoms[j].type; + + my_offset = _my_offset[j]; + + p_val3 = system->reax_param.sbp[ type_j ].p_val3; + p_val5 = system->reax_param.sbp[ type_j ].p_val5; + + SBOp = 0, prod_SBO = 1; + for (t = start_j; t < end_j; ++t) { + bo_jt = &(bonds->select.bond_list[t].bo_data); + SBOp += (bo_jt->BO_pi + bo_jt->BO_pi2); + temp = SQR( bo_jt->BO ); + temp *= temp; + temp *= temp; + prod_SBO *= exp( -temp ); + } + + // modifications to match Adri's code - 09/01/09 + if( workspace->vlpex[j] >= 0 ){ + vlpadj = 0; + dSBO2 = prod_SBO - 1; + } + else{ + vlpadj = workspace->nlp[j]; + dSBO2 = (prod_SBO - 1) * (1 - p_val8 * workspace->dDelta_lp[j]); + } + + SBO = SBOp + (1 - prod_SBO) * (-workspace->Delta_boc[j] - p_val8 * vlpadj); + dSBO1 = -8 * prod_SBO * ( workspace->Delta_boc[j] + p_val8 * vlpadj ); + + if( SBO <= 0 ) + SBO2 = 0, CSBO2 = 0; + else if( SBO > 0 && SBO <= 1 ) { + SBO2 = pow( SBO, p_val9 ); + CSBO2 = p_val9 * pow( SBO, p_val9 - 1 ); + } + else if( SBO > 1 && SBO < 2 ) { + SBO2 = 2 - pow( 2-SBO, p_val9 ); + CSBO2 = p_val9 * pow( 2 - SBO, p_val9 - 1 ); + } + else + SBO2 = 2, CSBO2 = 0; + + expval6 = exp( p_val6 * workspace->Delta_boc[j] ); + + for (pi = start_j; pi < end_j; ++pi) { + Set_Start_Index( pi, my_offset, thb_intrs ); + pbond_ij = &(bonds->select.bond_list[pi]); + bo_ij = &(pbond_ij->bo_data); + BOA_ij = bo_ij->BO - control->thb_cut; + + + if (BOA_ij > 0.0) { + i = pbond_ij->nbr; + r_ij = pbond_ij->d; + type_i = system->my_atoms[i].type; + + + /* first copy 3-body intrs from previously computed ones where i>k. + in the second for-loop below, + we compute only new 3-body intrs where i < k */ + for (pk = start_j; pk < pi; ++pk) { + start_pk = Start_Index( pk, thb_intrs ); + end_pk = End_Index( pk, thb_intrs ); + + for (t = start_pk; t < end_pk; ++t) + if (thb_intrs->select.three_body_list[t].thb == i) { + p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); + p_kji = &(thb_intrs->select.three_body_list[t]); + + p_ijk->thb = bonds->select.bond_list[pk].nbr; + p_ijk->pthb = pk; + p_ijk->theta = p_kji->theta; + rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); + rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); + rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); + + ++my_offset; + ++num_thb_intrs; + break; + } + } // for(pk) + + + /* and this is the second for loop mentioned above */ + for (pk = pi+1; pk < end_j; ++pk) { + pbond_jk = &(bonds->select.bond_list[pk]); + bo_jk = &(pbond_jk->bo_data); + BOA_jk = bo_jk->BO - control->thb_cut; + k = pbond_jk->nbr; + type_k = system->my_atoms[k].type; + p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); + + // Fix by Sudhir + // if (BOA_jk <= 0) continue; + if (j >= system->n && i >= system->n && k >= system->n) continue; + + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &theta, &cos_theta ); + + Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &(p_ijk->dcos_di), &(p_ijk->dcos_dj), + &(p_ijk->dcos_dk) ); + p_ijk->thb = k; + p_ijk->pthb = pk; + p_ijk->theta = theta; + + sin_theta = sin( theta ); + if( sin_theta < 1.0e-5 ) + sin_theta = 1.0e-5; + + ++my_offset; // add this to the list of 3-body interactions + ++num_thb_intrs; + + if ((j < system->n) && (BOA_jk > 0.0) && + (bo_ij->BO > control->thb_cut) && + (bo_jk->BO > control->thb_cut) && + (bo_ij->BO * bo_jk->BO > control->thb_cutsq)) { + r_jk = pbond_jk->d; + thbh = &( system->reax_param.thbp[ type_i ][ type_j ][ type_k ] ); + + for (cnt = 0; cnt < thbh->cnt; ++cnt) { + + if( fabs(thbh->prm[cnt].p_val1) > 0.001 ) { + thbp = &( thbh->prm[cnt] ); + + /* ANGLE ENERGY */ + p_val1 = thbp->p_val1; + p_val2 = thbp->p_val2; + p_val4 = thbp->p_val4; + p_val7 = thbp->p_val7; + theta_00 = thbp->theta_00; + + exp3ij = exp( -p_val3 * pow( BOA_ij, p_val4 ) ); + f7_ij = 1.0 - exp3ij; + Cf7ij = p_val3 * p_val4 * pow( BOA_ij, p_val4 - 1.0 ) * exp3ij; + + exp3jk = exp( -p_val3 * pow( BOA_jk, p_val4 ) ); + f7_jk = 1.0 - exp3jk; + Cf7jk = p_val3 * p_val4 * pow( BOA_jk, p_val4 - 1.0 ) * exp3jk; + + expval7 = exp( -p_val7 * workspace->Delta_boc[j] ); + trm8 = 1.0 + expval6 + expval7; + f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 ); + Cf8j = ( (1.0 - p_val5) / SQR(trm8) ) * + ( p_val6 * expval6 * trm8 - + (2.0 + expval6) * ( p_val6*expval6 - p_val7*expval7 ) ); + + theta_0 = 180.0 - theta_00 * (1.0 - + exp(-p_val10 * (2.0 - SBO2))); + theta_0 = DEG2RAD( theta_0 ); + + expval2theta = exp( -p_val2 * SQR(theta_0 - theta) ); + if (p_val1 >= 0) + expval12theta = p_val1 * (1.0 - expval2theta); + else // To avoid linear Me-H-Me angles (6/6/06) + expval12theta = p_val1 * -expval2theta; + + CEval1 = Cf7ij * f7_jk * f8_Dj * expval12theta; + CEval2 = Cf7jk * f7_ij * f8_Dj * expval12theta; + CEval3 = Cf8j * f7_ij * f7_jk * expval12theta; + CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj * + expval2theta * (theta_0 - theta); + + Ctheta_0 = p_val10 * DEG2RAD(theta_00) * + exp( -p_val10 * (2.0 - SBO2) ); + + CEval5 = -CEval4 * Ctheta_0 * CSBO2; + CEval6 = CEval5 * dSBO1; + CEval7 = CEval5 * dSBO2; + CEval8 = -CEval4 / sin_theta; + + total_Eang += e_ang = + f7_ij * f7_jk * f8_Dj * expval12theta; + /* END ANGLE ENERGY*/ + + + /* PENALTY ENERGY */ + p_pen1 = thbp->p_pen1; + p_pen2 = system->reax_param.gp.l[19]; + p_pen3 = system->reax_param.gp.l[20]; + p_pen4 = system->reax_param.gp.l[21]; + + exp_pen2ij = exp( -p_pen2 * SQR( BOA_ij - 2.0 ) ); + exp_pen2jk = exp( -p_pen2 * SQR( BOA_jk - 2.0 ) ); + exp_pen3 = exp( -p_pen3 * workspace->Delta[j] ); + exp_pen4 = exp( p_pen4 * workspace->Delta[j] ); + trm_pen34 = 1.0 + exp_pen3 + exp_pen4; + f9_Dj = ( 2.0 + exp_pen3 ) / trm_pen34; + Cf9j = ( -p_pen3 * exp_pen3 * trm_pen34 - + (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 + + p_pen4 * exp_pen4 ) ) / + SQR( trm_pen34 ); + + total_Epen += e_pen = + p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; + + CEpen1 = e_pen * Cf9j / f9_Dj; + temp = -2.0 * p_pen2 * e_pen; + CEpen2 = temp * (BOA_ij - 2.0); + CEpen3 = temp * (BOA_jk - 2.0); + /* END PENALTY ENERGY */ + + + /* COALITION ENERGY */ + p_coa1 = thbp->p_coa1; + p_coa2 = system->reax_param.gp.l[2]; + p_coa3 = system->reax_param.gp.l[38]; + p_coa4 = system->reax_param.gp.l[30]; + + exp_coa2 = exp( p_coa2 * workspace->Delta_val[j] ); + total_Ecoa += e_coa = + p_coa1 / (1. + exp_coa2) * + exp( -p_coa3 * SQR(workspace->total_bond_order[i]-BOA_ij) ) * + exp( -p_coa3 * SQR(workspace->total_bond_order[k]-BOA_jk) ) * + exp( -p_coa4 * SQR(BOA_ij - 1.5) ) * + exp( -p_coa4 * SQR(BOA_jk - 1.5) ); + + CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa; + CEcoa2 = -2 * p_coa4 * (BOA_jk - 1.5) * e_coa; + CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1 + exp_coa2); + CEcoa4 = -2 * p_coa3 * + (workspace->total_bond_order[i]-BOA_ij) * e_coa; + CEcoa5 = -2 * p_coa3 * + (workspace->total_bond_order[k]-BOA_jk) * e_coa; + /* END COALITION ENERGY */ + + + /* FORCES */ + bo_ij->Cdbo += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4)); + bo_jk->Cdbo += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5)); + workspace->CdDelta[j] += ((CEval3 + CEval7) + CEpen1 + CEcoa3); + workspace->CdDeltaReduction[reductionOffset+i] += CEcoa4; + workspace->CdDeltaReduction[reductionOffset+k] += CEcoa5; + + for (t = start_j; t < end_j; ++t) { + pbond_jt = &( bonds->select.bond_list[t] ); + bo_jt = &(pbond_jt->bo_data); + temp_bo_jt = bo_jt->BO; + temp = CUBE( temp_bo_jt ); + pBOjt7 = temp * temp * temp_bo_jt; + + bo_jt->Cdbo += (CEval6 * pBOjt7); + bo_jt->Cdbopi += CEval5; + bo_jt->Cdbopi2 += CEval5; + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEval8, p_ijk->dcos_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEval8, p_ijk->dcos_dk ); + } + else { + /* terms not related to bond order derivatives are + added directly into forces and pressure vector/tensor */ + rvec_Scale( force, CEval8, p_ijk->dcos_di ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); + + rvec_Scale( force, CEval8, p_ijk->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* tally into per-atom virials */ + if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + + /* Acquire vectors */ + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + rvec_ScaledSum( delkj, 1., system->my_atoms[k].x, + -1., system->my_atoms[j].x ); + + rvec_Scale( fi_tmp, -CEval8, p_ijk->dcos_di ); + rvec_Scale( fj_tmp, -CEval8, p_ijk->dcos_dj ); + rvec_Scale( fk_tmp, -CEval8, p_ijk->dcos_dk ); + + eng_tmp = e_ang + e_pen + e_coa; + + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, j, system->N, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + if( system->pair_ptr->vflag_atom) + // NEED TO MAKE AN OMP VERSION OF THIS CALL! + system->pair_ptr->v_tally3( i, j, k, fi_tmp, fk_tmp, delij, delkj); + } + + } // if(p_val1>0.001) + } // for(cnt) + } // if(j0) + } // for(pk) + } // if(BOA_ij>0) + + Set_End_Index(pi, my_offset, thb_intrs ); + } // for(pi) + } // for(j) + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // end omp parallel + + data->my_en.e_ang = total_Eang; + data->my_en.e_pen = total_Epen; + data->my_en.e_coa = total_Ecoa; + + if( num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE ) { + workspace->realloc.num_3body = num_thb_intrs * TWICE; + if( num_thb_intrs > thb_intrs->num_intrs ) { + fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d", + data->step, num_thb_intrs, thb_intrs->num_intrs ); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEVALENCEANGLESBOINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_valence_angles_omp.h b/src/USER-OMP/reaxc_valence_angles_omp.h new file mode 100644 index 0000000000..ce304acced --- /dev/null +++ b/src/USER-OMP/reaxc_valence_angles_omp.h @@ -0,0 +1,37 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + . + ----------------------------------------------------------------------*/ + +#ifndef __VALENCE_ANGLES_OMP_H_ +#define __VALENCE_ANGLES_OMP_H_ + +#include "reaxc_types.h" + +void Valence_AnglesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +void Calculate_dCos_ThetaOMP( rvec, double, rvec, double, rvec*, rvec*, rvec* ); + +#endif diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 22b5382727..5f171c8768 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -64,7 +64,7 @@ static const char cite_fix_qeq_reax[] = /* ---------------------------------------------------------------------- */ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) + Fix(lmp, narg, arg), pertype_option(NULL) { if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax); @@ -76,7 +76,9 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : swa = force->numeric(FLERR,arg[4]); swb = force->numeric(FLERR,arg[5]); tolerance = force->numeric(FLERR,arg[6]); - pertype_parameters(arg[7]); + int len = strlen(arg[7]) + 1; + pertype_option = new char[len]; + strcpy(pertype_option,arg[7]); // dual CG support only available for USER-OMP variant dual_enabled = 0; @@ -135,6 +137,8 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : FixQEqReax::~FixQEqReax() { + delete[] pertype_option; + // unregister callbacks to this fix from Atom class if (copymode) return; @@ -158,6 +162,13 @@ FixQEqReax::~FixQEqReax() /* ---------------------------------------------------------------------- */ +void FixQEqReax::post_constructor() +{ + pertype_parameters(pertype_option); +} + +/* ---------------------------------------------------------------------- */ + int FixQEqReax::setmask() { int mask = 0; diff --git a/src/USER-REAXC/fix_qeq_reax.h b/src/USER-REAXC/fix_qeq_reax.h index d82232576a..19efcd2b03 100644 --- a/src/USER-REAXC/fix_qeq_reax.h +++ b/src/USER-REAXC/fix_qeq_reax.h @@ -39,6 +39,7 @@ class FixQEqReax : public Fix { FixQEqReax(class LAMMPS *, int, char **); ~FixQEqReax(); int setmask(); + virtual void post_constructor(); virtual void init(); void init_list(int,class NeighList *); virtual void init_storage(); @@ -100,6 +101,7 @@ class FixQEqReax : public Fix { //double **h; //double *hc, *hs; + char *pertype_option; // argument to determine how per-type info is obtained virtual void pertype_parameters(char*); void init_shielding(); void init_taper(); diff --git a/src/USER-REAXC/pair_reaxc.h b/src/USER-REAXC/pair_reaxc.h index 5658648db6..91b44be661 100644 --- a/src/USER-REAXC/pair_reaxc.h +++ b/src/USER-REAXC/pair_reaxc.h @@ -42,7 +42,7 @@ class PairReaxC : public Pair { void compute(int, int); void settings(int, char **); void coeff(int, char **); - void init_style(); + virtual void init_style(); double init_one(int, int); void *extract(const char *, int &); int fixbond_flag, fixspecies_flag;