diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp index 9a646e04b0..f582ce4c41 100644 --- a/src/USER-OMP/angle_dipole_omp.cpp +++ b/src/USER-OMP/angle_dipole_omp.cpp @@ -122,14 +122,14 @@ void AngleDipoleOMP::eval(int nfrom, int nto, ThrData * const thr) delTx = tangle * (dely*mu[iDip][2] - delz*mu[iDip][1]); delTy = tangle * (delz*mu[iDip][0] - delx*mu[iDip][2]); delTz = tangle * (delx*mu[iDip][1] - dely*mu[iDip][0]); - + torque[iDip][0] += delTx; torque[iDip][1] += delTy; torque[iDip][2] += delTz; // Force couple that counterbalances dipolar torque fx = dely*delTz - delz*delTy; // direction (fi): - r x (-T) - fy = delz*delTx - delx*delTz; + fy = delz*delTx - delx*delTz; fz = delx*delTy - dely*delTx; fmod = sqrt(delTx*delTx + delTy*delTy + delTz*delTz) / r; // magnitude @@ -142,11 +142,11 @@ void AngleDipoleOMP::eval(int nfrom, int nto, ThrData * const thr) fj[0] = -fi[0]; fj[1] = -fi[1]; fj[2] = -fi[2]; - + f[iDip][0] += fj[0]; f[iDip][1] += fj[1]; f[iDip][2] += fj[2]; - + f[iRef][0] += fi[0]; f[iRef][1] += fi[1]; f[iRef][2] += fi[2]; diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp index c8e5e3cb93..a43d6cfdd1 100644 --- a/src/USER-OMP/fix_qeq_reax_omp.cpp +++ b/src/USER-OMP/fix_qeq_reax_omp.cpp @@ -168,10 +168,10 @@ void FixQEqReaxOMP::allocate_storage() /* ---------------------------------------------------------------------- */ void FixQEqReaxOMP::deallocate_storage() -{ +{ memory->destroy(b_temp); - - FixQEqReax::deallocate_storage(); + + FixQEqReax::deallocate_storage(); } /* ---------------------------------------------------------------------- */ @@ -275,7 +275,7 @@ void FixQEqReaxOMP::init() n -= 1.0; d += 1.0; } - + // fprintf(stdout,"aspc_omega= %f\n",aspc_omega); // for(int i=0; iall(FLERR,"Early Termination"); @@ -306,21 +306,21 @@ void FixQEqReaxOMP::compute_H() firstneigh = list->firstneigh; } int ai, num_nbrs; - + // sumscan of the number of neighbors per atom to determine the offsets // most likely, we are overallocating. desirable to work on this part // to reduce the memory footprint of the far_nbrs list. - + num_nbrs = 0; - + for (int itr_i = 0; itr_i < inum; ++itr_i) { ai = ilist[itr_i]; H.firstnbr[ai] = num_nbrs; num_nbrs += numneigh[ai]; } - + // fill in the H matrix - + #pragma omp parallel default(shared) { int i, j, ii, jj, mfill, jnum, flag; @@ -328,7 +328,7 @@ void FixQEqReaxOMP::compute_H() double dx, dy, dz, r_sqr; mfill = 0; - + //#pragma omp for schedule(dynamic,50) #pragma omp for schedule(guided) for (ii = 0; ii < inum; ii++) { @@ -340,12 +340,12 @@ void FixQEqReaxOMP::compute_H() for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - + dx = x[j][0] - x[i][0]; dy = x[j][1] - x[i][1]; dz = x[j][2] - x[i][2]; r_sqr = SQR(dx) + SQR(dy) + SQR(dz); - + flag = 0; if (r_sqr <= SQR(swb)) { if (j < n) flag = 1; @@ -358,7 +358,7 @@ void FixQEqReaxOMP::compute_H() } } } - + if( flag ) { H.jlist[mfill] = j; H.val[mfill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] ); @@ -378,7 +378,7 @@ void FixQEqReaxOMP::compute_H() error->all(FLERR,"Fix qeq/reax/omp has insufficient QEq matrix size"); } } // omp - + } /* ---------------------------------------------------------------------- */ @@ -386,10 +386,10 @@ void FixQEqReaxOMP::compute_H() void FixQEqReaxOMP::init_storage() { int NN; - + if(reaxc) NN = reaxc->list->inum + reaxc->list->gnum; else NN = list->inum + list->gnum; - + #pragma omp parallel for schedule(static) for (int i = 0; i < NN; i++) { Hdia_inv[i] = 1. / eta[atom->type[i]]; @@ -450,9 +450,9 @@ void FixQEqReaxOMP::pre_force(int vflag) ompTimingCGCount[COMPUTECG1INDEX]+= matvecs_s; startTimeBase = endTimeBase; #endif - + matvecs_t = CG(b_t, t); // CG on t - parallel - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase); @@ -495,7 +495,7 @@ void FixQEqReaxOMP::init_matvec() long endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + /* fill-in H matrix */ compute_H(); @@ -611,12 +611,12 @@ int FixQEqReaxOMP::CG( double *b, double *x ) if (atom->mask[i] & groupbit) { r[i] = b[i] - q[i]; d[i] = r[i] * Hdia_inv[i]; //pre-condition - + tmp1 += b[i] * b[i]; tmp2 += r[i] * d[i]; } } - + my_buf[0] = tmp1; my_buf[1] = tmp2; @@ -633,7 +633,7 @@ int FixQEqReaxOMP::CG( double *b, double *x ) tmp1 = 0.0; #pragma omp parallel { - + #pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1) for( jj = 0; jj < nn; jj++) { ii = ilist[jj]; @@ -656,7 +656,7 @@ int FixQEqReaxOMP::CG( double *b, double *x ) if(atom->mask[ii] & groupbit) { x[ii] += alpha * d[ii]; r[ii] -= alpha * q[ii]; - + // pre-conditioning p[ii] = r[ii] * Hdia_inv[ii]; tmp1 += r[ii] * p[ii]; @@ -665,12 +665,12 @@ int FixQEqReaxOMP::CG( double *b, double *x ) } // omp parallel sig_old = sig_new; - + MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world); - + sig_new = tmp2; beta = sig_new / sig_old; - + #pragma omp for schedule(dynamic,50) private(ii) for( jj = 0; jj < nn; jj++) { ii = ilist[jj]; @@ -699,7 +699,7 @@ void FixQEqReaxOMP::sparse_matvec( sparse_matrix *A, double *x, double *b ) int *ilist; int nthreads = comm->nthreads; int tid = omp_get_thread_num(); - + if(reaxc) { nn = reaxc->list->inum; NN = reaxc->list->inum + reaxc->list->gnum; @@ -709,21 +709,21 @@ void FixQEqReaxOMP::sparse_matvec( sparse_matrix *A, double *x, double *b ) NN = list->inum + list->gnum; ilist = list->ilist; } - + #pragma omp for schedule(dynamic,50) for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if(atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i]; } - + #pragma omp for schedule(dynamic,50) for (ii = nn; ii < NN; ++ii) { i = ilist[ii]; if(atom->mask[i] & groupbit) b[i] = 0; } - + #pragma omp for schedule(dynamic,50) - for (i = 0; i < NN; ++i) + for (i = 0; i < NN; ++i) for(int t=0; tmask[i] & groupbit) { q[i] = s[i] - u * t[i]; - + // backup s & t for (int k = 4; k > 0; --k) { s_hist[i][k] = s_hist[i][k-1]; @@ -817,7 +817,7 @@ void FixQEqReaxOMP::calculate_Q() // double FixQEqReaxOMP::parallel_norm( double *v, int n ) // { // int i; -// double my_sum, norm_sqr; +// double my_sum, norm_sqr; // int *ilist; @@ -911,7 +911,7 @@ void FixQEqReaxOMP::vector_sum( double* dest, double c, double* v, /* ---------------------------------------------------------------------- */ void FixQEqReaxOMP::vector_add( double* dest, double c, double* v, int k ) -{ +{ int i; int *ilist; @@ -971,18 +971,18 @@ int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) int indxI = 2 * i; r[indxI ] = b1[i] - q[indxI ]; r[indxI+1] = b2[i] - q[indxI+1]; - + d[indxI ] = r[indxI ] * Hdia_inv[i]; //pre-condition d[indxI+1] = r[indxI+1] * Hdia_inv[i]; - + tmp1 += b1[i] * b1[i]; tmp2 += b2[i] * b2[i]; - + tmp3 += r[indxI ] * d[indxI ]; tmp4 += r[indxI+1] * d[indxI+1]; } } - + my_buf[0] = tmp1; my_buf[1] = tmp2; my_buf[2] = tmp3; @@ -1004,7 +1004,7 @@ int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) tmp1 = tmp2 = 0.0; #pragma omp parallel { - + #pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2) for( jj = 0; jj < nn; jj++) { ii = ilist[jj]; @@ -1037,14 +1037,14 @@ int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) int indxI = 2 * ii; x1[ii] += alpha_s * d[indxI ]; x2[ii] += alpha_t * d[indxI+1]; - + r[indxI ] -= alpha_s * q[indxI ]; r[indxI+1] -= alpha_t * q[indxI+1]; - + // pre-conditioning p[indxI ] = r[indxI ] * Hdia_inv[ii]; p[indxI+1] = r[indxI+1] * Hdia_inv[ii]; - + tmp1 += r[indxI ] * p[indxI ]; tmp2 += r[indxI+1] * p[indxI+1]; } @@ -1053,20 +1053,20 @@ int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) my_buf[0] = tmp1; my_buf[1] = tmp2; - + sig_old_s = sig_new_s; sig_old_t = sig_new_t; - + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); - + sig_new_s = buf[0]; sig_new_t = buf[1]; if( sqrt(sig_new_s)/b_norm_s <= tolerance || sqrt(sig_new_t)/b_norm_t <= tolerance) break; - + beta_s = sig_new_s / sig_old_s; beta_t = sig_new_t / sig_old_t; - + #pragma omp for schedule(dynamic,50) private(ii) for( jj = 0; jj < nn; jj++) { ii = ilist[jj]; @@ -1082,7 +1082,7 @@ int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2 ) i++; matvecs_s = matvecs_t = i; // The plus one makes consistent with count from CG() matvecs = i; - + // Timing info for iterating s&t together #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); @@ -1140,7 +1140,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2 int nthreads = comm->nthreads; int tid = omp_get_thread_num(); - + if (reaxc) { nn = reaxc->list->inum; NN = reaxc->list->inum + reaxc->list->gnum; @@ -1150,7 +1150,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2 NN = list->inum + list->gnum; ilist = list->ilist; } - + #pragma omp for schedule(dynamic,50) for( ii = 0; ii < nn; ++ii ) { i = ilist[ii]; @@ -1160,7 +1160,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2 b[indxI+1] = eta[ atom->type[i] ] * x2[i]; } } - + #pragma omp for schedule(dynamic,50) for( ii = nn; ii < NN; ++ii ) { i = ilist[ii]; @@ -1179,7 +1179,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2 b_temp[t][indxI+1] = 0.0; } } - + // Wait for b accumulated and b_temp zeroed #pragma omp barrier #pragma omp for schedule(dynamic,50) @@ -1226,7 +1226,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) int nthreads = comm->nthreads; int tid = omp_get_thread_num(); - + if (reaxc) { nn = reaxc->list->inum; NN = reaxc->list->inum + reaxc->list->gnum; @@ -1236,7 +1236,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) NN = list->inum + list->gnum; ilist = list->ilist; } - + #pragma omp for schedule(dynamic,50) for( ii = 0; ii < nn; ++ii ) { i = ilist[ii]; @@ -1246,7 +1246,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) b[indxI+1] = eta[ atom->type[i] ] * x[indxI+1]; } } - + #pragma omp for schedule(dynamic,50) for( ii = nn; ii < NN; ++ii ) { i = ilist[ii]; @@ -1265,7 +1265,7 @@ void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) b_temp[t][indxI+1] = 0.0; } } - + // Wait for b accumulated and b_temp zeroed #pragma omp barrier #pragma omp for schedule(dynamic,50) diff --git a/src/USER-OMP/fix_qeq_reax_omp.h b/src/USER-OMP/fix_qeq_reax_omp.h index c06185ca2e..6d8719857d 100644 --- a/src/USER-OMP/fix_qeq_reax_omp.h +++ b/src/USER-OMP/fix_qeq_reax_omp.h @@ -43,7 +43,7 @@ class FixQEqReaxOMP : public FixQEqReax { virtual void init_storage(); virtual void pre_force(int); virtual void post_constructor(); - + protected: double **b_temp; @@ -53,7 +53,7 @@ class FixQEqReaxOMP : public FixQEqReax { int aspc_order, aspc_order_max; double aspc_omega; double * aspc_b; - + virtual void pertype_parameters(char*); virtual void allocate_storage(); virtual void deallocate_storage(); @@ -64,7 +64,7 @@ class FixQEqReaxOMP : public FixQEqReax { virtual int CG(double*,double*); virtual void sparse_matvec(sparse_matrix*,double*,double*); virtual void calculate_Q(); - + /* virtual double parallel_norm( double*, int ); */ /* virtual double parallel_dot( double*, double*, int ); */ /* virtual double parallel_vector_acc( double*, int ); */ @@ -72,7 +72,7 @@ class FixQEqReaxOMP : public FixQEqReax { virtual void vector_sum(double*,double,double*,double,double*,int); virtual void vector_add(double*, double, double*,int); - // dual CG support + // dual CG support virtual int dual_CG(double*,double*,double*,double*); virtual void dual_sparse_matvec(sparse_matrix*,double*,double*,double*); virtual void dual_sparse_matvec(sparse_matrix*,double*,double*); diff --git a/src/USER-OMP/fix_reaxc_species_omp.cpp b/src/USER-OMP/fix_reaxc_species_omp.cpp index e9ac388252..786ba9824b 100644 --- a/src/USER-OMP/fix_reaxc_species_omp.cpp +++ b/src/USER-OMP/fix_reaxc_species_omp.cpp @@ -5,7 +5,7 @@ Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under + certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. diff --git a/src/USER-OMP/fix_reaxc_species_omp.h b/src/USER-OMP/fix_reaxc_species_omp.h index 006bb3e87a..f2ef3fb266 100644 --- a/src/USER-OMP/fix_reaxc_species_omp.h +++ b/src/USER-OMP/fix_reaxc_species_omp.h @@ -5,7 +5,7 @@ Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under + certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. @@ -26,17 +26,17 @@ FixStyle(reax/c/species/omp,FixReaxCSpeciesOMP) #define BUFLEN 1000 namespace LAMMPS_NS { - + class FixReaxCSpeciesOMP : public FixReaxCSpecies { - + public: FixReaxCSpeciesOMP(class LAMMPS *, int, char **); ~FixReaxCSpeciesOMP(){}; virtual void init(); - + private: class PairReaxCOMP *reaxc; - + }; } diff --git a/src/USER-OMP/pair_reaxc_omp.cpp b/src/USER-OMP/pair_reaxc_omp.cpp index c7a6c27f48..f216b47665 100644 --- a/src/USER-OMP/pair_reaxc_omp.cpp +++ b/src/USER-OMP/pair_reaxc_omp.cpp @@ -89,7 +89,7 @@ PairReaxCOMP::~PairReaxCOMP() int myrank; MPI_Comm_rank(mpi_data->world,&myrank); - + // Write screen output if (myrank == 0 && screen) { fprintf(screen,"\n\nWrite_Lists took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]); @@ -98,11 +98,11 @@ PairReaxCOMP::~PairReaxCOMP() fprintf(screen,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); fprintf(screen,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); fprintf(screen,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); - fprintf(screen,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(screen,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); fprintf(screen,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); fprintf(screen,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); fprintf(screen,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); - fprintf(screen,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(screen,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); fprintf(screen,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); fprintf(screen,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); @@ -124,11 +124,11 @@ PairReaxCOMP::~PairReaxCOMP() fprintf(logfile,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); fprintf(logfile,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); fprintf(logfile,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); - fprintf(logfile,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(logfile,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); fprintf(logfile,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); fprintf(logfile,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); fprintf(logfile,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); - fprintf(logfile,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(logfile,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); fprintf(logfile,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); fprintf(logfile,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); @@ -150,41 +150,41 @@ void PairReaxCOMP::compute(int eflag, int vflag) { double evdwl,ecoul; double t_start, t_end; - + // communicate num_bonds once every reneighboring // 2 num arrays stored by fix, grab ptr to them - + if (neighbor->ago == 0) comm->forward_comm_fix(fix_reax); int *num_bonds = fix_reax->num_bonds; int *num_hbonds = fix_reax->num_hbonds; - + evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); else ev_unset(); - + if (vflag_global) control->virial = 1; else control->virial = 0; - + system->n = atom->nlocal; // my atoms system->N = atom->nlocal + atom->nghost; // mine + ghosts system->bigN = static_cast (atom->natoms); // all atoms in the system - + system->big_box.V = 0; system->big_box.box_norms[0] = 0; system->big_box.box_norms[1] = 0; system->big_box.box_norms[2] = 0; if( comm->me == 0 ) t_start = MPI_Wtime(); // setup data structures - + setup(); - + Reset( system, control, data, workspace, &lists, world ); - + // Why not update workspace like in MPI-only code? - // Using the MPI-only way messes up the hb energy + // Using the MPI-only way messes up the hb energy //workspace->realloc.num_far = write_reax_lists(); write_reax_lists(); - + // timing for filling in the reax lists if( comm->me == 0 ) { t_end = MPI_Wtime(); @@ -192,7 +192,7 @@ void PairReaxCOMP::compute(int eflag, int vflag) } // forces - + #ifdef OMP_TIMING double startTimeBase,endTimeBase; startTimeBase = MPI_Wtime(); @@ -200,20 +200,20 @@ void PairReaxCOMP::compute(int eflag, int vflag) Compute_ForcesOMP(system,control,data,workspace,&lists,out_control,mpi_data); read_reax_forces(vflag); - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); ompTimingData[COMPUTEINDEX] += (endTimeBase-startTimeBase); #endif - + #pragma omp parallel for schedule(static) for(int k = 0; k < system->N; ++k) { num_bonds[k] = system->my_atoms[k].num_bonds; num_hbonds[k] = system->my_atoms[k].num_hbonds; } - + // energies and pressure - + if (eflag_global) { evdwl += data->my_en.e_bond; evdwl += data->my_en.e_ov; @@ -226,13 +226,13 @@ void PairReaxCOMP::compute(int eflag, int vflag) evdwl += data->my_en.e_tor; evdwl += data->my_en.e_con; evdwl += data->my_en.e_vdW; - + ecoul += data->my_en.e_ele; ecoul += data->my_en.e_pol; - + // Store the different parts of the energy // in a list for output by compute pair command - + pvector[0] = data->my_en.e_bond; pvector[1] = data->my_en.e_ov + data->my_en.e_un; pvector[2] = data->my_en.e_lp; @@ -250,16 +250,16 @@ void PairReaxCOMP::compute(int eflag, int vflag) } if (vflag_fdotr) virial_fdotr_compute(); - + // Set internal timestep counter to that of LAMMPS - + data->step = update->ntimestep; Output_Results( system, control, data, &lists, out_control, mpi_data ); - + // populate tmpid and tmpbo arrays for fix reax/c/species int i, j; - + if(fixspecies_flag) { if (system->N > nmax) { memory->destroy(tmpid); @@ -268,14 +268,14 @@ void PairReaxCOMP::compute(int eflag, int vflag) memory->create(tmpid,nmax,MAXSPECBOND,"pair:tmpid"); memory->create(tmpbo,nmax,MAXSPECBOND,"pair:tmpbo"); } - + #pragma omp parallel for collapse(2) schedule(static) default(shared) for (i = 0; i < system->N; i ++) for (j = 0; j < MAXSPECBOND; j ++) { tmpbo[i][j] = 0.0; tmpid[i][j] = 0; } - + FindBond(); } } @@ -284,7 +284,7 @@ void PairReaxCOMP::compute(int eflag, int vflag) void PairReaxCOMP::init_style( ) { - if (!atom->q_flag) + if (!atom->q_flag) error->all(FLERR,"Pair reax/c/omp requires atom attribute q"); // firstwarn = 1; @@ -330,7 +330,7 @@ void PairReaxCOMP::init_style( ) fix_reax = (FixReaxC *) modify->fix[modify->nfix-1]; } -#pragma omp parallel +#pragma omp parallel { control->nthreads = omp_get_num_threads(); } } @@ -397,11 +397,11 @@ void PairReaxCOMP::setup( ) for(int k = oldN; k < system->N; ++k) Set_End_Index( k, Start_Index( k, lists+BONDS ), lists+BONDS ); - + // estimate far neighbor list size // Not present in MPI-only version workspace->realloc.num_far = estimate_reax_lists(); - + // check if I need to shrink/extend my data-structs ReAllocate( system, control, data, workspace, &lists, mpi_data ); @@ -414,10 +414,10 @@ void PairReaxCOMP::write_reax_atoms() { int *num_bonds = fix_reax->num_bonds; int *num_hbonds = fix_reax->num_hbonds; - + if (system->N > system->total_cap) error->all(FLERR,"Too many ghost atoms"); - + #pragma omp parallel for schedule(static) default(shared) for( int i = 0; i < system->N; ++i ){ system->my_atoms[i].orig_id = atom->tag[i]; @@ -435,28 +435,28 @@ void PairReaxCOMP::write_reax_atoms() int PairReaxCOMP::estimate_reax_lists() { - int i; + int i; int *ilist = list->ilist; int *numneigh = list->numneigh; int numall = list->inum + list->gnum; int mincap = system->mincap; - + // for good performance in the OpenMP implementation, each thread needs // to know where to place the neighbors of the atoms it is responsible for. // The sumscan values for the list->numneigh will be used to determine the - // neighbor offset of each atom. Note that this may cause some significant - // memory overhead if delayed neighboring is used - so it may be desirable + // neighbor offset of each atom. Note that this may cause some significant + // memory overhead if delayed neighboring is used - so it may be desirable // to work on this part to reduce the memory footprint of the far_nbrs list. - + int num_nbrs = 0; - + for (int itr_i = 0; itr_i < numall; ++itr_i) { i = ilist[itr_i]; num_nbrs += numneigh[i]; } - + int new_estimate = MAX (num_nbrs, mincap*MIN_NBRS); - + return new_estimate; } @@ -473,51 +473,51 @@ int PairReaxCOMP::write_reax_lists() int *jlist; double d_sqr, dist, cutoff_sqr; rvec dvec; - + double **x = atom->x; int *ilist = list->ilist; int *numneigh = list->numneigh; int **firstneigh = list->firstneigh; reax_list *far_nbrs = lists + FAR_NBRS; far_neighbor_data *far_list = far_nbrs->select.far_nbr_list; - + int num_nbrs = 0; int inum = list->inum; int gnum = list->gnum; int numall = inum + gnum; - + // sumscan of the number of neighbors per atom to determine the offsets // most likely, we are overallocating. desirable to work on this part // to reduce the memory footprint of the far_nbrs list. - + num_nbrs = 0; - + for (itr_i = 0; itr_i < numall; ++itr_i) { i = ilist[itr_i]; num_nbrs_offset[i] = num_nbrs; num_nbrs += numneigh[i]; } -//#pragma omp parallel for schedule(guided) default(shared) +//#pragma omp parallel for schedule(guided) default(shared) #pragma omp parallel for schedule(dynamic,50) default(shared) \ private(itr_i, itr_j, i, j, jlist, cutoff_sqr, num_mynbrs, d_sqr, dvec, dist) for (itr_i = 0; itr_i < numall; ++itr_i) { i = ilist[itr_i]; jlist = firstneigh[i]; Set_Start_Index( i, num_nbrs_offset[i], far_nbrs ); - - if (i < inum) + + if (i < inum) cutoff_sqr = control->nonb_cut*control->nonb_cut; - else - cutoff_sqr = control->bond_cut*control->bond_cut; - + else + cutoff_sqr = control->bond_cut*control->bond_cut; + num_mynbrs = 0; - + for (itr_j = 0; itr_j < numneigh[i]; ++itr_j) { j = jlist[itr_j]; j &= NEIGHMASK; get_distance( x[j], x[i], &d_sqr, &dvec ); - + if (d_sqr <= cutoff_sqr) { dist = sqrt( d_sqr ); set_far_nbr( &far_list[num_nbrs_offset[i] + num_mynbrs], j, dist, dvec ); @@ -531,7 +531,7 @@ int PairReaxCOMP::write_reax_lists() endTimeBase = MPI_Wtime(); ompTimingData[COMPUTEWLINDEX] += (endTimeBase-startTimeBase); #endif - + return num_nbrs; } diff --git a/src/USER-OMP/pair_reaxc_omp.h b/src/USER-OMP/pair_reaxc_omp.h index 5d5c7e145b..a5e077c309 100644 --- a/src/USER-OMP/pair_reaxc_omp.h +++ b/src/USER-OMP/pair_reaxc_omp.h @@ -47,40 +47,40 @@ class PairReaxCOMP : public PairReaxC, public ThrOMP { return fix; }; - inline void ev_setup_thr_proxy(int eflagparm, int vflagparm, int nallparm, + inline void ev_setup_thr_proxy(int eflagparm, int vflagparm, int nallparm, double *eatomparm, double **vatomparm, ThrData *thrparm) { ev_setup_thr(eflagparm, vflagparm, nallparm, eatomparm, vatomparm, thrparm); }; - + // reduce per thread data as needed - inline void reduce_thr_proxy(void * const styleparm, const int eflagparm, + inline void reduce_thr_proxy(void * const styleparm, const int eflagparm, const int vflagparm, ThrData * const thrparm) { reduce_thr(styleparm, eflagparm, vflagparm, thrparm); } - inline void ev_tally_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, - const int nlocalparm, const int newton_pairparm, + inline void ev_tally_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, const double evdwlparm, const double ecoulparm, - const double fpairparm, const double delxparm, - const double delyparm, const double delzparm, + const double fpairparm, const double delxparm, + const double delyparm, const double delzparm, ThrData * const thrparm) { - ev_tally_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + ev_tally_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, evdwlparm, ecoulparm, fpairparm, delxparm, delyparm, delzparm, thrparm); } - inline void ev_tally_xyz_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, - const int nlocalparm, const int newton_pairparm, - const double evdwlparm, const double ecoulparm, + inline void ev_tally_xyz_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, + const double evdwlparm, const double ecoulparm, const double fxparm, const double fyparm, const double fzparm, - const double delxparm, const double delyparm, + const double delxparm, const double delyparm, const double delzparm, ThrData * const thrparm) { - ev_tally_xyz_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, - evdwlparm, ecoulparm, fxparm, fyparm, fzparm, + ev_tally_xyz_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + evdwlparm, ecoulparm, fxparm, fyparm, fzparm, delxparm, delyparm, delzparm, thrparm); } - - inline void ev_tally3_thr_proxy(Pair * const pairparm,int i, int j, int k, - double evdwl, double ecoul, double *fj, double *fk, + + inline void ev_tally3_thr_proxy(Pair * const pairparm,int i, int j, int k, + double evdwl, double ecoul, double *fj, double *fk, double *drji, double *drki, ThrData * const thrparm) { ev_tally3_thr(pairparm, i, j, k, evdwl, ecoul, fj, fk, drji, drki, thrparm); } diff --git a/src/USER-OMP/reaxc_bond_orders_omp.cpp b/src/USER-OMP/reaxc_bond_orders_omp.cpp index 1d32cbd34f..aaa311640e 100644 --- a/src/USER-OMP/reaxc_bond_orders_omp.cpp +++ b/src/USER-OMP/reaxc_bond_orders_omp.cpp @@ -44,11 +44,11 @@ void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, int pk, k, j; PairReaxCOMP *pair_reax_ptr = static_cast(system->pair_ptr); - + int tid = omp_get_thread_num(); ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); long reductionOffset = (system->N * tid); - + /* Virial Tallying variables */ double f_scaler; rvec fi_tmp, fj_tmp, fk_tmp, delij, delji, delki, delkj, temp; @@ -154,7 +154,7 @@ void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) { nbr_k = &(bonds->select.bond_list[pk]); k = nbr_k->nbr; - + // rvec_Scale( temp, -coef.C2dbo, nbr_k->bo_data.dBOp); // rvec_ScaledAdd( temp, -coef.C2dDelta, nbr_k->bo_data.dBOp); // rvec_ScaledAdd( temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); @@ -179,12 +179,12 @@ void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, delkj[0],delkj[1],delkj[2],thr); } } - + // forces on k: j neighbor for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) { nbr_k = &(bonds->select.bond_list[pk]); k = nbr_k->nbr; - + // rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp ); // rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp); // rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); @@ -202,7 +202,7 @@ void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0, fk_tmp[0],fk_tmp[1],fk_tmp[2], delki[0],delki[1],delki[2],thr); - + rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x); pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0, @@ -357,7 +357,7 @@ int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut, /****** bonds i-j and j-i ******/ ibond = &( bonds->select.bond_list[btop_i] ); jbond = &( bonds->select.bond_list[btop_j] ); - + ibond->nbr = j; jbond->nbr = i; ibond->d = nbr_pj->d; @@ -370,19 +370,19 @@ int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut, jbond->dbond_index = btop_i; ibond->sym_index = btop_j; jbond->sym_index = btop_i; - + bo_ij = &( ibond->bo_data ); bo_ji = &( jbond->bo_data ); bo_ji->BO = bo_ij->BO = BO; bo_ji->BO_s = bo_ij->BO_s = BO_s; bo_ji->BO_pi = bo_ij->BO_pi = BO_pi; bo_ji->BO_pi2 = bo_ij->BO_pi2 = BO_pi2; - + /* Bond Order page2-3, derivative of total bond order prime */ Cln_BOp_s = twbp->p_bo2 * C12 * rr2; Cln_BOp_pi = twbp->p_bo4 * C34 * rr2; Cln_BOp_pi2 = twbp->p_bo6 * C56 * rr2; - + /* Only dln_BOp_xx wrt. dr_i is stored here, note that dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec); @@ -392,34 +392,34 @@ int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut, rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); - + rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s + bo_ij->BO_pi * Cln_BOp_pi + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); - + bo_ij->BO_s -= bo_cut; bo_ij->BO -= bo_cut; bo_ji->BO_s -= bo_cut; bo_ji->BO -= bo_cut; - + bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0; bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0; - + return 1; } /* ---------------------------------------------------------------------- */ void BOOMP( reax_system *system, control_params *control, simulation_data *data, - storage *workspace, reax_list **lists, output_controls *out_control ) + storage *workspace, reax_list **lists, output_controls *out_control ) { #ifdef OMP_TIMING double endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + double p_lp1 = system->reax_param.gp.l[15]; int num_bonds = 0; double p_boc1 = system->reax_param.gp.l[0]; @@ -427,7 +427,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, reax_list *bonds = (*lists) + BONDS; int natoms = system->N; int nthreads = control->nthreads; - + #pragma omp parallel default(shared) { int i, j, pj, type_i, type_j; @@ -442,9 +442,9 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; bond_order_data *bo_ij, *bo_ji; - + int tid = omp_get_thread_num(); - + /* Calculate Deltaprime, Deltaprime_boc values */ #pragma omp for schedule(static) for (i = 0; i < system->N; ++i) { @@ -460,7 +460,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, // Wait till initialization complete #pragma omp barrier - + /* Corrected Bond Order calculations */ //#pragma omp for schedule(dynamic,50) #pragma omp for schedule(guided) @@ -473,7 +473,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, Deltap_boc_i = workspace->Deltap_boc[i]; start_i = Start_Index(i, bonds); end_i = End_Index(i, bonds); - + for (pj = start_i; pj < end_i; ++pj) { j = bonds->select.bond_list[pj].nbr; type_j = system->my_atoms[j].type; @@ -487,23 +487,23 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, bo_ij->C1dbo = 1.000000; bo_ij->C2dbo = 0.000000; bo_ij->C3dbo = 0.000000; - + bo_ij->C1dbopi = bo_ij->BO_pi; bo_ij->C2dbopi = 0.000000; bo_ij->C3dbopi = 0.000000; bo_ij->C4dbopi = 0.000000; - + bo_ij->C1dbopi2 = bo_ij->BO_pi2; bo_ij->C2dbopi2 = 0.000000; bo_ij->C3dbopi2 = 0.000000; bo_ij->C4dbopi2 = 0.000000; - + } else { val_j = system->reax_param.sbp[type_j].valency; Deltap_j = workspace->Deltap[j]; Deltap_boc_j = workspace->Deltap_boc[j]; - + /* on page 1 */ if( twbp->ovc >= 0.001 ) { /* Correction for overcoordination */ @@ -511,12 +511,12 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, exp_p2i = exp( -p_boc2 * Deltap_i ); exp_p1j = exp( -p_boc1 * Deltap_j ); exp_p2j = exp( -p_boc2 * Deltap_j ); - + f2 = exp_p1i + exp_p1j; f3 = -1.0 / p_boc2 * log( 0.5 * ( exp_p2i + exp_p2j ) ); f1 = 0.5 * ( ( val_i + f2 )/( val_i + f2 + f3 ) + ( val_j + f2 )/( val_j + f2 + f3 ) ); - + /* Now come the derivates */ /* Bond Order pages 5-7, derivative of f1 */ temp = f2 + f3; @@ -526,7 +526,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, 1.0 / SQR( u1_ji )); Cf1B_ij = -0.5 * (( u1_ij - f3 ) / SQR( u1_ij ) + ( u1_ji - f3 ) / SQR( u1_ji )); - + Cf1_ij = 0.50 * ( -p_boc1 * exp_p1i / u1_ij - ((val_i+f2) / SQR(u1_ij)) * ( -p_boc1 * exp_p1i + @@ -535,8 +535,8 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, ((val_j+f2) / SQR(u1_ji)) * ( -p_boc1 * exp_p1i + exp_p2i / ( exp_p2i + exp_p2j ) )); - - + + Cf1_ji = -Cf1A_ij * p_boc1 * exp_p1j + Cf1B_ij * exp_p2j / ( exp_p2i + exp_p2j ); } @@ -552,11 +552,11 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, Deltap_boc_i) * twbp->p_boc3 + twbp->p_boc5); exp_f5 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) - Deltap_boc_j) * twbp->p_boc3 + twbp->p_boc5); - + f4 = 1. / (1. + exp_f4); f5 = 1. / (1. + exp_f5); f4f5 = f4 * f5; - + /* Bond Order pages 8-9, derivative of f4 and f5 */ Cf45_ij = -f4 * exp_f4; Cf45_ji = -f5 * exp_f5; @@ -565,7 +565,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, f4 = f5 = f4f5 = 1.0; Cf45_ij = Cf45_ji = 0.0; } - + /* Bond Order page 10, derivative of total bond order */ A0_ij = f1 * f4f5; A1_ij = -2 * twbp->p_boc3 * twbp->p_boc4 * bo_ij->BO * @@ -574,28 +574,28 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, A2_ji = Cf1_ji / f1 + twbp->p_boc3 * Cf45_ji; A3_ij = A2_ij + Cf1_ij / f1; A3_ji = A2_ji + Cf1_ji / f1; - + /* find corrected bond orders and their derivative coef */ bo_ij->BO = bo_ij->BO * A0_ij; bo_ij->BO_pi = bo_ij->BO_pi * A0_ij *f1; bo_ij->BO_pi2= bo_ij->BO_pi2* A0_ij *f1; bo_ij->BO_s = bo_ij->BO - ( bo_ij->BO_pi + bo_ij->BO_pi2 ); - + bo_ij->C1dbo = A0_ij + bo_ij->BO * A1_ij; bo_ij->C2dbo = bo_ij->BO * A2_ij; bo_ij->C3dbo = bo_ij->BO * A2_ji; - + bo_ij->C1dbopi = f1*f1*f4*f5; bo_ij->C2dbopi = bo_ij->BO_pi * A1_ij; bo_ij->C3dbopi = bo_ij->BO_pi * A3_ij; bo_ij->C4dbopi = bo_ij->BO_pi * A3_ji; - + bo_ij->C1dbopi2 = f1*f1*f4*f5; bo_ij->C2dbopi2 = bo_ij->BO_pi2 * A1_ij; bo_ij->C3dbopi2 = bo_ij->BO_pi2 * A3_ij; bo_ij->C4dbopi2 = bo_ij->BO_pi2 * A3_ji; } - + /* neglect bonds that are < 1e-10 */ if( bo_ij->BO < 1e-10 ) bo_ij->BO = 0.0; @@ -605,7 +605,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, bo_ij->BO_pi = 0.0; if( bo_ij->BO_pi2 < 1e-10 ) bo_ij->BO_pi2 = 0.0; - + workspace->total_bond_order[i] += bo_ij->BO; //now keeps total_BO } // else { @@ -617,11 +617,11 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, // bo_ij->BO_s = bo_ji->BO_s; // bo_ij->BO_pi = bo_ji->BO_pi; // bo_ij->BO_pi2 = bo_ji->BO_pi2; - + // workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO // } } - + } // Wait for bo_ij to be updated @@ -635,7 +635,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, if(type_i < 0) continue; start_i = Start_Index(i, bonds); end_i = End_Index(i, bonds); - + for (pj = start_i; pj < end_i; ++pj) { j = bonds->select.bond_list[pj].nbr; type_j = system->my_atoms[j].type; @@ -647,25 +647,25 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, /* We only need to update bond orders from bo_ji everything else is set in uncorrected_bo calculations */ sym_index = bonds->select.bond_list[pj].sym_index; - + bo_ij = &( bonds->select.bond_list[pj].bo_data ); bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data); bo_ij->BO = bo_ji->BO; bo_ij->BO_s = bo_ji->BO_s; bo_ij->BO_pi = bo_ji->BO_pi; bo_ij->BO_pi2 = bo_ji->BO_pi2; - + workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO } } - + } /*-------------------------*/ - + // Need to wait for total_bond_order to be accumulated. #pragma omp barrier - + /* Calculate some helper variables that are used at many places throughout force calculations */ #pragma omp for schedule(guided) @@ -673,14 +673,14 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, type_j = system->my_atoms[j].type; if(type_j < 0) continue; sbp_j = &(system->reax_param.sbp[ type_j ]); - + workspace->Delta[j] = workspace->total_bond_order[j] - sbp_j->valency; workspace->Delta_e[j] = workspace->total_bond_order[j] - sbp_j->valency_e; workspace->Delta_boc[j] = workspace->total_bond_order[j] - sbp_j->valency_boc; workspace->Delta_val[j] = workspace->total_bond_order[j] - sbp_j->valency_val; - + workspace->vlpex[j] = workspace->Delta_e[j] - 2.0 * (int)(workspace->Delta_e[j]/2.0); explp1 = exp(-p_lp1 * SQR(2.0 + workspace->vlpex[j])); @@ -688,7 +688,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, workspace->Delta_lp[j] = sbp_j->nlp_opt - workspace->nlp[j]; workspace->Clp[j] = 2.0 * p_lp1 * explp1 * (2.0 + workspace->vlpex[j]); workspace->dDelta_lp[j] = workspace->Clp[j]; - + if( sbp_j->mass > 21.0 ) { workspace->nlp_temp[j] = 0.5 * (sbp_j->valency_e - sbp_j->valency); workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j]; @@ -700,7 +700,7 @@ void BOOMP( reax_system *system, control_params *control, simulation_data *data, workspace->dDelta_lp_temp[j] = workspace->Clp[j]; } } - + } // parallel region #ifdef OMP_TIMING diff --git a/src/USER-OMP/reaxc_bonds_omp.cpp b/src/USER-OMP/reaxc_bonds_omp.cpp index 4dc666b3ee..19433ce2e3 100644 --- a/src/USER-OMP/reaxc_bonds_omp.cpp +++ b/src/USER-OMP/reaxc_bonds_omp.cpp @@ -45,7 +45,7 @@ void BondsOMP( reax_system *system, control_params *control, double endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + int natoms = system->n; int nthreads = control->nthreads; reax_list *bonds = (*lists) + BONDS; @@ -57,7 +57,7 @@ void BondsOMP( reax_system *system, control_params *control, double total_Ebond = 0.0; #pragma omp parallel default(shared) reduction(+: total_Ebond) - { + { int i, j, pj; int start_i, end_i; int type_i, type_j; @@ -70,31 +70,31 @@ void BondsOMP( reax_system *system, control_params *control, bond_order_data *bo_ij; int tid = omp_get_thread_num(); long reductionOffset = (system->N * tid); - + class PairReaxCOMP *pair_reax_ptr; pair_reax_ptr = static_cast(system->pair_ptr); class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - - pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, natoms, + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, system->pair_ptr->eatom, system->pair_ptr->vatom, thr); - + #pragma omp for schedule(guided) for (i = 0; i < natoms; ++i) { start_i = Start_Index(i, bonds); end_i = End_Index(i, bonds); - + for (pj = start_i; pj < end_i; ++pj) { j = bonds->select.bond_list[pj].nbr; - + if( system->my_atoms[i].orig_id > system->my_atoms[j].orig_id ) continue; if( system->my_atoms[i].orig_id == system->my_atoms[j].orig_id ) { if (system->my_atoms[j].x[2] < system->my_atoms[i].x[2]) continue; - if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && system->my_atoms[j].x[1] < system->my_atoms[i].x[1]) continue; - if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && - system->my_atoms[j].x[1] == system->my_atoms[i].x[1] && + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + system->my_atoms[j].x[1] == system->my_atoms[i].x[1] && system->my_atoms[j].x[0] < system->my_atoms[i].x[0]) continue; } @@ -105,29 +105,29 @@ void BondsOMP( reax_system *system, control_params *control, sbp_j = &( system->reax_param.sbp[type_j] ); twbp = &( system->reax_param.tbp[type_i][type_j] ); bo_ij = &( bonds->select.bond_list[pj].bo_data ); - + /* calculate the constants */ pow_BOs_be2 = pow( bo_ij->BO_s, twbp->p_be2 ); exp_be12 = exp( twbp->p_be1 * ( 1.0 - pow_BOs_be2 ) ); CEbo = -twbp->De_s * exp_be12 * ( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 ); - - /* calculate the Bond Energy */ + + /* calculate the Bond Energy */ total_Ebond += ebond = -twbp->De_s * bo_ij->BO_s * exp_be12 -twbp->De_p * bo_ij->BO_pi -twbp->De_pp * bo_ij->BO_pi2; - + /* tally into per-atom energy */ if (system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, ebond, 0.0, 0.0, 0.0, 0.0, 0.0, thr); - + /* calculate derivatives of Bond Orders */ bo_ij->Cdbo += CEbo; bo_ij->Cdbopi -= (CEbo + twbp->De_p); bo_ij->Cdbopi2 -= (CEbo + twbp->De_pp); - + /* Stabilisation terminal triple bond */ if (bo_ij->BO >= 1.00) { if (gp37 == 2 || @@ -138,22 +138,22 @@ void BondsOMP( reax_system *system, control_params *control, exphub1 = exp(-gp3 * (workspace->total_bond_order[j]-bo_ij->BO)); exphuov = exp(gp4 * (workspace->Delta[i] + workspace->Delta[j])); hulpov = 1.0 / (1.0 + 25.0 * exphuov); - + estriph = gp10 * exphu * hulpov * (exphua1 + exphub1); total_Ebond += estriph; - + decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) * ( gp3 - 2.0 * gp7 * (bo_ij->BO-2.50) ); decobdboua = -gp10 * exphu * hulpov * (gp3*exphua1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); decobdboub = -gp10 * exphu * hulpov * (gp3*exphub1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); - + /* tally into per-atom energy */ if (system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, estriph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); - + bo_ij->Cdbo += decobdbo; workspace->CdDelta[i] += decobdboua; workspace->CdDeltaReduction[reductionOffset+j] += decobdboub; @@ -163,12 +163,12 @@ void BondsOMP( reax_system *system, control_params *control, } // for(i) } // omp - + data->my_en.e_bond += total_Ebond; - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); - ompTimingData[COMPUTEBONDSINDEX] += (endTimeBase-startTimeBase); + ompTimingData[COMPUTEBONDSINDEX] += (endTimeBase-startTimeBase); #endif } diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp index 1bf04129e0..47e439a3f7 100644 --- a/src/USER-OMP/reaxc_forces_omp.cpp +++ b/src/USER-OMP/reaxc_forces_omp.cpp @@ -77,7 +77,7 @@ void Compute_Bonded_ForcesOMP( reax_system *system, control_params *control, MPI_Comm comm ) { int i; - + #ifdef OMP_TIMING double startTimeBase, endTimeBase; startTimeBase = MPI_Wtime(); @@ -114,7 +114,7 @@ void Compute_NonBonded_ForcesOMP( reax_system *system, control_params *control, else Tabulated_vdW_Coulomb_Energy_OMP( system, control, data, workspace, lists, out_control ); - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); ompTimingData[COMPUTENBFINDEX] += (endTimeBase-startTimeBase); @@ -129,48 +129,48 @@ void Compute_NonBonded_ForcesOMP( reax_system *system, control_params *control, void Compute_Total_ForceOMP( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data ) -{ +{ #ifdef OMP_TIMING double startTimeBase,endTimeBase; startTimeBase = MPI_Wtime(); #endif - + int natoms = system->N; int nthreads = control->nthreads; - long totalReductionSize = system->N * nthreads; + long totalReductionSize = system->N * nthreads; reax_list *bonds = (*lists) + BONDS; - -#pragma omp parallel default(shared) //default(none) + +#pragma omp parallel default(shared) //default(none) { int i, j, k, pj, pk, start_j, end_j; int tid = omp_get_thread_num(); bond_order_data *bo_jk; - + class PairReaxCOMP *pair_reax_ptr; pair_reax_ptr = static_cast(system->pair_ptr); class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - - pair_reax_ptr->ev_setup_thr_proxy(0, 1, natoms, system->pair_ptr->eatom, + + pair_reax_ptr->ev_setup_thr_proxy(0, 1, natoms, system->pair_ptr->eatom, system->pair_ptr->vatom, thr); - + #pragma omp for schedule(guided) for (i = 0; i < system->N; ++i) { for (j = 0; j < nthreads; ++j) workspace->CdDelta[i] += workspace->CdDeltaReduction[system->N*j+i]; } - + #pragma omp for schedule(dynamic,50) for (j = 0; j < system->N; ++j) { start_j = Start_Index(j, bonds); end_j = End_Index(j, bonds); - + for (pk = start_j; pk < end_j; ++pk) { bo_jk = &( bonds->select.bond_list[pk].bo_data ); for (k = 0; k < nthreads; ++k) bo_jk->Cdbo += bo_jk->CdboReduction[k]; } } - + // #pragma omp for schedule(guided) //(dynamic,50) // for (i = 0; i < system->N; ++i) // for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) @@ -188,7 +188,7 @@ void Compute_Total_ForceOMP( reax_system *system, control_params *control, const int startj = Start_Index(i, bonds); const int endj = End_Index(i, bonds); for (pj = startj; pj < endj; ++pj) - if (i < bonds->select.bond_list[pj].nbr) + if (i < bonds->select.bond_list[pj].nbr) Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists ); } @@ -199,7 +199,7 @@ void Compute_Total_ForceOMP( reax_system *system, control_params *control, const int startj = Start_Index(i, bonds); const int endj = End_Index(i, bonds); for (pj = startj; pj < endj; ++pj) - if (i < bonds->select.bond_list[pj].nbr) + if (i < bonds->select.bond_list[pj].nbr) Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists ); } @@ -213,8 +213,8 @@ void Compute_Total_ForceOMP( reax_system *system, control_params *control, rvec_Add( workspace->f[i], workspace->forceReduction[system->N*j+i] ); } - -#pragma omp for schedule(guided) + +#pragma omp for schedule(guided) for (i = 0; i < totalReductionSize; i++) { workspace->forceReduction[i][0] = 0; workspace->forceReduction[i][1] = 0; @@ -222,7 +222,7 @@ void Compute_Total_ForceOMP( reax_system *system, control_params *control, workspace->CdDeltaReduction[i] = 0; } } // parallel region - + if (control->virial) for (int i=0; i < nthreads; ++i) { rvec_Add(data->my_ext_press, workspace->my_ext_pressReduction[i]); @@ -249,7 +249,7 @@ void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lis #pragma omp parallel default(shared) private(i, comp, Hindex) { - + /* bond list */ if( N > 0 ) { bonds = *lists + BONDS; @@ -269,8 +269,8 @@ void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lis } } } - - + + /* hbonds list */ if( numH > 0 ) { hbonds = *lists + HBONDS; @@ -294,7 +294,7 @@ void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lis } } } - + } // omp parallel } @@ -307,7 +307,7 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, double startTimeBase, endTimeBase; startTimeBase = MPI_Wtime(); #endif - + int i, j, pi, pj; int start_i, end_i, start_j, end_j; int type_i, type_j; @@ -338,7 +338,7 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, /* uncorrected bond orders */ cutoff = control->bond_cut; - + #pragma omp parallel default(shared) \ private(i, atom_i, type_i, pi, start_i, end_i, sbp_i, btop_i, ibond, ihb, ihb_top, \ j, atom_j, type_j, pj, start_j, end_j, sbp_j, nbr_pj, jbond, jhb, twbp) @@ -376,7 +376,7 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, // btop_i++; // Set_End_Index(i, btop_i, bonds); // } - + // } // Trying to minimize time spent in critical section by moving initial part of BOp() @@ -412,7 +412,7 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, if(BO >= bo_cut) { int btop_j; - + // Update indices in critical section #pragma omp critical { @@ -421,30 +421,30 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, Set_End_Index( j, btop_j+1, bonds ); Set_End_Index( i, btop_i+1, bonds ); } // omp critical - + // Finish remaining BOp() work BOp_OMP(workspace, bonds, bo_cut, i , btop_i, nbr_pj, sbp_i, sbp_j, twbp, btop_j, C12, C34, C56, BO, BO_s, BO_pi, BO_pi2); - + bond_data * ibond = &(bonds->select.bond_list[btop_i]); bond_order_data * bo_ij = &(ibond->bo_data); - + bond_data * jbond = &(bonds->select.bond_list[btop_j]); bond_order_data * bo_ji = &(jbond->bo_data); - + workspace->total_bond_order[i] += bo_ij->BO; tmp_bond_order[reductionOffset + j] += bo_ji->BO; - + rvec_Add(workspace->dDeltap_self[i], bo_ij->dBOp); rvec_Add(tmp_ddelta[reductionOffset + j], bo_ji->dBOp); - + btop_i++; num_bonds++; } // if(BO>=bo_cut) } // if(cutoff) - + } // for(pj) } // for(i) @@ -452,7 +452,7 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, #pragma omp barrier #pragma omp for schedule(dynamic,50) - for(i=0; iN; i++) + for(i=0; iN; i++) for(int t=0; tN + i; workspace->dDeltap_self[i][0] += tmp_ddelta[indx][0]; @@ -467,18 +467,18 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, // for (i = 0; i < system->N; ++i) { // start_i = Start_Index(i, bonds); // end_i = End_Index(i, bonds); - + // for (pi = start_i; pi < end_i; ++pi) { // ibond = &(bonds->select.bond_list[pi]); // j = ibond->nbr; - + // if (i < j) { // start_j = Start_Index(j, bonds); // end_j = End_Index(j, bonds); // for (pj = start_j; pj < end_j; ++pj) { // jbond = &(bonds->select.bond_list[pj]); - + // if (jbond->nbr == i) { // ibond->sym_index = pj; // jbond->sym_index = pi; @@ -488,13 +488,13 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, // } // } // } - + /* hydrogen bond list */ if (control->hbond_cut > 0) { cutoff = control->hbond_cut; -//#pragma omp for schedule(guided) reduction(+ : num_hbonds) -#pragma omp for schedule(dynamic,50) reduction(+ : num_hbonds) +//#pragma omp for schedule(guided) reduction(+ : num_hbonds) +#pragma omp for schedule(dynamic,50) reduction(+ : num_hbonds) for (i = 0; i < system->n; ++i) { atom_i = &(system->my_atoms[i]); type_i = atom_i->type; @@ -503,11 +503,11 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, #pragma omp critical { - + if (ihb == 1 || ihb == 2) { start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); - + for (pj = start_i; pj < end_i; ++pj) { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; @@ -520,12 +520,12 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, if (nbr_pj->d <= control->hbond_cut) { int iflag = 0; int jflag = 0; - + if(ihb==1 && jhb==2) iflag = 1; else if(jn && ihb == 2 && jhb == 1) jflag = 1; - + if(iflag || jflag) { - + // This critical section enforces H-bonds to be added by threads one at a time. // #pragma omp critical // { @@ -547,14 +547,14 @@ void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, hbonds->select.hbond_list[jhb_top].scl = -1; hbonds->select.hbond_list[jhb_top].ptr = nbr_pj; } - + num_hbonds++; } // if(iflag || jflag) } } } - + } // omp critical } @@ -592,8 +592,8 @@ void Compute_ForcesOMP( reax_system *system, control_params *control, { int qeq_flag; MPI_Comm comm = mpi_data->world; - - // Init Forces + + // Init Forces #if defined(LOG_PERFORMANCE) double t_start = 0; if( system->my_rank == MASTER_NODE ) @@ -602,13 +602,13 @@ void Compute_ForcesOMP( reax_system *system, control_params *control, Init_Forces_noQEq_OMP( system, control, data, workspace, lists, out_control, comm ); - + #if defined(LOG_PERFORMANCE) //MPI_Barrier( comm ); if( system->my_rank == MASTER_NODE ) Update_Timing_Info( &t_start, &(data->timing.init_forces) ); #endif - + // Bonded Interactions Compute_Bonded_ForcesOMP( system, control, data, workspace, lists, out_control, mpi_data->world ); @@ -617,19 +617,19 @@ void Compute_ForcesOMP( reax_system *system, control_params *control, if( system->my_rank == MASTER_NODE ) Update_Timing_Info( &t_start, &(data->timing.bonded) ); #endif - + // Nonbonded Interactions Compute_NonBonded_ForcesOMP( system, control, data, workspace, lists, out_control, mpi_data->world ); - + #if defined(LOG_PERFORMANCE) if( system->my_rank == MASTER_NODE ) Update_Timing_Info( &t_start, &(data->timing.nonb) ); #endif - + // Total Force Compute_Total_ForceOMP( system, control, data, workspace, lists, mpi_data ); - + #if defined(LOG_PERFORMANCE) if( system->my_rank == MASTER_NODE ) Update_Timing_Info( &t_start, &(data->timing.bonded) ); diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp index 5047e2775e..00ecb58de1 100644 --- a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp @@ -46,10 +46,10 @@ void Hydrogen_BondsOMP( reax_system *system, control_params *control, double endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + const int nthreads = control->nthreads; long totalReductionSize = system->N; - + #pragma omp parallel default(shared) //default(none) { int i, j, k, pi, pk; @@ -92,11 +92,11 @@ void Hydrogen_BondsOMP( reax_system *system, control_params *control, class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, - natoms, system->pair_ptr->eatom, + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, system->pair_ptr->vatom, thr); - + /* loops below discover the Hydrogen bonds between i-j-k triplets. here j is H atom and there has to be some bond between i and j. Hydrogen bond is between j and k. @@ -113,7 +113,7 @@ void Hydrogen_BondsOMP( reax_system *system, control_params *control, hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); if(type_j < 0) continue; - + top = 0; for( pi = start_j; pi < end_j; ++pi ) { pbond_ij = &( bond_list[pi] ); @@ -230,8 +230,8 @@ void Hydrogen_BondsOMP( reax_system *system, control_params *control, { data->my_en.e_hb += e_hb_thr; } - - pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, system->pair_ptr->vflag_either, thr); } diff --git a/src/USER-OMP/reaxc_init_md_omp.cpp b/src/USER-OMP/reaxc_init_md_omp.cpp index 1584d5e53e..9e01280545 100644 --- a/src/USER-OMP/reaxc_init_md_omp.cpp +++ b/src/USER-OMP/reaxc_init_md_omp.cpp @@ -53,7 +53,7 @@ int Init_ListsOMP( reax_system *system, control_params *control, int i, total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; int *hb_top, *bond_top; MPI_Comm comm; - + int TWICE = 2; int mincap = system->mincap; double safezone = system->safezone; @@ -96,9 +96,9 @@ int Init_ListsOMP( reax_system *system, control_params *control, int nthreads = control->nthreads; reax_list *bonds = (*lists)+BONDS; - + for (i = 0; i < bonds->num_intrs; ++i) - bonds->select.bond_list[i].bo_data.CdboReduction = + bonds->select.bond_list[i].bo_data.CdboReduction = (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); /* 3bodies list */ diff --git a/src/USER-OMP/reaxc_multi_body_omp.cpp b/src/USER-OMP/reaxc_multi_body_omp.cpp index a355ce8609..7922d7fd0d 100644 --- a/src/USER-OMP/reaxc_multi_body_omp.cpp +++ b/src/USER-OMP/reaxc_multi_body_omp.cpp @@ -45,7 +45,7 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, double endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + /* Initialize parameters */ double p_lp1 = system->reax_param.gp.l[15]; double p_lp3 = system->reax_param.gp.l[5]; @@ -58,11 +58,11 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, int natoms = system->n; int nthreads = control->nthreads; reax_list *bonds = (*lists) + BONDS; - + double total_Elp = 0.0; double total_Eun = 0.0; double total_Eov = 0.0; - + #pragma omp parallel default(shared) reduction(+:total_Elp, total_Eun, total_Eov) { int i, j, pj, type_i, type_j; @@ -77,29 +77,29 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, double eng_tmp, f_tmp; double p_lp2, p_ovun2, p_ovun5; int numbonds; - + single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; bond_data *pbond; bond_order_data *bo_ij; - + int tid = omp_get_thread_num(); - + long reductionOffset = (system->N * tid); class PairReaxCOMP *pair_reax_ptr; pair_reax_ptr = static_cast(system->pair_ptr); class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - - pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, natoms, + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, system->pair_ptr->eatom, system->pair_ptr->vatom, thr); - + #pragma omp for schedule(guided) for ( i = 0; i < system->n; ++i) { type_i = system->my_atoms[i].type; if(type_i < 0) continue; sbp_i = &(system->reax_param.sbp[ type_i ]); - + /* lone-pair Energy */ p_lp2 = sbp_i->p_lp2; expvd2 = exp( -75 * workspace->Delta_lp[i] ); @@ -111,21 +111,21 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, numbonds ++; /* calculate the energy */ - if(numbonds > 0) + if(numbonds > 0) total_Elp += e_lp = p_lp2 * workspace->Delta_lp[i] * inv_expvd2; - + dElp = p_lp2 * inv_expvd2 + 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); CElp = dElp * workspace->dDelta_lp[i]; - + if(numbonds > 0) workspace->CdDelta[i] += CElp; // lp - 1st term - + /* tally into per-atom energy */ if( system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, e_lp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); - + /* correction for C2 */ if( p_lp3 > 0.001 && !strcmp(system->reax_param.sbp[type_i].name, "C") ) for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { @@ -138,19 +138,19 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, bo_ij = &( bonds->select.bond_list[pj].bo_data ); Di = workspace->Delta[i]; vov3 = bo_ij->BO - Di - 0.040*pow(Di, 4.); - + if( vov3 > 3. ) { total_Elp += e_lph = p_lp3 * SQR(vov3-3.0); deahu2dbo = 2.*p_lp3*(vov3 - 3.); deahu2dsbo = 2.*p_lp3*(vov3 - 3.)*(-1. - 0.16*pow(Di, 3.)); - + bo_ij->Cdbo += deahu2dbo; workspace->CdDelta[i] += deahu2dsbo; - + /* tally into per-atom energy */ if( system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, system->n, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, system->n, 1, e_lph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); } } @@ -163,12 +163,12 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, type_i = system->my_atoms[i].type; if(type_i < 0) continue; sbp_i = &(system->reax_param.sbp[ type_i ]); - + /* over-coordination energy */ if( sbp_i->mass > 21.0 ) dfvl = 0.0; else dfvl = 1.0; // only for 1st-row elements - + p_ovun2 = sbp_i->p_ovun2; sum_ovun1 = sum_ovun2 = 0; for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) { @@ -177,20 +177,20 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, if(type_j < 0) continue; bo_ij = &(bonds->select.bond_list[pj].bo_data); twbp = &(system->reax_param.tbp[ type_i ][ type_j ]); - + sum_ovun1 += twbp->p_ovun1 * twbp->De_s * bo_ij->BO; sum_ovun2 += (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j])* ( bo_ij->BO_pi + bo_ij->BO_pi2 ); } - + exp_ovun1 = p_ovun3 * exp( p_ovun4 * sum_ovun2 ); inv_exp_ovun1 = 1.0 / (1 + exp_ovun1); Delta_lpcorr = workspace->Delta[i] - (dfvl * workspace->Delta_lp_temp[i]) * inv_exp_ovun1; - + exp_ovun2 = exp( p_ovun2 * Delta_lpcorr ); inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2); - + DlpVi = 1.0 / (Delta_lpcorr + sbp_i->valency + 1e-8); CEover1 = Delta_lpcorr * DlpVi * inv_exp_ovun2; @@ -198,17 +198,17 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, CEover2 = sum_ovun1 * DlpVi * inv_exp_ovun2 * (1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 )); - + CEover3 = CEover2 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1 ); - + CEover4 = CEover2 * (dfvl * workspace->Delta_lp_temp[i]) * p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1); - - + + /* under-coordination potential */ p_ovun2 = sbp_i->p_ovun2; p_ovun5 = sbp_i->p_ovun5; - + exp_ovun2n = 1.0 / exp_ovun2; exp_ovun6 = exp( p_ovun6 * Delta_lpcorr ); exp_ovun8 = p_ovun7 * exp(p_ovun8 * sum_ovun2); @@ -222,7 +222,7 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, if(numbonds > 0) total_Eun += e_un = -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; - + CEunder1 = inv_exp_ovun2n * ( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 + p_ovun2 * e_un * exp_ovun2n ); @@ -230,15 +230,15 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, CEunder3 = CEunder1 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1); CEunder4 = CEunder1 * (dfvl*workspace->Delta_lp_temp[i]) * p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1) + CEunder2; - + /* tally into per-atom energy */ if (system->pair_ptr->evflag) { eng_tmp = e_ov; if(numbonds > 0) eng_tmp+= e_un; - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); } - + /* forces */ workspace->CdDelta[i] += CEover3; // OvCoor - 2nd term if(numbonds > 0) workspace->CdDelta[i] += CEunder3; // UnCoor - 1st term @@ -249,35 +249,35 @@ void Atom_EnergyOMP( reax_system *system, control_params *control, bo_ij = &(pbond->bo_data); twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] [system->my_atoms[pbond->nbr].type]); - + bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor-1st - workspace->CdDeltaReduction[reductionOffset+j] += + workspace->CdDeltaReduction[reductionOffset+j] += CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor-3a - + bo_ij->Cdbopi += CEover4 * (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b bo_ij->Cdbopi2 += CEover4 * (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b - - workspace->CdDeltaReduction[reductionOffset+j] += + + workspace->CdDeltaReduction[reductionOffset+j] += CEunder4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // UnCoor - 2a - + bo_ij->Cdbopi += CEunder4 * (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b bo_ij->Cdbopi2 += CEunder4 * (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b } } - - pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, system->pair_ptr->vflag_either, thr); - + } - + data->my_en.e_lp += total_Elp; data->my_en.e_ov += total_Eov; data->my_en.e_un += total_Eun; - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); ompTimingData[COMPUTEATOMENERGYINDEX] += (endTimeBase-startTimeBase); diff --git a/src/USER-OMP/reaxc_nonbonded_omp.cpp b/src/USER-OMP/reaxc_nonbonded_omp.cpp index 73dbc1dda4..583c02bd08 100644 --- a/src/USER-OMP/reaxc_nonbonded_omp.cpp +++ b/src/USER-OMP/reaxc_nonbonded_omp.cpp @@ -44,7 +44,7 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { - int natoms = system->n; + int natoms = system->n; int nthreads = control->nthreads; long totalReductionSize = system->N * nthreads; reax_list *far_nbrs = (*lists) + FAR_NBRS; @@ -52,7 +52,7 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, double p_vdW1i = 1.0 / p_vdW1; double total_EvdW = 0.; double total_Eele = 0.; - + #pragma omp parallel default(shared) reduction(+: total_EvdW, total_Eele) //default(none) { int tid = omp_get_thread_num(); @@ -67,20 +67,20 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, rvec temp, ext_press; two_body_parameters *twbp; far_neighbor_data *nbr_pj; - + // Tallying variables: double pe_vdw, f_tmp, delij[3]; - + long reductionOffset = (system->N * tid); - + class PairReaxCOMP *pair_reax_ptr; pair_reax_ptr = static_cast(system->pair_ptr); class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - - pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, system->pair_ptr->eatom, - system->pair_ptr->vatom, thr); + system->pair_ptr->vatom, thr); e_core = 0; e_vdW = 0; e_vdW_thr = 0; @@ -94,12 +94,12 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); orig_i = system->my_atoms[i].orig_id; - + for( pj = start_i; pj < end_i; ++pj ) { nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); j = nbr_pj->nbr; orig_j = system->my_atoms[j].orig_id; - + flag = 0; if(nbr_pj->d <= control->nonb_cut) { if(j < natoms) flag = 1; @@ -113,7 +113,7 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, } } } - + if (flag) { r_ij = nbr_pj->d; @@ -142,14 +142,14 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, { // shielding powr_vdW1 = pow(r_ij, p_vdW1); powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1); - + fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i ); exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); exp2 = exp( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); - + e_vdW = twbp->D * (exp1 - 2.0 * exp2); total_EvdW += Tap * e_vdW; - + dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * pow(r_ij, p_vdW1 - 2.0); @@ -159,10 +159,10 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, else{ // no shielding exp1 = exp( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); exp2 = exp( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); - + e_vdW = twbp->D * (exp1 - 2.0 * exp2); total_EvdW += Tap * e_vdW; - + CEvd = dTap * e_vdW - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) / r_ij; } @@ -171,23 +171,23 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, { // innner wall e_core = twbp->ecore * exp(twbp->acore * (1.0-(r_ij/twbp->rcore))); total_EvdW += Tap * e_core; - + de_core = -(twbp->acore/twbp->rcore) * e_core; CEvd += dTap * e_core + Tap * de_core / r_ij; - + // lg correction, only if lgvdw is yes if (control->lgflag) { r_ij5 = pow( r_ij, 5.0 ); r_ij6 = pow( r_ij, 6.0 ); re6 = pow( twbp->lgre, 6.0 ); - + e_lg = -(twbp->lgcij/( r_ij6 + re6 )); total_EvdW += Tap * e_lg; - + de_lg = -6.0 * e_lg * r_ij5 / ( r_ij6 + re6 ) ; CEvd += dTap * e_lg + Tap * de_lg / r_ij; } - + } /*Coulomb Calculations*/ @@ -207,44 +207,44 @@ void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, rvec_ScaledSum( delij, 1., system->my_atoms[i].x, -1., system->my_atoms[j].x ); f_tmp = -(CEvd + CEclmb); - pair_reax_ptr->ev_tally_thr_proxy( system->pair_ptr, i, j, natoms, - 1, pe_vdw, e_ele, f_tmp, + pair_reax_ptr->ev_tally_thr_proxy( system->pair_ptr, i, j, natoms, + 1, pe_vdw, e_ele, f_tmp, delij[0], delij[1], delij[2], thr); } - + if( control->virial == 0 ) { rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); - rvec_ScaledAdd( workspace->forceReduction[reductionOffset+j], + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+j], +(CEvd + CEclmb), nbr_pj->dvec ); } else { /* NPT, iNPT or sNPT */ /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - + rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); rvec_ScaledAdd( workspace->f[reductionOffset+i], -1., temp ); rvec_Add( workspace->forceReduction[reductionOffset+j], temp); - + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); - + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); } } } } - pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, thr); + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); } // parallel region - + data->my_en.e_vdW = total_EvdW; data->my_en.e_ele = total_Eele; - + Compute_Polarization_Energy( system, data ); -} +} /* ---------------------------------------------------------------------- */ - + void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *control, simulation_data *data, storage *workspace, reax_list **lists, @@ -257,7 +257,7 @@ void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *contro long totalReductionSize = system->N * nthreads; double total_EvdW = 0.; double total_Eele = 0.; - + #pragma omp parallel default(shared) reduction(+:total_EvdW, total_Eele) { int i, j, pj, r; @@ -276,12 +276,12 @@ void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *contro class PairReaxCOMP *pair_reax_ptr; pair_reax_ptr = static_cast(system->pair_ptr); class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); - - pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, system->pair_ptr->eatom, system->pair_ptr->vatom, thr); - + //#pragma omp for schedule(dynamic,50) #pragma omp for schedule(guided) for (i = 0; i < natoms; ++i) { @@ -290,7 +290,7 @@ void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *contro start_i = Start_Index(i,far_nbrs); end_i = End_Index(i,far_nbrs); orig_i = system->my_atoms[i].orig_id; - + for (pj = start_i; pj < end_i; ++pj) { nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); j = nbr_pj->nbr; @@ -310,7 +310,7 @@ void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *contro flag = 1; } } - + } if (flag) { @@ -354,30 +354,30 @@ void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *contro if( control->virial == 0 ) { rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); - rvec_ScaledAdd( workspace->forceReduction[froffset+j], + rvec_ScaledAdd( workspace->forceReduction[froffset+j], +(CEvd + CEclmb), nbr_pj->dvec ); } else { // NPT, iNPT or sNPT /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); - + rvec_ScaledAdd( workspace->f[i], -1., temp ); rvec_Add( workspace->forceReduction[froffset+j], temp ); - + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); } } } } - + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, - system->pair_ptr->vflag_either, thr); + system->pair_ptr->vflag_either, thr); } // end omp parallel - + data->my_en.e_vdW = total_EvdW; data->my_en.e_ele = total_Eele; - + Compute_Polarization_Energy( system, data ); } diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.cpp b/src/USER-OMP/reaxc_torsion_angles_omp.cpp index 294eeb9544..bb8bbe1cd7 100644 --- a/src/USER-OMP/reaxc_torsion_angles_omp.cpp +++ b/src/USER-OMP/reaxc_torsion_angles_omp.cpp @@ -65,7 +65,7 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, double total_Etor = 0; double total_Econ = 0; int nthreads = control->nthreads; - + #pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ) { int i, j, k, l, pi, pj, pk, pl, pij, plk; @@ -73,11 +73,11 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, int start_j, end_j, start_k, end_k; int start_pj, end_pj, start_pk, end_pk; int num_frb_intrs = 0; - + double Delta_j, Delta_k; double r_ij, r_jk, r_kl, r_li; double BOA_ij, BOA_jk, BOA_kl; - + double exp_tor2_ij, exp_tor2_jk, exp_tor2_kl; double exp_tor1, exp_tor3_DjDk, exp_tor4_DjDk, exp_tor34_inv; double exp_cot2_jk, exp_cot2_ij, exp_cot2_kl; @@ -102,7 +102,7 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, bond_data *pbond_ij, *pbond_jk, *pbond_kl; bond_order_data *bo_ij, *bo_jk, *bo_kl; three_body_interaction_data *p_ijk, *p_jkl; - + // Virial tallying variables double delil[3], deljl[3], delkl[3]; double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3]; @@ -123,27 +123,27 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, for (j = 0; j < system->N; ++j) { start_j = Start_Index(j, bonds); end_j = End_Index(j, bonds); - + for (pk = start_j; pk < end_j; ++pk) { bo_jk = &( bonds->select.bond_list[pk].bo_data ); for (k = 0; k < nthreads; ++k) bo_jk->CdboReduction[k] = 0.; } } - + #pragma omp for schedule(dynamic,50) for (j = 0; j < natoms; ++j) { type_j = system->my_atoms[j].type; Delta_j = workspace->Delta_boc[j]; start_j = Start_Index(j, bonds); end_j = End_Index(j, bonds); - + for (pk = start_j; pk < end_j; ++pk) { pbond_jk = &( bonds->select.bond_list[pk] ); k = pbond_jk->nbr; bo_jk = &( pbond_jk->bo_data ); BOA_jk = bo_jk->BO - control->thb_cut; - + /* see if there are any 3-body interactions involving j&k where j is the central atom. Otherwise there is no point in trying to form a 4-body interaction out of this neighborhood */ @@ -160,27 +160,27 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, type_k = system->my_atoms[k].type; Delta_k = workspace->Delta_boc[k]; r_jk = pbond_jk->d; - + start_pk = Start_Index(pk, thb_intrs ); end_pk = End_Index(pk, thb_intrs ); start_pj = Start_Index(pj, thb_intrs ); end_pj = End_Index(pj, thb_intrs ); - + exp_tor2_jk = exp( -p_tor2 * BOA_jk ); exp_cot2_jk = exp( -p_cot2 * SQR(BOA_jk - 1.5) ); exp_tor3_DjDk = exp( -p_tor3 * (Delta_j + Delta_k) ); exp_tor4_DjDk = exp( p_tor4 * (Delta_j + Delta_k) ); exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DjDk + exp_tor4_DjDk); f11_DjDk = (2.0 + exp_tor3_DjDk) * exp_tor34_inv; - - + + /* pick i up from j-k interaction where j is the central atom */ for (pi = start_pk; pi < end_pk; ++pi) { p_ijk = &( thb_intrs->select.three_body_list[pi] ); pij = p_ijk->pthb; // pij is pointer to i on j's bond_list pbond_ij = &( bonds->select.bond_list[pij] ); bo_ij = &( pbond_ij->bo_data ); - + if (bo_ij->BO > control->thb_cut/*0*/) { i = p_ijk->thb; type_i = system->my_atoms[i].type; @@ -219,13 +219,13 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/) { ++num_frb_intrs; //fprintf(stderr, - // "%5d: %6d %6d %6d %6d\n", num_frb_intrs, + // "%5d: %6d %6d %6d %6d\n", num_frb_intrs, // system->my_atoms[i].orig_id,system->my_atoms[j].orig_id, // system->my_atoms[k].orig_id,system->my_atoms[l].orig_id); r_kl = pbond_kl->d; BOA_kl = bo_kl->BO - control->thb_cut; - + theta_jkl = p_jkl->theta; sin_jkl = sin( theta_jkl ); cos_jkl = cos( theta_jkl ); @@ -235,12 +235,12 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) tan_jkl_i = cos_jkl / -MIN_SINE; else tan_jkl_i = cos_jkl /sin_jkl; - + rvec_ScaledSum( dvec_li, 1., system->my_atoms[i].x, -1., system->my_atoms[l].x ); r_li = rvec_Norm( dvec_li ); - - + + /* omega and its derivative */ omega = Calculate_Omega( pbond_ij->dvec, r_ij, pbond_jk->dvec, r_jk, @@ -255,7 +255,7 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, cos2omega = cos( 2. * omega ); cos3omega = cos( 3. * omega ); /* end omega calculations */ - + /* torsion energy */ exp_tor1 = exp( fbp->p_tor1 * SQR(2.0 - bo_jk->BO_pi - f11_DjDk) ); @@ -360,35 +360,35 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, } else { ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); - + /* dcos_theta_ijk */ rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_dk ); rvec_Add( workspace->forceReduction[reductionOffset+i], force ); rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); - + rvec_ScaledAdd( workspace->f[j], CEtors7 + CEconj4, p_ijk->dcos_dj ); - + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); rvec_Add( workspace->forceReduction[reductionOffset+k], force ); rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); - + /* dcos_theta_jkl */ rvec_ScaledAdd( workspace->f[j], CEtors8 + CEconj5, p_jkl->dcos_di ); - + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); rvec_Add( workspace->forceReduction[reductionOffset+k], force ); rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); - + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dk ); rvec_Add( workspace->forceReduction[reductionOffset+l], force ); rvec_iMultiply( ext_press, rel_box_jl, force ); - rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); - + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + /* dcos_omega */ rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di ); rvec_Add( workspace->forceReduction[reductionOffset+i], force ); @@ -411,7 +411,7 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, /* tally into per-atom virials */ if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { - + // acquire vectors rvec_ScaledSum( delil, 1., system->my_atoms[l].x, -1., system->my_atoms[i].x ); @@ -423,21 +423,21 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, rvec_Scale( fi_tmp, CEtors7 + CEconj4, p_ijk->dcos_dk ); rvec_Scale( fj_tmp, CEtors7 + CEconj4, p_ijk->dcos_dj ); rvec_Scale( fk_tmp, CEtors7 + CEconj4, p_ijk->dcos_di ); - + // dcos_theta_jkl rvec_ScaledAdd( fj_tmp, CEtors8 + CEconj5, p_jkl->dcos_di ); rvec_ScaledAdd( fk_tmp, CEtors8 + CEconj5, p_jkl->dcos_dj ); - + // dcos_omega rvec_ScaledAdd( fi_tmp, CEtors9 + CEconj6, dcos_omega_di ); rvec_ScaledAdd( fj_tmp, CEtors9 + CEconj6, dcos_omega_dj ); rvec_ScaledAdd( fk_tmp, CEtors9 + CEconj6, dcos_omega_dk ); - + // tally eng_tmp = e_tor + e_con; - + if (system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, k, system->n, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, k, system->n, 1, eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); // NEED TO MAKE AN OMP VERSION OF THIS CALL! @@ -445,7 +445,7 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, system->pair_ptr->v_tally4(i, j, k, l, fi_tmp, fj_tmp, fk_tmp, delil, deljl, delkl ); } - + } // pl check ends } // pl loop ends } // pi check ends @@ -454,12 +454,12 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control, } // jmy_en.e_tor = total_Etor; data->my_en.e_con = total_Econ; - + #ifdef OMP_TIMING endTimeBase = MPI_Wtime(); ompTimingData[COMPUTETORSIONANGLESBOINDEX] += (endTimeBase-startTimeBase); diff --git a/src/USER-OMP/reaxc_valence_angles_omp.cpp b/src/USER-OMP/reaxc_valence_angles_omp.cpp index 7c45db1493..c83e5de852 100644 --- a/src/USER-OMP/reaxc_valence_angles_omp.cpp +++ b/src/USER-OMP/reaxc_valence_angles_omp.cpp @@ -54,7 +54,7 @@ void Calculate_dCos_ThetaOMP( rvec dvec_ji, double d_ji, rvec dvec_jk, double d_ double csqr_jk = Cdot_inv3 * sqr_d_jk; double csqr_ji = Cdot_inv3 * sqr_d_ji; - + // Try to help compiler out by unrolling // x-component double dinv_jk = dvec_jk[0] * inv_dists; @@ -103,7 +103,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, double endTimeBase, startTimeBase; startTimeBase = MPI_Wtime(); #endif - + reax_list *bonds = (*lists) + BONDS; reax_list *thb_intrs = (*lists) + THREE_BODIES; @@ -118,20 +118,20 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, double total_Eang = 0; double total_Epen = 0; double total_Ecoa = 0; - + int per_atom = (thb_intrs->num_intrs / system->N); int nthreads = control->nthreads; int chunksize = system->N/(nthreads*10); int num_thb_intrs = 0; int TWICE = 2; -#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) +#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) { int i, j, pi, k, pk, t; int type_i, type_j, type_k; int start_j, end_j, start_pk, end_pk; int cnt, my_offset, mark; - + double temp, temp_bo_jt, pBOjt7; double p_val1, p_val2, p_val3, p_val4, p_val5, p_val7; double p_pen1, p_pen2, p_pen3, p_pen4; @@ -150,17 +150,17 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, double BOA_ij, BOA_jk; rvec force, ext_press; // rtensor temp_rtensor, total_rtensor; - + // Tallying variables double eng_tmp, f_scaler, fi_tmp[3], fj_tmp[3], fk_tmp[3]; double delij[3], delkj[3]; - + three_body_header *thbh; three_body_parameters *thbp; three_body_interaction_data *p_ijk, *p_kji; bond_data *pbond_ij, *pbond_jk, *pbond_jt; bond_order_data *bo_ij, *bo_jk, *bo_jt; - + int tid = omp_get_thread_num(); long reductionOffset = (system->N * tid); class PairReaxCOMP *pair_reax_ptr; @@ -174,7 +174,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, // Run through a minimal for(jnum_intrs / nthreads; #pragma omp for schedule(dynamic,50) @@ -188,7 +188,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, // Always point to start of workspace to count angles my_offset = tid * per_thread; - + for (pi = start_j; pi < end_j; ++pi) { Set_Start_Index( pi, my_offset, thb_intrs ); pbond_ij = &(bonds->select.bond_list[pi]); @@ -196,7 +196,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, BOA_ij = bo_ij->BO - control->thb_cut; if (BOA_ij > 0.0) { - i = pbond_ij->nbr; + i = pbond_ij->nbr; /* first copy 3-body intrs from previously computed ones where i>k. in the second for-loop below, @@ -204,13 +204,13 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, for (pk = start_j; pk < pi; ++pk) { start_pk = Start_Index( pk, thb_intrs ); end_pk = End_Index( pk, thb_intrs ); - + for (t = start_pk; t < end_pk; ++t) if (thb_intrs->select.three_body_list[t].thb == i) { p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); p_ijk->thb = bonds->select.bond_list[pk].nbr; - + ++my_offset; break; } @@ -225,11 +225,11 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); p_ijk->thb = k; - + ++my_offset; // add this to the list of 3-body interactions } // for(pk) } // if() - + Set_End_Index(pi, my_offset, thb_intrs ); } // for(pi) @@ -246,7 +246,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, // Number of angles owned by this atom _my_offset[j] = my_offset - tid * per_thread; } // for(j) - + // Wait for all threads to finish counting angles #pragma omp barrier @@ -271,7 +271,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, // Original loop, but now using precomputed offsets // Safe to use all threads available, regardless of threads tasked above // We also now skip over atoms that have no angles assigned -#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided) +#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided) for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N type_j = system->my_atoms[j].type; if(type_j < 0) continue; @@ -281,14 +281,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, start_j = Start_Index(j, bonds); end_j = End_Index(j, bonds); - + type_j = system->my_atoms[j].type; my_offset = _my_offset[j]; p_val3 = system->reax_param.sbp[ type_j ].p_val3; p_val5 = system->reax_param.sbp[ type_j ].p_val5; - + SBOp = 0, prod_SBO = 1; for (t = start_j; t < end_j; ++t) { bo_jt = &(bonds->select.bond_list[t].bo_data); @@ -298,7 +298,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, temp *= temp; prod_SBO *= exp( -temp ); } - + // modifications to match Adri's code - 09/01/09 if( workspace->vlpex[j] >= 0 ){ vlpadj = 0; @@ -308,10 +308,10 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, vlpadj = workspace->nlp[j]; dSBO2 = (prod_SBO - 1) * (1 - p_val8 * workspace->dDelta_lp[j]); } - + SBO = SBOp + (1 - prod_SBO) * (-workspace->Delta_boc[j] - p_val8 * vlpadj); dSBO1 = -8 * prod_SBO * ( workspace->Delta_boc[j] + p_val8 * vlpadj ); - + if( SBO <= 0 ) SBO2 = 0, CSBO2 = 0; else if( SBO > 0 && SBO <= 1 ) { @@ -324,16 +324,16 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, } else SBO2 = 2, CSBO2 = 0; - + expval6 = exp( p_val6 * workspace->Delta_boc[j] ); - + for (pi = start_j; pi < end_j; ++pi) { Set_Start_Index( pi, my_offset, thb_intrs ); pbond_ij = &(bonds->select.bond_list[pi]); bo_ij = &(pbond_ij->bo_data); BOA_ij = bo_ij->BO - control->thb_cut; - - + + if (BOA_ij > 0.0) { i = pbond_ij->nbr; r_ij = pbond_ij->d; @@ -346,19 +346,19 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, for (pk = start_j; pk < pi; ++pk) { start_pk = Start_Index( pk, thb_intrs ); end_pk = End_Index( pk, thb_intrs ); - + for (t = start_pk; t < end_pk; ++t) if (thb_intrs->select.three_body_list[t].thb == i) { p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); p_kji = &(thb_intrs->select.three_body_list[t]); - + p_ijk->thb = bonds->select.bond_list[pk].nbr; p_ijk->pthb = pk; p_ijk->theta = p_kji->theta; rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); - + ++my_offset; ++num_thb_intrs; break; @@ -374,15 +374,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, k = pbond_jk->nbr; type_k = system->my_atoms[k].type; p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); - + // Fix by Sudhir // if (BOA_jk <= 0) continue; if (j >= system->n && i >= system->n && k >= system->n) continue; - + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, pbond_jk->dvec, pbond_jk->d, &theta, &cos_theta ); - + Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, pbond_jk->dvec, pbond_jk->d, &(p_ijk->dcos_di), &(p_ijk->dcos_dj), @@ -390,23 +390,23 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, p_ijk->thb = k; p_ijk->pthb = pk; p_ijk->theta = theta; - + sin_theta = sin( theta ); if( sin_theta < 1.0e-5 ) sin_theta = 1.0e-5; - + ++my_offset; // add this to the list of 3-body interactions ++num_thb_intrs; - + if ((j < system->n) && (BOA_jk > 0.0) && (bo_ij->BO > control->thb_cut) && (bo_jk->BO > control->thb_cut) && (bo_ij->BO * bo_jk->BO > control->thb_cutsq)) { r_jk = pbond_jk->d; thbh = &( system->reax_param.thbp[ type_i ][ type_j ][ type_k ] ); - + for (cnt = 0; cnt < thbh->cnt; ++cnt) { - + if( fabs(thbh->prm[cnt].p_val1) > 0.001 ) { thbp = &( thbh->prm[cnt] ); @@ -456,7 +456,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, CEval7 = CEval5 * dSBO2; CEval8 = -CEval4 / sin_theta; - total_Eang += e_ang = + total_Eang += e_ang = f7_ij * f7_jk * f8_Dj * expval12theta; /* END ANGLE ENERGY*/ @@ -533,9 +533,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, if( control->virial == 0 ) { rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); - rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], CEval8, p_ijk->dcos_di ); - rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], CEval8, p_ijk->dcos_dk ); } else { @@ -543,36 +543,36 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, added directly into forces and pressure vector/tensor */ rvec_Scale( force, CEval8, p_ijk->dcos_di ); rvec_Add( workspace->forceReduction[reductionOffset+i], force ); - + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); - + rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); - + rvec_Scale( force, CEval8, p_ijk->dcos_dk ); rvec_Add( workspace->forceReduction[reductionOffset+k], force ); - + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); } /* tally into per-atom virials */ if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { - + /* Acquire vectors */ rvec_ScaledSum( delij, 1., system->my_atoms[i].x, -1., system->my_atoms[j].x ); rvec_ScaledSum( delkj, 1., system->my_atoms[k].x, -1., system->my_atoms[j].x ); - + rvec_Scale( fi_tmp, -CEval8, p_ijk->dcos_di ); rvec_Scale( fj_tmp, -CEval8, p_ijk->dcos_dj ); rvec_Scale( fk_tmp, -CEval8, p_ijk->dcos_dk ); - + eng_tmp = e_ang + e_pen + e_coa; - + if( system->pair_ptr->evflag) - pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, j, system->N, 1, + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, j, system->N, 1, eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); if( system->pair_ptr->vflag_atom) // NEED TO MAKE AN OMP VERSION OF THIS CALL! @@ -588,7 +588,7 @@ void Valence_AnglesOMP( reax_system *system, control_params *control, Set_End_Index(pi, my_offset, thb_intrs ); } // for(pi) } // for(j) - + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, system->pair_ptr->vflag_either, thr); } // end omp parallel diff --git a/src/USER-REAXC/pair_reaxc.cpp b/src/USER-REAXC/pair_reaxc.cpp index b520fc14c7..bf3b2e4467 100644 --- a/src/USER-REAXC/pair_reaxc.cpp +++ b/src/USER-REAXC/pair_reaxc.cpp @@ -230,7 +230,7 @@ void PairReaxC::settings(int narg, char **arg) system->mincap = MIN_CAP; system->safezone = SAFE_ZONE; system->saferzone = SAFER_ZONE; - + // process optional keywords int iarg = 1; diff --git a/src/USER-REAXC/reaxc_allocate.cpp b/src/USER-REAXC/reaxc_allocate.cpp index ac835e7ce3..0d9c51c878 100644 --- a/src/USER-REAXC/reaxc_allocate.cpp +++ b/src/USER-REAXC/reaxc_allocate.cpp @@ -199,7 +199,7 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) if(workspace->CdDeltaReduction) sfree( workspace->CdDeltaReduction, "cddelta_reduce" ); if(workspace->forceReduction) sfree( workspace->forceReduction, "f_reduce" ); if(workspace->valence_angle_atom_myoffset) sfree( workspace->valence_angle_atom_myoffset, "valence_angle_atom_myoffset"); - + if (control->virial && workspace->my_ext_pressReduction) sfree( workspace->my_ext_pressReduction, "ext_press_reduce"); #endif } @@ -297,12 +297,12 @@ int Allocate_Workspace( reax_system *system, control_params *control, workspace->f = (rvec*) scalloc( total_cap, sizeof(rvec), "f", comm ); workspace->CdDelta = (double*) scalloc( total_cap, sizeof(double), "CdDelta", comm ); - + // storage for reductions with multiple threads #ifdef LMP_USER_OMP workspace->CdDeltaReduction = (double *) scalloc(sizeof(double), total_cap*control->nthreads, "cddelta_reduce", comm); - + workspace->forceReduction = (rvec *) scalloc(sizeof(rvec), total_cap*control->nthreads, "forceReduction", comm); @@ -368,30 +368,30 @@ static int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, *total_bonds += system->my_atoms[i].num_bonds; } *total_bonds = (int)(MAX( *total_bonds * safezone, mincap*MIN_BONDS )); - + #ifdef LMP_USER_OMP for (i = 0; i < bonds->num_intrs; ++i) sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction"); #endif - + Delete_List( bonds, comm ); if(!Make_List(system->total_cap, *total_bonds, TYP_BOND, bonds, comm)) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } - + #ifdef LMP_USER_OMP #if defined(_OPENMP) int nthreads = omp_get_num_threads(); #else int nthreads = 1; #endif - + for (i = 0; i < bonds->num_intrs; ++i) - bonds->select.bond_list[i].bo_data.CdboReduction = + bonds->select.bond_list[i].bo_data.CdboReduction = (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); #endif - + return SUCCESS; } diff --git a/src/USER-REAXC/reaxc_types.h b/src/USER-REAXC/reaxc_types.h index 1b9ce63dc2..547602feb4 100644 --- a/src/USER-REAXC/reaxc_types.h +++ b/src/USER-REAXC/reaxc_types.h @@ -486,7 +486,7 @@ typedef struct int lgflag; int enobondsflag; - + } control_params;