From bb890941ca401edb9d5ecbb9a665cbb6b685b1fa Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 24 May 2017 00:19:36 -0400 Subject: [PATCH] first chunk of code from USER-REAXC-OMP imported and adapted into USER-REAXC --- src/USER-REAXC/fix_qeq_reax.cpp | 106 ++++++++++++++++++++++++------ src/USER-REAXC/fix_qeq_reax.h | 39 ++++++----- src/USER-REAXC/fix_reaxc.h | 1 + src/USER-REAXC/pair_reaxc.cpp | 8 ++- src/USER-REAXC/pair_reaxc.h | 29 ++++---- src/USER-REAXC/reaxc_allocate.cpp | 62 +++++++++++++++-- src/USER-REAXC/reaxc_control.cpp | 1 + src/USER-REAXC/reaxc_types.h | 43 ++++++++++++ 8 files changed, 231 insertions(+), 58 deletions(-) diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 96df03c668..22b5382727 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -68,7 +68,7 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : { if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax); - if (narg != 8) error->all(FLERR,"Illegal fix qeq/reax command"); + if (narg < 8 || narg > 9) error->all(FLERR,"Illegal fix qeq/reax command"); nevery = force->inumeric(FLERR,arg[3]); if (nevery <= 0) error->all(FLERR,"Illegal fix qeq/reax command"); @@ -78,6 +78,9 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : tolerance = force->numeric(FLERR,arg[6]); pertype_parameters(arg[7]); + // dual CG support only available for USER-OMP variant + dual_enabled = 0; + shld = NULL; n = n_cap = 0; @@ -111,16 +114,21 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : // perform initial allocation of atom-based arrays // register with Atom class - s_hist = t_hist = NULL; - grow_arrays(atom->nmax); - atom->add_callback(0); - for( int i = 0; i < atom->nmax; i++ ) - for (int j = 0; j < nprev; ++j ) - s_hist[i][j] = t_hist[i][j] = 0; - reaxc = NULL; reaxc = (PairReaxC *) force->pair_match("reax/c",1); + if (reaxc) { + s_hist = t_hist = NULL; + grow_arrays(atom->nmax); + atom->add_callback(0); + for( int i = 0; i < atom->nmax; i++ ) + for (int j = 0; j < nprev; ++j ) + s_hist[i][j] = t_hist[i][j] = 0; + } + + // dual CG support + // Update comm sizes for this fix + if (dual_enabled) comm_forward = comm_reverse = 2; } /* ---------------------------------------------------------------------- */ @@ -180,6 +188,10 @@ void FixQEqReax::pertype_parameters(char *arg) return; } + // OMP style will use it's own pertype_parameters() + Pair * pair = force->pair_match("reax/c/omp",1); + if (pair) return; + int i,itype,ntypes; double v1,v2,v3; FILE *pf; @@ -227,10 +239,14 @@ void FixQEqReax::allocate_storage() memory->create(b_prc,nmax,"qeq:b_prc"); memory->create(b_prm,nmax,"qeq:b_prm"); - memory->create(p,nmax,"qeq:p"); - memory->create(q,nmax,"qeq:q"); - memory->create(r,nmax,"qeq:r"); - memory->create(d,nmax,"qeq:d"); + // dual CG support + int size = nmax; + if (dual_enabled) size*= 2; + + memory->create(p,size,"qeq:p"); + memory->create(q,size,"qeq:q"); + memory->create(r,size,"qeq:r"); + memory->create(d,size,"qeq:d"); } /* ---------------------------------------------------------------------- */ @@ -437,6 +453,13 @@ void FixQEqReax::setup_pre_force_respa(int vflag, int ilevel) /* ---------------------------------------------------------------------- */ +void FixQEqReax::min_setup_pre_force(int vflag) +{ + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + void FixQEqReax::init_storage() { int NN; @@ -476,11 +499,14 @@ void FixQEqReax::pre_force(int vflag) reallocate_matrix(); init_matvec(); - matvecs = CG(b_s, s); // CG on s - parallel - matvecs += CG(b_t, t); // CG on t - parallel + + matvecs_s = CG(b_s, s); // CG on s - parallel + matvecs_t = CG(b_t, t); // CG on t - parallel + matvecs = matvecs_s + matvecs_t; + calculate_Q(); - if( comm->me == 0 ) { + if (comm->me == 0) { t_end = MPI_Wtime(); qeq_time = t_end - t_start; } @@ -702,7 +728,6 @@ int FixQEqReax::CG( double *b, double *x ) beta = sig_new / sig_old; vector_sum( d, 1., p, beta, d, nn ); - } if (i >= imax && comm->me == 0) { @@ -816,7 +841,15 @@ int FixQEqReax::pack_forward_comm(int n, int *list, double *buf, for(m = 0; m < n; m++) buf[m] = t[list[m]]; else if( pack_flag == 4 ) for(m = 0; m < n; m++) buf[m] = atom->q[list[m]]; - + else if( pack_flag == 5) { + m = 0; + for(int i = 0; i < n; i++) { + int j = 2 * list[i]; + buf[m++] = d[j ]; + buf[m++] = d[j+1]; + } + return m; + } return n; } @@ -834,6 +867,15 @@ void FixQEqReax::unpack_forward_comm(int n, int first, double *buf) for(m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; else if( pack_flag == 4) for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; + else if( pack_flag == 5) { + int last = first + n; + m = 0; + for(i = first; i < last; i++) { + int j = 2 * i; + d[j ] = buf[m++]; + d[j+1] = buf[m++]; + } + } } /* ---------------------------------------------------------------------- */ @@ -841,15 +883,35 @@ void FixQEqReax::unpack_forward_comm(int n, int first, double *buf) int FixQEqReax::pack_reverse_comm(int n, int first, double *buf) { int i, m; - for(m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; - return n; + if (pack_flag == 5) { + m = 0; + int last = first + n; + for(i = first; i < last; i++) { + int indxI = 2 * i; + buf[m++] = q[indxI ]; + buf[m++] = q[indxI+1]; + } + return m; + } else { + for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; + return n; + } } /* ---------------------------------------------------------------------- */ void FixQEqReax::unpack_reverse_comm(int n, int *list, double *buf) { - for(int m = 0; m < n; m++) q[list[m]] += buf[m]; + if (pack_flag == 5) { + int m = 0; + for(int i = 0; i < n; i++) { + int indxI = 2 * list[i]; + q[indxI ] += buf[m++]; + q[indxI+1] += buf[m++]; + } + } else { + for (int m = 0; m < n; m++) q[list[m]] += buf[m]; + } } /* ---------------------------------------------------------------------- @@ -866,6 +928,9 @@ double FixQEqReax::memory_usage() bytes += m_cap * sizeof(int); bytes += m_cap * sizeof(double); + if (dual_enabled) + bytes += atom->nmax*4 * sizeof(double); // double size for q, d, r, and p + return bytes; } @@ -1034,5 +1099,4 @@ void FixQEqReax::vector_add( double* dest, double c, double* v, int k ) if (atom->mask[kk] & groupbit) dest[kk] += c * v[kk]; } - } diff --git a/src/USER-REAXC/fix_qeq_reax.h b/src/USER-REAXC/fix_qeq_reax.h index 7c3e8a8f96..d82232576a 100644 --- a/src/USER-REAXC/fix_qeq_reax.h +++ b/src/USER-REAXC/fix_qeq_reax.h @@ -39,15 +39,16 @@ class FixQEqReax : public Fix { FixQEqReax(class LAMMPS *, int, char **); ~FixQEqReax(); int setmask(); - void init(); + virtual void init(); void init_list(int,class NeighList *); - void init_storage(); + virtual void init_storage(); void setup_pre_force(int); - void pre_force(int); + virtual void pre_force(int); void setup_pre_force_respa(int, int); void pre_force_respa(int, int, int); + void min_setup_pre_force(int); void min_pre_force(int); int matvecs; @@ -99,25 +100,25 @@ class FixQEqReax : public Fix { //double **h; //double *hc, *hs; - void pertype_parameters(char*); + virtual void pertype_parameters(char*); void init_shielding(); void init_taper(); - void allocate_storage(); - void deallocate_storage(); + virtual void allocate_storage(); + virtual void deallocate_storage(); void reallocate_storage(); - void allocate_matrix(); + virtual void allocate_matrix(); void deallocate_matrix(); void reallocate_matrix(); - void init_matvec(); + virtual void init_matvec(); void init_H(); - void compute_H(); + virtual void compute_H(); double calculate_H(double,double); - void calculate_Q(); + virtual void calculate_Q(); - int CG(double*,double*); + virtual int CG(double*,double*); //int GMRES(double*,double*); - void sparse_matvec(sparse_matrix*,double*,double*); + virtual void sparse_matvec(sparse_matrix*,double*,double*); int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); @@ -129,12 +130,16 @@ class FixQEqReax : public Fix { int pack_exchange(int, double *); int unpack_exchange(int, double *); - double parallel_norm( double*, int ); - double parallel_dot( double*, double*, int ); - double parallel_vector_acc( double*, int ); + virtual double parallel_norm( double*, int ); + virtual double parallel_dot( double*, double*, int ); + virtual double parallel_vector_acc( double*, int ); - void vector_sum(double*,double,double*,double,double*,int); - void vector_add(double*, double, double*,int); + virtual void vector_sum(double*,double,double*,double,double*,int); + virtual void vector_add(double*, double, double*,int); + + // dual CG support + int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization + int matvecs_s, matvecs_t; // Iteration count for each system }; } diff --git a/src/USER-REAXC/fix_reaxc.h b/src/USER-REAXC/fix_reaxc.h index e51a94e4a9..0e173f5ece 100644 --- a/src/USER-REAXC/fix_reaxc.h +++ b/src/USER-REAXC/fix_reaxc.h @@ -36,6 +36,7 @@ namespace LAMMPS_NS { class FixReaxC : public Fix { friend class PairReaxC; + friend class PairReaxCOMP; public: FixReaxC(class LAMMPS *,int, char **); diff --git a/src/USER-REAXC/pair_reaxc.cpp b/src/USER-REAXC/pair_reaxc.cpp index d51b0fc2f8..b520fc14c7 100644 --- a/src/USER-REAXC/pair_reaxc.cpp +++ b/src/USER-REAXC/pair_reaxc.cpp @@ -211,6 +211,9 @@ void PairReaxC::settings(int narg, char **arg) control->thb_cutsq = 0.00001; control->bg_cut = 0.3; + // Initialize for when omp style included + control->nthreads = 1; + out_control->write_steps = 0; out_control->traj_method = 0; strcpy( out_control->traj_title, "default_title" ); @@ -256,7 +259,7 @@ void PairReaxC::settings(int narg, char **arg) system->safezone = force->numeric(FLERR,arg[iarg+1]); if (system->safezone < 0.0) error->all(FLERR,"Illegal pair_style reax/c safezone command"); - system->saferzone = system->safezone*1.2; + system->saferzone = system->safezone*1.2 + 0.2; iarg += 2; } else if (strcmp(arg[iarg],"mincap") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command"); @@ -457,6 +460,9 @@ void PairReaxC::setup( ) ReAllocate( system, control, data, workspace, &lists, mpi_data ); } + + bigint local_ngroup = list->inum; + MPI_Allreduce( &local_ngroup, &ngroup, 1, MPI_LMP_BIGINT, MPI_SUM, world ); } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-REAXC/pair_reaxc.h b/src/USER-REAXC/pair_reaxc.h index e3c9e63bdc..5658648db6 100644 --- a/src/USER-REAXC/pair_reaxc.h +++ b/src/USER-REAXC/pair_reaxc.h @@ -37,20 +37,6 @@ namespace LAMMPS_NS { class PairReaxC : public Pair { public: - int fixbond_flag, fixspecies_flag; - int **tmpid; - double **tmpbo,**tmpr; - double *chi,*eta,*gamma; - - int *map; - control_params *control; - reax_system *system; - output_controls *out_control; - simulation_data *data; - storage *workspace; - reax_list *lists; - mpi_datatypes *mpi_data; - PairReaxC(class LAMMPS *); ~PairReaxC(); void compute(int, int); @@ -59,13 +45,28 @@ class PairReaxC : public Pair { void init_style(); double init_one(int, int); void *extract(const char *, int &); + int fixbond_flag, fixspecies_flag; + int **tmpid; + double **tmpbo,**tmpr; + + control_params *control; + reax_system *system; + output_controls *out_control; + simulation_data *data; + storage *workspace; + reax_list *lists; + mpi_datatypes *mpi_data; + + bigint ngroup; protected: double cutmax; int nelements; // # of unique elements char **elements; // names of unique elements + int *map; class FixReaxC *fix_reax; + double *chi,*eta,*gamma; int qeqflag; int setup_flag; int firstwarn; diff --git a/src/USER-REAXC/reaxc_allocate.cpp b/src/USER-REAXC/reaxc_allocate.cpp index 969912e082..ac835e7ce3 100644 --- a/src/USER-REAXC/reaxc_allocate.cpp +++ b/src/USER-REAXC/reaxc_allocate.cpp @@ -31,6 +31,10 @@ #include "reaxc_tool_box.h" #include "reaxc_vector.h" +#if defined(_OPENMP) +#include +#endif + /* allocate space for my_atoms important: we cannot know the exact number of atoms that will fall into a process's box throughout the whole simulation. therefore @@ -49,6 +53,15 @@ int PreAllocate_Space( reax_system *system, control_params *control, system->my_atoms = (reax_atom*) scalloc( system->total_cap, sizeof(reax_atom), "my_atoms", comm ); + // Nullify some arrays only used in omp styles + // Should be safe to do here since called in pair->setup(); +#ifdef LMP_USER_OMP + workspace->CdDeltaReduction = NULL; + workspace->forceReduction = NULL; + workspace->valence_angle_atom_myoffset = NULL; + workspace->my_ext_pressReduction = NULL; +#endif + return SUCCESS; } @@ -174,13 +187,21 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->q2, "q2" ); sfree( workspace->p2, "p2" ); - /* integrator */ + /* integrator storage */ sfree( workspace->v_const, "v_const" ); /* force related storage */ sfree( workspace->f, "f" ); sfree( workspace->CdDelta, "CdDelta" ); + /* reductions */ +#ifdef LMP_USER_OMP + if(workspace->CdDeltaReduction) sfree( workspace->CdDeltaReduction, "cddelta_reduce" ); + if(workspace->forceReduction) sfree( workspace->forceReduction, "f_reduce" ); + if(workspace->valence_angle_atom_myoffset) sfree( workspace->valence_angle_atom_myoffset, "valence_angle_atom_myoffset"); + + if (control->virial && workspace->my_ext_pressReduction) sfree( workspace->my_ext_pressReduction, "ext_press_reduce"); +#endif } @@ -272,10 +293,24 @@ int Allocate_Workspace( reax_system *system, control_params *control, /* integrator storage */ workspace->v_const = (rvec*) smalloc( local_rvec, "v_const", comm ); - // /* force related storage */ + /* force related storage */ workspace->f = (rvec*) scalloc( total_cap, sizeof(rvec), "f", comm ); workspace->CdDelta = (double*) scalloc( total_cap, sizeof(double), "CdDelta", comm ); + + // storage for reductions with multiple threads +#ifdef LMP_USER_OMP + workspace->CdDeltaReduction = (double *) scalloc(sizeof(double), total_cap*control->nthreads, + "cddelta_reduce", comm); + + workspace->forceReduction = (rvec *) scalloc(sizeof(rvec), total_cap*control->nthreads, + "forceReduction", comm); + + workspace->valence_angle_atom_myoffset = (int *) scalloc(sizeof(int), total_cap, "valence_angle_atom_myoffset", comm); + + if (control->virial) + workspace->my_ext_pressReduction = (rvec *) calloc(sizeof(rvec), control->nthreads); +#endif return SUCCESS; } @@ -333,13 +368,30 @@ static int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, *total_bonds += system->my_atoms[i].num_bonds; } *total_bonds = (int)(MAX( *total_bonds * safezone, mincap*MIN_BONDS )); - + +#ifdef LMP_USER_OMP + for (i = 0; i < bonds->num_intrs; ++i) + sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction"); +#endif + Delete_List( bonds, comm ); if(!Make_List(system->total_cap, *total_bonds, TYP_BOND, bonds, comm)) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } - + +#ifdef LMP_USER_OMP +#if defined(_OPENMP) + int nthreads = omp_get_num_threads(); +#else + int nthreads = 1; +#endif + + for (i = 0; i < bonds->num_intrs; ++i) + bonds->select.bond_list[i].bo_data.CdboReduction = + (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); +#endif + return SUCCESS; } @@ -438,7 +490,7 @@ void ReAllocate( reax_system *system, control_params *control, Reallocate_Bonds_List( system, (*lists)+BONDS, &num_bonds, &est_3body, comm ); realloc->bonds = 0; - realloc->num_3body = MAX( realloc->num_3body, est_3body ); + realloc->num_3body = MAX( realloc->num_3body, est_3body ) * 2; } /* 3-body list */ diff --git a/src/USER-REAXC/reaxc_control.cpp b/src/USER-REAXC/reaxc_control.cpp index 4def41bc8c..11a89020b8 100644 --- a/src/USER-REAXC/reaxc_control.cpp +++ b/src/USER-REAXC/reaxc_control.cpp @@ -48,6 +48,7 @@ char Read_Control_File( char *control_file, control_params* control, control->nsteps = 0; control->dt = 0.25; control->nprocs = 1; + control->nthreads = 1; control->procs_by_dim[0] = 1; control->procs_by_dim[1] = 1; control->procs_by_dim[2] = 1; diff --git a/src/USER-REAXC/reaxc_types.h b/src/USER-REAXC/reaxc_types.h index b3e2f40f02..1b9ce63dc2 100644 --- a/src/USER-REAXC/reaxc_types.h +++ b/src/USER-REAXC/reaxc_types.h @@ -38,6 +38,40 @@ #include "sys/time.h" #include +#if defined LMP_USER_OMP +#define OMP_TIMING 1 + +#ifdef OMP_TIMING +// pkcoff timing fields +enum { + COMPUTEINDEX=0, + COMPUTEWLINDEX, + COMPUTEBFINDEX, + COMPUTEQEQINDEX, + COMPUTENBFINDEX, + COMPUTEIFINDEX, + COMPUTETFINDEX, + COMPUTEBOINDEX, + COMPUTEBONDSINDEX, + COMPUTEATOMENERGYINDEX, + COMPUTEVALENCEANGLESBOINDEX, + COMPUTETORSIONANGLESBOINDEX, + COMPUTEHBONDSINDEX, + COMPUTECG1INDEX, + COMPUTECG2INDEX, + COMPUTECGCOMPUTEINDEX, + COMPUTECALCQINDEX, + COMPUTEINITMVINDEX, + COMPUTEMVCOMPINDEX, + LASTTIMINGINDEX +}; + +extern double ompTimingData[LASTTIMINGINDEX]; +extern int ompTimingCount[LASTTIMINGINDEX]; +extern int ompTimingCGCount[LASTTIMINGINDEX]; +#endif +#endif + /************* SOME DEFS - crucial for reax_types.h *********/ #define LAMMPS_REAX @@ -391,6 +425,7 @@ typedef struct { char sim_name[REAX_MAX_STR]; int nprocs; + int nthreads; ivec procs_by_dim; /* ensemble values: 0 : NVE @@ -616,6 +651,7 @@ typedef struct{ double C1dbopi, C2dbopi, C3dbopi, C4dbopi; double C1dbopi2, C2dbopi2, C3dbopi2, C4dbopi2; rvec dBOp, dln_BOp_s, dln_BOp_pi, dln_BOp_pi2; + double *CdboReduction; } bond_order_data; typedef struct { @@ -702,6 +738,13 @@ typedef struct double *CdDelta; // coefficient of dDelta rvec *f; + /* omp */ + rvec *forceReduction; + rvec *my_ext_pressReduction; + double *CdDeltaReduction; + int *valence_angle_atom_myoffset; + + reallocate_data realloc; } storage;