From d2fef46fb3a01fe49af3666ef600cb71a4808d42 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 23 Feb 2013 18:27:33 +0100 Subject: [PATCH] complete the implementation of respa/omp. tested ok. --- src/USER-OMP/fix_omp.cpp | 44 ++++++++++++--------------- src/USER-OMP/fix_omp.h | 12 +++++--- src/USER-OMP/respa_omp.cpp | 62 +++++++++++++++++++++++++++++++++++++- src/USER-OMP/thr_omp.cpp | 11 ++++--- 4 files changed, 94 insertions(+), 35 deletions(-) diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index da05f690d6..bac001795d 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -68,7 +68,7 @@ static int get_tid() FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL), - _nthr(-1), _neighbor(true), _mixed(false) + _nthr(-1), _neighbor(true), _mixed(false), _reduced(true) { if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command"); if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'"); @@ -87,8 +87,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) if (nthreads < 1) error->all(FLERR,"Illegal number of OpenMP threads requested"); + int reset_thr = 0; if (nthreads != comm->nthreads) { #if defined(_OPENMP) + reset_thr = 1; omp_set_num_threads(nthreads); #endif comm->nthreads = nthreads; @@ -110,25 +112,21 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) // print summary of settings if (comm->me == 0) { - const char * const nmode = _neighbor ? "OpenMP capable" : "serial"; + const char * const nmode = _neighbor ? "multi-threaded" : "serial"; const char * const kmode = _mixed ? "mixed" : "double"; if (screen) { - fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads); - fprintf(screen," using %s neighbor list subroutines\n", nmode); - if (_mixed) - fputs(" using mixed precision OpenMP force kernels where available\n", screen); - else - fputs(" using double precision OpenMP force kernels\n", screen); + if (reset_thr) + fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads); + fprintf(screen,"using %s neighbor list subroutines\n", nmode); + fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode); } if (logfile) { - fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads); - fprintf(logfile," using %s neighbor list subroutines\n", nmode); - if (_mixed) - fputs(" using mixed precision OpenMP force kernels where available\n", logfile); - else - fputs(" using double precision OpenMP force kernels\n", logfile); + if (reset_thr) + fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads); + fprintf(logfile,"using %s neighbor list subroutines\n", nmode); + fprintf(logfile,"prefer %s precision OpenMP force kernels\n", kmode); } } @@ -137,7 +135,6 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) // encourage the OS to use storage that is "close" to each thread's CPU. thr = new ThrData *[nthreads]; _nthr = nthreads; - _clearforce = true; #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -283,7 +280,6 @@ void FixOMP::init() fprintf(logfile,"Last active /omp style is %s_style %s\n", last_force_name, last_omp_name); } else { - _clearforce = false; if (screen) fprintf(screen,"No /omp style for force computation currently active\n"); if (logfile) @@ -323,18 +319,16 @@ void FixOMP::pre_force(int) double *de = atom->de; double *drho = atom->drho; - if (_clearforce) { #if defined(_OPENMP) #pragma omp parallel default(none) shared(f,torque,erforce,de,drho) #endif - { - const int tid = get_tid(); - thr[tid]->check_tid(tid); - thr[tid]->init_force(nall,f,torque,erforce,de,drho); - } - } else { - thr[0]->init_force(nall,f,torque,erforce,de,drho); - } + { + const int tid = get_tid(); + thr[tid]->check_tid(tid); + thr[tid]->init_force(nall,f,torque,erforce,de,drho); + } // end of omp parallel region + + _reduced = false; } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h index e651849984..656ab752a8 100644 --- a/src/USER-OMP/fix_omp.h +++ b/src/USER-OMP/fix_omp.h @@ -29,6 +29,7 @@ class ThrData; class FixOMP : public Fix { friend class ThrOMP; + friend class RespaOMP; public: FixOMP(class LAMMPS *, int, char **); @@ -51,6 +52,8 @@ class FixOMP : public Fix { // to do the general force reduction void *last_pair_hybrid; // pointer to the pair style that needs // to call virial_fdot_compute() + // signal that an /omp style did the force reduction. needed by respa/omp + void did_reduce() { _reduced = true; } public: ThrData *get_thr(int tid) { return thr[tid]; } @@ -58,12 +61,13 @@ class FixOMP : public Fix { bool get_neighbor() const { return _neighbor; } bool get_mixed() const { return _mixed; } + bool get_reduced() const { return _reduced; } private: - int _nthr; // number of currently active ThrData object - bool _neighbor; // en/disable threads for neighbor list construction - bool _clearforce; // whether to clear per thread data objects - bool _mixed; // whether to use a mixed precision compute kernel + int _nthr; // number of currently active ThrData objects + bool _neighbor; // en/disable threads for neighbor list construction + bool _mixed; // whether to prefer mixed precision compute kernels + bool _reduced; // whether forces have been reduced for this step void set_neighbor_omp(); }; diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp index b7c7a495b6..2c545b303c 100644 --- a/src/USER-OMP/respa_omp.cpp +++ b/src/USER-OMP/respa_omp.cpp @@ -38,6 +38,10 @@ #include "memory.h" #include "error.h" +#if defined(_OPENMP) +#include +#endif + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -55,7 +59,7 @@ void RespaOMP::init() { Respa::init(); - if (lmp->atom->torque) + if (atom->torque) error->all(FLERR,"Extended particles are not supported by respa/omp\n"); // set newton flag for each level @@ -137,6 +141,25 @@ void RespaOMP::setup() force->kspace->setup(); if (kspace_compute_flag) force->kspace->compute(eflag,vflag); } + + // reduce forces from per-thread arrays, if needed + if (!fix->get_reduced()) { + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + data_reduce_thr(atom->f[0], nall, nthreads, 3, tid); + } + fix->did_reduce(); + } + if (newton[ilevel]) comm->reverse_comm(); copy_f_flevel(ilevel); } @@ -205,6 +228,25 @@ void RespaOMP::setup_minimal(int flag) force->kspace->setup(); if (kspace_compute_flag) force->kspace->compute(eflag,vflag); } + + // reduce forces from per-thread arrays, if needed + if (!fix->get_reduced()) { + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + data_reduce_thr(atom->f[0], nall, nthreads, 3, tid); + } + fix->did_reduce(); + } + if (newton[ilevel]) comm->reverse_comm(); copy_f_flevel(ilevel); } @@ -315,6 +357,24 @@ void RespaOMP::recurse(int ilevel) timer->stamp(Timer::KSPACE); } + // reduce forces from per-thread arrays, if needed + if (!fix->get_reduced()) { + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + data_reduce_thr(atom->f[0], nall, nthreads, 3, tid); + } + fix->did_reduce(); + } + if (newton[ilevel]) { comm->reverse_comm(); timer->stamp(Timer::COMM); diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 31bd558268..4aea630554 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -183,6 +183,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, // pair_style hybrid will compute fdotr for us // but we first need to reduce the forces data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + fix->did_reduce(); need_force_reduce = 0; } } @@ -380,13 +381,11 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, break; case THR_KSPACE: - // nothing to do. XXX need to add support for per-atom info + // nothing to do. XXX may need to add support for per-atom info break; case THR_INTGR: - // when running and /omp enabled integrator class. - // the reduction is run explicitly by the integrator - need_force_reduce = 0; + // nothing to do break; default: @@ -395,8 +394,10 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, } if (style == fix->last_omp_style) { - if (need_force_reduce) + if (need_force_reduce) { data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + fix->did_reduce(); + } if (lmp->atom->torque) data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);