complete the implementation of respa/omp. tested ok.

This commit is contained in:
Axel Kohlmeyer
2013-02-23 18:27:33 +01:00
parent ba8c0bb5bc
commit d2fef46fb3
4 changed files with 94 additions and 35 deletions

View File

@ -68,7 +68,7 @@ static int get_tid()
FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
: Fix(lmp, narg, arg),
thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
_nthr(-1), _neighbor(true), _mixed(false)
_nthr(-1), _neighbor(true), _mixed(false), _reduced(true)
{
if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command");
if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'");
@ -87,8 +87,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
if (nthreads < 1)
error->all(FLERR,"Illegal number of OpenMP threads requested");
int reset_thr = 0;
if (nthreads != comm->nthreads) {
#if defined(_OPENMP)
reset_thr = 1;
omp_set_num_threads(nthreads);
#endif
comm->nthreads = nthreads;
@ -110,25 +112,21 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
// print summary of settings
if (comm->me == 0) {
const char * const nmode = _neighbor ? "OpenMP capable" : "serial";
const char * const nmode = _neighbor ? "multi-threaded" : "serial";
const char * const kmode = _mixed ? "mixed" : "double";
if (screen) {
fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads);
fprintf(screen," using %s neighbor list subroutines\n", nmode);
if (_mixed)
fputs(" using mixed precision OpenMP force kernels where available\n", screen);
else
fputs(" using double precision OpenMP force kernels\n", screen);
if (reset_thr)
fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads);
fprintf(screen,"using %s neighbor list subroutines\n", nmode);
fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode);
}
if (logfile) {
fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads);
fprintf(logfile," using %s neighbor list subroutines\n", nmode);
if (_mixed)
fputs(" using mixed precision OpenMP force kernels where available\n", logfile);
else
fputs(" using double precision OpenMP force kernels\n", logfile);
if (reset_thr)
fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads);
fprintf(logfile,"using %s neighbor list subroutines\n", nmode);
fprintf(logfile,"prefer %s precision OpenMP force kernels\n", kmode);
}
}
@ -137,7 +135,6 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
// encourage the OS to use storage that is "close" to each thread's CPU.
thr = new ThrData *[nthreads];
_nthr = nthreads;
_clearforce = true;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
@ -283,7 +280,6 @@ void FixOMP::init()
fprintf(logfile,"Last active /omp style is %s_style %s\n",
last_force_name, last_omp_name);
} else {
_clearforce = false;
if (screen)
fprintf(screen,"No /omp style for force computation currently active\n");
if (logfile)
@ -323,18 +319,16 @@ void FixOMP::pre_force(int)
double *de = atom->de;
double *drho = atom->drho;
if (_clearforce) {
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
#endif
{
const int tid = get_tid();
thr[tid]->check_tid(tid);
thr[tid]->init_force(nall,f,torque,erforce,de,drho);
}
} else {
thr[0]->init_force(nall,f,torque,erforce,de,drho);
}
{
const int tid = get_tid();
thr[tid]->check_tid(tid);
thr[tid]->init_force(nall,f,torque,erforce,de,drho);
} // end of omp parallel region
_reduced = false;
}
/* ---------------------------------------------------------------------- */

View File

@ -29,6 +29,7 @@ class ThrData;
class FixOMP : public Fix {
friend class ThrOMP;
friend class RespaOMP;
public:
FixOMP(class LAMMPS *, int, char **);
@ -51,6 +52,8 @@ class FixOMP : public Fix {
// to do the general force reduction
void *last_pair_hybrid; // pointer to the pair style that needs
// to call virial_fdot_compute()
// signal that an /omp style did the force reduction. needed by respa/omp
void did_reduce() { _reduced = true; }
public:
ThrData *get_thr(int tid) { return thr[tid]; }
@ -58,12 +61,13 @@ class FixOMP : public Fix {
bool get_neighbor() const { return _neighbor; }
bool get_mixed() const { return _mixed; }
bool get_reduced() const { return _reduced; }
private:
int _nthr; // number of currently active ThrData object
bool _neighbor; // en/disable threads for neighbor list construction
bool _clearforce; // whether to clear per thread data objects
bool _mixed; // whether to use a mixed precision compute kernel
int _nthr; // number of currently active ThrData objects
bool _neighbor; // en/disable threads for neighbor list construction
bool _mixed; // whether to prefer mixed precision compute kernels
bool _reduced; // whether forces have been reduced for this step
void set_neighbor_omp();
};

View File

@ -38,6 +38,10 @@
#include "memory.h"
#include "error.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
@ -55,7 +59,7 @@ void RespaOMP::init()
{
Respa::init();
if (lmp->atom->torque)
if (atom->torque)
error->all(FLERR,"Extended particles are not supported by respa/omp\n");
// set newton flag for each level
@ -137,6 +141,25 @@ void RespaOMP::setup()
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
}
// reduce forces from per-thread arrays, if needed
if (!fix->get_reduced()) {
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
int tid = omp_get_thread_num();
#else
int tid = 0;
#endif
data_reduce_thr(atom->f[0], nall, nthreads, 3, tid);
}
fix->did_reduce();
}
if (newton[ilevel]) comm->reverse_comm();
copy_f_flevel(ilevel);
}
@ -205,6 +228,25 @@ void RespaOMP::setup_minimal(int flag)
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
}
// reduce forces from per-thread arrays, if needed
if (!fix->get_reduced()) {
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
int tid = omp_get_thread_num();
#else
int tid = 0;
#endif
data_reduce_thr(atom->f[0], nall, nthreads, 3, tid);
}
fix->did_reduce();
}
if (newton[ilevel]) comm->reverse_comm();
copy_f_flevel(ilevel);
}
@ -315,6 +357,24 @@ void RespaOMP::recurse(int ilevel)
timer->stamp(Timer::KSPACE);
}
// reduce forces from per-thread arrays, if needed
if (!fix->get_reduced()) {
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
int tid = omp_get_thread_num();
#else
int tid = 0;
#endif
data_reduce_thr(atom->f[0], nall, nthreads, 3, tid);
}
fix->did_reduce();
}
if (newton[ilevel]) {
comm->reverse_comm();
timer->stamp(Timer::COMM);

View File

@ -183,6 +183,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
// pair_style hybrid will compute fdotr for us
// but we first need to reduce the forces
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
fix->did_reduce();
need_force_reduce = 0;
}
}
@ -380,13 +381,11 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
break;
case THR_KSPACE:
// nothing to do. XXX need to add support for per-atom info
// nothing to do. XXX may need to add support for per-atom info
break;
case THR_INTGR:
// when running and /omp enabled integrator class.
// the reduction is run explicitly by the integrator
need_force_reduce = 0;
// nothing to do
break;
default:
@ -395,8 +394,10 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
}
if (style == fix->last_omp_style) {
if (need_force_reduce)
if (need_force_reduce) {
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
fix->did_reduce();
}
if (lmp->atom->torque)
data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);