git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9581 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -68,7 +68,7 @@ static int get_tid()
|
|||||||
FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
||||||
: Fix(lmp, narg, arg),
|
: Fix(lmp, narg, arg),
|
||||||
thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
|
thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
|
||||||
_nthr(-1), _neighbor(true), _mixed(false)
|
_nthr(-1), _neighbor(true), _mixed(false), _reduced(true)
|
||||||
{
|
{
|
||||||
if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command");
|
if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command");
|
||||||
if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'");
|
if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'");
|
||||||
@ -87,8 +87,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
|||||||
if (nthreads < 1)
|
if (nthreads < 1)
|
||||||
error->all(FLERR,"Illegal number of OpenMP threads requested");
|
error->all(FLERR,"Illegal number of OpenMP threads requested");
|
||||||
|
|
||||||
|
int reset_thr = 0;
|
||||||
if (nthreads != comm->nthreads) {
|
if (nthreads != comm->nthreads) {
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
|
reset_thr = 1;
|
||||||
omp_set_num_threads(nthreads);
|
omp_set_num_threads(nthreads);
|
||||||
#endif
|
#endif
|
||||||
comm->nthreads = nthreads;
|
comm->nthreads = nthreads;
|
||||||
@ -110,25 +112,21 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
|||||||
|
|
||||||
// print summary of settings
|
// print summary of settings
|
||||||
if (comm->me == 0) {
|
if (comm->me == 0) {
|
||||||
const char * const nmode = _neighbor ? "OpenMP capable" : "serial";
|
const char * const nmode = _neighbor ? "multi-threaded" : "serial";
|
||||||
const char * const kmode = _mixed ? "mixed" : "double";
|
const char * const kmode = _mixed ? "mixed" : "double";
|
||||||
|
|
||||||
if (screen) {
|
if (screen) {
|
||||||
fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads);
|
if (reset_thr)
|
||||||
fprintf(screen," using %s neighbor list subroutines\n", nmode);
|
fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads);
|
||||||
if (_mixed)
|
fprintf(screen,"using %s neighbor list subroutines\n", nmode);
|
||||||
fputs(" using mixed precision OpenMP force kernels where available\n", screen);
|
fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode);
|
||||||
else
|
|
||||||
fputs(" using double precision OpenMP force kernels\n", screen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (logfile) {
|
if (logfile) {
|
||||||
fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads);
|
if (reset_thr)
|
||||||
fprintf(logfile," using %s neighbor list subroutines\n", nmode);
|
fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads);
|
||||||
if (_mixed)
|
fprintf(logfile,"using %s neighbor list subroutines\n", nmode);
|
||||||
fputs(" using mixed precision OpenMP force kernels where available\n", logfile);
|
fprintf(logfile,"prefer %s precision OpenMP force kernels\n", kmode);
|
||||||
else
|
|
||||||
fputs(" using double precision OpenMP force kernels\n", logfile);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +135,6 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
|||||||
// encourage the OS to use storage that is "close" to each thread's CPU.
|
// encourage the OS to use storage that is "close" to each thread's CPU.
|
||||||
thr = new ThrData *[nthreads];
|
thr = new ThrData *[nthreads];
|
||||||
_nthr = nthreads;
|
_nthr = nthreads;
|
||||||
_clearforce = true;
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
@ -188,8 +185,9 @@ int FixOMP::setmask()
|
|||||||
|
|
||||||
void FixOMP::init()
|
void FixOMP::init()
|
||||||
{
|
{
|
||||||
if (strstr(update->integrate_style,"respa") != NULL)
|
if ((strstr(update->integrate_style,"respa") != NULL)
|
||||||
error->all(FLERR,"Cannot use r-RESPA with /omp styles");
|
&& (strstr(update->integrate_style,"respa/omp") == NULL))
|
||||||
|
error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles");
|
||||||
|
|
||||||
int check_hybrid, kspace_split;
|
int check_hybrid, kspace_split;
|
||||||
last_pair_hybrid = NULL;
|
last_pair_hybrid = NULL;
|
||||||
@ -282,7 +280,6 @@ void FixOMP::init()
|
|||||||
fprintf(logfile,"Last active /omp style is %s_style %s\n",
|
fprintf(logfile,"Last active /omp style is %s_style %s\n",
|
||||||
last_force_name, last_omp_name);
|
last_force_name, last_omp_name);
|
||||||
} else {
|
} else {
|
||||||
_clearforce = false;
|
|
||||||
if (screen)
|
if (screen)
|
||||||
fprintf(screen,"No /omp style for force computation currently active\n");
|
fprintf(screen,"No /omp style for force computation currently active\n");
|
||||||
if (logfile)
|
if (logfile)
|
||||||
@ -322,18 +319,16 @@ void FixOMP::pre_force(int)
|
|||||||
double *de = atom->de;
|
double *de = atom->de;
|
||||||
double *drho = atom->drho;
|
double *drho = atom->drho;
|
||||||
|
|
||||||
if (_clearforce) {
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
|
#pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
const int tid = get_tid();
|
const int tid = get_tid();
|
||||||
thr[tid]->check_tid(tid);
|
thr[tid]->check_tid(tid);
|
||||||
thr[tid]->init_force(nall,f,torque,erforce,de,drho);
|
thr[tid]->init_force(nall,f,torque,erforce,de,drho);
|
||||||
}
|
} // end of omp parallel region
|
||||||
} else {
|
|
||||||
thr[0]->init_force(nall,f,torque,erforce,de,drho);
|
_reduced = false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -29,6 +29,7 @@ class ThrData;
|
|||||||
|
|
||||||
class FixOMP : public Fix {
|
class FixOMP : public Fix {
|
||||||
friend class ThrOMP;
|
friend class ThrOMP;
|
||||||
|
friend class RespaOMP;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FixOMP(class LAMMPS *, int, char **);
|
FixOMP(class LAMMPS *, int, char **);
|
||||||
@ -51,6 +52,8 @@ class FixOMP : public Fix {
|
|||||||
// to do the general force reduction
|
// to do the general force reduction
|
||||||
void *last_pair_hybrid; // pointer to the pair style that needs
|
void *last_pair_hybrid; // pointer to the pair style that needs
|
||||||
// to call virial_fdot_compute()
|
// to call virial_fdot_compute()
|
||||||
|
// signal that an /omp style did the force reduction. needed by respa/omp
|
||||||
|
void did_reduce() { _reduced = true; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ThrData *get_thr(int tid) { return thr[tid]; }
|
ThrData *get_thr(int tid) { return thr[tid]; }
|
||||||
@ -58,12 +61,13 @@ class FixOMP : public Fix {
|
|||||||
|
|
||||||
bool get_neighbor() const { return _neighbor; }
|
bool get_neighbor() const { return _neighbor; }
|
||||||
bool get_mixed() const { return _mixed; }
|
bool get_mixed() const { return _mixed; }
|
||||||
|
bool get_reduced() const { return _reduced; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int _nthr; // number of currently active ThrData object
|
int _nthr; // number of currently active ThrData objects
|
||||||
bool _neighbor; // en/disable threads for neighbor list construction
|
bool _neighbor; // en/disable threads for neighbor list construction
|
||||||
bool _clearforce; // whether to clear per thread data objects
|
bool _mixed; // whether to prefer mixed precision compute kernels
|
||||||
bool _mixed; // whether to use a mixed precision compute kernel
|
bool _reduced; // whether forces have been reduced for this step
|
||||||
|
|
||||||
void set_neighbor_omp();
|
void set_neighbor_omp();
|
||||||
};
|
};
|
||||||
|
|||||||
@ -183,6 +183,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
|
|||||||
// pair_style hybrid will compute fdotr for us
|
// pair_style hybrid will compute fdotr for us
|
||||||
// but we first need to reduce the forces
|
// but we first need to reduce the forces
|
||||||
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
|
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
|
||||||
|
fix->did_reduce();
|
||||||
need_force_reduce = 0;
|
need_force_reduce = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -380,7 +381,11 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case THR_KSPACE:
|
case THR_KSPACE:
|
||||||
// nothing to do. XXX need to add support for per-atom info
|
// nothing to do. XXX may need to add support for per-atom info
|
||||||
|
break;
|
||||||
|
|
||||||
|
case THR_INTGR:
|
||||||
|
// nothing to do
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -389,8 +394,10 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (style == fix->last_omp_style) {
|
if (style == fix->last_omp_style) {
|
||||||
if (need_force_reduce)
|
if (need_force_reduce) {
|
||||||
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
|
data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
|
||||||
|
fix->did_reduce();
|
||||||
|
}
|
||||||
|
|
||||||
if (lmp->atom->torque)
|
if (lmp->atom->torque)
|
||||||
data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
|
data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
|
||||||
|
|||||||
@ -58,8 +58,8 @@ class ThrOMP {
|
|||||||
|
|
||||||
enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2,
|
enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2,
|
||||||
THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5,
|
THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5,
|
||||||
THR_CHARMM=1<<6, /*THR_PROXY=1<<7,*/ THR_HYBRID=1<<8,
|
THR_CHARMM=1<<6, /*THR_PROXY=1<<7,THR_HYBRID=1<<8, */
|
||||||
THR_FIX=1<<9};
|
THR_FIX=1<<9,THR_INTGR=1<<10};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// extra ev_tally setup work for threaded styles
|
// extra ev_tally setup work for threaded styles
|
||||||
|
|||||||
Reference in New Issue
Block a user