git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15931 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2016-12-14 00:00:20 +00:00
parent 89719fb171
commit 5f04559071
5 changed files with 54 additions and 12 deletions

View File

@ -63,7 +63,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
_nbor_pack_width = 1;
_precision_mode = PREC_MODE_MIXED;
_offload_balance = 1.0;
_offload_balance = -1.0;
_overflow_flag[LMP_OVERFLOW] = 0;
_off_overflow_flag[LMP_OVERFLOW] = 0;
@ -189,10 +189,18 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
offload_cores = omp_get_num_procs();
omp_set_num_threads(offload_cores);
max_offload_threads = omp_get_max_threads();
#ifdef __AVX512F__
if ( (offload_cores / 4) % 2 == 1) {
offload_cores += 4;
max_offload_threads += 4;
}
#endif
}
_max_offload_threads = max_offload_threads;
_offload_cores = offload_cores;
if (_offload_threads == 0) _offload_threads = offload_cores;
if (_offload_cores > 244 && _offload_tpc > 2)
_offload_tpc = 2;
}
#endif
@ -317,6 +325,8 @@ void FixIntel::init()
error->all(FLERR,
"Currently, cannot use more than one intel style with hybrid.");
neighbor->fix_intel = (void *)this;
check_neighbor_intel();
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->zero_ev();
@ -1004,8 +1014,10 @@ void FixIntel::set_offload_affinity()
int offload_threads = _offload_threads;
int offload_tpc = _offload_tpc;
int offload_affinity_balanced = _offload_affinity_balanced;
int offload_cores = _offload_cores;
#pragma offload target(mic:_cop) mandatory \
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced, \
offload_cores)
{
omp_set_num_threads(offload_threads);
#pragma omp parallel
@ -1013,20 +1025,24 @@ void FixIntel::set_offload_affinity()
int tnum = omp_get_thread_num();
kmp_affinity_mask_t mask;
kmp_create_affinity_mask(&mask);
int proc;
if (offload_affinity_balanced) {
proc = offload_threads * node_rank + tnum;
int proc = offload_threads * node_rank + tnum;
#ifdef __AVX512F__
proc = (proc / offload_tpc) + (proc % offload_tpc) *
((offload_cores) / 4);
proc += 68;
#else
if (offload_affinity_balanced)
proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
} else {
proc = offload_threads * node_rank + tnum;
else
proc += (proc / 4) * (4 - offload_tpc) + 1;
}
#endif
kmp_set_affinity_mask_proc(proc, &mask);
if (kmp_set_affinity(&mask) != 0)
printf("Could not set affinity on rank %d thread %d to %d\n",
node_rank, tnum, proc);
}
}
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
else if (_precision_mode == PREC_MODE_MIXED)

View File

@ -28,7 +28,7 @@
// implementations.
// Vector classes provided with the intel compiler
#ifdef __MIC__
#if defined(__MIC__) && !defined(__AVX512F__)
#include <mic/micvec.h>
#else
#include <dvec.h> // icc-mmic hates generating movq

View File

@ -22,6 +22,11 @@
#ifdef __INTEL_OFFLOAD
#ifdef LMP_INTEL_OFFLOAD
#define _LMP_INTEL_OFFLOAD
#ifdef __TARGET_ARCH_MIC
#ifndef __MIC__
#define __MIC__ 1
#endif
#endif
#endif
#endif
@ -62,6 +67,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#define INTEL_MAX_STENCIL_CHECK 4096
#define INTEL_P3M_MAXORDER 5
#ifdef __INTEL_COMPILER
#ifdef __AVX__
#undef INTEL_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 8
@ -90,6 +96,13 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#endif
#endif
#else
#undef INTEL_VECTOR_WIDTH
#define INTEL_VECTOR_WIDTH 1
#endif
#define INTEL_DATA_ALIGN 64
#define INTEL_ONEATOM_FACTOR 2
#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
@ -97,7 +110,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#define INTEL_LB_MEAN_WEIGHT 0.1
#define INTEL_BIGP 1e15
#define INTEL_MAX_HOST_CORE_COUNT 512
#define INTEL_MAX_COI_CORES 2
#define INTEL_MAX_COI_CORES 36
#define IP_PRE_get_stride(stride, n, datasize, torque) \
{ \

View File

@ -43,7 +43,20 @@ using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
VerletLRTIntel::VerletLRTIntel(LAMMPS *lmp, int narg, char **arg) :
Verlet(lmp, narg, arg) {}
Verlet(lmp, narg, arg) {
#if defined(_LMP_INTEL_LRT_PTHREAD)
pthread_mutex_init(&_kmutex,NULL);
#endif
}
/* ---------------------------------------------------------------------- */
VerletLRTIntel::~VerletLRTIntel()
{
#if defined(_LMP_INTEL_LRT_PTHREAD)
pthread_mutex_destroy(&_kmutex);
#endif
}
/* ----------------------------------------------------------------------
initialization before run

View File

@ -40,7 +40,7 @@ namespace LAMMPS_NS {
class VerletLRTIntel : public Verlet {
public:
VerletLRTIntel(class LAMMPS *, int, char **);
virtual ~VerletLRTIntel() {}
virtual ~VerletLRTIntel();
virtual void init();
virtual void setup();
virtual void run(int);