git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15931 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -63,7 +63,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
_nbor_pack_width = 1;
|
||||
|
||||
_precision_mode = PREC_MODE_MIXED;
|
||||
_offload_balance = 1.0;
|
||||
_offload_balance = -1.0;
|
||||
_overflow_flag[LMP_OVERFLOW] = 0;
|
||||
_off_overflow_flag[LMP_OVERFLOW] = 0;
|
||||
|
||||
@ -189,10 +189,18 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
offload_cores = omp_get_num_procs();
|
||||
omp_set_num_threads(offload_cores);
|
||||
max_offload_threads = omp_get_max_threads();
|
||||
#ifdef __AVX512F__
|
||||
if ( (offload_cores / 4) % 2 == 1) {
|
||||
offload_cores += 4;
|
||||
max_offload_threads += 4;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
_max_offload_threads = max_offload_threads;
|
||||
_offload_cores = offload_cores;
|
||||
if (_offload_threads == 0) _offload_threads = offload_cores;
|
||||
if (_offload_cores > 244 && _offload_tpc > 2)
|
||||
_offload_tpc = 2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -317,6 +325,8 @@ void FixIntel::init()
|
||||
error->all(FLERR,
|
||||
"Currently, cannot use more than one intel style with hybrid.");
|
||||
|
||||
neighbor->fix_intel = (void *)this;
|
||||
|
||||
check_neighbor_intel();
|
||||
if (_precision_mode == PREC_MODE_SINGLE)
|
||||
_single_buffers->zero_ev();
|
||||
@ -1004,8 +1014,10 @@ void FixIntel::set_offload_affinity()
|
||||
int offload_threads = _offload_threads;
|
||||
int offload_tpc = _offload_tpc;
|
||||
int offload_affinity_balanced = _offload_affinity_balanced;
|
||||
int offload_cores = _offload_cores;
|
||||
#pragma offload target(mic:_cop) mandatory \
|
||||
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
|
||||
in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced, \
|
||||
offload_cores)
|
||||
{
|
||||
omp_set_num_threads(offload_threads);
|
||||
#pragma omp parallel
|
||||
@ -1013,20 +1025,24 @@ void FixIntel::set_offload_affinity()
|
||||
int tnum = omp_get_thread_num();
|
||||
kmp_affinity_mask_t mask;
|
||||
kmp_create_affinity_mask(&mask);
|
||||
int proc;
|
||||
if (offload_affinity_balanced) {
|
||||
proc = offload_threads * node_rank + tnum;
|
||||
int proc = offload_threads * node_rank + tnum;
|
||||
#ifdef __AVX512F__
|
||||
proc = (proc / offload_tpc) + (proc % offload_tpc) *
|
||||
((offload_cores) / 4);
|
||||
proc += 68;
|
||||
#else
|
||||
if (offload_affinity_balanced)
|
||||
proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
|
||||
} else {
|
||||
proc = offload_threads * node_rank + tnum;
|
||||
else
|
||||
proc += (proc / 4) * (4 - offload_tpc) + 1;
|
||||
}
|
||||
#endif
|
||||
kmp_set_affinity_mask_proc(proc, &mask);
|
||||
if (kmp_set_affinity(&mask) != 0)
|
||||
printf("Could not set affinity on rank %d thread %d to %d\n",
|
||||
node_rank, tnum, proc);
|
||||
}
|
||||
}
|
||||
|
||||
if (_precision_mode == PREC_MODE_SINGLE)
|
||||
_single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
|
||||
else if (_precision_mode == PREC_MODE_MIXED)
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
// implementations.
|
||||
|
||||
// Vector classes provided with the intel compiler
|
||||
#ifdef __MIC__
|
||||
#if defined(__MIC__) && !defined(__AVX512F__)
|
||||
#include <mic/micvec.h>
|
||||
#else
|
||||
#include <dvec.h> // icc-mmic hates generating movq
|
||||
|
||||
@ -22,6 +22,11 @@
|
||||
#ifdef __INTEL_OFFLOAD
|
||||
#ifdef LMP_INTEL_OFFLOAD
|
||||
#define _LMP_INTEL_OFFLOAD
|
||||
#ifdef __TARGET_ARCH_MIC
|
||||
#ifndef __MIC__
|
||||
#define __MIC__ 1
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -62,6 +67,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
#define INTEL_MAX_STENCIL_CHECK 4096
|
||||
#define INTEL_P3M_MAXORDER 5
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
#ifdef __AVX__
|
||||
#undef INTEL_VECTOR_WIDTH
|
||||
#define INTEL_VECTOR_WIDTH 8
|
||||
@ -90,6 +96,13 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#undef INTEL_VECTOR_WIDTH
|
||||
#define INTEL_VECTOR_WIDTH 1
|
||||
|
||||
#endif
|
||||
|
||||
#define INTEL_DATA_ALIGN 64
|
||||
#define INTEL_ONEATOM_FACTOR 2
|
||||
#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
|
||||
@ -97,7 +110,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
#define INTEL_LB_MEAN_WEIGHT 0.1
|
||||
#define INTEL_BIGP 1e15
|
||||
#define INTEL_MAX_HOST_CORE_COUNT 512
|
||||
#define INTEL_MAX_COI_CORES 2
|
||||
#define INTEL_MAX_COI_CORES 36
|
||||
|
||||
#define IP_PRE_get_stride(stride, n, datasize, torque) \
|
||||
{ \
|
||||
|
||||
@ -43,7 +43,20 @@ using namespace LAMMPS_NS;
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
VerletLRTIntel::VerletLRTIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||
Verlet(lmp, narg, arg) {}
|
||||
Verlet(lmp, narg, arg) {
|
||||
#if defined(_LMP_INTEL_LRT_PTHREAD)
|
||||
pthread_mutex_init(&_kmutex,NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
VerletLRTIntel::~VerletLRTIntel()
|
||||
{
|
||||
#if defined(_LMP_INTEL_LRT_PTHREAD)
|
||||
pthread_mutex_destroy(&_kmutex);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
initialization before run
|
||||
|
||||
@ -40,7 +40,7 @@ namespace LAMMPS_NS {
|
||||
class VerletLRTIntel : public Verlet {
|
||||
public:
|
||||
VerletLRTIntel(class LAMMPS *, int, char **);
|
||||
virtual ~VerletLRTIntel() {}
|
||||
virtual ~VerletLRTIntel();
|
||||
virtual void init();
|
||||
virtual void setup();
|
||||
virtual void run(int);
|
||||
|
||||
Reference in New Issue
Block a user