git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12851 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -6,6 +6,8 @@
|
||||
W. Michael Brown (Intel)
|
||||
michael.w.brown at intel.com
|
||||
|
||||
Anupama Kurpad (Intel)
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
This package is based on the USER-OMP package and provides LAMMPS styles that:
|
||||
@ -41,3 +43,10 @@ Intel compilers.
|
||||
For portability reasons, vectorization directives are currently only enabled
|
||||
for Intel compilers. Using other compilers may result in significantly
|
||||
lower performance.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
By default, when running with offload to Intel(R) coprocessors, affinity
|
||||
for host MPI tasks and OpenMP threads is set automatically within the code.
|
||||
This currently requires the use of system calls. To disable at build time,
|
||||
compile with -DINTEL_OFFLOAD_NOAFFINITY.
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
Anupama Kurpad (Intel) - Host Affinitization
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "comm.h"
|
||||
@ -59,7 +60,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
_off_overflow_flag[LMP_OVERFLOW] = 0;
|
||||
|
||||
_offload_affinity_balanced = 0;
|
||||
_offload_threads = 1;
|
||||
_offload_threads = 0;
|
||||
_offload_tpc = 4;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
@ -71,25 +72,12 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
_off_ev_array_s = 0;
|
||||
_off_ev_array_d = 0;
|
||||
_balance_fixed = 0.0;
|
||||
|
||||
_cop = 0;
|
||||
|
||||
int max_offload_threads, offload_cores;
|
||||
#pragma offload target(mic:_cop) mandatory \
|
||||
out(max_offload_threads,offload_cores)
|
||||
{
|
||||
offload_cores = omp_get_num_procs();
|
||||
omp_set_num_threads(offload_cores);
|
||||
max_offload_threads = omp_get_max_threads();
|
||||
}
|
||||
_max_offload_threads = max_offload_threads;
|
||||
_offload_cores = offload_cores;
|
||||
_offload_threads = offload_cores;
|
||||
#endif
|
||||
|
||||
// optional keywords
|
||||
|
||||
int nomp = 0;
|
||||
int nomp = 0, no_affinity = 0;
|
||||
_allow_separate_buffers = 1;
|
||||
_offload_ghost = -1;
|
||||
|
||||
@ -127,6 +115,9 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
|
||||
_offload_threads = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"no_affinity") == 0) {
|
||||
no_affinity = 1;
|
||||
iarg++;
|
||||
}
|
||||
|
||||
// undocumented options
|
||||
@ -143,10 +134,34 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
|
||||
// error check
|
||||
|
||||
if (_offload_balance > 1.0 || _offload_threads <= 0 ||
|
||||
_offload_tpc <= 0 || _offload_tpc > 4)
|
||||
if (_offload_balance > 1.0 || _offload_threads < 0 ||
|
||||
_offload_tpc <= 0 || _offload_tpc > 4 || nomp < 0)
|
||||
error->all(FLERR,"Illegal package intel command");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_ncops = ncops;
|
||||
if (_offload_balance != 0.0) {
|
||||
_real_space_comm = MPI_COMM_WORLD;
|
||||
if (no_affinity == 0)
|
||||
if (set_host_affinity(nomp) != 0)
|
||||
error->all(FLERR,"Could not set host affinity for offload tasks");
|
||||
}
|
||||
|
||||
int max_offload_threads = 0, offload_cores = 0;
|
||||
if (_offload_balance != 0.0) {
|
||||
#pragma offload target(mic:_cop) mandatory \
|
||||
out(max_offload_threads,offload_cores)
|
||||
{
|
||||
offload_cores = omp_get_num_procs();
|
||||
omp_set_num_threads(offload_cores);
|
||||
max_offload_threads = omp_get_max_threads();
|
||||
}
|
||||
_max_offload_threads = max_offload_threads;
|
||||
_offload_cores = offload_cores;
|
||||
if (_offload_threads == 0) _offload_threads = offload_cores;
|
||||
}
|
||||
#endif
|
||||
|
||||
// set OpenMP threads
|
||||
// nomp is user setting, default = 0
|
||||
|
||||
@ -154,13 +169,17 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
if (nomp != 0) {
|
||||
omp_set_num_threads(nomp);
|
||||
comm->nthreads = nomp;
|
||||
} else {
|
||||
int nthreads;
|
||||
#pragma omp parallel default(none) shared(nthreads)
|
||||
nthreads = omp_get_num_threads();
|
||||
comm->nthreads = nthreads;
|
||||
}
|
||||
#endif
|
||||
|
||||
// set offload params
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_ncops = ncops;
|
||||
if (_offload_balance < 0.0) {
|
||||
_balance_neighbor = 0.9;
|
||||
_balance_pair = 0.9;
|
||||
@ -173,6 +192,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
|
||||
zero_timers();
|
||||
_setup_time_cleared = false;
|
||||
_timers_allocated = false;
|
||||
|
||||
#else
|
||||
_offload_balance = 0.0;
|
||||
#endif
|
||||
@ -197,7 +217,8 @@ FixIntel::~FixIntel()
|
||||
double *time1 = off_watch_pair();
|
||||
double *time2 = off_watch_neighbor();
|
||||
int *overflow = get_off_overflow_flag();
|
||||
if (time1 != NULL && time2 != NULL && overflow != NULL) {
|
||||
if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL &&
|
||||
overflow != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
|
||||
}
|
||||
@ -239,7 +260,8 @@ void FixIntel::init()
|
||||
double *time1 = off_watch_pair();
|
||||
double *time2 = off_watch_neighbor();
|
||||
int *overflow = get_off_overflow_flag();
|
||||
if (time1 != NULL && time2 != NULL && overflow != NULL) {
|
||||
if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL &&
|
||||
overflow != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
|
||||
in(overflow:length(5) alloc_if(1) free_if(0))
|
||||
@ -247,25 +269,6 @@ void FixIntel::init()
|
||||
_timers_allocated = true;
|
||||
}
|
||||
|
||||
char kmode[80];
|
||||
if (_precision_mode == PREC_MODE_SINGLE)
|
||||
strcpy(kmode, "single");
|
||||
else if (_precision_mode == PREC_MODE_MIXED)
|
||||
strcpy(kmode, "mixed");
|
||||
else
|
||||
strcpy(kmode, "double");
|
||||
|
||||
// print summary of settings
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_offload_balance != 0.0) {
|
||||
fprintf(screen,"using offload with %d threads per core, ",_offload_tpc);
|
||||
fprintf(screen,"%d threads per task\n",_offload_threads);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (update->whichflag == 2 && _offload_balance != 0.0) {
|
||||
if (_offload_balance == 1.0 && _offload_noghost == 0)
|
||||
_sync_at_pair = 1;
|
||||
@ -279,12 +282,6 @@ void FixIntel::init()
|
||||
}
|
||||
#endif
|
||||
|
||||
if (neighbor->style != BIN)
|
||||
error->all(FLERR,
|
||||
"Currently, neighbor style BIN must be used with Intel package.");
|
||||
if (neighbor->exclude_setting() != 0)
|
||||
error->all(FLERR,
|
||||
"Currently, cannot use neigh_modify exclude with Intel package.");
|
||||
int nstyles = 0;
|
||||
if (force->pair_match("hybrid", 1) != NULL) {
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
@ -315,6 +312,71 @@ void FixIntel::init()
|
||||
_double_buffers->zero_ev();
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::setup(int vflag)
|
||||
{
|
||||
if (neighbor->style != BIN)
|
||||
error->all(FLERR,
|
||||
"Currently, neighbor style BIN must be used with Intel package.");
|
||||
if (neighbor->exclude_setting() != 0)
|
||||
error->all(FLERR,
|
||||
"Currently, cannot use neigh_modify exclude with Intel package.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::pair_init_check()
|
||||
{
|
||||
if (_offload_balance != 0.0 && comm->me == 0) {
|
||||
#ifndef __INTEL_COMPILER_BUILD_DATE
|
||||
error->warning(FLERR, "Unknown Intel Compiler Version\n");
|
||||
#else
|
||||
if (__INTEL_COMPILER_BUILD_DATE != 20131008 &&
|
||||
__INTEL_COMPILER_BUILD_DATE < 20141023)
|
||||
error->warning(FLERR, "Unsupported Intel Compiler.");
|
||||
#endif
|
||||
#if !defined(__INTEL_COMPILER)
|
||||
error->warning(FLERR, "Unsupported Intel Compiler.");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Clear buffers used for pair style
|
||||
char kmode[80];
|
||||
if (_precision_mode == PREC_MODE_SINGLE) {
|
||||
strcpy(kmode, "single");
|
||||
get_single_buffers()->free_all_nbor_buffers();
|
||||
} else if (_precision_mode == PREC_MODE_MIXED) {
|
||||
strcpy(kmode, "mixed");
|
||||
get_mixed_buffers()->free_all_nbor_buffers();
|
||||
} else {
|
||||
strcpy(kmode, "double");
|
||||
get_double_buffers()->free_all_nbor_buffers();
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
set_offload_affinity();
|
||||
#endif
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,
|
||||
"----------------------------------------------------------\n");
|
||||
if (_offload_balance != 0.0) {
|
||||
fprintf(screen,"Using Intel Coprocessor with %d threads per core, ",
|
||||
_offload_tpc);
|
||||
fprintf(screen,"%d threads per task\n",_offload_threads);
|
||||
} else {
|
||||
fprintf(screen,"Using Intel Package without Coprocessor.\n");
|
||||
}
|
||||
fprintf(screen,"Precision: %s\n",kmode);
|
||||
fprintf(screen,
|
||||
"----------------------------------------------------------\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::check_neighbor_intel()
|
||||
@ -440,6 +502,14 @@ void FixIntel::output_timing_data() {
|
||||
fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]);
|
||||
fprintf(_tscreen, "\n");
|
||||
#endif
|
||||
double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] +
|
||||
timers[TIME_OFFLOAD_WAIT];
|
||||
double ct = timers[TIME_OFFLOAD_NEIGHBOR] +
|
||||
timers[TIME_OFFLOAD_PAIR];
|
||||
double tt = MAX(ht,ct);
|
||||
if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0)
|
||||
error->warning(FLERR,
|
||||
"Leaving a core free can improve performance for offload");
|
||||
}
|
||||
fprintf(_tscreen, "------------------------------------------------\n");
|
||||
}
|
||||
@ -549,4 +619,157 @@ void FixIntel::set_offload_affinity()
|
||||
_double_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixIntel::set_host_affinity(const int nomp)
|
||||
{
|
||||
#ifndef INTEL_OFFLOAD_NOAFFINITY
|
||||
_separate_coi = 1;
|
||||
int rank = comm->me;
|
||||
int node_rank;
|
||||
int ppn = get_ppn(node_rank);
|
||||
int cop = node_rank / (ppn / _ncops);
|
||||
|
||||
// Get a sorted list of logical cores
|
||||
int proc_list[INTEL_MAX_HOST_CORE_COUNT];
|
||||
int ncores;
|
||||
FILE *p;
|
||||
char cmd[512];
|
||||
char readbuf[INTEL_MAX_HOST_CORE_COUNT*5];
|
||||
sprintf(cmd, "lscpu -p=cpu,core,socket | grep -v '#' |"
|
||||
"sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'");
|
||||
p = popen(cmd, "r");
|
||||
if (p == NULL) return -1;
|
||||
ncores = 0;
|
||||
while(fgets(readbuf, 512, p)) {
|
||||
proc_list[ncores] = atoi(readbuf);
|
||||
ncores++;
|
||||
}
|
||||
pclose(p);
|
||||
|
||||
// Sanity checks for core list
|
||||
if (ncores < 2) return -1;
|
||||
int nzero = 0;
|
||||
for (int i = 0; i < ncores; i++) {
|
||||
if (proc_list[i] == 0) nzero++;
|
||||
if (proc_list[i] < 0 || proc_list[i] >= ncores) return -1;
|
||||
}
|
||||
if (nzero > 1) return -1;
|
||||
|
||||
// Determine the OpenMP/MPI configuration
|
||||
char *estring;
|
||||
int nthreads = nomp;
|
||||
if (nthreads == 0) {
|
||||
estring = getenv("OMP_NUM_THREADS");
|
||||
if (estring != NULL) {
|
||||
nthreads = atoi(estring);
|
||||
if (nthreads < 2) nthreads = 1;
|
||||
} else
|
||||
nthreads = 1;
|
||||
}
|
||||
|
||||
// Determine how many logical cores for COI and MPI tasks
|
||||
int coi_cores = 0, mpi_cores;
|
||||
int subscription = nthreads * ppn;
|
||||
if (subscription > ncores) {
|
||||
if (rank == 0)
|
||||
error->warning(FLERR,
|
||||
"More MPI tasks/OpenMP threads than available cores");
|
||||
return 0;
|
||||
}
|
||||
if (subscription == ncores)
|
||||
_separate_coi = 0;
|
||||
|
||||
if (subscription > ncores / 2) {
|
||||
coi_cores = ncores - subscription;
|
||||
if (coi_cores > INTEL_MAX_COI_CORES) coi_cores = INTEL_MAX_COI_CORES;
|
||||
}
|
||||
mpi_cores = (ncores - coi_cores) / ppn;
|
||||
|
||||
// Get ids of all LWPs that COI spawned and affinitize
|
||||
int lwp = 0, plwp = 0, nlwp = 0, mlwp = 0, fail = 0;
|
||||
cpu_set_t cpuset;
|
||||
pid_t pid = getpid();
|
||||
if (coi_cores) {
|
||||
sprintf(cmd, "ps -Lp %d -o lwp | awk ' (NR > 2) {print}'", pid);
|
||||
p = popen(cmd, "r");
|
||||
if (p == NULL) return -1;
|
||||
|
||||
while(fgets(readbuf, 512, p)) {
|
||||
lwp = atoi(readbuf);
|
||||
int first = coi_cores + node_rank * mpi_cores;
|
||||
CPU_ZERO(&cpuset);
|
||||
for (int i = first; i < first + mpi_cores; i++)
|
||||
CPU_SET(proc_list[i], &cpuset);
|
||||
if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) {
|
||||
fail = 1;
|
||||
break;
|
||||
}
|
||||
plwp++;
|
||||
}
|
||||
pclose(p);
|
||||
|
||||
// Do async offload to create COI threads
|
||||
int sig1, sig2;
|
||||
float *buf1;
|
||||
int pragma_size = 1024;
|
||||
buf1 = (float*) malloc(sizeof(float)*pragma_size);
|
||||
|
||||
#pragma offload target (mic:0) \
|
||||
in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \
|
||||
signal(&sig1)
|
||||
{}
|
||||
#pragma offload_wait target(mic:0) wait(&sig1)
|
||||
|
||||
#pragma offload target (mic:0) \
|
||||
out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \
|
||||
signal(&sig2)
|
||||
{}
|
||||
#pragma offload_wait target(mic:0) wait(&sig2)
|
||||
free(buf1);
|
||||
|
||||
p = popen(cmd, "r");
|
||||
if (p == NULL) return -1;
|
||||
|
||||
while(fgets(readbuf, 512, p)) {
|
||||
lwp = atoi(readbuf);
|
||||
nlwp++;
|
||||
if (nlwp <= plwp) continue;
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
for(int i=0; i<coi_cores; i++)
|
||||
CPU_SET(proc_list[i], &cpuset);
|
||||
|
||||
if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) {
|
||||
fail = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pclose(p);
|
||||
nlwp -= plwp;
|
||||
|
||||
// Get stats on the number of LWPs per process
|
||||
MPI_Reduce(&nlwp, &mlwp, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
if (screen && rank == 0) {
|
||||
if (coi_cores)
|
||||
fprintf(screen,"Intel Package: Affinitizing %d Offload Threads to %d Cores\n",
|
||||
mlwp, coi_cores);
|
||||
fprintf(screen,"Intel Package: Affinitizing MPI Tasks to %d Cores Each\n",mpi_cores);
|
||||
}
|
||||
if (fail) return -1;
|
||||
|
||||
// Affinitize MPI Ranks
|
||||
CPU_ZERO(&cpuset);
|
||||
int first = coi_cores + node_rank * mpi_cores;
|
||||
for (int i = first; i < first+mpi_cores; i++)
|
||||
CPU_SET(proc_list[i], &cpuset);
|
||||
if (sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset))
|
||||
return -1;
|
||||
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -38,6 +38,8 @@ class FixIntel : public Fix {
|
||||
virtual ~FixIntel();
|
||||
virtual int setmask();
|
||||
virtual void init();
|
||||
virtual void setup(int);
|
||||
void pair_init_check();
|
||||
|
||||
// Get all forces, calculation results from coprocesser
|
||||
void sync_coprocessor();
|
||||
@ -135,7 +137,7 @@ class FixIntel : public Fix {
|
||||
int _offload_nlocal, _offload_nall, _offload_min_ghost, _offload_nghost;
|
||||
int _host_min_local, _host_min_ghost, _host_nall;
|
||||
int _host_used_local, _host_used_ghost;
|
||||
int _separate_buffers, _offload_noghost, _sync_at_pair;
|
||||
int _separate_buffers, _offload_noghost, _sync_at_pair, _separate_coi;
|
||||
bool _setup_time_cleared, _timers_allocated;
|
||||
void output_timing_data();
|
||||
FILE *_tscreen;
|
||||
@ -149,6 +151,7 @@ class FixIntel : public Fix {
|
||||
int _full_host_list, _cop, _ncops;
|
||||
|
||||
int get_ppn(int &);
|
||||
int set_host_affinity(const int);
|
||||
#endif
|
||||
void check_neighbor_intel();
|
||||
|
||||
@ -540,6 +543,14 @@ E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
W: Could not set host affinity for offload tasks
|
||||
|
||||
When using offload to a coprocessor, the application will try to set affinity
|
||||
for host MPI tasks and OpenMP threads and will generate a warning if unable
|
||||
to do so successfully. In the unsuccessful case, you might wish to set
|
||||
affinity outside of the application and performance might suffer if
|
||||
hyperthreading is disable on the CPU.
|
||||
|
||||
E: Neighbor list overflow, boost neigh_modify one
|
||||
|
||||
Increase the value for neigh_modify one to allow for larger allocations for
|
||||
@ -578,6 +589,17 @@ E: Currently, cannot use neigh_modify exclude with Intel package.
|
||||
|
||||
This is a current restriction of the Intel package.
|
||||
|
||||
W: Unknown Intel Compiler Version
|
||||
|
||||
The compiler version used to build LAMMPS has not been tested with
|
||||
offload to a coprocessor.
|
||||
|
||||
W: Unsupported Intel Compiler
|
||||
|
||||
The compiler version used to build LAMMPS is not supported when using
|
||||
offload to a coprocessor. There could be performance or correctness
|
||||
issues. Please use 14.0.1.106 or 15.1.133 or later.
|
||||
|
||||
E: Currently, cannot use more than one intel style with hybrid.
|
||||
|
||||
Currently, hybrid pair styles can only use the intel suffix for one of the
|
||||
@ -589,9 +611,21 @@ The hybrid pair style configuration is not yet supported by the Intel
|
||||
package. Support is limited to hybrid/overlay or a hybrid style that does
|
||||
not require a skip list.
|
||||
|
||||
W: Leaving a core/node free can improve performance for offload
|
||||
|
||||
When each CPU is fully subscribed with MPI tasks and OpenMP threads,
|
||||
context switching with threads used for offload can sometimes decrease
|
||||
performance. If you see this warning, try using fewer MPI tasks/OpenMP threads
|
||||
per node to leave a physical CPU core free on each node.
|
||||
|
||||
E: MPI tasks per node must be multiple of offload_cards
|
||||
|
||||
For offload to multiple coprocessors on a single node, the Intel package
|
||||
requires that each coprocessor is used by the same number of MPI tasks.
|
||||
|
||||
W: More MPI tasks/OpenMP threads than available cores
|
||||
|
||||
Using more MPI tasks/OpenMP threads than available cores will typically
|
||||
decrease performance.
|
||||
|
||||
*/
|
||||
|
||||
@ -385,7 +385,7 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
|
||||
if (_ntypes > 0) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * cutneighsqo = _cutneighsq[0];
|
||||
if (cutneighsqo != 0) {
|
||||
if (_off_threads > 0 && cutneighsqo != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighsqo:alloc_if(0) free_if(1))
|
||||
}
|
||||
@ -396,7 +396,7 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
|
||||
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * cutneighsqo = _cutneighsq[0];
|
||||
if (cutneighsqo != NULL) {
|
||||
if (_off_threads > 0 && cutneighsqo != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
|
||||
}
|
||||
|
||||
@ -61,6 +61,8 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
|
||||
#define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH
|
||||
#define INTEL_LB_MEAN_WEIGHT 0.1
|
||||
#define INTEL_BIGP 1e15
|
||||
#define INTEL_MAX_HOST_CORE_COUNT 512
|
||||
#define INTEL_MAX_COI_CORES 2
|
||||
|
||||
#define IP_PRE_get_stride(stride, n, datasize, torque) \
|
||||
{ \
|
||||
|
||||
@ -893,20 +893,17 @@ void PairGayBerneIntel::init_style()
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
fix->set_offload_affinity();
|
||||
if (force->newton_pair) fix->set_offload_noghost(1);
|
||||
_cop = fix->coprocessor_number();
|
||||
#endif
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||
|
||||
fix->pair_init_check();
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
} else {
|
||||
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -489,19 +489,16 @@ void PairLJCharmmCoulLongIntel::init_style()
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
fix->set_offload_affinity();
|
||||
_cop = fix->coprocessor_number();
|
||||
#endif
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||
|
||||
fix->pair_init_check();
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
} else {
|
||||
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
|
||||
@ -456,19 +456,16 @@ void PairLJCutCoulLongIntel::init_style()
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
fix->set_offload_affinity();
|
||||
_cop = fix->coprocessor_number();
|
||||
#endif
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||
|
||||
fix->pair_init_check();
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
} else {
|
||||
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
|
||||
@ -336,16 +336,14 @@ void PairLJCutIntel::init_style()
|
||||
error->all(FLERR,
|
||||
"Offload for lj/cut/intel is not yet available. Set balance to 0.");
|
||||
#endif
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fix->get_mixed_buffers()->free_all_nbor_buffers();
|
||||
|
||||
fix->pair_init_check();
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fix->get_double_buffers()->free_all_nbor_buffers();
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
} else {
|
||||
fix->get_single_buffers()->free_all_nbor_buffers();
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Reference in New Issue
Block a user