diff --git a/doc/src/Speed_kokkos.txt b/doc/src/Speed_kokkos.txt index fd33491253..99d29864dc 100644 --- a/doc/src/Speed_kokkos.txt +++ b/doc/src/Speed_kokkos.txt @@ -46,16 +46,15 @@ software version 7.5 or later must be installed on your system. See the discussion for the "GPU package"_Speed_gpu.html for details of how to check and do this. -NOTE: Kokkos with CUDA currently implicitly assumes that the MPI -library is CUDA-aware and has support for GPU-direct. This is not -always the case, especially when using pre-compiled MPI libraries -provided by a Linux distribution. This is not a problem when using -only a single GPU and a single MPI rank on a desktop. When running -with multiple MPI ranks, you may see segmentation faults without -GPU-direct support. These can be avoided by adding the flags "-pk -kokkos gpu/direct off"_Run_options.html to the LAMMPS command line or -by using the command "package kokkos gpu/direct off"_package.html in -the input file. +NOTE: Kokkos with CUDA currently implicitly assumes that the MPI library +is CUDA-aware. This is not always the case, especially when using +pre-compiled MPI libraries provided by a Linux distribution. This is not +a problem when using only a single GPU with a single MPI rank. When +running with multiple MPI ranks, you may see segmentation faults without +CUDA-aware MPI support. These can be avoided by adding the flags "-pk +kokkos cuda/aware off"_Run_options.html to the LAMMPS command line or by +using the command "package kokkos cuda/aware off"_package.html in the +input file. [Building LAMMPS with the KOKKOS package:] @@ -217,9 +216,8 @@ case, also packing/unpacking communication buffers on the host may give speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS is recommended in this scenario. -Using a CUDA-aware MPI library with -support for GPU-direct is highly recommended. GPU-direct use can be -avoided by using "-pk kokkos gpu/direct no"_package.html. As above for +Using a CUDA-aware MPI library is highly recommended. CUDA-aware MPI use can be +avoided by using "-pk kokkos cuda/aware no"_package.html. As above for multi-core CPUs (and no GPU), if N is the number of physical cores/node, then the number of MPI tasks/node should not exceed N. diff --git a/doc/src/package.txt b/doc/src/package.txt index 6a6d17bcbc..edd409a842 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -64,7 +64,7 @@ args = arguments specific to the style :l {no_affinity} values = none {kokkos} args = keyword value ... zero or more keyword/value pairs may be appended - keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct} + keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {cuda/aware} {neigh} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner @@ -87,9 +87,9 @@ args = arguments specific to the style :l no = perform communication pack/unpack in non-KOKKOS mode host = perform pack/unpack on host (e.g. with OpenMP threading) device = perform pack/unpack on device (e.g. on GPU) - {gpu/direct} = {off} or {on} - off = do not use GPU-direct - on = use GPU-direct (default) + {cuda/aware} = {off} or {on} + off = do not use CUDA-aware MPI + on = use CUDA-aware MPI (default) {omp} args = Nthreads keyword value ... Nthread = # of OpenMP threads to associate with each MPI process zero or more keyword/value pairs may be appended @@ -520,19 +520,21 @@ pack/unpack communicated data. When running small systems on a GPU, performing the exchange pack/unpack on the host CPU can give speedup since it reduces the number of CUDA kernel launches. -The {gpu/direct} keyword chooses whether GPU-direct will be used. When +The {cuda/aware} keyword chooses whether CUDA-aware MPI will be used. When this keyword is set to {on}, buffers in GPU memory are passed directly through MPI send/receive calls. This reduces overhead of first copying -the data to the host CPU. However GPU-direct is not supported on all +the data to the host CPU. However CUDA-aware MPI is not supported on all systems, which can lead to segmentation faults and would require using a -value of {off}. If LAMMPS can safely detect that GPU-direct is not +value of {off}. If LAMMPS can safely detect that CUDA-aware MPI is not available (currently only possible with OpenMPI v2.0.0 or later), then -the {gpu/direct} keyword is automatically set to {off} by default. When -the {gpu/direct} keyword is set to {off} while any of the {comm} +the {cuda/aware} keyword is automatically set to {off} by default. When +the {cuda/aware} keyword is set to {off} while any of the {comm} keywords are set to {device}, the value for these {comm} keywords will be automatically changed to {host}. This setting has no effect if not -running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later -versions), Mvapich2 1.9 (or later), and CrayMPI. +running on GPUs. CUDA-aware MPI is available for OpenMPI 1.8 (or later +versions), Mvapich2 1.9 (or later) when the "MV2_USE_CUDA" environment +variable is set to "1", CrayMPI, and IBM Spectrum MPI when the "-gpu" +flag is used. :line @@ -641,8 +643,8 @@ switch"_Run_options.html. For the KOKKOS package, the option defaults for GPUs are neigh = full, neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default -value, comm = device, gpu/direct = on. When LAMMPS can safely detect -that GPU-direct is not available, the default value of gpu/direct +value, comm = device, cuda/aware = on. When LAMMPS can safely detect +that CUDA-aware MPI is not available, the default value of cuda/aware becomes "off". For CPUs or Xeon Phis, the option defaults are neigh = half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The option neigh/thread = on when there are 16K atoms or less on an MPI diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 87986a9ca9..d0bd978ae7 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -418,7 +418,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) if (sendproc[iswap] != me) { double* buf_send_pair; double* buf_recv_pair; - if (lmp->kokkos->gpu_direct_flag) { + if (lmp->kokkos->cuda_aware_flag) { buf_send_pair = k_buf_send_pair.view().data(); buf_recv_pair = k_buf_recv_pair.view().data(); } else { @@ -436,7 +436,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct_flag) { + if (!lmp->kokkos->cuda_aware_flag) { k_buf_recv_pair.modify(); k_buf_recv_pair.sync(); } diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index 14b84a5733..348217634b 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -529,7 +529,7 @@ void GridCommKokkos::forward_comm(KSpace *kspace, int which) if (swap[m].sendproc != me) { FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->gpu_direct_flag) { + if (lmp->kokkos->cuda_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -545,7 +545,7 @@ void GridCommKokkos::forward_comm(KSpace *kspace, int which) swap[m].sendproc,0,gridcomm); MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct_flag) { + if (!lmp->kokkos->cuda_aware_flag) { k_buf2.modify(); k_buf2.sync(); } @@ -579,7 +579,7 @@ void GridCommKokkos::reverse_comm(KSpace *kspace, int which) if (swap[m].recvproc != me) { FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->gpu_direct_flag) { + if (lmp->kokkos->cuda_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -595,7 +595,7 @@ void GridCommKokkos::reverse_comm(KSpace *kspace, int which) swap[m].recvproc,0,gridcomm); MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->gpu_direct_flag) { + if (!lmp->kokkos->cuda_aware_flag) { k_buf2.modify(); k_buf2.sync(); } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 3fa84d98b2..18a4a3168f 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -28,32 +28,36 @@ #ifdef KOKKOS_ENABLE_CUDA -// for detecting GPU-direct support: -// the function int have_gpu_direct() -// - returns -1 if GPU-direct support is unknown -// - returns 0 if no GPU-direct support available -// - returns 1 if GPU-direct support is available +// for detecting CUDA-aware MPI support: +// the variable int have_cuda_aware +// - is 1 if CUDA-aware MPI support is available +// - is 0 if CUDA-aware MPI support is unavailable +// - is -1 if CUDA-aware MPI support is unknown -#define GPU_DIRECT_UNKNOWN static int have_gpu_direct() {return -1;} +#define CUDA_AWARE_UNKNOWN static int have_cuda_aware = -1; -// OpenMPI supports detecting GPU-direct as of version 2.0.0 -#if OPEN_MPI +// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0 +#if (OPEN_MPI) #if (OMPI_MAJOR_VERSION >= 2) + #include -#if defined(MPIX_CUDA_AWARE_SUPPORT) -static int have_gpu_direct() { return MPIX_Query_cuda_support(); } + +#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT +static int have_cuda_aware = 1; +#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT +static int have_cuda_aware = 0; #else -GPU_DIRECT_UNKNOWN -#endif +CUDA_AWARE_UNKNOWN +#endif // defined(MPIX_CUDA_AWARE_SUPPORT) #else // old OpenMPI -GPU_DIRECT_UNKNOWN -#endif +CUDA_AWARE_UNKNOWN +#endif // (OMPI_MAJOR_VERSION >=2) #else // unknown MPI library -GPU_DIRECT_UNKNOWN -#endif +CUDA_AWARE_UNKNOWN +#endif // OPEN_MPI #endif // KOKKOS_ENABLE_CUDA @@ -66,6 +70,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) kokkos_exists = 1; lmp->kokkos = this; + exchange_comm_changed = 0; + forward_comm_changed = 0; + reverse_comm_changed = 0; + delete memory; memory = new MemoryKokkos(lmp); memoryKK = (MemoryKokkos*) memory; @@ -145,29 +153,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) #ifdef KOKKOS_ENABLE_CUDA if (ngpus <= 0) error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested"); - - // check and warn about GPU-direct availability when using multiple MPI tasks - - int nmpi = 0; - MPI_Comm_size(world,&nmpi); - if ((nmpi > 1) && (me == 0)) { - if ( 1 == have_gpu_direct() ) { - ; // all good, nothing to warn about - } else if (-1 == have_gpu_direct() ) { - error->warning(FLERR,"Kokkos with CUDA assumes GPU-direct is available," - " but cannot determine if this is the case\n try" - " '-pk kokkos gpu/direct off' when getting segmentation faults"); - } else if ( 0 == have_gpu_direct() ) { - error->warning(FLERR,"GPU-direct is NOT available, " - "using '-pk kokkos gpu/direct off' by default"); - } else { - ; // should never get here - } - } #endif #ifndef KOKKOS_ENABLE_SERIAL - if (nthreads == 1) + if (nthreads == 1 && me == 0) error->warning(FLERR,"When using a single thread, the Kokkos Serial backend " "(i.e. Makefile.kokkos_mpi_only) gives better performance " "than the OpenMP backend"); @@ -183,7 +172,11 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // default settings for package kokkos command binsize = 0.0; - gpu_direct_flag = 1; +#ifdef KOKKOS_ENABLE_CUDA + cuda_aware_flag = 1; +#else + cuda_aware_flag = 0; +#endif neigh_thread = 0; neigh_thread_set = 0; neighflag_qeq_set = 0; @@ -206,10 +199,66 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } -#if KOKKOS_USE_CUDA - // only if we can safely detect, that GPU-direct is not available, change default - if (0 == have_gpu_direct()) gpu_direct_flag = 0; +#ifdef KOKKOS_ENABLE_CUDA + + // check and warn about CUDA-aware MPI availability when using multiple MPI tasks + // change default only if we can safely detect that CUDA-aware MPI is not available + + int nmpi = 0; + MPI_Comm_size(world,&nmpi); + if (nmpi > 0) { + + // Check for IBM Spectrum MPI + + int len; + char mpi_version[MPI_MAX_LIBRARY_VERSION_STRING]; + MPI_Get_library_version(mpi_version, &len); + if (strstr(&mpi_version[0], "Spectrum") != NULL) { + cuda_aware_flag = 0; + char* str; + if (str = getenv("OMPI_MCA_pml_pami_enable_cuda")) + if((strcmp(str,"1") == 0)) { + have_cuda_aware = 1; + cuda_aware_flag = 1; + } + + if (!cuda_aware_flag) + if (me == 0) + error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI"); + } + + if (cuda_aware_flag == 1 && have_cuda_aware == 0) { + if (me == 0) + error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, " + "use '-pk kokkos cuda/aware on' to override"); + cuda_aware_flag = 0; + } else if (have_cuda_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative? + // MVAPICH2 +#if (defined MPICH) && (defined MVAPICH2_VERSION) + char* str; + cuda_aware_flag = 0; + if (str = getenv("MV2_ENABLE_CUDA") + if ((strcmp(str,"1") == 0)) + cuda_aware_flag = 1; + + if (!cuda_aware_flag) + if (me == 0) + error->warning(FLERR,"MVAPICH2 'MV2_ENABLE_CUDA' environment variable is not set. Disabling CUDA-aware MPI"); + // pure MPICH or some unsupported MPICH derivative +#elif (defined MPICH) && !(defined MVAPICH2_VERSION) + if (me == 0) + error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI"); + cuda_aware_flag = 0; +#else + if (me == 0) + error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available," + " but cannot determine if this is the case\n try" + " '-pk kokkos cuda/aware off' if getting segmentation faults"); + #endif + } // if (-1 == have_cuda_aware) + } // nmpi > 0 +#endif // KOKKOS_ENABLE_CUDA #ifdef KILL_KOKKOS_ON_SIGSEGV signal(SIGSEGV, my_signal_handler); @@ -290,6 +339,7 @@ void KokkosLMP::accelerator(int narg, char **arg) exchange_comm_classic = 0; exchange_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); + exchange_comm_changed = 0; iarg += 2; } else if (strcmp(arg[iarg],"comm/forward") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); @@ -301,6 +351,7 @@ void KokkosLMP::accelerator(int narg, char **arg) forward_comm_classic = 0; forward_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); + forward_comm_changed = 0; iarg += 2; } else if (strcmp(arg[iarg],"comm/reverse") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); @@ -312,11 +363,12 @@ void KokkosLMP::accelerator(int narg, char **arg) reverse_comm_classic = 0; reverse_comm_on_host = 0; } else error->all(FLERR,"Illegal package kokkos command"); + reverse_comm_changed = 0; iarg += 2; - } else if (strcmp(arg[iarg],"gpu/direct") == 0) { + } else if (strcmp(arg[iarg],"cuda/aware") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); - if (strcmp(arg[iarg+1],"off") == 0) gpu_direct_flag = 0; - else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1; + if (strcmp(arg[iarg+1],"off") == 0) cuda_aware_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) cuda_aware_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"neigh/thread") == 0) { @@ -329,15 +381,38 @@ void KokkosLMP::accelerator(int narg, char **arg) } else error->all(FLERR,"Illegal package kokkos command"); } - // if "gpu/direct off" and "comm device", change to "comm host" + // if "cuda/aware off" and "comm device", change to "comm host" - if (!gpu_direct_flag) { - if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) - exchange_comm_on_host = 1; - if (forward_comm_classic == 0 && forward_comm_on_host == 0) - forward_comm_on_host = 1; - if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) - reverse_comm_on_host = 1; + if (!cuda_aware_flag) { + if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { + exchange_comm_on_host = 1; + exchange_comm_changed = 1; + } + if (forward_comm_classic == 0 && forward_comm_on_host == 0) { + forward_comm_on_host = 1; + forward_comm_changed = 1; + } + if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) { + reverse_comm_on_host = 1; + reverse_comm_changed = 1; + } + } + + // if "cuda/aware on" and comm flags were changed previously, change them back + + if (cuda_aware_flag) { + if (exchange_comm_changed) { + exchange_comm_on_host = 0; + exchange_comm_changed = 0; + } + if (forward_comm_changed) { + forward_comm_on_host = 0; + forward_comm_changed = 0; + } + if (reverse_comm_changed) { + reverse_comm_on_host = 0; + reverse_comm_changed = 0; + } } // set newton flags diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 46044799c4..b9f1e66c68 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -32,10 +32,13 @@ class KokkosLMP : protected Pointers { int exchange_comm_on_host; int forward_comm_on_host; int reverse_comm_on_host; + int exchange_comm_changed; + int forward_comm_changed; + int reverse_comm_changed; int nthreads,ngpus; int numa; int auto_sync; - int gpu_direct_flag; + int cuda_aware_flag; int neigh_thread; int neigh_thread_set; int newtonflag; diff --git a/src/MAKE/MACHINES/Makefile.lassen_kokkos b/src/MAKE/MACHINES/Makefile.lassen_kokkos new file mode 100644 index 0000000000..23697bfea2 --- /dev/null +++ b/src/MAKE/MACHINES/Makefile.lassen_kokkos @@ -0,0 +1,125 @@ +# lassen_kokkos = KOKKOS/CUDA, V100 GPU and Power9, IBM Spectrum MPI, nvcc compiler with gcc 7.3.1 + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) +export MPICH_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper +export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper +CC = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper +CCFLAGS = -g -O3 +SHFLAGS = -fPIC +DEPFLAGS = -M + +LINK = $(KOKKOS_ABSOLUTE_PATH)/bin/nvcc_wrapper +LINKFLAGS = -g -O3 +LIB = +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared +KOKKOS_DEVICES = Cuda +KOKKOS_ARCH = Power9,Volta70 + +# --------------------------------------------------------------------- +# LAMMPS-specific settings, all OPTIONAL +# specify settings for LAMMPS features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# LAMMPS ifdef settings +# see possible settings in Section 2.2 (step 4) of manual + +LMP_INC = -DLAMMPS_GZIP + +# MPI library +# see discussion in Section 2.2 (step 5) of manual +# MPI wrapper compiler/linker can provide this info +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -I/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1/include +MPI_PATH = +MPI_LIB = -L/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1/lib -lmpi_ibm + +# FFT library +# see discussion in Section 2.2 (step 6) of manaul +# can be left blank to use provided KISS FFT library +# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings +# PATH = path for FFT library +# LIB = name of FFT library + +FFT_INC = +FFT_PATH = +FFT_LIB = + +# JPEG and/or PNG library +# see discussion in Section 2.2 (step 7) of manual +# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC +# INC = path(s) for jpeglib.h and/or png.h +# PATH = path(s) for JPEG library and/or PNG library +# LIB = name(s) of JPEG library and/or PNG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# do not edit this section + +include Makefile.package.settings +include Makefile.package + +EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) + $(SIZE) $(EXE) + +# Library targets + +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) + +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + +# Compilation rules + +%.o:%.cpp $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +%.d:%.cpp $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ + +%.o:%.cu $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +depend : fastdep.exe $(SRC) + @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 + +fastdep.exe: ../DEPEND/fastdep.c + cc -O -o $@ $< + +sinclude .depend diff --git a/src/STUBS/mpi.c b/src/STUBS/mpi.c index af7a489bb1..cd92750b9a 100644 --- a/src/STUBS/mpi.c +++ b/src/STUBS/mpi.c @@ -76,6 +76,23 @@ int MPI_Finalized(int *flag) /* ---------------------------------------------------------------------- */ +/* return "LAMMPS MPI STUBS" as name of the library */ + +int MPI_Get_library_version(char *version, int *resultlen) +{ + const char string[] = "LAMMPS MPI STUBS"; + int len; + + if (!version || !resultlen) return MPI_ERR_ARG; + + len = strlen(string); + memcpy(version,string,len+1); + *resultlen = len; + return MPI_SUCCESS; +} + +/* ---------------------------------------------------------------------- */ + /* return "localhost" as name of the processor */ int MPI_Get_processor_name(char *name, int *resultlen) diff --git a/src/STUBS/mpi.h b/src/STUBS/mpi.h index 1eca1ec527..063dc542be 100644 --- a/src/STUBS/mpi.h +++ b/src/STUBS/mpi.h @@ -63,6 +63,7 @@ extern "C" { #define MPI_IN_PLACE NULL #define MPI_MAX_PROCESSOR_NAME 128 +#define MPI_MAX_LIBRARY_VERSION_STRING 128 typedef void MPI_User_function(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype); @@ -79,6 +80,7 @@ typedef struct _MPI_Status MPI_Status; int MPI_Init(int *argc, char ***argv); int MPI_Initialized(int *flag); int MPI_Finalized(int *flag); +int MPI_Get_library_version(char *version, int *resultlen); int MPI_Get_processor_name(char *name, int *resultlen); int MPI_Get_version(int *major, int *minor);