Merged upstream.
This commit is contained in:
972
lib/kokkos/Makefile.kokkos
Normal file
972
lib/kokkos/Makefile.kokkos
Normal file
@ -0,0 +1,972 @@
|
||||
# Default settings common options.
|
||||
|
||||
#LAMMPS specific settings:
|
||||
KOKKOS_PATH=../../lib/kokkos
|
||||
CXXFLAGS=$(CCFLAGS)
|
||||
|
||||
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
|
||||
KOKKOS_DEVICES ?= "Cuda, OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
# Options:
|
||||
KOKKOS_ARCH ?= "Pascal61"
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
|
||||
# AMD-CPUS: AMDAVX,Ryzen,Epyc
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
# Options: hwloc,librt,experimental_memkind
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++11,c++1z
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
# Options: aggressive_vectorization,disable_profiling
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
|
||||
# Default settings specific options.
|
||||
# Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
|
||||
|
||||
# Check for general settings.
|
||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
|
||||
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
|
||||
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l))
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
|
||||
|
||||
|
||||
# Check for Kokkos Host Execution Spaces one of which must be on.
|
||||
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
|
||||
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
|
||||
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
|
||||
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
|
||||
KOKKOS_INTERNAL_USE_SERIAL := 1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for other Execution Spaces.
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
|
||||
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
|
||||
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
|
||||
endif
|
||||
|
||||
# Check OS.
|
||||
KOKKOS_OS := $(strip $(shell uname -s))
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l))
|
||||
KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l))
|
||||
KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l))
|
||||
|
||||
# Check compiler.
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
ifneq ($(OMPI_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
endif
|
||||
ifneq ($(MPICH_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG = 1
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_XL = 1
|
||||
endif
|
||||
|
||||
# Apple Clang passes both clang and apple clang tests, so turn off clang.
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG = 0
|
||||
endif
|
||||
# AMD HCC passes both clang and hcc test so turn off clang
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
|
||||
KOKKOS_INTENAL_COMPILER_CLANG = 0
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
|
||||
$(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
|
||||
endif
|
||||
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
# Set compiler warnings flags.
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
# TODO check if PGI accepts GNU style warnings
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS =
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
# TODO check if cray accepts GNU style warnings
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS =
|
||||
else
|
||||
#gcc
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
KOKKOS_INTERNAL_COMPILER_WARNINGS =
|
||||
endif
|
||||
|
||||
# Set OpenMP flags.
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
# OpenMP is turned on by default in Cray compiler environment.
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG :=
|
||||
else
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
|
||||
endif
|
||||
endif
|
||||
|
||||
# Set C++11 flags.
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG :=
|
||||
else
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
|
||||
KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for Kokkos Architecture settings.
|
||||
|
||||
# Intel based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
|
||||
|
||||
# NVIDIA based.
|
||||
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
|
||||
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
# ARM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
|
||||
# Decide what ISA level we are able to support.
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
|
||||
|
||||
# Decide whether we can support transactional memory
|
||||
KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
|
||||
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||
endif
|
||||
|
||||
# Generating the list of Flags.
|
||||
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||
|
||||
KOKKOS_CXXFLAGS =
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS)
|
||||
endif
|
||||
|
||||
KOKKOS_LIBS = -lkokkos -ldl
|
||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_HEADERS =
|
||||
|
||||
# Generating the KokkosCore_config.h file.
|
||||
|
||||
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
|
||||
tmp := $(shell date >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
|
||||
|
||||
tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#else' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp)
|
||||
|
||||
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -lineinfo
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_LIBS += -lrt
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
|
||||
# Add Architecture flags.
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a
|
||||
KOKKOS_LDFLAGS += -march=armv8-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8.1-a
|
||||
KOKKOS_LDFLAGS += -march=armv8.1-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
|
||||
KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xSSE4.2
|
||||
KOKKOS_LDFLAGS += -xSSE4.2
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=nehalem
|
||||
KOKKOS_LDFLAGS += -tp=nehalem
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -msse4.2
|
||||
KOKKOS_LDFLAGS += -msse4.2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=sandybridge
|
||||
KOKKOS_LDFLAGS += -tp=sandybridge
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P8.
|
||||
KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7
|
||||
KOKKOS_LDFLAGS += -mcpu=power7 -mtune=power7
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler on P8.
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler on P9
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX2
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=haswell
|
||||
KOKKOS_LDFLAGS += -tp=haswell
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX2
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=haswell
|
||||
KOKKOS_LDFLAGS += -tp=haswell
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm
|
||||
KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
||||
KOKKOS_LDFLAGS += -xMIC-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Asssume that this is really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -march=knl -mtune=knl
|
||||
KOKKOS_LDFLAGS += -march=knl -mtune=knl
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Nothing here yet.
|
||||
KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
|
||||
KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
endif
|
||||
|
||||
# Figure out the architecture flag for Cuda.
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Figure out the architecture flag for ROCm.
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
# Lets start with adding architecture defines
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
|
||||
endif
|
||||
|
||||
|
||||
KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
|
||||
ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
|
||||
|
||||
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
|
||||
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
|
||||
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
|
||||
endif
|
||||
|
||||
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
|
||||
else
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := 1
|
||||
endif
|
||||
|
||||
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
||||
endif
|
||||
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
||||
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
|
||||
else
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
|
||||
endif
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
else
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
KOKKOS_LIBS += -lqthread
|
||||
endif
|
||||
|
||||
# Explicitly set the GCC Toolchain for Clang.
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
|
||||
KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
|
||||
KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
|
||||
KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning.
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
|
||||
# device to avoid a link warning.
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# With Cygwin functions such as fdopen and fileno are not defined
|
||||
# when strict ansi is enabled. strict ansi gets enabled with --std=c++11
|
||||
# though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
# This is needed for gtest actually, not for Kokkos itself!
|
||||
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
|
||||
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
|
||||
endif
|
||||
|
||||
# Setting up dependencies.
|
||||
|
||||
KokkosCore_config.h:
|
||||
|
||||
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
|
||||
|
||||
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
|
||||
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.targets
|
||||
|
||||
kokkos-clean:
|
||||
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
|
||||
|
||||
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
||||
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
||||
ranlib libkokkos.a
|
||||
|
||||
KOKKOS_LINK_DEPENDS=libkokkos.a
|
||||
296
lib/kokkos/bin/nvcc_wrapper
Executable file
296
lib/kokkos/bin/nvcc_wrapper
Executable file
@ -0,0 +1,296 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This shell script (nvcc_wrapper) wraps both the host compiler and
|
||||
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
|
||||
# The script remedies some differences between the interface of NVCC
|
||||
# and that of the host compiler, in particular for linking.
|
||||
# It also means that a legacy code doesn't need separate .cu files;
|
||||
# it can just use .cpp files.
|
||||
#
|
||||
# Default settings: change those according to your machine. For
|
||||
# example, you may have have two different wrappers with either icpc
|
||||
# or g++ as their back-end compiler. The defaults can be overwritten
|
||||
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
|
||||
|
||||
default_arch="sm_61"
|
||||
#default_arch="sm_50"
|
||||
|
||||
#
|
||||
# The default C++ compiler.
|
||||
#
|
||||
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
|
||||
#host_compiler="icpc"
|
||||
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
|
||||
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
|
||||
|
||||
#
|
||||
# Internal variables
|
||||
#
|
||||
|
||||
# C++ files
|
||||
cpp_files=""
|
||||
|
||||
# Host compiler arguments
|
||||
xcompiler_args=""
|
||||
|
||||
# Cuda (NVCC) only arguments
|
||||
cuda_args=""
|
||||
|
||||
# Arguments for both NVCC and Host compiler
|
||||
shared_args=""
|
||||
|
||||
# Linker arguments
|
||||
xlinker_args=""
|
||||
|
||||
# Object files passable to NVCC
|
||||
object_files=""
|
||||
|
||||
# Link objects for the host linker only
|
||||
object_files_xlinker=""
|
||||
|
||||
# Shared libraries with version numbers are not handled correctly by NVCC
|
||||
shared_versioned_libraries_host=""
|
||||
shared_versioned_libraries=""
|
||||
|
||||
# Does the User set the architecture
|
||||
arch_set=0
|
||||
|
||||
# Does the user overwrite the host compiler
|
||||
ccbin_set=0
|
||||
|
||||
#Error code of compilation
|
||||
error_code=0
|
||||
|
||||
# Do a dry run without actually compiling
|
||||
dry_run=0
|
||||
|
||||
# Skip NVCC compilation and use host compiler directly
|
||||
host_only=0
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
|
||||
# Mark first host compiler argument
|
||||
first_xcompiler_arg=1
|
||||
|
||||
temp_dir=${TMPDIR:-/tmp}
|
||||
|
||||
# Check if we have an optimization argument already
|
||||
optimization_applied=0
|
||||
|
||||
# Check if we have -std=c++X or --std=c++X already
|
||||
stdcxx_applied=0
|
||||
|
||||
#echo "Arguments: $# $@"
|
||||
|
||||
while [ $# -gt 0 ]
|
||||
do
|
||||
case $1 in
|
||||
#show the executed command
|
||||
--show|--nvcc-wrapper-show)
|
||||
dry_run=1
|
||||
;;
|
||||
#run host compilation only
|
||||
--host-only)
|
||||
host_only=1
|
||||
;;
|
||||
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
|
||||
--replace-pragma-ident)
|
||||
replace_pragma_ident=1
|
||||
;;
|
||||
#handle source files to be compiled as cuda files
|
||||
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
|
||||
cpp_files="$cpp_files $1"
|
||||
;;
|
||||
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
|
||||
-O*)
|
||||
if [ $optimization_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
optimization_applied=1
|
||||
fi
|
||||
;;
|
||||
#Handle shared args (valid for both nvcc and the host compiler)
|
||||
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#Handle shared args that have an argument
|
||||
-o|-MT)
|
||||
shared_args="$shared_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
--expt-extended-lambda|--expt-relaxed-constexpr)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle c++11
|
||||
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
|
||||
if [ $stdcxx_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
stdcxx_applied=1
|
||||
fi
|
||||
;;
|
||||
|
||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
||||
-std=c++98|--std=c++98)
|
||||
;;
|
||||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
||||
-pedantic|-Wpedantic|-ansi)
|
||||
;;
|
||||
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
|
||||
-Woverloaded-virtual)
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$2"
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#strip of "-x cu" because we add that
|
||||
-x)
|
||||
if [[ $2 != "cu" ]]; then
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="-x,$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,-x,$2"
|
||||
fi
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#Handle -ccbin (if its not set we can set it to a default value)
|
||||
-ccbin)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
ccbin_set=1
|
||||
host_compiler=$2
|
||||
shift
|
||||
;;
|
||||
#Handle -arch argument (if its not set use a default
|
||||
-arch*)
|
||||
cuda_args="$cuda_args $1"
|
||||
arch_set=1
|
||||
;;
|
||||
#Handle -Xcudafe argument
|
||||
-Xcudafe)
|
||||
cuda_args="$cuda_args -Xcudafe $2"
|
||||
shift
|
||||
;;
|
||||
#Handle args that should be sent to the linker
|
||||
-Wl*)
|
||||
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
||||
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
||||
;;
|
||||
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.a|*.so|*.o|*.obj)
|
||||
object_files="$object_files $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
||||
@*|*.dylib)
|
||||
object_files="$object_files -Xlinker $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle shared libraries with *.so.* names which nvcc can't do.
|
||||
*.so.*)
|
||||
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
|
||||
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
|
||||
;;
|
||||
#All other args are sent to the host compiler
|
||||
*)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args=$1
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$1"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
#Add default host compiler if necessary
|
||||
if [ $ccbin_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -ccbin $host_compiler"
|
||||
fi
|
||||
|
||||
#Add architecture command
|
||||
if [ $arch_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -arch=$default_arch"
|
||||
fi
|
||||
|
||||
#Compose compilation command
|
||||
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
|
||||
if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
|
||||
fi
|
||||
|
||||
#Compose host only command
|
||||
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
|
||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
||||
if [ $replace_pragma_ident -eq 1 ]; then
|
||||
cpp_files2=""
|
||||
for file in $cpp_files
|
||||
do
|
||||
var=`grep pragma ${file} | grep ident | grep "#"`
|
||||
if [ "${#var}" -gt 0 ]
|
||||
then
|
||||
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
|
||||
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
|
||||
else
|
||||
cpp_files2="$cpp_files2 $file"
|
||||
fi
|
||||
done
|
||||
cpp_files=$cpp_files2
|
||||
#echo $cpp_files
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
|
||||
else
|
||||
nvcc_command="$nvcc_command $object_files"
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
host_command="$host_command $object_files $cpp_files"
|
||||
else
|
||||
host_command="$host_command $object_files"
|
||||
fi
|
||||
|
||||
#Print command for dryrun
|
||||
if [ $dry_run -eq 1 ]; then
|
||||
if [ $host_only -eq 1 ]; then
|
||||
echo $host_command
|
||||
else
|
||||
echo $nvcc_command
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
#Run compilation command
|
||||
if [ $host_only -eq 1 ]; then
|
||||
$host_command
|
||||
else
|
||||
$nvcc_command
|
||||
fi
|
||||
error_code=$?
|
||||
|
||||
#Report error code
|
||||
exit $error_code
|
||||
Reference in New Issue
Block a user