rework Install.py for gpu library. make it consistent with other lib folders and support python3
This commit is contained in:
10
lib/gpu/.gitignore
vendored
10
lib/gpu/.gitignore
vendored
@ -1,4 +1,6 @@
|
||||
obj
|
||||
obj_ocl
|
||||
ocl_get_devices
|
||||
nvc_get_devices
|
||||
/obj
|
||||
/obj_ocl
|
||||
/ocl_get_devices
|
||||
/nvc_get_devices
|
||||
/*.cubin
|
||||
/*_cubin.h
|
||||
|
||||
@ -3,53 +3,57 @@
|
||||
# Install.py tool to build the GPU library
|
||||
# used to automate the steps described in the README file in this dir
|
||||
|
||||
import sys,os,re,commands
|
||||
from __future__ import print_function
|
||||
import sys,os,subprocess
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix"
|
||||
Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
|
||||
Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix"
|
||||
Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix
|
||||
|
||||
specify one or more options, order does not matter
|
||||
|
||||
copies an existing Makefile.isuffix in lib/gpu to Makefile.auto
|
||||
copies an existing Makefile.machine in lib/gpu to Makefile.auto
|
||||
optionally edits these variables in Makefile.auto:
|
||||
CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
|
||||
optionally uses Makefile.auto to build the GPU library -> libgpu.a
|
||||
and to copy a Makefile.lammps.esuffix -> Makefile.lammps
|
||||
optionally copies Makefile.auto to a new Makefile.osuffix
|
||||
|
||||
-i = use Makefile.isuffix as starting point, copy to Makefile.auto
|
||||
default isuffix = linux
|
||||
-m = use Makefile.machine as starting point, copy to Makefile.auto
|
||||
default machine = linux
|
||||
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
||||
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
||||
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
||||
use arch = ?? for K40 (Tesla)
|
||||
use arch = 37 for dual K80 (Tesla)
|
||||
use arch = 60 for P100 (Pascal)
|
||||
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
|
||||
or GeForce GTX 580 or similar
|
||||
use arch = 30 for Tesla K10 (Kepler)
|
||||
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
|
||||
use arch = 37 for Tesla dual K80 (Kepler)
|
||||
use arch = 60 for Tesla P100 (Pascal)
|
||||
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
||||
use precision = double or mixed or single
|
||||
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
||||
-m = make the GPU library using Makefile.auto
|
||||
-b = make the GPU library using Makefile.auto
|
||||
first performs a "make clean"
|
||||
produces libgpu.a if successful
|
||||
then produces libgpu.a if successful
|
||||
also copies EXTRAMAKE file -> Makefile.lammps
|
||||
-e can set which Makefile.lammps.esuffix file is copied
|
||||
-o = copy final Makefile.auto to Makefile.osuffix
|
||||
|
||||
Examples:
|
||||
|
||||
make lib-gpu args="-m" # build GPU lib with default Makefile.linux
|
||||
make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
|
||||
make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU lib
|
||||
make lib-gpu args="-b" # build GPU lib with default Makefile.linux
|
||||
make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
|
||||
make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
if not str: print(help)
|
||||
else: print("ERROR",str)
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
@ -65,7 +69,7 @@ outflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-i":
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
isuffix = args[iarg+1]
|
||||
iarg += 2
|
||||
@ -89,7 +93,7 @@ while iarg < nargs:
|
||||
eflag = 1
|
||||
lmpsuffix = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-m":
|
||||
elif args[iarg] == "-b":
|
||||
makeflag = 1
|
||||
iarg += 1
|
||||
elif args[iarg] == "-o":
|
||||
@ -117,9 +121,9 @@ fp = open("Makefile.auto",'w')
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) != 3:
|
||||
print >>fp,line,
|
||||
fp.write(line)
|
||||
continue
|
||||
|
||||
|
||||
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
|
||||
line = line.replace(words[2],hdir)
|
||||
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
|
||||
@ -128,20 +132,20 @@ for line in lines:
|
||||
line = line.replace(words[2],precstr)
|
||||
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
|
||||
|
||||
print >>fp,line,
|
||||
|
||||
fp.write(line)
|
||||
fp.close()
|
||||
|
||||
# perform make
|
||||
# make operations copies EXTRAMAKE file to Makefile.lammps
|
||||
|
||||
if makeflag:
|
||||
print "Building libgpu.a ..."
|
||||
print("Building libgpu.a ...")
|
||||
cmd = "rm -f libgpu.a"
|
||||
commands.getoutput(cmd)
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
commands.getoutput(cmd)
|
||||
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
print(txt.decode('UTF-8'))
|
||||
if not os.path.exists("libgpu.a"):
|
||||
error("Build of lib/gpu/libgpu.a was NOT successful")
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
@ -150,6 +154,6 @@ if makeflag:
|
||||
# copy new Makefile.auto to Makefile.osuffix
|
||||
|
||||
if outflag:
|
||||
print "Creating new Makefile.%s" % osuffix
|
||||
print("Creating new Makefile.%s" % osuffix)
|
||||
cmd = "cp Makefile.auto Makefile.%s" % osuffix
|
||||
commands.getoutput(cmd)
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
|
||||
@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
||||
|
||||
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
|
||||
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
|
||||
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||
|
||||
BIN_DIR = ./
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
||||
|
||||
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
|
||||
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
|
||||
-I$(CUDA_HOME)/include
|
||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DFERMI_OCL
|
||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
||||
|
||||
BIN_DIR = Obj_mingw32
|
||||
OBJ_DIR = Obj_mingw32
|
||||
LIB_DIR = Obj_mingw32
|
||||
AR = i686-w64-mingw32-ar
|
||||
BSH = /bin/sh
|
||||
|
||||
include Opencl.makefile
|
||||
@ -1,19 +0,0 @@
|
||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
||||
|
||||
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
|
||||
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
|
||||
-I../../tools/mingw-cross/mpich2-win32/include/ \
|
||||
-DMPICH_IGNORE_CXX_SEEK
|
||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
||||
-L../../tools/mingw-cross/mpich2-win32/lib -lmpi
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DFERMI_OCL
|
||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
||||
|
||||
BIN_DIR = Obj_mingw32-mpi
|
||||
OBJ_DIR = Obj_mingw32-mpi
|
||||
LIB_DIR = Obj_mingw32-mpi
|
||||
AR = i686-w64-mingw32-ar
|
||||
BSH = /bin/sh
|
||||
|
||||
include Opencl.makefile
|
||||
@ -1,18 +0,0 @@
|
||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
||||
|
||||
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
|
||||
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
|
||||
-I$(CUDA_HOME)/include
|
||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
||||
-L../../src/STUBS -lmpi_mingw64
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DFERMI_OCL
|
||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
||||
|
||||
BIN_DIR = Obj_mingw64
|
||||
OBJ_DIR = Obj_mingw64
|
||||
LIB_DIR = Obj_mingw64
|
||||
AR = x86_64-w64-mingw32-ar
|
||||
BSH = /bin/sh
|
||||
|
||||
include Opencl.makefile
|
||||
@ -1,20 +0,0 @@
|
||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
||||
|
||||
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
|
||||
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
|
||||
-I../../tools/mingw-cross/mpich2-win64/include/ \
|
||||
-DMPICH_IGNORE_CXX_SEEK
|
||||
|
||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
||||
-L../../tools/mingw-cross/mpich2-win64/lib -lmpi
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DFERMI_OCL
|
||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
||||
|
||||
BIN_DIR = Obj_mingw64-mpi
|
||||
OBJ_DIR = Obj_mingw64-mpi
|
||||
LIB_DIR = Obj_mingw64-mpi
|
||||
AR = x86_64-w64-mingw32-ar
|
||||
BSH = /bin/sh
|
||||
|
||||
include Opencl.makefile
|
||||
1
lib/gpu/Makefile.mpi
Symbolic link
1
lib/gpu/Makefile.mpi
Symbolic link
@ -0,0 +1 @@
|
||||
Makefile.linux
|
||||
@ -1,5 +1,5 @@
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Generic Makefile for CUDA using MPI STUBS library
|
||||
# Generic Linux Makefile for CUDA
|
||||
# - Change CUDA_ARCH for your GPU
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
@ -7,23 +7,38 @@
|
||||
|
||||
EXTRAMAKE = Makefile.lammps.standard
|
||||
|
||||
CUDA_HOME = $(HOME)/cuda
|
||||
ifeq ($(CUDA_HOME),)
|
||||
CUDA_HOME = /usr/local/cuda
|
||||
endif
|
||||
|
||||
NVCC = nvcc
|
||||
|
||||
# Tesla CUDA
|
||||
CUDA_ARCH = -arch=sm_20
|
||||
CUDA_ARCH = -arch=sm_21
|
||||
# newer CUDA
|
||||
#CUDA_ARCH = -arch=sm_13
|
||||
# older CUDA
|
||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||
CUDA_ARCH = -arch=sm_35
|
||||
|
||||
# this setting should match LAMMPS Makefile
|
||||
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
|
||||
|
||||
LMP_INC = -DLAMMPS_SMALLBIG
|
||||
|
||||
# precision for GPU calculations
|
||||
# -D_SINGLE_SINGLE # Single precision for all calculations
|
||||
# -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
CUDA_PRECISION = -D_SINGLE_DOUBLE
|
||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
|
||||
|
||||
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS
|
||||
CUDR_OPTS = -O2
|
||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
||||
|
||||
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
|
||||
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
@ -31,5 +46,7 @@ LIB_DIR = ./
|
||||
AR = ar
|
||||
BSH = /bin/sh
|
||||
|
||||
CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
|
||||
|
||||
include Nvidia.makefile
|
||||
|
||||
|
||||
Reference in New Issue
Block a user