rework Install.py for gpu library. make it consistent with other lib folders and support python3

This commit is contained in:
Axel Kohlmeyer
2017-08-08 16:57:27 -04:00
parent c53a84a967
commit 30431d4edb
9 changed files with 64 additions and 114 deletions

10
lib/gpu/.gitignore vendored
View File

@ -1,4 +1,6 @@
obj
obj_ocl
ocl_get_devices
nvc_get_devices
/obj
/obj_ocl
/ocl_get_devices
/nvc_get_devices
/*.cubin
/*_cubin.h

View File

@ -3,53 +3,57 @@
# Install.py tool to build the GPU library
# used to automate the steps described in the README file in this dir
import sys,os,re,commands
from __future__ import print_function
import sys,os,subprocess
# help message
help = """
Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix"
Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix"
Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix
specify one or more options, order does not matter
copies an existing Makefile.isuffix in lib/gpu to Makefile.auto
copies an existing Makefile.machine in lib/gpu to Makefile.auto
optionally edits these variables in Makefile.auto:
CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
optionally uses Makefile.auto to build the GPU library -> libgpu.a
and to copy a Makefile.lammps.esuffix -> Makefile.lammps
optionally copies Makefile.auto to a new Makefile.osuffix
-i = use Makefile.isuffix as starting point, copy to Makefile.auto
default isuffix = linux
-m = use Makefile.machine as starting point, copy to Makefile.auto
default machine = linux
-h = set CUDA_HOME variable in Makefile.auto to hdir
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
-a = set CUDA_ARCH variable in Makefile.auto to arch
use arch = ?? for K40 (Tesla)
use arch = 37 for dual K80 (Tesla)
use arch = 60 for P100 (Pascal)
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
or GeForce GTX 580 or similar
use arch = 30 for Tesla K10 (Kepler)
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
use arch = 37 for Tesla dual K80 (Kepler)
use arch = 60 for Tesla P100 (Pascal)
-p = set CUDA_PRECISION variable in Makefile.auto to precision
use precision = double or mixed or single
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
-m = make the GPU library using Makefile.auto
-b = make the GPU library using Makefile.auto
first performs a "make clean"
produces libgpu.a if successful
then produces libgpu.a if successful
also copies EXTRAMAKE file -> Makefile.lammps
-e can set which Makefile.lammps.esuffix file is copied
-o = copy final Makefile.auto to Makefile.osuffix
Examples:
make lib-gpu args="-m" # build GPU lib with default Makefile.linux
make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU lib
make lib-gpu args="-b" # build GPU lib with default Makefile.linux
make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
"""
# print error message or help
def error(str=None):
if not str: print help
else: print "ERROR",str
if not str: print(help)
else: print("ERROR",str)
sys.exit()
# parse args
@ -65,7 +69,7 @@ outflag = 0
iarg = 0
while iarg < nargs:
if args[iarg] == "-i":
if args[iarg] == "-m":
if iarg+2 > nargs: error()
isuffix = args[iarg+1]
iarg += 2
@ -89,7 +93,7 @@ while iarg < nargs:
eflag = 1
lmpsuffix = args[iarg+1]
iarg += 2
elif args[iarg] == "-m":
elif args[iarg] == "-b":
makeflag = 1
iarg += 1
elif args[iarg] == "-o":
@ -117,9 +121,9 @@ fp = open("Makefile.auto",'w')
for line in lines:
words = line.split()
if len(words) != 3:
print >>fp,line,
fp.write(line)
continue
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
line = line.replace(words[2],hdir)
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
@ -128,20 +132,20 @@ for line in lines:
line = line.replace(words[2],precstr)
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
print >>fp,line,
fp.write(line)
fp.close()
# perform make
# make operations copies EXTRAMAKE file to Makefile.lammps
if makeflag:
print "Building libgpu.a ..."
print("Building libgpu.a ...")
cmd = "rm -f libgpu.a"
commands.getoutput(cmd)
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
commands.getoutput(cmd)
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
print(txt.decode('UTF-8'))
if not os.path.exists("libgpu.a"):
error("Build of lib/gpu/libgpu.a was NOT successful")
if not os.path.exists("Makefile.lammps"):
@ -150,6 +154,6 @@ if makeflag:
# copy new Makefile.auto to Makefile.osuffix
if outflag:
print "Creating new Makefile.%s" % osuffix
print("Creating new Makefile.%s" % osuffix)
cmd = "cp Makefile.auto Makefile.%s" % osuffix
commands.getoutput(cmd)
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)

View File

@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
BIN_DIR = ./

View File

@ -1,17 +0,0 @@
CUDA_HOME = ../../tools/mingw-cross/OpenCL
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
-I$(CUDA_HOME)/include
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DFERMI_OCL
EXTRAMAKE = Makefile.lammps.mingw-cross
BIN_DIR = Obj_mingw32
OBJ_DIR = Obj_mingw32
LIB_DIR = Obj_mingw32
AR = i686-w64-mingw32-ar
BSH = /bin/sh
include Opencl.makefile

View File

@ -1,19 +0,0 @@
CUDA_HOME = ../../tools/mingw-cross/OpenCL
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
-I../../tools/mingw-cross/mpich2-win32/include/ \
-DMPICH_IGNORE_CXX_SEEK
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-L../../tools/mingw-cross/mpich2-win32/lib -lmpi
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DFERMI_OCL
EXTRAMAKE = Makefile.lammps.mingw-cross
BIN_DIR = Obj_mingw32-mpi
OBJ_DIR = Obj_mingw32-mpi
LIB_DIR = Obj_mingw32-mpi
AR = i686-w64-mingw32-ar
BSH = /bin/sh
include Opencl.makefile

View File

@ -1,18 +0,0 @@
CUDA_HOME = ../../tools/mingw-cross/OpenCL
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
-I$(CUDA_HOME)/include
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-L../../src/STUBS -lmpi_mingw64
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DFERMI_OCL
EXTRAMAKE = Makefile.lammps.mingw-cross
BIN_DIR = Obj_mingw64
OBJ_DIR = Obj_mingw64
LIB_DIR = Obj_mingw64
AR = x86_64-w64-mingw32-ar
BSH = /bin/sh
include Opencl.makefile

View File

@ -1,20 +0,0 @@
CUDA_HOME = ../../tools/mingw-cross/OpenCL
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
-I../../tools/mingw-cross/mpich2-win64/include/ \
-DMPICH_IGNORE_CXX_SEEK
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-L../../tools/mingw-cross/mpich2-win64/lib -lmpi
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DFERMI_OCL
EXTRAMAKE = Makefile.lammps.mingw-cross
BIN_DIR = Obj_mingw64-mpi
OBJ_DIR = Obj_mingw64-mpi
LIB_DIR = Obj_mingw64-mpi
AR = x86_64-w64-mingw32-ar
BSH = /bin/sh
include Opencl.makefile

1
lib/gpu/Makefile.mpi Symbolic link
View File

@ -0,0 +1 @@
Makefile.linux

View File

@ -1,5 +1,5 @@
# /* ----------------------------------------------------------------------
# Generic Makefile for CUDA using MPI STUBS library
# Generic Linux Makefile for CUDA
# - Change CUDA_ARCH for your GPU
# ------------------------------------------------------------------------- */
@ -7,23 +7,38 @@
EXTRAMAKE = Makefile.lammps.standard
CUDA_HOME = $(HOME)/cuda
ifeq ($(CUDA_HOME),)
CUDA_HOME = /usr/local/cuda
endif
NVCC = nvcc
# Tesla CUDA
CUDA_ARCH = -arch=sm_20
CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
CUDA_ARCH = -arch=sm_35
# this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
LMP_INC = -DLAMMPS_SMALLBIG
# precision for GPU calculations
# -D_SINGLE_SINGLE # Single precision for all calculations
# -D_DOUBLE_DOUBLE # Double precision for all calculations
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
CUDA_PRECISION = -D_SINGLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS
CUDR_OPTS = -O2
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
BIN_DIR = ./
OBJ_DIR = ./
@ -31,5 +46,7 @@ LIB_DIR = ./
AR = ar
BSH = /bin/sh
CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
include Nvidia.makefile