rework Install.py for gpu library. make it consistent with other lib folders and support python3
This commit is contained in:
10
lib/gpu/.gitignore
vendored
10
lib/gpu/.gitignore
vendored
@ -1,4 +1,6 @@
|
|||||||
obj
|
/obj
|
||||||
obj_ocl
|
/obj_ocl
|
||||||
ocl_get_devices
|
/ocl_get_devices
|
||||||
nvc_get_devices
|
/nvc_get_devices
|
||||||
|
/*.cubin
|
||||||
|
/*_cubin.h
|
||||||
|
|||||||
@ -3,53 +3,57 @@
|
|||||||
# Install.py tool to build the GPU library
|
# Install.py tool to build the GPU library
|
||||||
# used to automate the steps described in the README file in this dir
|
# used to automate the steps described in the README file in this dir
|
||||||
|
|
||||||
import sys,os,re,commands
|
from __future__ import print_function
|
||||||
|
import sys,os,subprocess
|
||||||
|
|
||||||
# help message
|
# help message
|
||||||
|
|
||||||
help = """
|
help = """
|
||||||
Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix"
|
Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix"
|
||||||
Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
|
Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix
|
||||||
|
|
||||||
specify one or more options, order does not matter
|
specify one or more options, order does not matter
|
||||||
|
|
||||||
copies an existing Makefile.isuffix in lib/gpu to Makefile.auto
|
copies an existing Makefile.machine in lib/gpu to Makefile.auto
|
||||||
optionally edits these variables in Makefile.auto:
|
optionally edits these variables in Makefile.auto:
|
||||||
CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
|
CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
|
||||||
optionally uses Makefile.auto to build the GPU library -> libgpu.a
|
optionally uses Makefile.auto to build the GPU library -> libgpu.a
|
||||||
and to copy a Makefile.lammps.esuffix -> Makefile.lammps
|
and to copy a Makefile.lammps.esuffix -> Makefile.lammps
|
||||||
optionally copies Makefile.auto to a new Makefile.osuffix
|
optionally copies Makefile.auto to a new Makefile.osuffix
|
||||||
|
|
||||||
-i = use Makefile.isuffix as starting point, copy to Makefile.auto
|
-m = use Makefile.machine as starting point, copy to Makefile.auto
|
||||||
default isuffix = linux
|
default machine = linux
|
||||||
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
||||||
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
||||||
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
||||||
use arch = ?? for K40 (Tesla)
|
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
|
||||||
use arch = 37 for dual K80 (Tesla)
|
or GeForce GTX 580 or similar
|
||||||
use arch = 60 for P100 (Pascal)
|
use arch = 30 for Tesla K10 (Kepler)
|
||||||
|
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
|
||||||
|
use arch = 37 for Tesla dual K80 (Kepler)
|
||||||
|
use arch = 60 for Tesla P100 (Pascal)
|
||||||
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
||||||
use precision = double or mixed or single
|
use precision = double or mixed or single
|
||||||
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
||||||
-m = make the GPU library using Makefile.auto
|
-b = make the GPU library using Makefile.auto
|
||||||
first performs a "make clean"
|
first performs a "make clean"
|
||||||
produces libgpu.a if successful
|
then produces libgpu.a if successful
|
||||||
also copies EXTRAMAKE file -> Makefile.lammps
|
also copies EXTRAMAKE file -> Makefile.lammps
|
||||||
-e can set which Makefile.lammps.esuffix file is copied
|
-e can set which Makefile.lammps.esuffix file is copied
|
||||||
-o = copy final Makefile.auto to Makefile.osuffix
|
-o = copy final Makefile.auto to Makefile.osuffix
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
make lib-gpu args="-m" # build GPU lib with default Makefile.linux
|
make lib-gpu args="-b" # build GPU lib with default Makefile.linux
|
||||||
make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
|
make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
|
||||||
make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU lib
|
make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# print error message or help
|
# print error message or help
|
||||||
|
|
||||||
def error(str=None):
|
def error(str=None):
|
||||||
if not str: print help
|
if not str: print(help)
|
||||||
else: print "ERROR",str
|
else: print("ERROR",str)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# parse args
|
# parse args
|
||||||
@ -65,7 +69,7 @@ outflag = 0
|
|||||||
|
|
||||||
iarg = 0
|
iarg = 0
|
||||||
while iarg < nargs:
|
while iarg < nargs:
|
||||||
if args[iarg] == "-i":
|
if args[iarg] == "-m":
|
||||||
if iarg+2 > nargs: error()
|
if iarg+2 > nargs: error()
|
||||||
isuffix = args[iarg+1]
|
isuffix = args[iarg+1]
|
||||||
iarg += 2
|
iarg += 2
|
||||||
@ -89,7 +93,7 @@ while iarg < nargs:
|
|||||||
eflag = 1
|
eflag = 1
|
||||||
lmpsuffix = args[iarg+1]
|
lmpsuffix = args[iarg+1]
|
||||||
iarg += 2
|
iarg += 2
|
||||||
elif args[iarg] == "-m":
|
elif args[iarg] == "-b":
|
||||||
makeflag = 1
|
makeflag = 1
|
||||||
iarg += 1
|
iarg += 1
|
||||||
elif args[iarg] == "-o":
|
elif args[iarg] == "-o":
|
||||||
@ -117,9 +121,9 @@ fp = open("Makefile.auto",'w')
|
|||||||
for line in lines:
|
for line in lines:
|
||||||
words = line.split()
|
words = line.split()
|
||||||
if len(words) != 3:
|
if len(words) != 3:
|
||||||
print >>fp,line,
|
fp.write(line)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
|
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
|
||||||
line = line.replace(words[2],hdir)
|
line = line.replace(words[2],hdir)
|
||||||
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
|
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
|
||||||
@ -128,20 +132,20 @@ for line in lines:
|
|||||||
line = line.replace(words[2],precstr)
|
line = line.replace(words[2],precstr)
|
||||||
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
|
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||||
line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
|
line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
|
||||||
|
|
||||||
print >>fp,line,
|
|
||||||
|
|
||||||
|
fp.write(line)
|
||||||
fp.close()
|
fp.close()
|
||||||
|
|
||||||
# perform make
|
# perform make
|
||||||
# make operations copies EXTRAMAKE file to Makefile.lammps
|
# make operations copies EXTRAMAKE file to Makefile.lammps
|
||||||
|
|
||||||
if makeflag:
|
if makeflag:
|
||||||
print "Building libgpu.a ..."
|
print("Building libgpu.a ...")
|
||||||
cmd = "rm -f libgpu.a"
|
cmd = "rm -f libgpu.a"
|
||||||
commands.getoutput(cmd)
|
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||||
commands.getoutput(cmd)
|
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||||
|
print(txt.decode('UTF-8'))
|
||||||
if not os.path.exists("libgpu.a"):
|
if not os.path.exists("libgpu.a"):
|
||||||
error("Build of lib/gpu/libgpu.a was NOT successful")
|
error("Build of lib/gpu/libgpu.a was NOT successful")
|
||||||
if not os.path.exists("Makefile.lammps"):
|
if not os.path.exists("Makefile.lammps"):
|
||||||
@ -150,6 +154,6 @@ if makeflag:
|
|||||||
# copy new Makefile.auto to Makefile.osuffix
|
# copy new Makefile.auto to Makefile.osuffix
|
||||||
|
|
||||||
if outflag:
|
if outflag:
|
||||||
print "Creating new Makefile.%s" % osuffix
|
print("Creating new Makefile.%s" % osuffix)
|
||||||
cmd = "cp Makefile.auto Makefile.%s" % osuffix
|
cmd = "cp Makefile.auto Makefile.%s" % osuffix
|
||||||
commands.getoutput(cmd)
|
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||||
|
|||||||
@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
|||||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
||||||
|
|
||||||
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
|
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
|
||||||
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||||
|
|
||||||
BIN_DIR = ./
|
BIN_DIR = ./
|
||||||
|
|||||||
@ -1,17 +0,0 @@
|
|||||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
|
||||||
|
|
||||||
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
|
|
||||||
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
|
|
||||||
-I$(CUDA_HOME)/include
|
|
||||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32
|
|
||||||
OCL_PREC = -D_SINGLE_DOUBLE
|
|
||||||
OCL_TUNE = -DFERMI_OCL
|
|
||||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
|
||||||
|
|
||||||
BIN_DIR = Obj_mingw32
|
|
||||||
OBJ_DIR = Obj_mingw32
|
|
||||||
LIB_DIR = Obj_mingw32
|
|
||||||
AR = i686-w64-mingw32-ar
|
|
||||||
BSH = /bin/sh
|
|
||||||
|
|
||||||
include Opencl.makefile
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
|
||||||
|
|
||||||
OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
|
|
||||||
-mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
|
|
||||||
-I../../tools/mingw-cross/mpich2-win32/include/ \
|
|
||||||
-DMPICH_IGNORE_CXX_SEEK
|
|
||||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
|
||||||
-L../../tools/mingw-cross/mpich2-win32/lib -lmpi
|
|
||||||
OCL_PREC = -D_SINGLE_DOUBLE
|
|
||||||
OCL_TUNE = -DFERMI_OCL
|
|
||||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
|
||||||
|
|
||||||
BIN_DIR = Obj_mingw32-mpi
|
|
||||||
OBJ_DIR = Obj_mingw32-mpi
|
|
||||||
LIB_DIR = Obj_mingw32-mpi
|
|
||||||
AR = i686-w64-mingw32-ar
|
|
||||||
BSH = /bin/sh
|
|
||||||
|
|
||||||
include Opencl.makefile
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
|
||||||
|
|
||||||
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
|
|
||||||
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
|
|
||||||
-I$(CUDA_HOME)/include
|
|
||||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
|
||||||
-L../../src/STUBS -lmpi_mingw64
|
|
||||||
OCL_PREC = -D_SINGLE_DOUBLE
|
|
||||||
OCL_TUNE = -DFERMI_OCL
|
|
||||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
|
||||||
|
|
||||||
BIN_DIR = Obj_mingw64
|
|
||||||
OBJ_DIR = Obj_mingw64
|
|
||||||
LIB_DIR = Obj_mingw64
|
|
||||||
AR = x86_64-w64-mingw32-ar
|
|
||||||
BSH = /bin/sh
|
|
||||||
|
|
||||||
include Opencl.makefile
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
CUDA_HOME = ../../tools/mingw-cross/OpenCL
|
|
||||||
|
|
||||||
OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
|
|
||||||
-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
|
|
||||||
-I../../tools/mingw-cross/mpich2-win64/include/ \
|
|
||||||
-DMPICH_IGNORE_CXX_SEEK
|
|
||||||
|
|
||||||
OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
|
|
||||||
-L../../tools/mingw-cross/mpich2-win64/lib -lmpi
|
|
||||||
OCL_PREC = -D_SINGLE_DOUBLE
|
|
||||||
OCL_TUNE = -DFERMI_OCL
|
|
||||||
EXTRAMAKE = Makefile.lammps.mingw-cross
|
|
||||||
|
|
||||||
BIN_DIR = Obj_mingw64-mpi
|
|
||||||
OBJ_DIR = Obj_mingw64-mpi
|
|
||||||
LIB_DIR = Obj_mingw64-mpi
|
|
||||||
AR = x86_64-w64-mingw32-ar
|
|
||||||
BSH = /bin/sh
|
|
||||||
|
|
||||||
include Opencl.makefile
|
|
||||||
1
lib/gpu/Makefile.mpi
Symbolic link
1
lib/gpu/Makefile.mpi
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
Makefile.linux
|
||||||
@ -1,5 +1,5 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Makefile for CUDA using MPI STUBS library
|
# Generic Linux Makefile for CUDA
|
||||||
# - Change CUDA_ARCH for your GPU
|
# - Change CUDA_ARCH for your GPU
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -7,23 +7,38 @@
|
|||||||
|
|
||||||
EXTRAMAKE = Makefile.lammps.standard
|
EXTRAMAKE = Makefile.lammps.standard
|
||||||
|
|
||||||
CUDA_HOME = $(HOME)/cuda
|
ifeq ($(CUDA_HOME),)
|
||||||
|
CUDA_HOME = /usr/local/cuda
|
||||||
|
endif
|
||||||
|
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
|
|
||||||
# Tesla CUDA
|
# Tesla CUDA
|
||||||
CUDA_ARCH = -arch=sm_20
|
CUDA_ARCH = -arch=sm_21
|
||||||
# newer CUDA
|
# newer CUDA
|
||||||
#CUDA_ARCH = -arch=sm_13
|
#CUDA_ARCH = -arch=sm_13
|
||||||
# older CUDA
|
# older CUDA
|
||||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||||
|
CUDA_ARCH = -arch=sm_35
|
||||||
|
|
||||||
|
# this setting should match LAMMPS Makefile
|
||||||
|
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
|
||||||
|
|
||||||
|
LMP_INC = -DLAMMPS_SMALLBIG
|
||||||
|
|
||||||
|
# precision for GPU calculations
|
||||||
|
# -D_SINGLE_SINGLE # Single precision for all calculations
|
||||||
|
# -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||||
|
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||||
|
|
||||||
CUDA_PRECISION = -D_SINGLE_DOUBLE
|
CUDA_PRECISION = -D_SINGLE_DOUBLE
|
||||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
|
||||||
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi
|
|
||||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
|
|
||||||
|
|
||||||
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS
|
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||||
CUDR_OPTS = -O2
|
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs
|
||||||
|
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
||||||
|
|
||||||
|
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
|
||||||
|
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||||
|
|
||||||
BIN_DIR = ./
|
BIN_DIR = ./
|
||||||
OBJ_DIR = ./
|
OBJ_DIR = ./
|
||||||
@ -31,5 +46,7 @@ LIB_DIR = ./
|
|||||||
AR = ar
|
AR = ar
|
||||||
BSH = /bin/sh
|
BSH = /bin/sh
|
||||||
|
|
||||||
|
CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
|
||||||
|
|
||||||
include Nvidia.makefile
|
include Nvidia.makefile
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user