rework Install.py for gpu library. make it consistent with other lib folders and support python3

2017-08-08 16:57:27 -04:00
parent c53a84a967
commit 30431d4edb
9 changed files with 64 additions and 114 deletions
--- a/lib/gpu/.gitignore
+++ b/lib/gpu/.gitignore
@ -1,4 +1,6 @@
-obj
-obj_ocl
-ocl_get_devices
-nvc_get_devices
+/obj
+/obj_ocl
+/ocl_get_devices
+/nvc_get_devices
+/*.cubin
+/*_cubin.h
--- a/lib/gpu/Install.py
+++ b/lib/gpu/Install.py
@ -3,53 +3,57 @@
 # Install.py tool to build the GPU library
 # used to automate the steps described in the README file in this dir

-import sys,os,re,commands
+from __future__ import print_function
+import sys,os,subprocess

 # help message

 help = """
-Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix"
-Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
+Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix"
+Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix

 specify one or more options, order does not matter

-copies an existing Makefile.isuffix in lib/gpu to Makefile.auto 
+copies an existing Makefile.machine in lib/gpu to Makefile.auto 
 optionally edits these variables in Makefile.auto:
  CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
 optionally uses Makefile.auto to build the GPU library -> libgpu.a
  and to copy a Makefile.lammps.esuffix -> Makefile.lammps
 optionally copies Makefile.auto to a new Makefile.osuffix

-  -i = use Makefile.isuffix as starting point, copy to Makefile.auto
-       default isuffix = linux
+  -m = use Makefile.machine as starting point, copy to Makefile.auto
+       default machine = linux
  -h = set CUDA_HOME variable in Makefile.auto to hdir
       hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
  -a = set CUDA_ARCH variable in Makefile.auto to arch
-       use arch = ?? for K40 (Tesla)
-       use arch = 37 for dual K80 (Tesla)
-       use arch = 60 for P100 (Pascal)
+       use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0) 
+                     or GeForce GTX 580 or similar
+       use arch = 30 for Tesla K10 (Kepler)
+       use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
+       use arch = 37 for Tesla dual K80 (Kepler)
+       use arch = 60 for Tesla P100 (Pascal)
  -p = set CUDA_PRECISION variable in Makefile.auto to precision
       use precision = double or mixed or single
  -e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
-  -m = make the GPU library using Makefile.auto
+  -b = make the GPU library using Makefile.auto
       first performs a "make clean"
-       produces libgpu.a if successful
+       then produces libgpu.a if successful
       also copies EXTRAMAKE file -> Makefile.lammps
         -e can set which Makefile.lammps.esuffix file is copied
  -o = copy final Makefile.auto to Makefile.osuffix

 Examples:

-make lib-gpu args="-m"      # build GPU lib with default Makefile.linux
-make lib-gpu args="-i xk7 -p single -o xk7.single"      # create new Makefile.xk7.single, altered for single-precision
-make lib-gpu args="-i xk7 -p single -o xk7.single -m"   # ditto, also build GPU lib
+make lib-gpu args="-b"      # build GPU lib with default Makefile.linux
+make lib-gpu args="-m xk7 -p single -o xk7.single"      # create new Makefile.xk7.single, altered for single-precision
+make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
 """

 # print error message or help

 def error(str=None):
-  if not str: print help
-  else: print "ERROR",str
+  if not str: print(help)
+  else: print("ERROR",str)
  sys.exit()

 # parse args
@ -65,7 +69,7 @@ outflag = 0

 iarg = 0
 while iarg < nargs:
-  if args[iarg] == "-i":
+  if args[iarg] == "-m":
    if iarg+2 > nargs: error()
    isuffix = args[iarg+1]
    iarg += 2
@ -89,7 +93,7 @@ while iarg < nargs:
    eflag = 1
    lmpsuffix = args[iarg+1]
    iarg += 2
-  elif args[iarg] == "-m":
+  elif args[iarg] == "-b":
    makeflag = 1
    iarg += 1
  elif args[iarg] == "-o":
@ -117,9 +121,9 @@ fp = open("Makefile.auto",'w')
 for line in lines:
  words = line.split()
  if len(words) != 3:
-    print >>fp,line,
+    fp.write(line)
    continue
-  
+
  if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
    line = line.replace(words[2],hdir)
  if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
@ -128,20 +132,20 @@ for line in lines:
    line = line.replace(words[2],precstr)
  if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
    line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
-    
-  print >>fp,line,

+  fp.write(line)
 fp.close()

 # perform make
 # make operations copies EXTRAMAKE file to Makefile.lammps

 if makeflag:
-  print "Building libgpu.a ..."
+  print("Building libgpu.a ...")
  cmd = "rm -f libgpu.a"
-  commands.getoutput(cmd)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
  cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
-  commands.getoutput(cmd)
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  print(txt.decode('UTF-8'))
  if not os.path.exists("libgpu.a"):
    error("Build of lib/gpu/libgpu.a was NOT successful")
  if not os.path.exists("Makefile.lammps"):
@ -150,6 +154,6 @@ if makeflag:
 # copy new Makefile.auto to Makefile.osuffix

 if outflag:
-  print "Creating new Makefile.%s" % osuffix
+  print("Creating new Makefile.%s" % osuffix)
  cmd = "cp Makefile.auto Makefile.%s" % osuffix
-  commands.getoutput(cmd)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
--- a/lib/gpu/Makefile.linux
+++ b/lib/gpu/Makefile.linux
@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64
 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)

-CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
+CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
 CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias

 BIN_DIR = ./
--- a/lib/gpu/Makefile.mingw32-cross
+++ b/lib/gpu/Makefile.mingw32-cross
@ -1,17 +0,0 @@
-CUDA_HOME = ../../tools/mingw-cross/OpenCL
-
-OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
-        -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
-        -I$(CUDA_HOME)/include
-OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32
-OCL_PREC = -D_SINGLE_DOUBLE
-OCL_TUNE = -DFERMI_OCL
-EXTRAMAKE = Makefile.lammps.mingw-cross
-
-BIN_DIR = Obj_mingw32
-OBJ_DIR = Obj_mingw32
-LIB_DIR = Obj_mingw32
-AR = i686-w64-mingw32-ar
-BSH = /bin/sh
-
-include Opencl.makefile
--- a/lib/gpu/Makefile.mingw32-cross-mpi
+++ b/lib/gpu/Makefile.mingw32-cross-mpi
@ -1,19 +0,0 @@
-CUDA_HOME = ../../tools/mingw-cross/OpenCL
-
-OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \
-        -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
-	-I../../tools/mingw-cross/mpich2-win32/include/ \
-        -DMPICH_IGNORE_CXX_SEEK
-OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-	-L../../tools/mingw-cross/mpich2-win32/lib -lmpi
-OCL_PREC = -D_SINGLE_DOUBLE
-OCL_TUNE = -DFERMI_OCL
-EXTRAMAKE = Makefile.lammps.mingw-cross
-
-BIN_DIR = Obj_mingw32-mpi
-OBJ_DIR = Obj_mingw32-mpi
-LIB_DIR = Obj_mingw32-mpi
-AR = i686-w64-mingw32-ar
-BSH = /bin/sh
-
-include Opencl.makefile
--- a/lib/gpu/Makefile.mingw64-cross
+++ b/lib/gpu/Makefile.mingw64-cross
@ -1,18 +0,0 @@
-CUDA_HOME = ../../tools/mingw-cross/OpenCL
-
-OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
-	-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \
-        -I$(CUDA_HOME)/include
-OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-	-L../../src/STUBS -lmpi_mingw64
-OCL_PREC = -D_SINGLE_DOUBLE
-OCL_TUNE = -DFERMI_OCL
-EXTRAMAKE = Makefile.lammps.mingw-cross
-
-BIN_DIR = Obj_mingw64
-OBJ_DIR = Obj_mingw64
-LIB_DIR = Obj_mingw64
-AR = x86_64-w64-mingw32-ar
-BSH = /bin/sh
-
-include Opencl.makefile
--- a/lib/gpu/Makefile.mingw64-cross-mpi
+++ b/lib/gpu/Makefile.mingw64-cross-mpi
@ -1,20 +0,0 @@
-CUDA_HOME = ../../tools/mingw-cross/OpenCL
-
-OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \
-	-msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \
-	-I../../tools/mingw-cross/mpich2-win64/include/ \
-        -DMPICH_IGNORE_CXX_SEEK
- 
-OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \
-	-L../../tools/mingw-cross/mpich2-win64/lib -lmpi
-OCL_PREC = -D_SINGLE_DOUBLE
-OCL_TUNE = -DFERMI_OCL
-EXTRAMAKE = Makefile.lammps.mingw-cross
-
-BIN_DIR = Obj_mingw64-mpi
-OBJ_DIR = Obj_mingw64-mpi
-LIB_DIR = Obj_mingw64-mpi
-AR = x86_64-w64-mingw32-ar
-BSH = /bin/sh
-
-include Opencl.makefile
--- a/lib/gpu/Makefile.mpi
+++ b/lib/gpu/Makefile.mpi
@ -0,0 +1 @@
+Makefile.linux
--- a/lib/gpu/Makefile.serial
+++ b/lib/gpu/Makefile.serial
@ -1,5 +1,5 @@
 # /* ----------------------------------------------------------------------   
-#  Generic Makefile for CUDA using MPI STUBS library
+#  Generic Linux Makefile for CUDA 
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */

@ -7,23 +7,38 @@

 EXTRAMAKE = Makefile.lammps.standard

-CUDA_HOME = $(HOME)/cuda
+ifeq ($(CUDA_HOME),)
+CUDA_HOME = /usr/local/cuda
+endif
+
 NVCC = nvcc

 # Tesla CUDA
-CUDA_ARCH = -arch=sm_20
+CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
+CUDA_ARCH = -arch=sm_35
+
+# this setting should match LAMMPS Makefile
+# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
+
+LMP_INC = -DLAMMPS_SMALLBIG
+
+# precision for GPU calculations
+# -D_SINGLE_SINGLE  # Single precision for all calculations
+# -D_DOUBLE_DOUBLE  # Double precision for all calculations
+# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double

 CUDA_PRECISION = -D_SINGLE_DOUBLE
-CUDA_INCLUDE = -I$(CUDA_HOME)/include
-CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi
-CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math

-CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS
-CUDR_OPTS = -O2 
+CUDA_INCLUDE = -I$(CUDA_HOME)/include
+CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs
+CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
+
+CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
+CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias

 BIN_DIR = ./
 OBJ_DIR = ./
@ -31,5 +46,7 @@ LIB_DIR = ./
 AR = ar
 BSH = /bin/sh

+CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
+
 include Nvidia.makefile