# /* ---------------------------------------------------------------------- # Generic Linux Makefile for HIP # - export HIP_PLATFORM=amd (or nvcc) before execution # - change HIP_ARCH for your GPU # ------------------------------------------------------------------------- */ # this setting should match LAMMPS Makefile # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL LMP_INC = -DLAMMPS_SMALLBIG # precision for GPU calculations # -D_SINGLE_SINGLE # Single precision for all calculations # -D_DOUBLE_DOUBLE # Double precision for all calculations # -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double HIP_PRECISION = -D_SINGLE_DOUBLE HIP_OPTS = -O3 HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp HIP_HOST_INCLUDE = # use device sort # requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively) HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT # path to cub HIP_HOST_INCLUDE += -I./ # path to hipcub HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include # use mpi HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT # this settings should match LAMMPS Makefile MPI_COMP_OPTS = $(shell mpicxx --showme:compile) MPI_LINK_OPTS = $(shell mpicxx --showme:link) HIP_PATH ?= $(wildcard /opt/rocm/hip) HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler) ifeq (hcc,$(HIP_PLATFORM)) # possible values: gfx803,gfx900,gfx906 HIP_ARCH = gfx906 else ifeq (amd,$(HIP_PLATFORM)) # possible values: gfx803,gfx900,gfx906 HIP_ARCH = gfx906 else ifeq (nvcc,$(HIP_PLATFORM)) HIP_OPTS += --use_fast_math HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\ -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\ -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75] endif BIN_DIR = . OBJ_DIR = ./obj LIB_DIR = . AR = ar BSH = /bin/sh # /* ---------------------------------------------------------------------- # don't change section below without need # ------------------------------------------------------------------------- */ HIP_OPTS += -DUSE_HIP $(HIP_PRECISION) HIP_GPU_OPTS += $(HIP_OPTS) -I./ ifeq (clang,$(HIP_COMPILER)) HIP_HOST_OPTS += -fPIC HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH) HIP_GPU_OPTS_E = HIP_KERNEL_SUFFIX = .cpp HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH) export HCC_AMDGPU_TARGET := $(HIP_ARCH) else ifeq (hcc,$(HIP_COMPILER)) HIP_HOST_OPTS += -fPIC HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco HIP_GPU_OPTS_S = -t="$(HIP_ARCH)" -f=\" HIP_GPU_OPTS_E = \" HIP_KERNEL_SUFFIX = .cpp HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH) export HCC_AMDGPU_TARGET := $(HIP_ARCH) else ifeq (nvcc,$(HIP_PLATFORM)) HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --fatbin HIP_GPU_OPTS += $(HIP_ARCH) HIP_GPU_SORT_ARCH = $(HIP_ARCH) # fix nvcc can't handle -pthread flag MPI_COMP_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_COMP_OPTS)) MPI_LINK_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_LINK_OPTS)) endif # hipcc is essential for device sort, because of hipcub is header only library and ROCm gpu code generation is deferred to the linking stage HIP_HOST_CC = $(HIP_PATH)/bin/hipcc HIP_HOST_OPTS += $(HIP_OPTS) $(MPI_COMP_OPTS) $(LMP_INC) HIP_HOST_CC_CMD = $(HIP_HOST_CC) $(HIP_HOST_OPTS) $(HIP_HOST_INCLUDE) # sources ALL_H = $(wildcard ./geryon/ucl*.h) $(wildcard ./geryon/hip*.h) $(wildcard ./lal_*.h) SRCS := $(wildcard ./lal_*.cpp) OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o)) CUS := $(wildcard lal_*.cu) CUHS := $(filter-out pppm_cubin.h, $(CUS:lal_%.cu=%_cubin.h)) pppm_f_cubin.h pppm_d_cubin.h CUHS := $(addprefix $(OBJ_DIR)/, $(CUHS)) all: $(OBJ_DIR) $(CUHS) $(LIB_DIR)/libgpu.a $(BIN_DIR)/hip_get_devices $(OBJ_DIR): mkdir -p $@ # GPU kernels compilation $(OBJ_DIR)/pppm_f_cubin.h: lal_pppm.cu $(ALL_H) @cp $< $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=float -Dgrdtyp4=float4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) @xxd -i $(OBJ_DIR)/pppm_f.cubin $@ @sed -i "s/[a-zA-Z0-9_]*pppm_f_cubin/pppm_f/g" $@ @rm $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/pppm_d_cubin.h: lal_pppm.cu $(ALL_H) @cp $< $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=double -Dgrdtyp4=double4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) @xxd -i $(OBJ_DIR)/pppm_d.cubin $@ @sed -i "s/[a-zA-Z0-9_]*pppm_d_cubin/pppm_d/g" $@ @rm $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/%_cubin.h: lal_%.cu $(ALL_H) @cp $< $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) @xxd -i $(OBJ_DIR)/$*.cubin $@ @sed -i "s/[a-zA-Z0-9_]*$*_cubin/$*/g" $@ @rm $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/$*.cubin # host sources compilation $(OBJ_DIR)/lal_atom.o: lal_atom.cpp $(CUHS) $(ALL_H) $(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) $(HIP_GPU_SORT_ARCH) $(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(ALL_H) $(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) # libgpu building $(LIB_DIR)/libgpu.a: $(OBJS) $(AR) -crs $@ $(OBJS) printf "export HIP_PLATFORM := %s\n%s\n" "$(HIP_PLATFORM)" "$(HIP_LIBS_TARGET)" > Makefile.lammps # test app building $(BIN_DIR)/hip_get_devices: ./geryon/ucl_get_devices.cpp $(ALL_H) $(HIP_HOST_CC_CMD) -o $@ $< -DUCL_HIP $(MPI_LINK_OPTS) clean: -rm -f $(BIN_DIR)/hip_get_devices $(LIB_DIR)/libgpu.a $(OBJS) $(OBJ_DIR)/temp_* $(CUHS)