diff --git a/.gitattributes b/.gitattributes index f81d32a235..77eb7f93f3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,6 +3,7 @@ .github export-ignore .lgtm.yml export-ignore SECURITY.md export-ignore +CITATION.cff export-ignore * text=auto *.jpg -text *.pdf -text diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..a8712ddcf2 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,91 @@ +# YAML 1.2 +--- +cff-version: 1.2.0 +title: "LAMMPS: Large-scale Atomic/Molecular Massively Parallel Simulator" +type: software +authors: + - family-names: "Plimpton" + given-names: "Steven J." + - family-names: "Kohlmeyer" + given-names: "Axel" + orcid: "https://orcid.org/0000-0001-6204-6475" + - family-names: "Thompson" + given-names: "Aidan P." + orcid: "https://orcid.org/0000-0002-0324-9114" + - family-names: "Moore" + given-names: "Stan G." + - family-names: "Berger" + given-names: "Richard" + orcid: "https://orcid.org/0000-0002-3044-8266" +doi: 10.5281/zenodo.3726416 +license: GPL-2.0-only +url: https://www.lammps.org +repository-code: https://github.com/lammps/lammps/ +keywords: + - "Molecular Dynamics" + - "Materials Modeling" +message: "If you are referencing LAMMPS in a publication, please cite the paper below." +preferred-citation: + type: article + doi: "10.1016/j.cpc.2021.108171" + url: "https://www.sciencedirect.com/science/article/pii/S0010465521002836" + authors: + - family-names: "Thompson" + given-names: "Aidan P." + orcid: "https://orcid.org/0000-0002-0324-9114" + - family-names: "Aktulga" + given-names: "H. Metin" + - family-names: "Berger" + given-names: "Richard" + orcid: "https://orcid.org/0000-0002-3044-8266" + - family-names: "Bolintineanu" + given-names: "Dan S." + - family-names: "Brown" + given-names: "W. Michael" + - family-names: "Crozier" + given-names: "Paul S." + - family-names: "in 't Veld" + given-names: "Pieter J." + - family-names: "Kohlmeyer" + given-names: "Axel" + orcid: "https://orcid.org/0000-0001-6204-6475" + - family-names: "Moore" + given-names: "Stan G." + - family-names: "Nguyen" + given-names: "Trung Dac" + - family-names: "Shan" + given-names: "Ray" + - family-names: "Stevens" + given-names: "Mark J." + - family-names: "Tranchida" + given-names: "Julien" + - family-names: "Trott" + given-names: "Christian" + - family-names: "Plimpton" + given-names: "Steven J." + title: "LAMMPS - a flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales" + journal: "Computer Physics Communications" + keywords: + - Molecular dynamics + - Materials modeling + - Parallel algorithms + - LAMMPS + month: 2 + volume: 271 + issn: 0010-4655 + pages: 108171 + year: 2022 +references: + - title: "Fast Parallel Algorithms for Short-Range Molecular Dynamics" + type: article + journal: Journal of Computational Physics + volume: 117 + number: 1 + pages: "1-19" + year: 1995 + issn: 0021-9991 + doi: 10.1006/jcph.1995.1039 + url: https://www.sciencedirect.com/science/article/pii/S002199918571039X + authors: + - family-names: "Plimpton" + given-names: "Steve" diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 75569aa55d..243b5111de 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -233,7 +233,8 @@ elseif(GPU_API STREQUAL "OPENCL") elseif(GPU_API STREQUAL "HIP") if(NOT DEFINED HIP_PATH) if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation") + message(FATAL_ERROR "GPU_API=HIP requires HIP_PATH to be defined.\n" + "Either pass the HIP_PATH as a CMake option via -DHIP_PATH=... or set the HIP_PATH environment variable.") else() set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation") endif() @@ -261,6 +262,8 @@ elseif(GPU_API STREQUAL "HIP") if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "amd") set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture") + elseif(HIP_PLATFORM STREQUAL "spirv") + set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture") elseif(HIP_PLATFORM STREQUAL "nvcc") find_package(CUDA REQUIRED) set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)") @@ -340,7 +343,14 @@ elseif(GPU_API STREQUAL "HIP") VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE} DEPENDS ${CU_FILE} COMMENT "Generating ${CU_NAME}.cubin") - endif() + elseif(HIP_PLATFORM STREQUAL "spirv") + configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY) + + add_custom_command(OUTPUT ${CUBIN_FILE} + VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE} + DEPENDS ${CU_CPP_FILE} + COMMENT "Gerating ${CU_NAME}.cubin") + endif() add_custom_command(OUTPUT ${CUBIN_H_FILE} COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 3dad393a52..24704d8672 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -123,6 +123,7 @@ CMake build -D GPU_API=value # value = opencl (default) or cuda or hip -D GPU_PREC=value # precision setting # value = double or mixed (default) or single + -D HIP_PATH # path to HIP installation. Must be set if GPU_API=HIP -D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda # value = sm_XX, see below # default is sm_50 @@ -179,10 +180,17 @@ set appropriate environment variables. Some variables such as :code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc` and the linker to work correctly. +Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP +code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must +set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not +yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you +should only use this option in preparations to run on Aurora system at ANL. + .. code:: bash # AMDGPU target (ROCm <= 4.0) export HIP_PLATFORM=hcc + export HIP_PATH=/path/to/HIP/install export HCC_AMDGPU_TARGET=gfx906 cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc .. make -j 4 @@ -191,6 +199,7 @@ and the linker to work correctly. # AMDGPU target (ROCm >= 4.1) export HIP_PLATFORM=amd + export HIP_PATH=/path/to/HIP/install cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc .. make -j 4 @@ -199,10 +208,20 @@ and the linker to work correctly. # CUDA target (not recommended, use GPU_ARCH=cuda) # !!! DO NOT set CMAKE_CXX_COMPILER !!! export HIP_PLATFORM=nvcc + export HIP_PATH=/path/to/HIP/install export CUDA_PATH=/usr/local/cuda cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=sm_70 .. make -j 4 +.. code:: bash + + # SPIR-V target (Intel GPUs) + export HIP_PLATFORM=spirv + export HIP_PATH=/path/to/HIP/install + export CMAKE_CXX_COMPILER= + cmake -D PKG_GPU=on -D GPU_API=HIP .. + make -j 4 + Traditional make ^^^^^^^^^^^^^^^^ diff --git a/doc/src/Developer.rst b/doc/src/Developer.rst index bb10fcffd7..dc3fac94ce 100644 --- a/doc/src/Developer.rst +++ b/doc/src/Developer.rst @@ -17,6 +17,7 @@ of time and requests from the LAMMPS user community. Developer_flow Developer_write Developer_notes + Developer_updating Developer_plugins Developer_unittest Classes diff --git a/doc/src/Developer_updating.rst b/doc/src/Developer_updating.rst new file mode 100644 index 0000000000..f33632d91a --- /dev/null +++ b/doc/src/Developer_updating.rst @@ -0,0 +1,324 @@ +Notes for updating code written for older LAMMPS versions +--------------------------------------------------------- + +This section documents how C++ source files that are available *outside +of the LAMMPS source distribution* (e.g. in external USER packages or as +source files provided as a supplement to a publication) that are written +for an older version of LAMMPS and thus need to be updated to be +compatible with the current version of LAMMPS. Due to the active +development of LAMMPS it is likely to always be incomplete. Please +contact developer@lammps.org in case you run across an issue that is not +(yet) listed here. Please also review the latest information about the +LAMMPS :doc:`programming style conventions `, especially +if you are considering to submit the updated version for inclusion into +the LAMMPS distribution. + +Available topics in mostly chronological order are: + +- `Setting flags in the constructor`_ +- `Rename of pack/unpack_comm() to pack/unpack_forward_comm()`_ +- `Use ev_init() to initialize variables derived from eflag and vflag`_ +- `Use utils::numeric() functions instead of force->numeric()`_ +- `Use utils::open_potential() function to open potential files`_ +- `Simplify customized error messages`_ +- `Use of "override" instead of "virtual"`_ +- `Simplified and more compact neighbor list requests`_ + +---- + +Setting flags in the constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As LAMMPS gains additional functionality, new flags may need to be set +in the constructor or a class to signal compatibility with such features. +Most of the time the defaults are chosen conservatively, but sometimes +the conservative choice is the uncommon choice, and then those settings +need to be made when updating code. + +Pair styles: + + - ``manybody_flag``: set to 1 if your pair style is not pair-wise additive + - ``restartinfo``: set to 0 if your pair style does not store data in restart files + + +Rename of pack/unpack_comm() to pack/unpack_forward_comm() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 8Aug2014 + +In this change set the functions to pack data into communication buffers +and to unpack data from communication buffers for :doc:`forward +communications ` were renamed from ``pack_comm()`` +and ``unpack_comm()`` to ``pack_forward_comm()`` and +``unpack_forward_comm()``, respectively. Also the meaning of the return +value of these functions was changed: rather than returning the number +of items per atom stored in the buffer, now the total number of items +added (or unpacked) needs to be returned. Here is an example from the +`PairEAM` class. Of course the member function declaration in corresponding +header file needs to be updated accordingly. + +Old: + +.. code-block:: C++ + + int PairEAM::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) + { + int m = 0; + for (int i = 0; i < n; i++) { + int j = list[i]; + buf[m++] = fp[j]; + } + return 1; + } + +New: + +.. code-block:: C++ + + int PairEAM::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) + { + int m = 0; + for (int i = 0; i < n; i++) { + int j = list[i]; + buf[m++] = fp[j]; + } + return m; + } + +.. note:: + + Because the various "pack" and "unpack" functions are defined in the + respective base classes as dummy functions doing nothing, and because + of the the name mismatch the custom versions in the derived class + will no longer be called, there will be no compilation error when + this change is not applied. Only calculations will suddenly produce + incorrect results because the required forward communication calls + will cease to function correctly. + +Use ev_init() to initialize variables derived from eflag and vflag +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 29Mar2019 + +There are several variables that need to be initialized based on +the values of the "eflag" and "vflag" variables and since sometimes +there are new bits added and new variables need to be set to 1 or 0. +To make this consistent, across all styles, there is now an inline +function ``ev_init(eflag, vflag)`` that makes those settings +consistently and calls either ``ev_setup()`` or ``ev_unset()``. +Example from a pair style: + +Old: + +.. code-block:: C++ + + if (eflag || vflag) ev_setup(eflag, vflag); + else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + +New: + +.. code-block:: C++ + + ev_init(eflag, vflag); + +Not applying this change will not cause a compilation error, but +can lead to inconsistent behavior and incorrect tallying of +energy or virial. + +Use utils::numeric() functions instead of force->numeric() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 18Sep2020 + +The "numeric()" conversion functions (including "inumeric()", +"bnumeric()", and "tnumeric()") have been moved from the Force class to +the utils namespace. Also they take an additional argument that selects +whether the ``Error::all()`` or ``Error::one()`` function should be +called in case of an error. The former should be used when *all* MPI +processes call the conversion function and the latter *must* be used +when they are called from only one or a subset of the MPI processes. + +Old: + +.. code-block:: C++ + + val = force->numeric(FLERR, arg[1]); + num = force->inumeric(FLERR, arg[2]); + +New: + +.. code-block:: C++ + + val = utils::numeric(FLERR, true, arg[1], lmp); + num = utils::inumeric(FLERR, false, arg[2], lmp); + +.. seealso:: + + :cpp:func:`utils::numeric() `, + :cpp:func:`utils::inumeric() `, + :cpp:func:`utils::bnumeric() `, + :cpp:func:`utils::tnumeric() ` + +Use utils::open_potential() function to open potential files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 18Sep2020 + +The :cpp:func:`utils::open_potential() +` function must be used to replace +calls to ``force->open_potential()`` and should be used to replace +``fopen()`` for opening potential files for reading. The custom +function does three additional steps compared to ``fopen()``: 1) it will +try to parse the ``UNITS:`` and ``DATE:`` metadata will stop with an +error on a units mismatch and will print the date info, if present, in +the log file; 2) for pair styles that support it, it will set up +possible automatic unit conversions based on the embedded unit +information and LAMMPS' current units setting; 3) it will not only try +to open a potential file at the given path, but will also search in the +folders listed in the ``LAMMPS_POTENTIALS`` environment variable. This +allows to keep potential files in a common location instead of having to +copy them around for simulations. + +Old: + +.. code-block:: C++ + + fp = force->open_potential(filename); + fp = fopen(filename, "r"); + +New: + +.. code-block:: C++ + + fp = utils::open_potential(filename, lmp); + +Simplify customized error messages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 14May2021 + +Aided by features of the bundled {fmt} library, error messages now +can have a variable number of arguments and the string will be interpreted +as a {fmt} style format string so that custom error messages can be +easily customized without having to use temporary buffers and ``sprintf()``. +Example: + +Old: + +.. code-block:: C++ + + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open AEAM potential file %s",filename); + error->one(FLERR,str); + } + +New: + +.. code-block:: C++ + + if (fptr == nullptr) + error->one(FLERR, "Cannot open AEAM potential file {}: {}", filename, utils::getsyserror()); + +Use of "override" instead of "virtual" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 17Feb2022 + +Since LAMMPS requires C++11 we switched to use the "override" keyword +instead of "virtual" to indicate polymorphism in derived classes. This +allows the C++ compiler to better detect inconsistencies when an +override is intended or not. Please note that "override" has to be +added to **all** polymorph functions in derived classes and "virtual" +*only* to the function in the base class (or the destructor). Here is +an example from the ``FixWallReflect`` class: + +Old: + +.. code-block:: C++ + + FixWallReflect(class LAMMPS *, int, char **); + virtual ~FixWallReflect(); + int setmask(); + void init(); + void post_integrate(); + +New: + +.. code-block:: C++ + + FixWallReflect(class LAMMPS *, int, char **); + ~FixWallReflect() override; + int setmask() override; + void init() override; + void post_integrate() override; + +This change set will neither cause a compilation failure, nor will it +change functionality, but if you plan to submit the updated code for +inclusion into the LAMMPS distribution, it will be requested for achieve +a consistent :doc:`programming style `. + +Simplified function names for forward and reverse communication +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 24Mar2022 + +Rather then using the function name to distinguish between the different +forward and reverse communication functions for styles, LAMMPS now uses +the type of the "this" pointer argument. + +Old: + +.. code-block:: C++ + + comm->forward_comm_pair(this); + comm->forward_comm_fix(this); + comm->forward_comm_compute(this); + comm->forward_comm_dump(this); + comm->reverse_comm_pair(this); + comm->reverse_comm_fix(this); + comm->reverse_comm_compute(this); + comm->reverse_comm_dump(this); + +New: + +.. code-block:: C++ + + comm->forward_comm(this); + comm->reverse_comm(this); + +This change is required or else the code will not compile. + +Simplified and more compact neighbor list requests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionchanged:: 24Mar2022 + +This change set reduces the amount of code required to request a +neighbor list. It enforces consistency and no longer requires to change +internal data of the request. More information on neighbor list +requests can be :doc:`found here `. Example from the +``ComputeRDF`` class: + +Old: + +.. code-block:: C++ + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->compute = 1; + neighbor->requests[irequest]->occasional = 1; + if (cutflag) { + neighbor->requests[irequest]->cut = 1; + neighbor->requests[irequest]->cutoff = mycutneigh; + } + +New: + +.. code-block:: C++ + + auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL); + if (cutflag) req->set_cutoff(mycutneigh); + +Public access to the ``NeighRequest`` class data members has been +removed so this update is *required* to avoid compilation failure. diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 9f6edbec4c..9006f99e50 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -291,6 +291,7 @@ blocksize blueviolet bn bni +bnumeric bo Bochkarev Bochum @@ -1482,6 +1483,7 @@ intra intralayer intramolecular ints +inumeric inv invariants inversed @@ -2706,6 +2708,7 @@ polydispersity polyelectrolyte polyhedra Polym +polymorph polymorphism Ponder popen @@ -3463,6 +3466,7 @@ tmin Tmin tmp tN +tnumeric Tobias Toennies Tohoku diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip index 9b6087bcc3..f5a0d03608 100644 --- a/lib/gpu/Makefile.hip +++ b/lib/gpu/Makefile.hip @@ -1,6 +1,9 @@ # /* ---------------------------------------------------------------------- # Generic Linux Makefile for HIP -# - export HIP_PLATFORM=amd (or nvcc) before execution +# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation +# such as hipamd or CHIP-SPV. +# - export HIP_PLATFORM= specify the HIP platform to use. +# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig. # - change HIP_ARCH for your GPU # ------------------------------------------------------------------------- */ @@ -20,41 +23,62 @@ HIP_OPTS = -O3 HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp HIP_HOST_INCLUDE = +ifndef HIP_PATH +$(error HIP_PATH is not set) +endif + +ifndef HIP_PLATFORM + HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) +endif +HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler) + # use device sort # requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively) +ifneq (spirv,$(HIP_PLATFORM)) +# hipCUB not aviable for CHIP-SPV HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT +endif # path to cub HIP_HOST_INCLUDE += -I./ # path to hipcub HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include ifeq (amd,$(HIP_PLATFORM)) - # newer version of ROCm (5.1+) require c++14 for rocprim - HIP_OPTS += -std=c++14 +# newer version of ROCm (5.1+) require c++14 for rocprim +HIP_OPTS += -std=c++14 endif # use mpi HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT # this settings should match LAMMPS Makefile -MPI_COMP_OPTS = $(shell mpicxx --showme:compile) +# automatic flag detection for OpenMPI +ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0) +MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1 MPI_LINK_OPTS = $(shell mpicxx --showme:link) - -HIP_PATH ?= $(wildcard /opt/rocm/hip) -HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) -HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler) +# automatic flag detection for MPICH +else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0) +MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK +MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info)) +# for other MPI libs: must set flags manually, if needed +else +MPI_COMP_OPTS = +MPI_LINK_OPTS = +endif ifeq (hcc,$(HIP_PLATFORM)) - # possible values: gfx803,gfx900,gfx906 - HIP_ARCH = gfx906 +# possible values: gfx803,gfx900,gfx906 +HIP_ARCH = gfx906 else ifeq (amd,$(HIP_PLATFORM)) - # possible values: gfx803,gfx900,gfx906 - HIP_ARCH = gfx906 +# possible values: gfx803,gfx900,gfx906 +HIP_ARCH = gfx906 else ifeq (nvcc,$(HIP_PLATFORM)) - HIP_OPTS += --use_fast_math - HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ +HIP_OPTS += --use_fast_math +HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\ -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\ -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75] +else ifeq (spirv,$(HIP_PLATFORM)) +HIP_ARCH = spirv endif BIN_DIR = . @@ -71,7 +95,15 @@ BSH = /bin/sh HIP_OPTS += -DUSE_HIP $(HIP_PRECISION) HIP_GPU_OPTS += $(HIP_OPTS) -I./ -ifeq (clang,$(HIP_COMPILER)) +ifeq (spirv,$(HIP_PLATFORM)) + HIP_HOST_OPTS += -fPIC + HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c + HIP_GPU_OPTS_S = + HIP_GPU_OPTS_E = + HIP_KERNEL_SUFFIX = .cpp + HIP_LIBS_TARGET = + export HCC_AMDGPU_TARGET := $(HIP_ARCH) +else ifeq (clang,$(HIP_COMPILER)) HIP_HOST_OPTS += -fPIC HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH) diff --git a/lib/gpu/lal_pre_cuda_hip.h b/lib/gpu/lal_pre_cuda_hip.h index 47a005b998..ec666a2863 100644 --- a/lib/gpu/lal_pre_cuda_hip.h +++ b/lib/gpu/lal_pre_cuda_hip.h @@ -30,7 +30,7 @@ // ------------------------------------------------------------------------- -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #define CONFIG_ID 303 #define SIMD_SIZE 64 #else @@ -112,7 +112,7 @@ // KERNEL MACROS - TEXTURES // ------------------------------------------------------------------------- -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #define _texture(name, type) __device__ type* name #define _texture_2d(name, type) __device__ type* name #else @@ -134,9 +134,12 @@ int2 qt = tex1Dfetch(q_tex,i); \ ans=__hiloint2double(qt.y, qt.x); \ } + #elseif defined(__HIP_PLATFORM_SPIRV__) + #define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i); + #define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i); #else - #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i); - #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i); + #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i); + #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i); #endif #else #define fetch4(ans,i,x) ans=x[i] @@ -152,7 +155,7 @@ #define mu_tex mu_ #endif -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #undef fetch4 #undef fetch @@ -209,7 +212,7 @@ #endif #endif -#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #ifdef _SINGLE_SINGLE #define shfl_down __shfl_down