Merge branch 'develop' into collected-small-changes

This commit is contained in:
Axel Kohlmeyer
2022-07-29 06:58:22 -04:00
9 changed files with 508 additions and 23 deletions

1
.gitattributes vendored
View File

@ -3,6 +3,7 @@
.github export-ignore
.lgtm.yml export-ignore
SECURITY.md export-ignore
CITATION.cff export-ignore
* text=auto
*.jpg -text
*.pdf -text

91
CITATION.cff Normal file
View File

@ -0,0 +1,91 @@
# YAML 1.2
---
cff-version: 1.2.0
title: "LAMMPS: Large-scale Atomic/Molecular Massively Parallel Simulator"
type: software
authors:
- family-names: "Plimpton"
given-names: "Steven J."
- family-names: "Kohlmeyer"
given-names: "Axel"
orcid: "https://orcid.org/0000-0001-6204-6475"
- family-names: "Thompson"
given-names: "Aidan P."
orcid: "https://orcid.org/0000-0002-0324-9114"
- family-names: "Moore"
given-names: "Stan G."
- family-names: "Berger"
given-names: "Richard"
orcid: "https://orcid.org/0000-0002-3044-8266"
doi: 10.5281/zenodo.3726416
license: GPL-2.0-only
url: https://www.lammps.org
repository-code: https://github.com/lammps/lammps/
keywords:
- "Molecular Dynamics"
- "Materials Modeling"
message: "If you are referencing LAMMPS in a publication, please cite the paper below."
preferred-citation:
type: article
doi: "10.1016/j.cpc.2021.108171"
url: "https://www.sciencedirect.com/science/article/pii/S0010465521002836"
authors:
- family-names: "Thompson"
given-names: "Aidan P."
orcid: "https://orcid.org/0000-0002-0324-9114"
- family-names: "Aktulga"
given-names: "H. Metin"
- family-names: "Berger"
given-names: "Richard"
orcid: "https://orcid.org/0000-0002-3044-8266"
- family-names: "Bolintineanu"
given-names: "Dan S."
- family-names: "Brown"
given-names: "W. Michael"
- family-names: "Crozier"
given-names: "Paul S."
- family-names: "in 't Veld"
given-names: "Pieter J."
- family-names: "Kohlmeyer"
given-names: "Axel"
orcid: "https://orcid.org/0000-0001-6204-6475"
- family-names: "Moore"
given-names: "Stan G."
- family-names: "Nguyen"
given-names: "Trung Dac"
- family-names: "Shan"
given-names: "Ray"
- family-names: "Stevens"
given-names: "Mark J."
- family-names: "Tranchida"
given-names: "Julien"
- family-names: "Trott"
given-names: "Christian"
- family-names: "Plimpton"
given-names: "Steven J."
title: "LAMMPS - a flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales"
journal: "Computer Physics Communications"
keywords:
- Molecular dynamics
- Materials modeling
- Parallel algorithms
- LAMMPS
month: 2
volume: 271
issn: 0010-4655
pages: 108171
year: 2022
references:
- title: "Fast Parallel Algorithms for Short-Range Molecular Dynamics"
type: article
journal: Journal of Computational Physics
volume: 117
number: 1
pages: "1-19"
year: 1995
issn: 0021-9991
doi: 10.1006/jcph.1995.1039
url: https://www.sciencedirect.com/science/article/pii/S002199918571039X
authors:
- family-names: "Plimpton"
given-names: "Steve"

View File

@ -233,7 +233,8 @@ elseif(GPU_API STREQUAL "OPENCL")
elseif(GPU_API STREQUAL "HIP")
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
message(FATAL_ERROR "GPU_API=HIP requires HIP_PATH to be defined.\n"
"Either pass the HIP_PATH as a CMake option via -DHIP_PATH=... or set the HIP_PATH environment variable.")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
endif()
@ -261,6 +262,8 @@ elseif(GPU_API STREQUAL "HIP")
if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "amd")
set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "spirv")
set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "nvcc")
find_package(CUDA REQUIRED)
set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)")
@ -340,7 +343,14 @@ elseif(GPU_API STREQUAL "HIP")
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE}
DEPENDS ${CU_FILE}
COMMENT "Generating ${CU_NAME}.cubin")
endif()
elseif(HIP_PLATFORM STREQUAL "spirv")
configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE}
DEPENDS ${CU_CPP_FILE}
COMMENT "Gerating ${CU_NAME}.cubin")
endif()
add_custom_command(OUTPUT ${CUBIN_H_FILE}
COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake

View File

@ -123,6 +123,7 @@ CMake build
-D GPU_API=value # value = opencl (default) or cuda or hip
-D GPU_PREC=value # precision setting
# value = double or mixed (default) or single
-D HIP_PATH # path to HIP installation. Must be set if GPU_API=HIP
-D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda
# value = sm_XX, see below
# default is sm_50
@ -179,10 +180,17 @@ set appropriate environment variables. Some variables such as
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc`
and the linker to work correctly.
Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP
code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must
set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not
yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you
should only use this option in preparations to run on Aurora system at ANL.
.. code:: bash
# AMDGPU target (ROCm <= 4.0)
export HIP_PLATFORM=hcc
export HIP_PATH=/path/to/HIP/install
export HCC_AMDGPU_TARGET=gfx906
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
make -j 4
@ -191,6 +199,7 @@ and the linker to work correctly.
# AMDGPU target (ROCm >= 4.1)
export HIP_PLATFORM=amd
export HIP_PATH=/path/to/HIP/install
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
make -j 4
@ -199,10 +208,20 @@ and the linker to work correctly.
# CUDA target (not recommended, use GPU_ARCH=cuda)
# !!! DO NOT set CMAKE_CXX_COMPILER !!!
export HIP_PLATFORM=nvcc
export HIP_PATH=/path/to/HIP/install
export CUDA_PATH=/usr/local/cuda
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=sm_70 ..
make -j 4
.. code:: bash
# SPIR-V target (Intel GPUs)
export HIP_PLATFORM=spirv
export HIP_PATH=/path/to/HIP/install
export CMAKE_CXX_COMPILER=<hipcc/clang++>
cmake -D PKG_GPU=on -D GPU_API=HIP ..
make -j 4
Traditional make
^^^^^^^^^^^^^^^^

View File

@ -17,6 +17,7 @@ of time and requests from the LAMMPS user community.
Developer_flow
Developer_write
Developer_notes
Developer_updating
Developer_plugins
Developer_unittest
Classes

View File

@ -0,0 +1,324 @@
Notes for updating code written for older LAMMPS versions
---------------------------------------------------------
This section documents how C++ source files that are available *outside
of the LAMMPS source distribution* (e.g. in external USER packages or as
source files provided as a supplement to a publication) that are written
for an older version of LAMMPS and thus need to be updated to be
compatible with the current version of LAMMPS. Due to the active
development of LAMMPS it is likely to always be incomplete. Please
contact developer@lammps.org in case you run across an issue that is not
(yet) listed here. Please also review the latest information about the
LAMMPS :doc:`programming style conventions <Modify_style>`, especially
if you are considering to submit the updated version for inclusion into
the LAMMPS distribution.
Available topics in mostly chronological order are:
- `Setting flags in the constructor`_
- `Rename of pack/unpack_comm() to pack/unpack_forward_comm()`_
- `Use ev_init() to initialize variables derived from eflag and vflag`_
- `Use utils::numeric() functions instead of force->numeric()`_
- `Use utils::open_potential() function to open potential files`_
- `Simplify customized error messages`_
- `Use of "override" instead of "virtual"`_
- `Simplified and more compact neighbor list requests`_
----
Setting flags in the constructor
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
As LAMMPS gains additional functionality, new flags may need to be set
in the constructor or a class to signal compatibility with such features.
Most of the time the defaults are chosen conservatively, but sometimes
the conservative choice is the uncommon choice, and then those settings
need to be made when updating code.
Pair styles:
- ``manybody_flag``: set to 1 if your pair style is not pair-wise additive
- ``restartinfo``: set to 0 if your pair style does not store data in restart files
Rename of pack/unpack_comm() to pack/unpack_forward_comm()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 8Aug2014
In this change set the functions to pack data into communication buffers
and to unpack data from communication buffers for :doc:`forward
communications <Developer_comm_ops>` were renamed from ``pack_comm()``
and ``unpack_comm()`` to ``pack_forward_comm()`` and
``unpack_forward_comm()``, respectively. Also the meaning of the return
value of these functions was changed: rather than returning the number
of items per atom stored in the buffer, now the total number of items
added (or unpacked) needs to be returned. Here is an example from the
`PairEAM` class. Of course the member function declaration in corresponding
header file needs to be updated accordingly.
Old:
.. code-block:: C++
int PairEAM::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int m = 0;
for (int i = 0; i < n; i++) {
int j = list[i];
buf[m++] = fp[j];
}
return 1;
}
New:
.. code-block:: C++
int PairEAM::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int m = 0;
for (int i = 0; i < n; i++) {
int j = list[i];
buf[m++] = fp[j];
}
return m;
}
.. note::
Because the various "pack" and "unpack" functions are defined in the
respective base classes as dummy functions doing nothing, and because
of the the name mismatch the custom versions in the derived class
will no longer be called, there will be no compilation error when
this change is not applied. Only calculations will suddenly produce
incorrect results because the required forward communication calls
will cease to function correctly.
Use ev_init() to initialize variables derived from eflag and vflag
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 29Mar2019
There are several variables that need to be initialized based on
the values of the "eflag" and "vflag" variables and since sometimes
there are new bits added and new variables need to be set to 1 or 0.
To make this consistent, across all styles, there is now an inline
function ``ev_init(eflag, vflag)`` that makes those settings
consistently and calls either ``ev_setup()`` or ``ev_unset()``.
Example from a pair style:
Old:
.. code-block:: C++
if (eflag || vflag) ev_setup(eflag, vflag);
else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
New:
.. code-block:: C++
ev_init(eflag, vflag);
Not applying this change will not cause a compilation error, but
can lead to inconsistent behavior and incorrect tallying of
energy or virial.
Use utils::numeric() functions instead of force->numeric()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 18Sep2020
The "numeric()" conversion functions (including "inumeric()",
"bnumeric()", and "tnumeric()") have been moved from the Force class to
the utils namespace. Also they take an additional argument that selects
whether the ``Error::all()`` or ``Error::one()`` function should be
called in case of an error. The former should be used when *all* MPI
processes call the conversion function and the latter *must* be used
when they are called from only one or a subset of the MPI processes.
Old:
.. code-block:: C++
val = force->numeric(FLERR, arg[1]);
num = force->inumeric(FLERR, arg[2]);
New:
.. code-block:: C++
val = utils::numeric(FLERR, true, arg[1], lmp);
num = utils::inumeric(FLERR, false, arg[2], lmp);
.. seealso::
:cpp:func:`utils::numeric() <LAMMPS_NS::utils::numeric>`,
:cpp:func:`utils::inumeric() <LAMMPS_NS::utils::inumeric>`,
:cpp:func:`utils::bnumeric() <LAMMPS_NS::utils::bnumeric>`,
:cpp:func:`utils::tnumeric() <LAMMPS_NS::utils::tnumeric>`
Use utils::open_potential() function to open potential files
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 18Sep2020
The :cpp:func:`utils::open_potential()
<LAMMPS_NS::utils::open_potential>` function must be used to replace
calls to ``force->open_potential()`` and should be used to replace
``fopen()`` for opening potential files for reading. The custom
function does three additional steps compared to ``fopen()``: 1) it will
try to parse the ``UNITS:`` and ``DATE:`` metadata will stop with an
error on a units mismatch and will print the date info, if present, in
the log file; 2) for pair styles that support it, it will set up
possible automatic unit conversions based on the embedded unit
information and LAMMPS' current units setting; 3) it will not only try
to open a potential file at the given path, but will also search in the
folders listed in the ``LAMMPS_POTENTIALS`` environment variable. This
allows to keep potential files in a common location instead of having to
copy them around for simulations.
Old:
.. code-block:: C++
fp = force->open_potential(filename);
fp = fopen(filename, "r");
New:
.. code-block:: C++
fp = utils::open_potential(filename, lmp);
Simplify customized error messages
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 14May2021
Aided by features of the bundled {fmt} library, error messages now
can have a variable number of arguments and the string will be interpreted
as a {fmt} style format string so that custom error messages can be
easily customized without having to use temporary buffers and ``sprintf()``.
Example:
Old:
.. code-block:: C++
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open AEAM potential file %s",filename);
error->one(FLERR,str);
}
New:
.. code-block:: C++
if (fptr == nullptr)
error->one(FLERR, "Cannot open AEAM potential file {}: {}", filename, utils::getsyserror());
Use of "override" instead of "virtual"
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 17Feb2022
Since LAMMPS requires C++11 we switched to use the "override" keyword
instead of "virtual" to indicate polymorphism in derived classes. This
allows the C++ compiler to better detect inconsistencies when an
override is intended or not. Please note that "override" has to be
added to **all** polymorph functions in derived classes and "virtual"
*only* to the function in the base class (or the destructor). Here is
an example from the ``FixWallReflect`` class:
Old:
.. code-block:: C++
FixWallReflect(class LAMMPS *, int, char **);
virtual ~FixWallReflect();
int setmask();
void init();
void post_integrate();
New:
.. code-block:: C++
FixWallReflect(class LAMMPS *, int, char **);
~FixWallReflect() override;
int setmask() override;
void init() override;
void post_integrate() override;
This change set will neither cause a compilation failure, nor will it
change functionality, but if you plan to submit the updated code for
inclusion into the LAMMPS distribution, it will be requested for achieve
a consistent :doc:`programming style <Modify_style>`.
Simplified function names for forward and reverse communication
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 24Mar2022
Rather then using the function name to distinguish between the different
forward and reverse communication functions for styles, LAMMPS now uses
the type of the "this" pointer argument.
Old:
.. code-block:: C++
comm->forward_comm_pair(this);
comm->forward_comm_fix(this);
comm->forward_comm_compute(this);
comm->forward_comm_dump(this);
comm->reverse_comm_pair(this);
comm->reverse_comm_fix(this);
comm->reverse_comm_compute(this);
comm->reverse_comm_dump(this);
New:
.. code-block:: C++
comm->forward_comm(this);
comm->reverse_comm(this);
This change is required or else the code will not compile.
Simplified and more compact neighbor list requests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionchanged:: 24Mar2022
This change set reduces the amount of code required to request a
neighbor list. It enforces consistency and no longer requires to change
internal data of the request. More information on neighbor list
requests can be :doc:`found here <Developer_notes>`. Example from the
``ComputeRDF`` class:
Old:
.. code-block:: C++
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->compute = 1;
neighbor->requests[irequest]->occasional = 1;
if (cutflag) {
neighbor->requests[irequest]->cut = 1;
neighbor->requests[irequest]->cutoff = mycutneigh;
}
New:
.. code-block:: C++
auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
if (cutflag) req->set_cutoff(mycutneigh);
Public access to the ``NeighRequest`` class data members has been
removed so this update is *required* to avoid compilation failure.

View File

@ -291,6 +291,7 @@ blocksize
blueviolet
bn
bni
bnumeric
bo
Bochkarev
Bochum
@ -1482,6 +1483,7 @@ intra
intralayer
intramolecular
ints
inumeric
inv
invariants
inversed
@ -2706,6 +2708,7 @@ polydispersity
polyelectrolyte
polyhedra
Polym
polymorph
polymorphism
Ponder
popen
@ -3463,6 +3466,7 @@ tmin
Tmin
tmp
tN
tnumeric
Tobias
Toennies
Tohoku

View File

@ -1,6 +1,9 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for HIP
# - export HIP_PLATFORM=amd (or nvcc) before execution
# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation
# such as hipamd or CHIP-SPV.
# - export HIP_PLATFORM=<amd/nvcc/spirv> specify the HIP platform to use.
# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig.
# - change HIP_ARCH for your GPU
# ------------------------------------------------------------------------- */
@ -20,41 +23,62 @@ HIP_OPTS = -O3
HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
HIP_HOST_INCLUDE =
ifndef HIP_PATH
$(error HIP_PATH is not set)
endif
ifndef HIP_PLATFORM
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
endif
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# use device sort
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
ifneq (spirv,$(HIP_PLATFORM))
# hipCUB not aviable for CHIP-SPV
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
endif
# path to cub
HIP_HOST_INCLUDE += -I./
# path to hipcub
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
ifeq (amd,$(HIP_PLATFORM))
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
endif
# use mpi
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
# this settings should match LAMMPS Makefile
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
# automatic flag detection for OpenMPI
ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0)
MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
HIP_PATH ?= $(wildcard /opt/rocm/hip)
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# automatic flag detection for MPICH
else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0)
MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK
MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info))
# for other MPI libs: must set flags manually, if needed
else
MPI_COMP_OPTS =
MPI_LINK_OPTS =
endif
ifeq (hcc,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (amd,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (nvcc,$(HIP_PLATFORM))
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
else ifeq (spirv,$(HIP_PLATFORM))
HIP_ARCH = spirv
endif
BIN_DIR = .
@ -71,7 +95,15 @@ BSH = /bin/sh
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
HIP_GPU_OPTS += $(HIP_OPTS) -I./
ifeq (clang,$(HIP_COMPILER))
ifeq (spirv,$(HIP_PLATFORM))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
HIP_GPU_OPTS_S =
HIP_GPU_OPTS_E =
HIP_KERNEL_SUFFIX = .cpp
HIP_LIBS_TARGET =
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
else ifeq (clang,$(HIP_COMPILER))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH)

View File

@ -30,7 +30,7 @@
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define CONFIG_ID 303
#define SIMD_SIZE 64
#else
@ -112,7 +112,7 @@
// KERNEL MACROS - TEXTURES
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define _texture(name, type) __device__ type* name
#define _texture_2d(name, type) __device__ type* name
#else
@ -134,9 +134,12 @@
int2 qt = tex1Dfetch(q_tex,i); \
ans=__hiloint2double(qt.y, qt.x); \
}
#elseif defined(__HIP_PLATFORM_SPIRV__)
#define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i);
#define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i);
#else
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#endif
#else
#define fetch4(ans,i,x) ans=x[i]
@ -152,7 +155,7 @@
#define mu_tex mu_
#endif
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#undef fetch4
#undef fetch
@ -209,7 +212,7 @@
#endif
#endif
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#ifdef _SINGLE_SINGLE
#define shfl_down __shfl_down