diff --git a/doc/src/Python_properties.rst b/doc/src/Python_properties.rst index d8e772379c..031461660a 100644 --- a/doc/src/Python_properties.rst +++ b/doc/src/Python_properties.rst @@ -53,6 +53,7 @@ against invalid accesses. * :py:meth:`version() `: return the numerical version id, e.g. LAMMPS 2 Sep 2015 -> 20150902 * :py:meth:`get_thermo() `: return current value of a thermo keyword + * :py:meth:`last_thermo() `: return a dictionary of the last thermodynamic output * :py:meth:`get_natoms() `: total # of atoms as int * :py:meth:`reset_box() `: reset the simulation box size * :py:meth:`extract_setting() `: return a global setting @@ -60,6 +61,10 @@ against invalid accesses. * :py:meth:`extract_box() `: extract box info * :py:meth:`create_atoms() `: create N atoms with IDs, types, x, v, and image flags + **Properties**: + + * :py:attr:`last_thermo_step `: the last timestep thermodynamic output was computed + .. tab:: PyLammps/IPyLammps API In addition to the functions provided by :py:class:`lammps `, :py:class:`PyLammps ` objects diff --git a/doc/src/package.rst b/doc/src/package.rst index 6d425b63dd..b0549ba7d9 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -49,7 +49,7 @@ Syntax *intel* args = NPhi keyword value ... Nphi = # of co-processors per node zero or more keyword/value pairs may be appended - keywords = *mode* or *omp* or *lrt* or *balance* or *ghost* or *tpc* or *tptask* or *no_affinity* + keywords = *mode* or *omp* or *lrt* or *balance* or *ghost* or *tpc* or *tptask* or *pppm_table* or *no_affinity* *mode* value = *single* or *mixed* or *double* single = perform force calculations in single precision mixed = perform force calculations in mixed precision @@ -68,6 +68,9 @@ Syntax Ntpc = max number of co-processor threads per co-processor core (default = 4) *tptask* value = Ntptask Ntptask = max number of co-processor threads per MPI task (default = 240) + *pppm_table* value = *yes* or *no* + *yes* = Precompute pppm values in table (doesn't change accuracy) + *no* = Compute pppm values on the fly *no_affinity* values = none *kokkos* args = keyword value ... zero or more keyword/value pairs may be appended @@ -392,13 +395,13 @@ is identical to the default *verlet* style aside from supporting the LRT feature. This feature requires setting the pre-processor flag -DLMP_INTEL_USELRT in the makefile when compiling LAMMPS. -The *balance* keyword sets the fraction of :doc:`pair style ` work offloaded to the co-processor for split -values between 0.0 and 1.0 inclusive. While this fraction of work is -running on the co-processor, other calculations will run on the host, -including neighbor and pair calculations that are not offloaded, as -well as angle, bond, dihedral, kspace, and some MPI communications. -If *split* is set to -1, the fraction of work is dynamically adjusted -automatically throughout the run. This typically give performance +The *balance* keyword sets the fraction of :doc:`pair style ` work +offloaded to the co-processor for split values between 0.0 and 1.0 inclusive. +While this fraction of work is running on the co-processor, other calculations +will run on the host, including neighbor and pair calculations that are not +offloaded, as well as angle, bond, dihedral, kspace, and some MPI +communications. If *split* is set to -1, the fraction of work is dynamically +adjusted automatically throughout the run. This typically give performance within 5 to 10 percent of the optimal fixed fraction. The *ghost* keyword determines whether or not ghost atoms, i.e. atoms @@ -430,6 +433,13 @@ with 16 threads, for a total of 128. Note that the default settings for *tpc* and *tptask* are fine for most problems, regardless of how many MPI tasks you assign to a Phi. +.. versionadded:: TBD + +The *pppm_table* keyword with the argument yes allows to use a +pre-computed table to efficiently spread the charge to the PPPM grid. +This feature is enabled by default but can be turned off using the +keyword with the argument *no*. + The *no_affinity* keyword will turn off automatic setting of core affinity for MPI tasks and OpenMP threads on the host when using offload to a co-processor. Affinity settings are used when possible @@ -699,39 +709,37 @@ Related commands Default """"""" -For the GPU package, the default is Ngpu = 0 and the option defaults -are neigh = yes, newton = off, binsize = 0.0, split = 1.0, gpuID = 0 -to Ngpu-1, tpa = 1, omp = 0, and platform=-1. These settings are made -automatically if the "-sf gpu" :doc:`command-line switch ` -is used. If it is not used, you must invoke the package gpu command -in your input script or via the "-pk gpu" :doc:`command-line switch `. +For the GPU package, the default is Ngpu = 0 and the option defaults are neigh += yes, newton = off, binsize = 0.0, split = 1.0, gpuID = 0 to Ngpu-1, tpa = 1, +omp = 0, and platform=-1. These settings are made automatically if the "-sf +gpu" :doc:`command-line switch ` is used. If it is not used, you +must invoke the package gpu command in your input script or via the "-pk gpu" +:doc:`command-line switch `. -For the INTEL package, the default is Nphi = 1 and the option -defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4, -tptask = 240. The default ghost option is determined by the pair -style being used. This value is output to the screen in the offload -report at the end of each run. Note that all of these settings, -except "omp" and "mode", are ignored if LAMMPS was not built with Xeon -Phi co-processor support. These settings are made automatically if the -"-sf intel" :doc:`command-line switch ` is used. If it is -not used, you must invoke the package intel command in your input -script or via the "-pk intel" :doc:`command-line switch `. +For the INTEL package, the default is Nphi = 1 and the option defaults are omp += 0, mode = mixed, lrt = no, balance = -1, tpc = 4, tptask = 240, pppm_table = +yes. The default ghost option is determined by the pair style being used. +This value is output to the screen in the offload report at the end of each +run. Note that all of these settings, except "omp" and "mode", are ignored if +LAMMPS was not built with Xeon Phi co-processor support. These settings are +made automatically if the "-sf intel" :doc:`command-line switch ` +is used. If it is not used, you must invoke the package intel command in your +input script or via the "-pk intel" :doc:`command-line switch `. For the KOKKOS package, the option defaults for GPUs are neigh = full, -neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default -value, comm = device, sort = device, neigh/transpose = off, gpu/aware = -on. When LAMMPS can safely detect that GPU-aware MPI is not available, -the default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the -option defaults are neigh = half, neigh/qeq = half, newton = on, binsize -= 0.0, comm = no, and sort = no. The option neigh/thread = on when -there are 16K atoms or less on an MPI rank, otherwise it is "off". These -settings are made automatically by the required "-k on" -:doc:`command-line switch `. You can change them by using -the package kokkos command in your input script or via the :doc:`-pk +neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default value, +comm = device, sort = device, neigh/transpose = off, gpu/aware = on. When +LAMMPS can safely detect that GPU-aware MPI is not available, the default value +of gpu/aware becomes "off". For CPUs or Xeon Phis, the option defaults are +neigh = half, neigh/qeq = half, newton = on, binsize = 0.0, comm = no, and sort += no. The option neigh/thread = on when there are 16K atoms or less on an MPI +rank, otherwise it is "off". These settings are made automatically by the +required "-k on" :doc:`command-line switch `. You can change them +by using the package kokkos command in your input script or via the :doc:`-pk kokkos command-line switch `. -For the OMP package, the default is Nthreads = 0 and the option -defaults are neigh = yes. These settings are made automatically if -the "-sf omp" :doc:`command-line switch ` is used. If it -is not used, you must invoke the package omp command in your input -script or via the "-pk omp" :doc:`command-line switch `. +For the OMP package, the default is Nthreads = 0 and the option defaults are +neigh = yes. These settings are made automatically if the "-sf omp" +:doc:`command-line switch ` is used. If it is not used, you must +invoke the package omp command in your input script or via the "-pk omp" +:doc:`command-line switch `. diff --git a/examples/COUPLE/plugin/liblammpsplugin.h b/examples/COUPLE/plugin/liblammpsplugin.h index 40fba6b9e3..1520ef638d 100644 --- a/examples/COUPLE/plugin/liblammpsplugin.h +++ b/examples/COUPLE/plugin/liblammpsplugin.h @@ -37,12 +37,13 @@ #endif /* The following enums must be kept in sync with the equivalent enums - * or constants in python/lammps/constants.py, fortran/lammps.f90, - * tools/swig/lammps.i, and examples/COUPLE/plugin/liblammpsplugin.h */ + * or constants in src/library.h, src/lmptype.h, python/lammps/constants.py, + * fortran/lammps.f90, and tools/swig/lammps.i */ /* Data type constants for extracting data from atoms, computes and fixes */ enum _LMP_DATATYPE_CONST { + LAMMPS_NONE = -1, /*!< no data type assigned (yet) */ LAMMPS_INT = 0, /*!< 32-bit integer (array) */ LAMMPS_INT_2D = 1, /*!< two-dimensional 32-bit integer array */ LAMMPS_DOUBLE = 2, /*!< 64-bit double (array) */ diff --git a/fortran/lammps.f90 b/fortran/lammps.f90 index f511e6bb60..ba3997ac8e 100644 --- a/fortran/lammps.f90 +++ b/fortran/lammps.f90 @@ -44,11 +44,12 @@ MODULE LIBLAMMPS ! Data type constants for extracting data from global, atom, compute, and fix ! ! Must be kept in sync with the equivalent declarations in - ! src/library.h, python/lammps/constants.py, tools/swig/lammps.i, + ! src/library.h, src/lmptype.h, python/lammps/constants.py, tools/swig/lammps.i, ! and examples/COUPLE/plugin/liblammpsplugin.h ! ! These are NOT part of the API (the part the user sees) INTEGER(c_int), PARAMETER :: & + LAMMPS_NONE = -1, & ! no data type assigned (yet) LAMMPS_INT = 0, & ! 32-bit integer (or array) LAMMPS_INT_2D = 1, & ! two-dimensional 32-bit integer array LAMMPS_DOUBLE = 2, & ! 64-bit double (or array) @@ -1126,11 +1127,13 @@ CONTAINS FUNCTION lmp_last_thermo(self,what,index) RESULT(thermo_data) CLASS(lammps), INTENT(IN), TARGET :: self CHARACTER(LEN=*), INTENT(IN) :: what - INTEGER(c_int) :: index + INTEGER :: index + INTEGER(c_int) :: idx TYPE(lammps_data) :: thermo_data, type_data INTEGER(c_int) :: datatype TYPE(c_ptr) :: Cname, Cptr + idx = index - 1 ! set data type for known cases SELECT CASE (what) CASE ('step') @@ -1147,7 +1150,7 @@ CONTAINS datatype = LAMMPS_STRING CASE ('data') Cname = f2c_string('type') - Cptr = lammps_last_thermo(self%handle,Cname,index-1) + Cptr = lammps_last_thermo(self%handle,Cname,idx) type_data%lammps_instance => self type_data%datatype = DATA_INT CALL C_F_POINTER(Cptr, type_data%i32) @@ -1158,7 +1161,7 @@ CONTAINS END SELECT Cname = f2c_string(what) - Cptr = lammps_last_thermo(self%handle,Cname,index-1) + Cptr = lammps_last_thermo(self%handle,Cname,idx) CALL lammps_free(Cname) thermo_data%lammps_instance => self diff --git a/python/lammps/constants.py b/python/lammps/constants.py index 8fd6a9eaf5..1d0d8adb78 100644 --- a/python/lammps/constants.py +++ b/python/lammps/constants.py @@ -13,7 +13,13 @@ # various symbolic constants to be used # in certain calls to select data formats + +# these must be kept in sync with the enums in src/library.h, src/lmptype.h, +# tools/swig/lammps.i, examples/COUPLE/plugin/liblammpsplugin.h, +# and the constants in fortran/lammps.f90 + LAMMPS_AUTODETECT = None +LAMMPS_NONE = -1 LAMMPS_INT = 0 LAMMPS_INT_2D = 1 LAMMPS_DOUBLE = 2 @@ -22,8 +28,6 @@ LAMMPS_INT64 = 4 LAMMPS_INT64_2D = 5 LAMMPS_STRING = 6 -# these must be kept in sync with the enums in src/library.h, tools/swig/lammps.i, -# examples/COUPLE/plugin/liblammpsplugin.h, and the constants in fortran/lammps.f90 LMP_STYLE_GLOBAL = 0 LMP_STYLE_ATOM = 1 LMP_STYLE_LOCAL = 2 diff --git a/python/lammps/core.py b/python/lammps/core.py index d34d5de4d4..84a80e77a3 100644 --- a/python/lammps/core.py +++ b/python/lammps/core.py @@ -746,6 +746,16 @@ class lammps(object): return self.lib.lammps_get_thermo(self.lmp,name) # ------------------------------------------------------------------------- + @property + def last_thermo_step(self): + """ Get the last timestep where thermodynamic data was computed + + :return: the timestep or a negative number if there has not been any thermo output yet + :rtype: int + """ + with ExceptionCheck(self): + ptr = self.lib.lammps_last_thermo(self.lmp, c_char_p("step".encode()), 0) + return cast(ptr, POINTER(self.c_bigint)).contents.value def last_thermo(self): """Get a dictionary of the last thermodynamic output @@ -755,14 +765,12 @@ class lammps(object): data from the last timestep into a dictionary. The return value is None, if there has not been any thermo output yet. - :return: value of thermo keyword + :return: a dictionary containing the last computed thermo output values :rtype: dict or None """ rv = dict() - with ExceptionCheck(self): - ptr = self.lib.lammps_last_thermo(self.lmp, c_char_p("step".encode()), 0) - mystep = cast(ptr, POINTER(self.c_bigint)).contents.value + mystep = self.last_thermo_step if mystep < 0: return None diff --git a/python/lammps/pylammps.py b/python/lammps/pylammps.py index d0ff7ab1aa..bdbed3e971 100644 --- a/python/lammps/pylammps.py +++ b/python/lammps/pylammps.py @@ -13,7 +13,7 @@ ################################################################################ # Alternative Python Wrapper -# Written by Richard Berger +# Written by Richard Berger ################################################################################ # for python2/3 compatibility @@ -28,6 +28,7 @@ import tempfile from collections import namedtuple from .core import lammps +from .constants import * # lgtm [py/polluting-import] # ------------------------------------------------------------------------- @@ -65,22 +66,43 @@ class OutputCapture(object): # ------------------------------------------------------------------------- class Variable(object): - def __init__(self, pylammps_instance, name, style, definition): + def __init__(self, pylammps_instance, name): self._pylmp = pylammps_instance self.name = name - self.style = style - self.definition = definition.split() + + @property + def style(self): + vartype = self._pylmp.lmp.lib.lammps_extract_variable_datatype(self._pylmp.lmp.lmp, self.name.encode()) + if vartype == LMP_VAR_EQUAL: + return "equal" + elif vartype == LMP_VAR_ATOM: + return "atom" + elif vartype == LMP_VAR_VECTOR: + return "vector" + elif vartype == LMP_VAR_STRING: + return "string" + return None @property def value(self): - if self.style == 'atom': - return list(self._pylmp.lmp.extract_variable(self.name, "all", 1)) + return self._pylmp.lmp.extract_variable(self.name) + + @value.setter + def value(self, newvalue): + style = self.style + if style == "equal" or style == "string": + self._pylmp.variable("{} {} {}".format(self.name, style, newvalue)) else: - value = self._pylmp.lmp_print('"${%s}"' % self.name).strip() - try: - return float(value) - except ValueError: - return value + raise Exception("Setter not implemented for {} style variables.".format(style)) + + def __str__(self): + value = self.value + if isinstance(value, str): + value = "\"{}\"".format(value) + return "Variable(name=\"{}\", value={})".format(self.name, value) + + def __repr__(self): + return self.__str__() # ------------------------------------------------------------------------- @@ -377,55 +399,6 @@ class variable_set: def __repr__(self): return self.__str__() -# ------------------------------------------------------------------------- - -def get_thermo_data(output): - """ traverse output of runs and extract thermo data columns """ - if isinstance(output, str): - lines = output.splitlines() - else: - lines = output - - runs = [] - columns = [] - in_run = False - current_run = {} - - for line in lines: - if line.startswith("Per MPI rank memory allocation"): - in_run = True - elif in_run and len(columns) == 0: - # first line after memory usage are column names - columns = line.split() - - current_run = {} - - for col in columns: - current_run[col] = [] - - elif line.startswith("Loop time of "): - in_run = False - columns = [] - thermo_data = variable_set('ThermoData', current_run) - r = {'thermo' : thermo_data } - runs.append(namedtuple('Run', list(r.keys()))(*list(r.values()))) - elif in_run and len(columns) > 0: - items = line.split() - # Convert thermo output and store it. - # It must have the same number of columns and - # all of them must be convertible to floats. - # Otherwise we ignore the line - if len(items) == len(columns): - try: - values = [float(x) for x in items] - for i, col in enumerate(columns): - current_run[col].append(values[i]) - except ValueError: - # cannot convert. must be a non-thermo output. ignore. - pass - - return runs - # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- @@ -483,6 +456,9 @@ class PyLammps(object): self._enable_cmd_history = False self.runs = [] + if not self.lmp.has_package("PYTHON"): + print("WARNING: run thermo data not captured since PYTHON LAMMPS package is not enabled") + def __enter__(self): return self @@ -573,16 +549,49 @@ class PyLammps(object): if self.enable_cmd_history: self._cmd_history.append(cmd) + def _append_run_thermo(self, thermo): + for k, v in thermo.items(): + if k in self._current_run: + self._current_run[k].append(v) + else: + self._current_run[k] = [v] + def run(self, *args, **kwargs): """ Execute LAMMPS run command with given arguments - All thermo output during the run is captured and saved as new entry in + Thermo data of the run is recorded and saved as new entry in :py:attr:`PyLammps.runs`. The latest run can be retrieved by :py:attr:`PyLammps.last_run`. + + Note, for recording of all thermo steps during a run, the PYTHON package + needs to be enabled in LAMMPS. Otherwise, it will only capture the final + timestep. """ + self._current_run = {} + self._last_thermo_step = -1 + def end_of_step_callback(lmp): + if self.lmp.last_thermo_step == self._last_thermo_step: return + thermo = self.lmp.last_thermo() + self._append_run_thermo(thermo) + self._last_thermo_step = thermo['Step'] + + import __main__ + __main__._PyLammps_end_of_step_callback = end_of_step_callback + capture_thermo = self.lmp.has_package("PYTHON") + + if capture_thermo: + self.fix("__pylammps_internal_run_callback", "all", "python/invoke", "1", "end_of_step", "_PyLammps_end_of_step_callback") + output = self.__getattr__('run')(*args, **kwargs) - self.runs += get_thermo_data(output) + + if capture_thermo: + self.unfix("__pylammps_internal_run_callback") + self._append_run_thermo(self.lmp.last_thermo()) + + thermo_data = variable_set('ThermoData', self._current_run) + r = {'thermo' : thermo_data } + self.runs.append(namedtuple('Run', list(r.keys()))(*list(r.values()))) return output @property @@ -677,9 +686,7 @@ class PyLammps(object): :getter: Returns a list of atom groups that are currently active in this LAMMPS instance :type: list """ - output = self.lmp_info("groups") - output = output[output.index("Group information:")+1:] - return self._parse_groups(output) + return self.lmp.available_ids("group") @property def variables(self): @@ -689,11 +696,9 @@ class PyLammps(object): :getter: Returns a dictionary of all variables that are defined in this LAMMPS instance :type: dict """ - output = self.lmp_info("variables") - output = output[output.index("Variable information:")+1:] variables = {} - for v in self._parse_element_list(output): - variables[v['name']] = Variable(self, v['name'], v['style'], v['def']) + for name in self.lmp.available_ids("variable"): + variables[name] = Variable(self, name) return variables def eval(self, expr): @@ -720,66 +725,55 @@ class PyLammps(object): def _parse_info_system(self, output): system = {} + system['dimensions'] = self.lmp.extract_setting("dimension") + system['xlo'] = self.lmp.extract_global("boxxlo") + system['ylo'] = self.lmp.extract_global("boxylo") + system['zlo'] = self.lmp.extract_global("boxzlo") + system['xhi'] = self.lmp.extract_global("boxxhi") + system['yhi'] = self.lmp.extract_global("boxyhi") + system['zhi'] = self.lmp.extract_global("boxzhi") + xprd = system["xhi"] - system["xlo"] + yprd = system["yhi"] - system["ylo"] + zprd = system["zhi"] - system["zlo"] + if self.lmp.extract_setting("triclinic") == 1: + system['triclinic_box'] = (xprd, yprd, zprd) + else: + system['orthogonal_box'] = (xprd, yprd, zprd) + system['nangles'] = self.lmp.extract_global("nbonds") + system['nangletypes'] = self.lmp.extract_setting("nbondtypes") + system['angle_style'] = self.lmp.extract_global("angle_style") + system['nbonds'] = self.lmp.extract_global("nbonds") + system['nbondtypes'] = self.lmp.extract_setting("nbondtypes") + system['bond_style'] = self.lmp.extract_global("bond_style") + system['ndihedrals'] = self.lmp.extract_global("ndihedrals") + system['ndihedraltypes'] = self.lmp.extract_setting("ndihedraltypes") + system['dihedral_style'] = self.lmp.extract_global("dihedral_style") + system['nimpropers'] = self.lmp.extract_global("nimpropers") + system['nimpropertypes'] = self.lmp.extract_setting("nimpropertypes") + system['improper_style'] = self.lmp.extract_global("improper_style") + system['kspace_style'] = self.lmp.extract_global("kspace_style") + system['natoms'] = self.lmp.extract_global("natoms") + system['ntypes'] = self.lmp.extract_global("ntypes") + system['pair_style'] = self.lmp.extract_global("pair_style") + system['atom_style'] = self.lmp.extract_global("atom_style") + system['units'] = self.lmp.extract_global("units") for line in output: - if line.startswith("Units"): - system['units'] = self._get_pair(line)[1] - elif line.startswith("Atom style"): - system['atom_style'] = self._get_pair(line)[1] - elif line.startswith("Atom map"): + if line.startswith("Atom map"): system['atom_map'] = self._get_pair(line)[1] elif line.startswith("Atoms"): parts = self._split_values(line) - system['natoms'] = int(self._get_pair(parts[0])[1]) - system['ntypes'] = int(self._get_pair(parts[1])[1]) - system['style'] = self._get_pair(parts[2])[1] - elif line.startswith("Kspace style"): - system['kspace_style'] = self._get_pair(line)[1] - elif line.startswith("Dimensions"): - system['dimensions'] = int(self._get_pair(line)[1]) - elif line.startswith("Orthogonal box"): - system['orthogonal_box'] = [float(x) for x in self._get_pair(line)[1].split('x')] elif line.startswith("Boundaries"): system['boundaries'] = self._get_pair(line)[1] - elif line.startswith("xlo"): - keys, values = [self._split_values(x) for x in self._get_pair(line)] - for key, value in zip(keys, values): - system[key] = float(value) - elif line.startswith("ylo"): - keys, values = [self._split_values(x) for x in self._get_pair(line)] - for key, value in zip(keys, values): - system[key] = float(value) - elif line.startswith("zlo"): - keys, values = [self._split_values(x) for x in self._get_pair(line)] - for key, value in zip(keys, values): - system[key] = float(value) elif line.startswith("Molecule type"): system['molecule_type'] = self._get_pair(line)[1] - elif line.startswith("Bonds"): - parts = self._split_values(line) - system['nbonds'] = int(self._get_pair(parts[0])[1]) - system['nbondtypes'] = int(self._get_pair(parts[1])[1]) - system['bond_style'] = self._get_pair(parts[2])[1] - elif line.startswith("Angles"): - parts = self._split_values(line) - system['nangles'] = int(self._get_pair(parts[0])[1]) - system['nangletypes'] = int(self._get_pair(parts[1])[1]) - system['angle_style'] = self._get_pair(parts[2])[1] - elif line.startswith("Dihedrals"): - parts = self._split_values(line) - system['ndihedrals'] = int(self._get_pair(parts[0])[1]) - system['ndihedraltypes'] = int(self._get_pair(parts[1])[1]) - system['dihedral_style'] = self._get_pair(parts[2])[1] - elif line.startswith("Impropers"): - parts = self._split_values(line) - system['nimpropers'] = int(self._get_pair(parts[0])[1]) - system['nimpropertypes'] = int(self._get_pair(parts[1])[1]) - system['improper_style'] = self._get_pair(parts[2])[1] return system def _parse_info_communication(self, output): comm = {} + comm['nprocs'] = self.lmp.extract_setting("world_size") + comm['nthreads'] = self.lmp.extract_setting("nthreads") for line in output: if line.startswith("MPI library"): @@ -792,10 +786,6 @@ class PyLammps(object): comm['proc_grid'] = [int(x) for x in self._get_pair(line)[1].split('x')] elif line.startswith("Communicate velocities for ghost atoms"): comm['ghost_velocity'] = (self._get_pair(line)[1] == "yes") - elif line.startswith("Nprocs"): - parts = self._split_values(line) - comm['nprocs'] = int(self._get_pair(parts[0])[1]) - comm['nthreads'] = int(self._get_pair(parts[1])[1]) return comm def _parse_element_list(self, output): @@ -810,16 +800,6 @@ class PyLammps(object): elements.append(element) return elements - def _parse_groups(self, output): - groups = [] - group_pattern = re.compile(r"(?P.+) \((?P.+)\)") - - for line in output: - m = group_pattern.match(line.split(':')[1].strip()) - group = {'name': m.group('name'), 'type': m.group('type')} - groups.append(group) - return groups - def lmp_print(self, s): """ needed for Python2 compatibility, since print is a reserved keyword """ return self.__getattr__("print")(s) diff --git a/src/BODY/fix_wall_body_polygon.cpp b/src/BODY/fix_wall_body_polygon.cpp index 4d0517e527..6f0622cbf6 100644 --- a/src/BODY/fix_wall_body_polygon.cpp +++ b/src/BODY/fix_wall_body_polygon.cpp @@ -44,7 +44,7 @@ enum {FAR=0,XLO,XHI,YLO,YHI}; //#define _POLYGON_DEBUG #define DELTA 10000 -#define EPSILON 1e-2 +#define EPSILON 1e-2 // dimensionless threshold (dot products, end point checks, contact checks) #define BIG 1.0e20 #define MAX_CONTACTS 4 // maximum number of contacts for 2D models #define EFF_CONTACTS 2 // effective contacts for 2D models diff --git a/src/BODY/fix_wall_body_polyhedron.cpp b/src/BODY/fix_wall_body_polyhedron.cpp index 212830eb81..546ef1f0d4 100644 --- a/src/BODY/fix_wall_body_polyhedron.cpp +++ b/src/BODY/fix_wall_body_polyhedron.cpp @@ -42,7 +42,7 @@ enum {FAR=0,XLO,XHI,YLO,YHI,ZLO,ZHI}; //#define _POLYHEDRON_DEBUG #define DELTA 10000 -#define EPSILON 1e-2 +#define EPSILON 1e-3 // dimensionless threshold (dot products, end point checks) #define BIG 1.0e20 #define MAX_CONTACTS 4 // maximum number of contacts for 2D models #define EFF_CONTACTS 2 // effective contacts for 2D models diff --git a/src/BODY/pair_body_rounded_polygon.cpp b/src/BODY/pair_body_rounded_polygon.cpp index 0e83b63f99..24f38a6a0a 100644 --- a/src/BODY/pair_body_rounded_polygon.cpp +++ b/src/BODY/pair_body_rounded_polygon.cpp @@ -40,7 +40,7 @@ using namespace LAMMPS_NS; #define DELTA 10000 -#define EPSILON 1e-3 +#define EPSILON 1e-3 // dimensionless threshold (dot products, end point checks, contact checks) #define MAX_CONTACTS 4 // maximum number of contacts for 2D models #define EFF_CONTACTS 2 // effective contacts for 2D models @@ -624,7 +624,8 @@ void PairBodyRoundedPolygon::sphere_against_sphere(int i, int j, fy = dely*fpair/rij; fz = delz*fpair/rij; - if (R <= EPSILON) { // in contact + double rmin = MIN(rradi, rradj); + if (R <= EPSILON*rmin) { // in contact // relative translational velocity @@ -1019,6 +1020,7 @@ int PairBodyRoundedPolygon::compute_distance_to_vertex(int ibody, double xi1[3],xi2[3],u[3],v[3],uij[3]; double udotv, magv, magucostheta; double delx,dely,delz; + double rmin = MIN(rounded_radius, x0_rounded_radius); ifirst = dfirst[ibody]; iefirst = edfirst[ibody]; @@ -1105,17 +1107,17 @@ int PairBodyRoundedPolygon::compute_distance_to_vertex(int ibody, // x0 and xmi are on the different sides // t is the ratio to detect if x0 is closer to the vertices xi or xj - if (fabs(xi2[0] - xi1[0]) > EPSILON) + if (fabs(xi2[0] - xi1[0]) > EPSILON*rmin) t = (hi[0] - xi1[0]) / (xi2[0] - xi1[0]); - else if (fabs(xi2[1] - xi1[1]) > EPSILON) + else if (fabs(xi2[1] - xi1[1]) > EPSILON*rmin) t = (hi[1] - xi1[1]) / (xi2[1] - xi1[1]); - else if (fabs(xi2[2] - xi1[2]) > EPSILON) + else if (fabs(xi2[2] - xi1[2]) > EPSILON*rmin) t = (hi[2] - xi1[2]) / (xi2[2] - xi1[2]); double contact_dist = rounded_radius + x0_rounded_radius; if (t >= 0 && t <= 1) { mode = EDGE; - if (d < contact_dist + EPSILON) + if (d < contact_dist + EPSILON*rmin) contact = 1; } else { // t < 0 || t > 1: closer to either vertices of the edge @@ -1293,8 +1295,14 @@ double PairBodyRoundedPolygon::contact_separation(const Contact& c1, double x3 = c2.xv[0]; double y3 = c2.xv[1]; + int ibody = c1.ibody; + int jbody = c1.ibody; + double rradi = rounded_radius[ibody]; + double rradj = rounded_radius[jbody]; + double rmin = MIN(rradi, rradj); + double delta_a = 0.0; - if (fabs(x2 - x1) > EPSILON) { + if (fabs(x2 - x1) > EPSILON*rmin) { double A = (y2 - y1) / (x2 - x1); delta_a = fabs(y1 - A * x1 - y3 + A * x3) / sqrt(1 + A * A); } else { diff --git a/src/BODY/pair_body_rounded_polyhedron.cpp b/src/BODY/pair_body_rounded_polyhedron.cpp index 2c23199e33..d9a79abad0 100644 --- a/src/BODY/pair_body_rounded_polyhedron.cpp +++ b/src/BODY/pair_body_rounded_polyhedron.cpp @@ -44,7 +44,7 @@ using namespace LAMMPS_NS; using namespace MathConst; #define DELTA 10000 -#define EPSILON 1e-3 +#define EPSILON 1e-3 // dimensionless threshold (dot products, end point checks, contact checks) #define MAX_FACE_SIZE 4 // maximum number of vertices per face (same as BodyRoundedPolyhedron) #define MAX_CONTACTS 32 // for 3D models (including duplicated counts) @@ -1186,7 +1186,13 @@ int PairBodyRoundedPolyhedron::interaction_edge_to_edge(int ibody, // singularity case, ignore interactions - if (r < EPSILON) return interact; + double rmin = MIN(rounded_radius_i, rounded_radius_j); + if (r < EPSILON*rmin) { + #ifdef _POLYHEDRON_DEBUG + printf("ignore interaction: r = %0.16f\n", r); + #endif + return interact; + } // include the vertices for interactions @@ -1898,10 +1904,12 @@ void PairBodyRoundedPolyhedron::inside_polygon(int ibody, int face_index, { int i,n,ifirst,iffirst,npi1,npi2; - double xi1[3],xi2[3],u[3],v[3],costheta,anglesum1,anglesum2,magu,magv; + double xi1[3],xi2[3],u[3],v[3],costheta,anglesum1,anglesum2,magu,magv,rradi; ifirst = dfirst[ibody]; iffirst = facfirst[ibody]; + rradi = rounded_radius[ibody]; + double rradsq = rradi*rradi; anglesum1 = anglesum2 = 0;; for (i = 0; i < MAX_FACE_SIZE; i++) { npi1 = static_cast(face[iffirst+face_index][i]); @@ -1929,7 +1937,7 @@ void PairBodyRoundedPolyhedron::inside_polygon(int ibody, int face_index, // the point is at either vertices - if (magu * magv < EPSILON) inside1 = 1; + if (magu * magv < EPSILON*rradsq) inside1 = 1; else { costheta = MathExtra::dot3(u,v)/(magu*magv); anglesum1 += acos(costheta); @@ -1940,7 +1948,7 @@ void PairBodyRoundedPolyhedron::inside_polygon(int ibody, int face_index, MathExtra::sub3(xi2,q2,v); magu = MathExtra::len3(u); magv = MathExtra::len3(v); - if (magu * magv < EPSILON) inside2 = 1; + if (magu * magv < EPSILON*rradsq) inside2 = 1; else { costheta = MathExtra::dot3(u,v)/(magu*magv); anglesum2 += acos(costheta); @@ -2338,7 +2346,12 @@ void PairBodyRoundedPolyhedron::find_unique_contacts(Contact* contact_list, for (int j = i + 1; j < n; j++) { if (contact_list[i].unique == 0) continue; double d = contact_separation(contact_list[i], contact_list[j]); - if (d < EPSILON) contact_list[j].unique = 0; + int ibody = contact_list[i].ibody; + int jbody = contact_list[i].jbody; + double rradi = rounded_radius[ibody]; + double rradj = rounded_radius[jbody]; + double rmin = MIN(rradi, rradj); + if (d < EPSILON*rmin) contact_list[j].unique = 0; } } } diff --git a/src/EXTRA-DUMP/dump_yaml.cpp b/src/EXTRA-DUMP/dump_yaml.cpp index 47cab1ee02..3ca5c59edf 100644 --- a/src/EXTRA-DUMP/dump_yaml.cpp +++ b/src/EXTRA-DUMP/dump_yaml.cpp @@ -71,11 +71,11 @@ void DumpYAML::write_header(bigint ndump) thermo_data += "]\n - data: [ "; for (int i = 0; i < nfield; ++i) { - if (fields[i].type == multitype::DOUBLE) + if (fields[i].type == multitype::LAMMPS_DOUBLE) thermo_data += fmt::format("{}, ", fields[i].data.d); - else if (fields[i].type == multitype::INT) + else if (fields[i].type == multitype::LAMMPS_INT) thermo_data += fmt::format("{}, ", fields[i].data.i); - else if (fields[i].type == multitype::BIGINT) + else if (fields[i].type == multitype::LAMMPS_INT64) thermo_data += fmt::format("{}, ", fields[i].data.b); else thermo_data += ", "; diff --git a/src/INTEL/TEST/run_benchmarks.sh b/src/INTEL/TEST/run_benchmarks.sh index 01ddfbebf3..82eb51c928 100755 --- a/src/INTEL/TEST/run_benchmarks.sh +++ b/src/INTEL/TEST/run_benchmarks.sh @@ -7,12 +7,10 @@ # --------------------- MPI Launch Command export MPI="mpirun" -#export MPI="numactl -p 1 mpirun" # -- Systems w/ MCDRAM in flat mode # ------------- Name and location of the LAMMPS binary export LMP_BIN=../../lmp_intel_cpu_intelmpi -#export LMP_BIN=../../lmp_knl # ------------- Directory containing the LAMMPS installation @@ -20,21 +18,18 @@ export LMP_ROOT=../../../ # ------------- Number of physical cores (not HW threads) -export LMP_CORES=36 # -- For Intel Xeon E5-2697v4 SKU -#export LMP_CORES=68 # -- For Intel Xeon Phi x200 7250 SKU +export LMP_CORES=36 +# Set automatically with lscpu +export LMP_CORES=`lscpu | awk '$1=="Core(s)"{t=NF; cores=$t}$1=="Socket(s):"{t=NF; sockets=$t}END{print cores*sockets}'` # ------------- Number of HW threads to use in tests -export LMP_THREAD_LIST="2" # -- For 2 threads per core w/ HT enabled -#export LMP_THREAD_LIST="2 4" # -- For 2 threads per core w/ HT enabled +export LMP_THREAD_LIST="1 2" # -- Also test 2 threads per core w/ HT enabled # ------------- MPI Tuning Parameters -#export I_MPI_SHM_LMT=shm # -- Uncomment for Xeon Phi x200 series - -# ------------- Library locations for build - -#source /opt/intel/parallel_studio_xe_2017.2.050/psxevars.sh +export I_MPI_FABRICS=shm +export I_MPI_PIN_DOMAIN=core ######################################################################### # End settings for your system @@ -50,8 +45,6 @@ export DATE_STRING=`date +%s` export LOG_DIR=$LOG_DIR_HOST"_"$LOG_DIR_HEADER"_"$DATE_STRING mkdir $LOG_DIR -export I_MPI_PIN_DOMAIN=core -#export I_MPI_FABRICS=shm export KMP_BLOCKTIME=0 echo -n "Creating restart file...." @@ -73,6 +66,9 @@ do export LOGFILE=$LOG_DIR/$workload"_lrt".$LMP_CORES"c"$threads"t".log cmd="$MPI -np $LMP_CORES $LMP_BIN -in in.intel.$workload -log $LOGFILE $RLMP_ARGS"; rthreads=$((threads-1)) + if [ $rthreads -lt 1 ]; then + rthreads=1 + fi export KMP_AFFINITY=none export OMP_NUM_THREADS=$rthreads echo " $cmd" >> $LOG_DIR/commands.info @@ -82,4 +78,5 @@ do done # Performance reported by LAMMPS (Timesteps/second ignoring warm-up run) -grep Perf $LOG_DIR/*.log | awk 'BEGIN{n=1}n%2==0{print $0}{n++}' | sed 's/\/day//g' | sed 's/steps\/s/steps_s/g' | sed 's/hours\/ns//g' | sed 's/.*\///g' | sed 's/\.log:Performance://g' | awk '{c=NF-1; print $1,$c}' +grep Perf $LOG_DIR/*.log | awk 'n%2==1{c=NF-3; print $1,$c}{n++}' | sed -e 's/.*\///g' -e 's/:.*://g' + diff --git a/src/INTEL/angle_charmm_intel.cpp b/src/INTEL/angle_charmm_intel.cpp index 1ea9c8348e..e13448e28a 100644 --- a/src/INTEL/angle_charmm_intel.cpp +++ b/src/INTEL/angle_charmm_intel.cpp @@ -178,7 +178,7 @@ void AngleCharmmIntel::eval(const int vflag, const flt_t delz1 = x[i1].z - x[i2].z; const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - flt_t ir12 = (flt_t)1.0/sqrt(rsq1); + flt_t ir12 = (flt_t)1.0/std::sqrt(rsq1); // 2nd bond @@ -187,7 +187,7 @@ void AngleCharmmIntel::eval(const int vflag, const flt_t delz2 = x[i3].z - x[i2].z; const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - ir12 *= (flt_t)1.0/sqrt(rsq2); + ir12 *= (flt_t)1.0/std::sqrt(rsq2); // Urey-Bradley bond @@ -196,7 +196,7 @@ void AngleCharmmIntel::eval(const int vflag, const flt_t delzUB = x[i3].z - x[i1].z; const flt_t rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB; - const flt_t irUB = (flt_t)1.0/sqrt(rsqUB); + const flt_t irUB = (flt_t)1.0/std::sqrt(rsqUB); // Urey-Bradley force & energy @@ -219,12 +219,12 @@ void AngleCharmmIntel::eval(const int vflag, if (c < (flt_t)-1.0) c = (flt_t)-1.0; const flt_t sd = (flt_t)1.0 - c * c; - flt_t s = (flt_t)1.0 / sqrt(sd); + flt_t s = (flt_t)1.0 / std::sqrt(sd); if (sd < SMALL2) s = INVSMALL; // harmonic force & energy - const flt_t dtheta = acos(c) - fc.fc[type].theta0; + const flt_t dtheta = std::acos(c) - fc.fc[type].theta0; const flt_t tk = fc.fc[type].k * dtheta; if (EFLAG) eangle += tk*dtheta; diff --git a/src/INTEL/angle_harmonic_intel.cpp b/src/INTEL/angle_harmonic_intel.cpp index 8cdab1d43a..98125b164b 100644 --- a/src/INTEL/angle_harmonic_intel.cpp +++ b/src/INTEL/angle_harmonic_intel.cpp @@ -178,7 +178,7 @@ void AngleHarmonicIntel::eval(const int vflag, const flt_t delz1 = x[i1].z - x[i2].z; const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - const flt_t r1 = (flt_t)1.0/sqrt(rsq1); + const flt_t r1 = (flt_t)1.0/std::sqrt(rsq1); // 2nd bond @@ -187,7 +187,7 @@ void AngleHarmonicIntel::eval(const int vflag, const flt_t delz2 = x[i3].z - x[i2].z; const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - const flt_t r2 = (flt_t)1.0/sqrt(rsq2); + const flt_t r2 = (flt_t)1.0/std::sqrt(rsq2); // angle (cos and sin) @@ -199,12 +199,12 @@ void AngleHarmonicIntel::eval(const int vflag, if (c < (flt_t)-1.0) c = (flt_t)-1.0; const flt_t sd = (flt_t)1.0 - c * c; - flt_t s = (flt_t)1.0/sqrt(sd); + flt_t s = (flt_t)1.0/std::sqrt(sd); if (sd < SMALL2) s = INVSMALL; // harmonic force & energy - const flt_t dtheta = acos(c) - fc.fc[type].theta0; + const flt_t dtheta = std::acos(c) - fc.fc[type].theta0; const flt_t tk = fc.fc[type].k * dtheta; flt_t eangle; diff --git a/src/INTEL/bond_harmonic_intel.cpp b/src/INTEL/bond_harmonic_intel.cpp index a60050bb6b..ab38cefea4 100644 --- a/src/INTEL/bond_harmonic_intel.cpp +++ b/src/INTEL/bond_harmonic_intel.cpp @@ -168,7 +168,7 @@ void BondHarmonicIntel::eval(const int vflag, const flt_t delz = x[i1].z - x[i2].z; const flt_t rsq = delx*delx + dely*dely + delz*delz; - const flt_t r = sqrt(rsq); + const flt_t r = std::sqrt(rsq); const flt_t dr = r - fc.fc[type].r0; const flt_t rk = fc.fc[type].k * dr; diff --git a/src/INTEL/dihedral_charmm_intel.cpp b/src/INTEL/dihedral_charmm_intel.cpp index 6c3ae2c927..a41cfb867c 100644 --- a/src/INTEL/dihedral_charmm_intel.cpp +++ b/src/INTEL/dihedral_charmm_intel.cpp @@ -240,14 +240,14 @@ void DihedralCharmmIntel::eval(const int vflag, const flt_t rasq = ax*ax + ay*ay + az*az; const flt_t rbsq = bx*bx + by*by + bz*bz; const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - const flt_t rg = sqrt(rgsq); + const flt_t rg = std::sqrt(rgsq); flt_t rginv, ra2inv, rb2inv; rginv = ra2inv = rb2inv = (flt_t)0.0; if (rg > 0) rginv = (flt_t)1.0/rg; if (rasq > 0) ra2inv = (flt_t)1.0/rasq; if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq; - const flt_t rabinv = sqrt(ra2inv*rb2inv); + const flt_t rabinv = std::sqrt(ra2inv*rb2inv); flt_t c = (ax*bx + ay*by + az*bz)*rabinv; const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); @@ -367,7 +367,7 @@ void DihedralCharmmIntel::eval(const int vflag, flt_t forcecoul; if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; - else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + else forcecoul = qqrd2e * q[i1]*q[i4]*std::sqrt(r2inv); const flt_t forcelj = r6inv * (fc.ljp[itype][jtype].lj1*r6inv - fc.ljp[itype][jtype].lj2); const flt_t fpair = tweight * (forcelj+forcecoul)*r2inv; diff --git a/src/INTEL/dihedral_fourier_intel.cpp b/src/INTEL/dihedral_fourier_intel.cpp index 5448fdae98..595d747839 100644 --- a/src/INTEL/dihedral_fourier_intel.cpp +++ b/src/INTEL/dihedral_fourier_intel.cpp @@ -199,14 +199,14 @@ void DihedralFourierIntel::eval(const int vflag, const flt_t rasq = ax*ax + ay*ay + az*az; const flt_t rbsq = bx*bx + by*by + bz*bz; const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - const flt_t rg = sqrt(rgsq); + const flt_t rg = std::sqrt(rgsq); flt_t rginv, ra2inv, rb2inv; rginv = ra2inv = rb2inv = (flt_t)0.0; if (rg > 0) rginv = (flt_t)1.0/rg; if (rasq > 0) ra2inv = (flt_t)1.0/rasq; if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq; - const flt_t rabinv = sqrt(ra2inv*rb2inv); + const flt_t rabinv = std::sqrt(ra2inv*rb2inv); flt_t c = (ax*bx + ay*by + az*bz)*rabinv; const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); diff --git a/src/INTEL/dihedral_harmonic_intel.cpp b/src/INTEL/dihedral_harmonic_intel.cpp index 8d91a3fd27..138545d94a 100644 --- a/src/INTEL/dihedral_harmonic_intel.cpp +++ b/src/INTEL/dihedral_harmonic_intel.cpp @@ -199,14 +199,14 @@ void DihedralHarmonicIntel::eval(const int vflag, const flt_t rasq = ax*ax + ay*ay + az*az; const flt_t rbsq = bx*bx + by*by + bz*bz; const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - const flt_t rg = sqrt(rgsq); + const flt_t rg = std::sqrt(rgsq); flt_t rginv, ra2inv, rb2inv; rginv = ra2inv = rb2inv = (flt_t)0.0; if (rg > 0) rginv = (flt_t)1.0/rg; if (rasq > 0) ra2inv = (flt_t)1.0/rasq; if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq; - const flt_t rabinv = sqrt(ra2inv*rb2inv); + const flt_t rabinv = std::sqrt(ra2inv*rb2inv); flt_t c = (ax*bx + ay*by + az*bz)*rabinv; const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); diff --git a/src/INTEL/dihedral_opls_intel.cpp b/src/INTEL/dihedral_opls_intel.cpp index 82151a8585..52763e6bd8 100644 --- a/src/INTEL/dihedral_opls_intel.cpp +++ b/src/INTEL/dihedral_opls_intel.cpp @@ -195,15 +195,15 @@ void DihedralOPLSIntel::eval(const int vflag, // 1st and 2nd angle const flt_t b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; - const flt_t rb1 = (flt_t)1.0 / sqrt(b1mag2); + const flt_t rb1 = (flt_t)1.0 / std::sqrt(b1mag2); const flt_t sb1 = (flt_t)1.0 / b1mag2; const flt_t b2mag2 = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - const flt_t rb2 = (flt_t)1.0 / sqrt(b2mag2); + const flt_t rb2 = (flt_t)1.0 / std::sqrt(b2mag2); const flt_t sb2 = (flt_t)1.0 / b2mag2; const flt_t b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; - const flt_t rb3 = (flt_t)1.0 / sqrt(b3mag2); + const flt_t rb3 = (flt_t)1.0 / std::sqrt(b3mag2); const flt_t sb3 = (flt_t)1.0 / b3mag2; const flt_t c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; @@ -219,11 +219,11 @@ void DihedralOPLSIntel::eval(const int vflag, // cos and sin of 2 angles and final c flt_t sin2 = MAX((flt_t)1.0 - c1mag*c1mag,(flt_t)0.0); - flt_t sc1 = (flt_t)1.0/sqrt(sin2); + flt_t sc1 = (flt_t)1.0/std::sqrt(sin2); if (sin2 < SMALL2) sc1 = INVSMALL; sin2 = MAX((flt_t)1.0 - c2mag*c2mag,(flt_t)0.0); - flt_t sc2 = (flt_t)1.0/sqrt(sin2); + flt_t sc2 = (flt_t)1.0/std::sqrt(sin2); if (sin2 < SMALL2) sc2 = INVSMALL; const flt_t s1 = sc1 * sc1; @@ -234,7 +234,7 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t cx = vb1z*vb2ym - vb1y*vb2zm; const flt_t cy = vb1x*vb2zm - vb1z*vb2xm; const flt_t cz = vb1y*vb2xm - vb1x*vb2ym; - const flt_t cmag = (flt_t)1.0/sqrt(cx*cx + cy*cy + cz*cz); + const flt_t cmag = (flt_t)1.0/std::sqrt(cx*cx + cy*cy + cz*cz); const flt_t dx = (cx*vb3x + cy*vb3y + cz*vb3z)*cmag*rb3; // error check @@ -252,7 +252,7 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t cossq = c * c; const flt_t sinsq = (flt_t)1.0 - cossq; - flt_t siinv = (flt_t)1.0/sqrt(sinsq); + flt_t siinv = (flt_t)1.0/std::sqrt(sinsq); if (sinsq < SMALLER2 ) siinv = INVSMALLER; if (dx < (flt_t)0.0) siinv = -siinv; diff --git a/src/INTEL/fix_intel.cpp b/src/INTEL/fix_intel.cpp index 1ce0b41338..59224cc511 100644 --- a/src/INTEL/fix_intel.cpp +++ b/src/INTEL/fix_intel.cpp @@ -61,6 +61,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) _hybrid_nonpair = 0; _print_pkg_info = 1; _nthreads = comm->nthreads; + _torque_flag = 0; _precision_mode = PREC_MODE_MIXED; _offload_balance = -1.0; @@ -95,6 +96,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) _allow_separate_buffers = 1; _offload_ghost = -1; _lrt = 0; + _p3m_table = 1; int iarg = 4; while (iarg < narg) { @@ -135,11 +137,14 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _lrt = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; - } + } else if (strcmp(arg[iarg], "pppm_table") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); + _p3m_table = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; // undocumented options - else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { + } else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { _offload_affinity_balanced = 1; iarg++; } else if (strcmp(arg[iarg],"buffers") == 0) { @@ -275,10 +280,8 @@ int FixIntel::setmask() int mask = 0; mask |= PRE_REVERSE; mask |= MIN_PRE_REVERSE; - #ifdef _LMP_INTEL_OFFLOAD mask |= POST_FORCE; mask |= MIN_POST_FORCE; - #endif mask |= POST_RUN; return mask; } @@ -299,14 +302,29 @@ void FixIntel::init() } #endif + _torque_flag = 0; + if (force->pair_match("gayberne/intel$", 0)) _torque_flag = 1; + const int nstyles = _pair_intel_count; if (force->pair_match("^hybrid", 0) != nullptr) { _pair_hybrid_flag = 1; + + // Check if need to handle torque + auto hybrid = dynamic_cast(force->pair); + for (int i = 0; i < hybrid->nstyles; i++) + if (utils::strmatch(hybrid->keywords[i],"/intel$") && + utils::strmatch(hybrid->keywords[i],"gayberne")) + _torque_flag = 1; + if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute) error->all(FLERR,"INTEL package requires fdotr virial with newton on."); } else _pair_hybrid_flag = 0; + if (_torque_flag && nstyles > 1) + error->all(FLERR,"gayberne/intel style cannot yet be used with other " + "intel pair styles."); + if (nstyles > 1 && _pair_hybrid_flag) _pair_hybrid_flag = 2; else if (force->newton_pair == 0) _pair_hybrid_flag = 0; @@ -333,14 +351,17 @@ void FixIntel::init() int off_mode = 0; if (_offload_balance != 0.0) off_mode = 1; if (_precision_mode == PREC_MODE_SINGLE) { + _single_buffers->set_torque_flag(_torque_flag); _single_buffers->zero_ev(); _single_buffers->grow_ncache(off_mode, comm->nthreads); _single_buffers->free_list_ptrs(); } else if (_precision_mode == PREC_MODE_MIXED) { + _mixed_buffers->set_torque_flag(_torque_flag); _mixed_buffers->zero_ev(); _mixed_buffers->grow_ncache(off_mode, comm->nthreads); _mixed_buffers->free_list_ptrs(); } else { + _double_buffers->set_torque_flag(_torque_flag); _double_buffers->zero_ev(); _double_buffers->grow_ncache(off_mode, comm->nthreads); _double_buffers->free_list_ptrs(); @@ -390,7 +411,7 @@ bool FixIntel::pair_hybrid_check() void FixIntel::pair_init_check(const bool cdmessage) { #ifdef INTEL_VMASK - atom->sortfreq = 1; + if (atom->sortfreq) atom->sortfreq = 1; #endif _nbor_pack_width = 1; @@ -597,6 +618,19 @@ void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/) /* ---------------------------------------------------------------------- */ +void FixIntel::post_force(int vflag) +{ + // Redundant call to sync Intel data structs with native for methods that + // call force compute but do not call prereverse + _sync_main_arrays(1); + + #ifdef LMP_INTEL_OFFLOAD + if (_sync_mode == 2) sync_coprocessor(); + #endif +} + +/* ---------------------------------------------------------------------- */ + template void FixIntel::reduce_results(acc_t * _noalias const f_scalar) { @@ -605,7 +639,7 @@ void FixIntel::reduce_results(acc_t * _noalias const f_scalar) o_range = atom->nlocal + atom->nghost; else o_range = atom->nlocal; - IP_PRE_get_stride(f_stride, o_range, (sizeof(acc_t)*4), lmp->atom->torque); + IP_PRE_get_stride(f_stride, o_range, (sizeof(acc_t)*4), _torque_flag); o_range *= 4; const int f_stride4 = f_stride * 4; @@ -704,7 +738,7 @@ void FixIntel::add_results(const ft * _noalias const f_in, add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal); const acc_t * _noalias const enull = 0; int offset = _offload_nlocal; - if (atom->torque) offset *= 2; + if (_torque_flag) offset *= 2; add_oresults(f_in + offset, enull, eatom, vatom, _offload_min_ghost, _offload_nghost); } else @@ -715,7 +749,7 @@ void FixIntel::add_results(const ft * _noalias const f_in, _host_min_local, _host_used_local); const acc_t * _noalias const enull = 0; int offset = _host_used_local; - if (atom->torque) offset *= 2; + if (_torque_flag) offset *= 2; add_oresults(f_in + offset, enull, eatom, vatom, _host_min_ghost, _host_used_ghost); } else { @@ -764,7 +798,7 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, const int eatom, const int /*vatom*/, const int out_offset, const int nall) { lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset; - if (atom->torque) { + if (_torque_flag) { if (f_in[1].w) { if (f_in[1].w == 1) @@ -788,7 +822,7 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, #endif int ifrom, ito; IP_PRE_omp_range_align(ifrom, ito, tid, nall, packthreads, sizeof(acc_t)); - if (atom->torque) { + if (_torque_flag) { int ii = ifrom * 2; lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] + out_offset; @@ -883,13 +917,6 @@ double FixIntel::memory_usage() /* ---------------------------------------------------------------------- */ -void FixIntel::post_force(int vflag) -{ - if (_sync_mode == 2) sync_coprocessor(); -} - -/* ---------------------------------------------------------------------- */ - template void FixIntel::add_off_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global) { @@ -919,7 +946,7 @@ void FixIntel::add_off_results(const ft * _noalias const f_in, _offload_nlocal; } - if (atom->torque) + if (_torque_flag) if (f_in[1].w < 0.0) error->all(FLERR,"Bad matrix inversion in mldivide3"); add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1); diff --git a/src/INTEL/fix_intel.h b/src/INTEL/fix_intel.h index 9214fe3419..08d0a534d9 100644 --- a/src/INTEL/fix_intel.h +++ b/src/INTEL/fix_intel.h @@ -55,6 +55,7 @@ class FixIntel : public Fix { void pre_reverse(int eflag = 0, int vflag = 0) override; inline void min_pre_reverse(int eflag = 0, int vflag = 0) override { pre_reverse(eflag, vflag); } + void post_force(int vflag) override; void post_run() override { _print_pkg_info = 1; } // Get all forces, calculation results from coprocesser @@ -102,7 +103,7 @@ class FixIntel : public Fix { inline int pppm_table() { if (force->kspace_match("^pppm/.*intel$", 0)) - return INTEL_P3M_TABLE; + return _p3m_table; else return 0; } @@ -115,7 +116,7 @@ class FixIntel : public Fix { int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor; int _pair_intel_count, _pair_hybrid_flag, _print_pkg_info; // These should be removed in subsequent update w/ simpler hybrid arch - int _pair_hybrid_zero, _hybrid_nonpair, _zero_master; + int _pair_hybrid_zero, _hybrid_nonpair, _zero_master, _torque_flag; public: inline int *get_overflow_flag() { return _overflow_flag; } @@ -132,7 +133,6 @@ class FixIntel : public Fix { inline void get_buffern(const int offload, int &nlocal, int &nall, int &minlocal); #ifdef _LMP_INTEL_OFFLOAD - void post_force(int vflag); inline int coprocessor_number() { return _cop; } inline int full_host_list() { return _full_host_list; } void set_offload_affinity(); @@ -194,7 +194,7 @@ class FixIntel : public Fix { protected: int _overflow_flag[5]; _alignvar(int _off_overflow_flag[5], 64); - int _allow_separate_buffers, _offload_ghost, _lrt; + int _allow_separate_buffers, _offload_ghost, _lrt, _p3m_table; IntelBuffers::vec3_acc_t *_force_array_s; IntelBuffers::vec3_acc_t *_force_array_m; diff --git a/src/INTEL/fix_nh_intel.cpp b/src/INTEL/fix_nh_intel.cpp index 87ce0dbe43..2c05c3f6fa 100644 --- a/src/INTEL/fix_nh_intel.cpp +++ b/src/INTEL/fix_nh_intel.cpp @@ -166,28 +166,28 @@ void FixNHIntel::remap() if (pstyle == TRICLINIC) { if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); + expfac = std::exp(dto8*omega_dot[0]); h[4] *= expfac; h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); h[4] *= expfac; } if (p_flag[3]) { - expfac = exp(dto4*omega_dot[1]); + expfac = std::exp(dto4*omega_dot[1]); h[3] *= expfac; h[3] += dto2*(omega_dot[3]*h[2]); h[3] *= expfac; } if (p_flag[5]) { - expfac = exp(dto4*omega_dot[0]); + expfac = std::exp(dto4*omega_dot[0]); h[5] *= expfac; h[5] += dto2*(omega_dot[5]*h[1]); h[5] *= expfac; } if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); + expfac = std::exp(dto8*omega_dot[0]); h[4] *= expfac; h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); h[4] *= expfac; @@ -200,7 +200,7 @@ void FixNHIntel::remap() if (p_flag[0]) { oldlo = domain->boxlo[0]; oldhi = domain->boxhi[0]; - expfac = exp(dto*omega_dot[0]); + expfac = std::exp(dto*omega_dot[0]); domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0]; domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0]; } @@ -208,7 +208,7 @@ void FixNHIntel::remap() if (p_flag[1]) { oldlo = domain->boxlo[1]; oldhi = domain->boxhi[1]; - expfac = exp(dto*omega_dot[1]); + expfac = std::exp(dto*omega_dot[1]); domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1]; domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1]; if (scalexy) h[5] *= expfac; @@ -217,7 +217,7 @@ void FixNHIntel::remap() if (p_flag[2]) { oldlo = domain->boxlo[2]; oldhi = domain->boxhi[2]; - expfac = exp(dto*omega_dot[2]); + expfac = std::exp(dto*omega_dot[2]); domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2]; domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2]; if (scalexz) h[4] *= expfac; @@ -229,28 +229,28 @@ void FixNHIntel::remap() if (pstyle == TRICLINIC) { if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); + expfac = std::exp(dto8*omega_dot[0]); h[4] *= expfac; h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); h[4] *= expfac; } if (p_flag[3]) { - expfac = exp(dto4*omega_dot[1]); + expfac = std::exp(dto4*omega_dot[1]); h[3] *= expfac; h[3] += dto2*(omega_dot[3]*h[2]); h[3] *= expfac; } if (p_flag[5]) { - expfac = exp(dto4*omega_dot[0]); + expfac = std::exp(dto4*omega_dot[0]); h[5] *= expfac; h[5] += dto2*(omega_dot[5]*h[1]); h[5] *= expfac; } if (p_flag[4]) { - expfac = exp(dto8*omega_dot[0]); + expfac = std::exp(dto8*omega_dot[0]); h[4] *= expfac; h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]); h[4] *= expfac; @@ -427,9 +427,9 @@ void FixNHIntel::nh_v_press() int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; - double f0 = exp(-dt4*(omega_dot[0]+mtk_term2)); - double f1 = exp(-dt4*(omega_dot[1]+mtk_term2)); - double f2 = exp(-dt4*(omega_dot[2]+mtk_term2)); + double f0 = std::exp(-dt4*(omega_dot[0]+mtk_term2)); + double f1 = std::exp(-dt4*(omega_dot[1]+mtk_term2)); + double f2 = std::exp(-dt4*(omega_dot[2]+mtk_term2)); f0 *= f0; f1 *= f1; f2 *= f2; diff --git a/src/INTEL/improper_cvff_intel.cpp b/src/INTEL/improper_cvff_intel.cpp index 09b2d42167..a503f45541 100644 --- a/src/INTEL/improper_cvff_intel.cpp +++ b/src/INTEL/improper_cvff_intel.cpp @@ -191,15 +191,15 @@ void ImproperCvffIntel::eval(const int vflag, // 1st and 2nd angle const flt_t b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; - const flt_t rb1 = (flt_t)1.0 / sqrt(b1mag2); + const flt_t rb1 = (flt_t)1.0 / std::sqrt(b1mag2); const flt_t sb1 = (flt_t)1.0 / b1mag2; const flt_t b2mag2 = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; - const flt_t rb2 = (flt_t)1.0 / sqrt(b2mag2); + const flt_t rb2 = (flt_t)1.0 / std::sqrt(b2mag2); const flt_t sb2 = (flt_t)1.0 / b2mag2; const flt_t b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; - const flt_t rb3 = (flt_t)1.0 / sqrt(b3mag2); + const flt_t rb3 = (flt_t)1.0 / std::sqrt(b3mag2); const flt_t sb3 = (flt_t)1.0 / b3mag2; const flt_t c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * rb1 * rb3; @@ -215,11 +215,11 @@ void ImproperCvffIntel::eval(const int vflag, // cos and sin of 2 angles and final c const flt_t sd1 = (flt_t)1.0 - c1mag * c1mag; - flt_t sc1 = (flt_t)1.0/sqrt(sd1); + flt_t sc1 = (flt_t)1.0/std::sqrt(sd1); if (sd1 < SMALL2) sc1 = INVSMALL; const flt_t sd2 = (flt_t)1.0 - c2mag * c2mag; - flt_t sc2 = (flt_t)1.0/sqrt(sd2); + flt_t sc2 = (flt_t)1.0/std::sqrt(sd2); if (sc2 < SMALL2) sc2 = INVSMALL; const flt_t s1 = sc1 * sc1; diff --git a/src/INTEL/improper_harmonic_intel.cpp b/src/INTEL/improper_harmonic_intel.cpp index a9a152d5f0..869051ab6b 100644 --- a/src/INTEL/improper_harmonic_intel.cpp +++ b/src/INTEL/improper_harmonic_intel.cpp @@ -194,9 +194,9 @@ void ImproperHarmonicIntel::eval(const int vflag, flt_t ss2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; flt_t ss3 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; - const flt_t r1 = (flt_t)1.0 / sqrt(ss1); - const flt_t r2 = (flt_t)1.0 / sqrt(ss2); - const flt_t r3 = (flt_t)1.0 / sqrt(ss3); + const flt_t r1 = (flt_t)1.0 / std::sqrt(ss1); + const flt_t r2 = (flt_t)1.0 / std::sqrt(ss2); + const flt_t r3 = (flt_t)1.0 / std::sqrt(ss3); ss1 = (flt_t)1.0 / ss1; ss2 = (flt_t)1.0 / ss2; @@ -214,7 +214,7 @@ void ImproperHarmonicIntel::eval(const int vflag, flt_t s2 = (flt_t)1.0 - c2*c2; if (s2 < SMALL) s2 = SMALL; - flt_t s12 = (flt_t)1.0 / sqrt(s1*s2); + flt_t s12 = (flt_t)1.0 / std::sqrt(s1*s2); s1 = (flt_t)1.0 / s1; s2 = (flt_t)1.0 / s2; flt_t c = (c1*c2 + c0) * s12; @@ -229,12 +229,12 @@ void ImproperHarmonicIntel::eval(const int vflag, if (c < (flt_t)-1.0) c = (flt_t)-1.0; const flt_t sd = (flt_t)1.0 - c * c; - flt_t s = (flt_t)1.0 / sqrt(sd); + flt_t s = (flt_t)1.0 / std::sqrt(sd); if (sd < SMALL2) s = INVSMALL; // force & energy - const flt_t domega = acos(c) - fc.fc[type].chi; + const flt_t domega = std::acos(c) - fc.fc[type].chi; flt_t a; a = fc.fc[type].k * domega; diff --git a/src/INTEL/intel_buffers.cpp b/src/INTEL/intel_buffers.cpp index bb54577097..6d4c9bffcd 100644 --- a/src/INTEL/intel_buffers.cpp +++ b/src/INTEL/intel_buffers.cpp @@ -28,6 +28,7 @@ template IntelBuffers::IntelBuffers(class LAMMPS *lmp_in) : lmp(lmp_in), _x(nullptr), _q(nullptr), _quat(nullptr), _f(nullptr), _off_threads(0), _n_list_ptrs(1), _max_list_ptrs(4), _buf_size(0), _buf_local_size(0) { + _torque_flag = 0; _neigh_list_ptrs = new IntelNeighListPtrs[_max_list_ptrs]; _neigh_list_ptrs[0].cnumneigh = nullptr; _list_alloc_atoms = 0; @@ -695,7 +696,7 @@ double IntelBuffers::memory_usage(const int nthreads) { double tmem = sizeof(atom_t); if (lmp->atom->q) tmem += sizeof(flt_t); - if (lmp->atom->torque) tmem += sizeof(quat_t); + if (_torque_flag) tmem += sizeof(quat_t); #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) tmem *= 2; #endif diff --git a/src/INTEL/intel_buffers.h b/src/INTEL/intel_buffers.h index 7422850184..1e9739c3b2 100644 --- a/src/INTEL/intel_buffers.h +++ b/src/INTEL/intel_buffers.h @@ -57,7 +57,7 @@ class IntelBuffers { inline int get_stride(int nall) { int stride; IP_PRE_get_stride(stride, nall, sizeof(vec3_acc_t), - lmp->atom->torque); + _torque_flag); return stride; } @@ -76,6 +76,8 @@ class IntelBuffers { _neigh_list_ptrs[0].numneighhalf = atombin; } + inline void set_torque_flag(const int in) { _torque_flag = in; } + inline void grow(const int nall, const int nlocal, const int nthreads, const int offload_end) { if (nall >= _buf_size || nlocal >= _buf_local_size) @@ -329,7 +331,7 @@ class IntelBuffers { flt_t *_q; quat_t *_quat; vec3_acc_t * _f; - int _off_threads, _off_map_listlocal; + int _torque_flag, _off_threads, _off_map_listlocal; int _list_alloc_atoms; int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked; diff --git a/src/INTEL/intel_intrinsics.h b/src/INTEL/intel_intrinsics.h index 1ff405edd2..c106dfd411 100644 --- a/src/INTEL/intel_intrinsics.h +++ b/src/INTEL/intel_intrinsics.h @@ -1764,7 +1764,7 @@ struct vector_ops { return a < b; } static fvec invsqrt(const fvec &a) { - return 1. / sqrt(a); + return 1. / std::sqrt(a); } static fvec sincos(fvec *c, const fvec &a) { *c = cos(a); diff --git a/src/INTEL/intel_preprocess.h b/src/INTEL/intel_preprocess.h index a3c961f436..2c4b9a0c1b 100644 --- a/src/INTEL/intel_preprocess.h +++ b/src/INTEL/intel_preprocess.h @@ -86,8 +86,6 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #define INTEL_MAX_STENCIL_CHECK 4096 #define INTEL_P3M_MAXORDER 8 #define INTEL_P3M_ALIGNED_MAXORDER 8 -// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY) -#define INTEL_P3M_TABLE 1 #ifdef __INTEL_COMPILER #ifdef __AVX__ diff --git a/src/INTEL/math_extra_intel.h b/src/INTEL/math_extra_intel.h index a8869f7384..556f3f4590 100644 --- a/src/INTEL/math_extra_intel.h +++ b/src/INTEL/math_extra_intel.h @@ -100,12 +100,12 @@ normalize a vector, return in ans ------------------------------------------------------------------------- */ -#define ME_normalize3(v0, v1, v2, ans) \ -{ \ - flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \ - ans##_0 = v0 * scale; \ - ans##_1 = v1 * scale; \ - ans##_2 = v2 * scale; \ +#define ME_normalize3(v0, v1, v2, ans) \ +{ \ + flt_t scale = (flt_t)1.0 / std::sqrt(v0*v0+v1*v1+v2*v2); \ + ans##_0 = v0 * scale; \ + ans##_1 = v1 * scale; \ + ans##_2 = v2 * scale; \ } /* ---------------------------------------------------------------------- @@ -359,7 +359,7 @@ #define ME_qnormalize(q) \ { \ double norm = 1.0 / \ - sqrt(q##_w*q##_w + q##_i*q##_i + q##_j*q##_j + q##_k*q##_k); \ + std::sqrt(q##_w*q##_w + q##_i*q##_i + q##_j*q##_j + q##_k*q##_k); \ q##_w *= norm; \ q##_i *= norm; \ q##_j *= norm; \ diff --git a/src/INTEL/npair_full_bin_ghost_intel.cpp b/src/INTEL/npair_full_bin_ghost_intel.cpp index b7b9ee4aea..381db62c08 100644 --- a/src/INTEL/npair_full_bin_ghost_intel.cpp +++ b/src/INTEL/npair_full_bin_ghost_intel.cpp @@ -292,12 +292,13 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list, else ito -= wlocal - nlocal; int e_ito = ito; - const int list_size = (e_ito + tid * 2 + 2) * maxnbors; + const bigint list_size = (bigint)(e_ito + tid * 2 + 2) * + (bigint)maxnbors; int pack_offset = maxnbors; - int ct = (ifrom + tid * 2) * maxnbors; + bigint ct = (bigint)(ifrom + tid * 2) * (bigint)maxnbors; int * _noalias neighptr = intel_list + ct; - const int obound = pack_offset + maxnbors * 2; + const bigint obound = pack_offset + maxnbors * 2; const int toffs = tid * ncache_stride; flt_t * _noalias const tx = ncachex + toffs; diff --git a/src/INTEL/npair_intel.cpp b/src/INTEL/npair_intel.cpp index b85873f677..8b05aa38aa 100644 --- a/src/INTEL/npair_intel.cpp +++ b/src/INTEL/npair_intel.cpp @@ -287,16 +287,17 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, if (imod) e_ito += pack_width - imod; } #endif - const int list_size = (e_ito + tid * 2 + 2) * maxnbors; + const bigint list_size = (bigint)(e_ito + tid * 2 + 2) * + (bigint)maxnbors; #ifdef LMP_INTEL_3BODY_FAST const int pack_offset = maxnbors * pack_width; - const int obound = pack_offset + maxnbors * 2; + const bigint obound = pack_offset + maxnbors * 2; #else const int pack_offset = 0; - const int obound = maxnbors * 3; + const bigint obound = maxnbors * 3; #endif - int ct = (ifrom + tid * 2) * maxnbors; + bigint ct = (bigint)(ifrom + tid * 2) * (bigint)maxnbors; int * _noalias neighptr = intel_list + ct; int * _noalias neighptr2; if (THREE) neighptr2 = neighptr; diff --git a/src/INTEL/pair_airebo_intel.cpp b/src/INTEL/pair_airebo_intel.cpp index 0b05447dd7..7bc2b3edb8 100644 --- a/src/INTEL/pair_airebo_intel.cpp +++ b/src/INTEL/pair_airebo_intel.cpp @@ -905,7 +905,7 @@ inline flt_t frebo_pij(KernelArgsAIREBOT * ka, int i, int j, flt_t rho_k = ka->params.rho[ktype][1]; flt_t rho_j = ka->params.rho[jtype][1]; flt_t lamdajik = 4 * itype * ((rho_k - rikmag) - (rho_j - rijmag)); - flt_t ex_lam = exp(lamdajik); + flt_t ex_lam = overloaded::exp(lamdajik); flt_t rcminik = ka->params.rcmin[itype][ktype]; flt_t rcmaxik = ka->params.rcmax[itype][ktype]; flt_t dwik; diff --git a/src/INTEL/pair_buck_coul_cut_intel.cpp b/src/INTEL/pair_buck_coul_cut_intel.cpp index 01a9f91bcf..424e38c16a 100644 --- a/src/INTEL/pair_buck_coul_cut_intel.cpp +++ b/src/INTEL/pair_buck_coul_cut_intel.cpp @@ -267,7 +267,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, const flt_t delz = ztmp - x[j].z; const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; - const flt_t r = sqrt(rsq); + const flt_t r = std::sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; #ifdef INTEL_VMASK diff --git a/src/INTEL/pair_buck_coul_long_intel.cpp b/src/INTEL/pair_buck_coul_long_intel.cpp index ff57b571a4..48007c30d9 100644 --- a/src/INTEL/pair_buck_coul_long_intel.cpp +++ b/src/INTEL/pair_buck_coul_long_intel.cpp @@ -322,7 +322,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, const int jtype = tjtype[jj]; const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; - const flt_t r = (flt_t)1.0 / sqrt(r2inv); + const flt_t r = (flt_t)1.0 / std::sqrt(r2inv); #ifdef INTEL_ALLOW_TABLE if (!ncoultablebits || rsq <= tabinnersq) { diff --git a/src/INTEL/pair_buck_intel.cpp b/src/INTEL/pair_buck_intel.cpp index 2461361788..1f2a033784 100644 --- a/src/INTEL/pair_buck_intel.cpp +++ b/src/INTEL/pair_buck_intel.cpp @@ -255,7 +255,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, const flt_t delz = ztmp - x[j].z; const int jtype = IP_PRE_dword_index(x[j].w); const flt_t rsq = delx * delx + dely * dely + delz * delz; - const flt_t r = sqrt(rsq); + const flt_t r = std::sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; #ifdef INTEL_VMASK diff --git a/src/INTEL/pair_dpd_intel.cpp b/src/INTEL/pair_dpd_intel.cpp index 011979223e..7b11053d41 100644 --- a/src/INTEL/pair_dpd_intel.cpp +++ b/src/INTEL/pair_dpd_intel.cpp @@ -180,7 +180,7 @@ void PairDPDIntel::eval(const int offload, const int vflag, ATOM_T * _noalias const x = buffers->get_x(offload); typedef struct { double x, y, z; } lmp_vt; auto *v = (lmp_vt *)atom->v[0]; - const flt_t dtinvsqrt = 1.0/sqrt(update->dt); + const flt_t dtinvsqrt = 1.0/std::sqrt(update->dt); const int * _noalias const ilist = list->ilist; const int * _noalias const numneigh = list->numneigh; @@ -322,7 +322,7 @@ void PairDPDIntel::eval(const int offload, const int vflag, icut = parami[jtype].icut; } const flt_t rsq = delx * delx + dely * dely + delz * delz; - const flt_t rinv = (flt_t)1.0/sqrt(rsq); + const flt_t rinv = (flt_t)1.0/std::sqrt(rsq); if (rinv > icut) { flt_t factor_dpd, factor_sqrt; diff --git a/src/INTEL/pair_eam_intel.cpp b/src/INTEL/pair_eam_intel.cpp index ba5047a15a..9c5d6da5e5 100644 --- a/src/INTEL/pair_eam_intel.cpp +++ b/src/INTEL/pair_eam_intel.cpp @@ -340,7 +340,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, const int j = tj[jj] & NEIGHMASK; if (!ONETYPE) jtype = tjtype[jj]; const flt_t rsq = trsq[jj]; - flt_t p = sqrt(rsq)*frdr + (flt_t)1.0; + flt_t p = std::sqrt(rsq)*frdr + (flt_t)1.0; int m = static_cast (p); m = MIN(m,nr-1); p -= m; @@ -546,7 +546,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, const int j = tj[jj] & NEIGHMASK; if (!ONETYPE) jtype = tjtype[jj]; const flt_t rsq = trsq[jj]; - const flt_t r = sqrt(rsq); + const flt_t r = std::sqrt(rsq); flt_t p = r*frdr + (flt_t)1.0; int m = static_cast (p); m = MIN(m,nr-1); diff --git a/src/INTEL/pair_gayberne_intel.cpp b/src/INTEL/pair_gayberne_intel.cpp index 5609479388..92e074e5e1 100644 --- a/src/INTEL/pair_gayberne_intel.cpp +++ b/src/INTEL/pair_gayberne_intel.cpp @@ -492,7 +492,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, flt_t r12hat_0, r12hat_1, r12hat_2; ME_normalize3(delx_form[jj], dely_form[jj], delz_form[jj], r12hat); - flt_t r = sqrt(rsq_form[jj]); + flt_t r = std::sqrt(rsq_form[jj]); // compute distance of closest approach diff --git a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp index e920257ef4..faae6e5cbc 100644 --- a/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp +++ b/src/INTEL/pair_lj_charmm_coul_charmm_intel.cpp @@ -306,7 +306,7 @@ void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag, const int sbindex = tj[jj] >> SBBITS & 3; const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; - const flt_t r_inv = (flt_t)1.0 / sqrt(rsq); + const flt_t r_inv = (flt_t)1.0 / std::sqrt(rsq); forcecoul = qqrd2e * qtmp * q[j] * r_inv; if (rsq > cut_coul_innersq) { const flt_t ccr = cut_coulsq - rsq; diff --git a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp index 0a93621bcd..412af17357 100644 --- a/src/INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -339,7 +339,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, const flt_t EWALD_F = 1.12837917; const flt_t INV_EWALD_P = 1.0 / 0.3275911; - const flt_t r = (flt_t)1.0 / sqrt(r2inv); + const flt_t r = (flt_t)1.0 / std::sqrt(r2inv); const flt_t grij = g_ewald * r; const flt_t expm2 = std::exp(-grij * grij); const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); @@ -591,10 +591,10 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst &fc, for (int j = 1; j < tp1; j++) { if (i <= j) { fc.lj[i][j].x = epsilon[i][j] * 4.0; - fc.lj[i][j].y = pow(sigma[i][j],6.0); + fc.lj[i][j].y = std::pow(sigma[i][j],6.0); } else { fc.lj[i][j].x = epsilon[j][i] * 4.0; - fc.lj[i][j].y = pow(sigma[j][i],6.0); + fc.lj[i][j].y = std::pow(sigma[j][i],6.0); } fc.cutsq[i][j] = cutsq[i][j]; } diff --git a/src/INTEL/pair_lj_cut_coul_long_intel.cpp b/src/INTEL/pair_lj_cut_coul_long_intel.cpp index 89e556defa..5eb3cbc1f7 100644 --- a/src/INTEL/pair_lj_cut_coul_long_intel.cpp +++ b/src/INTEL/pair_lj_cut_coul_long_intel.cpp @@ -332,7 +332,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, const flt_t EWALD_F = 1.12837917; const flt_t INV_EWALD_P = 1.0 / 0.3275911; - const flt_t r = (flt_t)1.0 / sqrt(r2inv); + const flt_t r = (flt_t)1.0 / std::sqrt(r2inv); const flt_t grij = g_ewald * r; const flt_t expm2 = std::exp(-grij * grij); const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); diff --git a/src/INTEL/pair_sw_intel.cpp b/src/INTEL/pair_sw_intel.cpp index 07d590ee2c..fa62f499de 100644 --- a/src/INTEL/pair_sw_intel.cpp +++ b/src/INTEL/pair_sw_intel.cpp @@ -382,7 +382,7 @@ void PairSWIntel::eval(const int offload, const int vflag, const flt_t rsq1 = trsq[jj]; const flt_t rinvsq1 = (flt_t)1.0 / rsq1; - const flt_t r1 = (flt_t)1.0/sqrt(rinvsq1); + const flt_t r1 = (flt_t)1.0/std::sqrt(rinvsq1); if (!ONETYPE) cut = p2f[ijtype].cut; const flt_t rainv1 = (flt_t)1.0 / (r1 - cut); @@ -475,7 +475,7 @@ void PairSWIntel::eval(const int offload, const int vflag, const flt_t rsq2 = trsq[kk]; const flt_t rinvsq2 = (flt_t)1.0 / rsq2; - const flt_t r2 = (flt_t)1.0 / sqrt(rinvsq2); + const flt_t r2 = (flt_t)1.0 / std::sqrt(rinvsq2); const flt_t rainv2 = (flt_t)1.0 / (r2 - cut); const flt_t gsrainv2 = sigma_gamma * rainv2; const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; diff --git a/src/INTEL/pppm_disp_intel.cpp b/src/INTEL/pppm_disp_intel.cpp index 319d070e66..cc4aa6ab93 100644 --- a/src/INTEL/pppm_disp_intel.cpp +++ b/src/INTEL/pppm_disp_intel.cpp @@ -657,8 +657,8 @@ void PPPMDispIntel::compute(int eflag, int vflag) energy_1 -= g_ewald*qsqsum/MY_PIS + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + - 1.0/12.0*pow(g_ewald_6,6)*csum; + energy_6 += - MY_PI*MY_PIS/(6*volume)*std::pow(g_ewald_6,3)*csumij + + 1.0/12.0*std::pow(g_ewald_6,6)*csum; energy_1 *= qscale; } @@ -671,7 +671,7 @@ void PPPMDispIntel::compute(int eflag, int vflag) MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; if (function[1]+function[2]+function[3]) { - double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; + double a = MY_PI*MY_PIS/(6*volume)*std::pow(g_ewald_6,3)*csumij; virial[0] -= a; virial[1] -= a; virial[2] -= a; @@ -690,8 +690,8 @@ void PPPMDispIntel::compute(int eflag, int vflag) int tmp; for (i = 0; i < atom->nlocal; i++) { tmp = atom->type[i]; - eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + - 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; + eatom[i] += - MY_PI*MY_PIS/(6*volume)*std::pow(g_ewald_6,3)* + csumi[tmp] + 1.0/12.0*std::pow(g_ewald_6,6)*cii[tmp]; } } } @@ -703,7 +703,7 @@ void PPPMDispIntel::compute(int eflag, int vflag) tmp = atom->type[i]; //dispersion self virial correction for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)* - pow(g_ewald_6,3)*csumi[tmp]; + std::pow(g_ewald_6,3)*csumi[tmp]; } } } @@ -1783,18 +1783,18 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers * /*buffers*/) const flt_t s1 = x[i][0] * hx_inv; const flt_t s2 = x[i][1] * hy_inv; const flt_t s3 = x[i][2] * hz_inv; - flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); - sf += fsf_coeff1 * sin(ffour_pi * s1); + flt_t sf = fsf_coeff0 * std::sin(ftwo_pi * s1); + sf += fsf_coeff1 * std::sin(ffour_pi * s1); sf *= twoqsq; f[i][0] += qfactor * particle_ekx[i] - fqqrd2es * sf; - sf = fsf_coeff2 * sin(ftwo_pi * s2); - sf += fsf_coeff3 * sin(ffour_pi * s2); + sf = fsf_coeff2 * std::sin(ftwo_pi * s2); + sf += fsf_coeff3 * std::sin(ffour_pi * s2); sf *= twoqsq; f[i][1] += qfactor * particle_eky[i] - fqqrd2es * sf; - sf = fsf_coeff4 * sin(ftwo_pi * s3); - sf += fsf_coeff5 * sin(ffour_pi * s3); + sf = fsf_coeff4 * std::sin(ftwo_pi * s3); + sf += fsf_coeff5 * std::sin(ffour_pi * s3); sf *= twoqsq; if (slabflag != 2) f[i][2] += qfactor * particle_ekz[i] - fqqrd2es * sf; @@ -2155,18 +2155,18 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers * /*buffers*/) const flt_t s1 = x[i][0] * hx_inv; const flt_t s2 = x[i][1] * hy_inv; const flt_t s3 = x[i][2] * hz_inv; - flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); - sf += fsf_coeff1 * sin(ffour_pi * s1); + flt_t sf = fsf_coeff0 * std::sin(ftwo_pi * s1); + sf += fsf_coeff1 * std::sin(ffour_pi * s1); sf *= twoljsq; f[i][0] += lj * particle_ekx[i] - sf; - sf = fsf_coeff2 * sin(ftwo_pi * s2); - sf += fsf_coeff3 * sin(ffour_pi * s2); + sf = fsf_coeff2 * std::sin(ftwo_pi * s2); + sf += fsf_coeff3 * std::sin(ffour_pi * s2); sf *= twoljsq; f[i][1] += lj * particle_eky[i] - sf; - sf = fsf_coeff4 * sin(ftwo_pi * s3); - sf += fsf_coeff5 * sin(ffour_pi * s3); + sf = fsf_coeff4 * std::sin(ftwo_pi * s3); + sf += fsf_coeff5 * std::sin(ffour_pi * s3); sf *= twoljsq; if (slabflag != 2) f[i][2] += lj * particle_ekz[i] - sf; @@ -2702,22 +2702,22 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers * /*buffers*/) const flt_t s1 = x[i][0] * hx_inv; const flt_t s2 = x[i][1] * hy_inv; const flt_t s3 = x[i][2] * hz_inv; - flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); - sf += fsf_coeff1 * sin(ffour_pi * s1); + flt_t sf = fsf_coeff0 * std::sin(ftwo_pi * s1); + sf += fsf_coeff1 * std::sin(ffour_pi * s1); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; f[i][0] += lj0*particle_ekx0[i] + lj1*particle_ekx1[i] + lj2*particle_ekx2[i] + lj3*particle_ekx3[i] + lj4*particle_ekx4[i] + lj5*particle_ekx5[i] + lj6*particle_ekx6[i] - sf; - sf = fsf_coeff2 * sin(ftwo_pi * s2); - sf += fsf_coeff3 * sin(ffour_pi * s2); + sf = fsf_coeff2 * std::sin(ftwo_pi * s2); + sf += fsf_coeff3 * std::sin(ffour_pi * s2); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; f[i][1] += lj0*particle_eky0[i] + lj1*particle_eky1[i] + lj2*particle_eky2[i] + lj3*particle_eky3[i] + lj4*particle_eky4[i] + lj5*particle_eky5[i] + lj6*particle_eky6[i] - sf; - sf = fsf_coeff4 * sin(ftwo_pi * s3); - sf += fsf_coeff5 * sin(ffour_pi * s3); + sf = fsf_coeff4 * std::sin(ftwo_pi * s3); + sf += fsf_coeff5 * std::sin(ffour_pi * s3); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; if (slabflag != 2) f[i][2] += lj0*particle_ekz0[i] + lj1*particle_ekz1[i] + @@ -3101,14 +3101,14 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers * /*buffers*/) const flt_t s1 = x[i][0] * hx_inv; const flt_t s2 = x[i][1] * hy_inv; const flt_t s3 = x[i][2] * hz_inv; - flt_t sf1 = fsf_coeff0 * sin(ftwo_pi * s1); - sf1 += fsf_coeff1 * sin(ffour_pi * s1); + flt_t sf1 = fsf_coeff0 * std::sin(ftwo_pi * s1); + sf1 += fsf_coeff1 * std::sin(ffour_pi * s1); - flt_t sf2 = fsf_coeff2 * sin(ftwo_pi * s2); - sf2 += fsf_coeff3 * sin(ffour_pi * s2); + flt_t sf2 = fsf_coeff2 * std::sin(ftwo_pi * s2); + sf2 += fsf_coeff3 * std::sin(ffour_pi * s2); - flt_t sf3 = fsf_coeff4 * sin(ftwo_pi * s3); - sf3 += fsf_coeff5 * sin(ffour_pi * s3); + flt_t sf3 = fsf_coeff4 * std::sin(ftwo_pi * s3); + sf3 += fsf_coeff5 * std::sin(ffour_pi * s3); for (int k = 0; k < nsplit; k++) { const flt_t lj = B[nsplit*type + k]; const flt_t twoljsq = lj*lj * B[k] * 2; diff --git a/src/INTEL/pppm_intel.cpp b/src/INTEL/pppm_intel.cpp index 2ceca54d29..f67b3a89b3 100644 --- a/src/INTEL/pppm_intel.cpp +++ b/src/INTEL/pppm_intel.cpp @@ -953,18 +953,18 @@ void PPPMIntel::fieldforce_ad(IntelBuffers *buffers) const flt_t s1 = x[i].x * hx_inv; const flt_t s2 = x[i].y * hy_inv; const flt_t s3 = x[i].z * hz_inv; - flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); - sf += fsf_coeff1 * sin(ffour_pi * s1); + flt_t sf = fsf_coeff0 * std::sin(ftwo_pi * s1); + sf += fsf_coeff1 * std::sin(ffour_pi * s1); sf *= twoqsq; f[i].x += qfactor * particle_ekx[i] - fqqrd2es * sf; - sf = fsf_coeff2 * sin(ftwo_pi * s2); - sf += fsf_coeff3 * sin(ffour_pi * s2); + sf = fsf_coeff2 * std::sin(ftwo_pi * s2); + sf += fsf_coeff3 * std::sin(ffour_pi * s2); sf *= twoqsq; f[i].y += qfactor * particle_eky[i] - fqqrd2es * sf; - sf = fsf_coeff4 * sin(ftwo_pi * s3); - sf += fsf_coeff5 * sin(ffour_pi * s3); + sf = fsf_coeff4 * std::sin(ftwo_pi * s3); + sf += fsf_coeff5 * std::sin(ffour_pi * s3); sf *= twoqsq; if (slabflag != 2) f[i].z += qfactor * particle_ekz[i] - fqqrd2es * sf; diff --git a/src/INTEL/verlet_lrt_intel.cpp b/src/INTEL/verlet_lrt_intel.cpp index 42414420b6..9df17d8cef 100644 --- a/src/INTEL/verlet_lrt_intel.cpp +++ b/src/INTEL/verlet_lrt_intel.cpp @@ -19,7 +19,6 @@ #include "atom_vec.h" #include "bond.h" #include "comm.h" -#include "compute.h" #include "dihedral.h" #include "domain.h" #include "error.h" @@ -27,7 +26,6 @@ #include "force.h" #include "improper.h" #include "kspace.h" -#include "memory.h" #include "modify.h" #include "neighbor.h" #include "output.h" @@ -35,9 +33,7 @@ #include "timer.h" #include "update.h" -#if defined(_OPENMP) -#include -#endif +#include using namespace LAMMPS_NS; @@ -67,7 +63,7 @@ void VerletLRTIntel::init() { Verlet::init(); - _intel_kspace = dynamic_cast(force->kspace_match("^pppm\\..*intel$", 0)); + _intel_kspace = dynamic_cast(force->kspace_match("^pppm/.*intel$", 0)); // include pppm/electrode/intel #ifndef LMP_INTEL_USELRT @@ -95,12 +91,15 @@ void VerletLRTIntel::setup(int flag) } #endif - if (comm->me == 0) { - fprintf(screen,"Setting up VerletLRTIntel run ...\n"); - fprintf(screen," Unit style : %s\n", update->unit_style); - fmt::print(screen," Current step : {}\n", update->ntimestep); - fprintf(screen," Time step : %g\n", update->dt); - timer->print_timeout(screen); + if (comm->me == 0 && screen) { + fputs("Setting up VerletLRTIntel run ...\n",screen); + if (flag) { + fmt::print(screen," Unit style : {}\n" + " Current step : {}\n" + " Time step : {}\n", + update->unit_style,update->ntimestep,update->dt); + timer->print_timeout(screen); + } } #if defined(_LMP_INTEL_LRT_PTHREAD) @@ -110,8 +109,9 @@ void VerletLRTIntel::setup(int flag) sched_getaffinity(0, sizeof(cpuset), &cpuset); int my_cpu_count = CPU_COUNT(&cpuset); if (my_cpu_count < comm->nthreads + 1) { - error->warning(FLERR, "Using {} threads per MPI rank, but only {} core(s) allocated " - "for each MPI rank", comm->nthreads + 1, my_cpu_count); + error->warning(FLERR, "Using {} threads per MPI rank, but only {} " + "core(s) allocated for each MPI rank", + comm->nthreads + 1, my_cpu_count); } } #endif @@ -144,6 +144,7 @@ void VerletLRTIntel::setup(int flag) domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(1); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -189,11 +190,11 @@ void VerletLRTIntel::setup(int flag) if (kspace_compute_flag) _intel_kspace->compute_second(eflag,vflag); - modify->pre_reverse(eflag,vflag); + modify->setup_pre_reverse(eflag,vflag); if (force->newton) comm->reverse_comm(); modify->setup(vflag); - output->setup(); + output->setup(flag); update->setupflag = 0; } @@ -214,9 +215,10 @@ void VerletLRTIntel::run(int n) int n_post_integrate = modify->n_post_integrate; int n_pre_exchange = modify->n_pre_exchange; int n_pre_neighbor = modify->n_pre_neighbor; + int n_post_neighbor = modify->n_post_neighbor; int n_pre_force = modify->n_pre_force; int n_pre_reverse = modify->n_pre_reverse; - int n_post_force = modify->n_post_force_any; + int n_post_force_any = modify->n_post_force_any; int n_end_of_step = modify->n_end_of_step; if (atom->sortfreq > 0) sortflag = 1; @@ -279,6 +281,10 @@ void VerletLRTIntel::run(int n) } neighbor->build(1); timer->stamp(Timer::NEIGH); + if (n_post_neighbor) { + modify->post_neighbor(); + timer->stamp(Timer::MODIFY); + } } // force computations @@ -353,7 +359,7 @@ void VerletLRTIntel::run(int n) // force modifications, final time integration, diagnostics - if (n_post_force) modify->post_force(vflag); + if (n_post_force_any) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); timer->stamp(Timer::MODIFY); diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index ec72cabf33..2b81e21e2b 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -32,10 +32,10 @@ #include "potential_file_reader.h" #include -#include - using namespace LAMMPS_NS; +#define MAX_CACHE_ROWS 500 + // Cannot use virtual inheritance on the GPU, so must duplicate code /* ---------------------------------------------------------------------- */ @@ -46,6 +46,7 @@ PairEAMAlloyKokkos::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp) respa_enable = 0; single_enable = 0; one_coeff = 1; + manybody_flag = 1; kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -59,10 +60,10 @@ PairEAMAlloyKokkos::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp) template PairEAMAlloyKokkos::~PairEAMAlloyKokkos() { - if (!copymode) { - memoryKK->destroy_kokkos(k_eatom,eatom); - memoryKK->destroy_kokkos(k_vatom,vatom); - } + if (copymode) return; + + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } /* ---------------------------------------------------------------------- */ @@ -118,7 +119,7 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - int inum = list->inum; + inum = list->inum; need_dup = lmp->kokkos->need_dup(); if (need_dup) { @@ -138,9 +139,9 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) // zero out density if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); else - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); // loop over neighbors of my atoms @@ -152,15 +153,15 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } @@ -178,18 +179,22 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) // compute kernel B if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else if (neighflag == FULL) { // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } if (eflag) { @@ -208,41 +213,65 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } } @@ -427,7 +456,7 @@ int PairEAMAlloyKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_i d_sendlist = k_sendlist.view(); iswap = iswap_in; v_buf = buf.view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); return n; } @@ -445,7 +474,7 @@ void PairEAMAlloyKokkos::unpack_forward_comm_kokkos(int n, int first { first = first_in; v_buf = buf.view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); } template @@ -588,6 +617,7 @@ template template KOKKOS_INLINE_FUNCTION void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii, EV_FLOAT& ev) const { + // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), @@ -644,6 +674,7 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); @@ -806,6 +837,235 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + // rho = density at each atom + // loop over neighbors of my atoms + const int m_max = d_rhor_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_rhor_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_rhor_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_rhor_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + if (d_type2rhor_ji == 0 && d_rhor_spline_cached == 1) { + rhotmp += ((A(m,3)*p + A(m,4))*p + + A(m,5))*p + A(m,6); + } else + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + } + + } + d_rho[i] += rhotmp; + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMAlloyKernelAB(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + auto a_f = v_f.template access>(); + + const int m_max = d_z2r_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_z2r_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_z2r_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_z2r_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + const F_FLOAT r = sqrt(rsq); + F_FLOAT p = r*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p + + d_rhor_spline(d_type2rhor_ij,m,2); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p + + d_rhor_spline(d_type2rhor_ji,m,2); + const int d_type2z2r_ij = d_type2z2r(itype,jtype); + + const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij); + const auto z2r_spline_3 = (have_cache) ? A(m,3) : d_z2r_spline(d_type2z2r_ij,m,3); + const auto z2r_spline_4 = (have_cache) ? A(m,4) : d_z2r_spline(d_type2z2r_ij,m,4); + const auto z2r_spline_5 = (have_cache) ? A(m,5) : d_z2r_spline(d_type2z2r_ij,m,5); + const auto z2r_spline_6 = (have_cache) ? A(m,6) : d_z2r_spline(d_type2z2r_ij,m,6); + + const F_FLOAT z2p = (3.0*rdr*z2r_spline_3*p + 2.0*rdr*z2r_spline_4)*p + + rdr*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function + const F_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p + + z2r_spline_5)*p + z2r_spline_6; + + const F_FLOAT recip = 1.0/r; + const F_FLOAT phi = z2*recip; + const F_FLOAT phip = z2p*recip - phi*recip; + const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; + const F_FLOAT fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (eflag) { + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); + } + + } + } + + a_f(i,0) += fxtmp; + a_f(i,1) += fytmp; + a_f(i,2) += fztmp; + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, + /*const int &ii*/ + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMAlloyKernelC(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + template template KOKKOS_INLINE_FUNCTION @@ -1214,6 +1474,35 @@ void PairEAMAlloyKokkos::file2array_alloy() /* ---------------------------------------------------------------------- */ +template +template +struct PairEAMAlloyKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + auto policy = Kokkos::RangePolicy(0,inum); + return policy; + } +}; + +#ifdef KOKKOS_ENABLE_HIP +template<> +template +struct PairEAMAlloyKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + static_assert(t_ffloat_2d_n7::static_extent(2) == 7, + "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); + + auto policy = Kokkos::TeamPolicy((inum+1023)/1024, 1024) + .set_scratch_size(0, + Kokkos::PerTeam(MAX_CACHE_ROWS*7*sizeof(double))); + return policy; + } +}; +#endif + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class PairEAMAlloyKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h index c6cb73ca5d..2eb40189ac 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.h +++ b/src/KOKKOS/pair_eam_alloy_kokkos.h @@ -61,7 +61,6 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { ~PairEAMAlloyKokkos() override; void compute(int, int) override; void init_style() override; - void *extract(const char *, int &) override { return nullptr; } void coeff(int, char **) override; KOKKOS_INLINE_FUNCTION @@ -93,6 +92,14 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMAlloyKernelAB, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelAB, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelAB, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMAlloyKernelC, const int&, EV_FLOAT&) const; @@ -101,6 +108,14 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMAlloyKernelC, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelC, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelC, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -125,7 +140,7 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { typename AT::t_efloat_1d d_eatom; typename AT::t_virial_array d_vatom; - int need_dup; + int need_dup,inum; using KKDeviceType = typename KKDevice::value; @@ -139,7 +154,6 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { DupScatterView dup_f; DupScatterView dup_eatom; DupScatterView dup_vatom; - NonDupScatterView ndup_rho; NonDupScatterView ndup_f; NonDupScatterView ndup_eatom; @@ -163,17 +177,18 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { t_ffloat_2d_n7 d_frho_spline; t_ffloat_2d_n7 d_rhor_spline; t_ffloat_2d_n7 d_z2r_spline; - + void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); void file2array() override; void file2array_alloy(); void array2spline() override; - void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); void read_file(char *) override; + template + struct policyInstance; + typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d d_ilist; typename AT::t_int_1d d_numneigh; - //NeighListKokkos k_list; int iswap; int first; @@ -187,7 +202,6 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { }; } - #endif #endif diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 50abbc3191..4572d14e48 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -32,10 +32,10 @@ #include "potential_file_reader.h" #include -#include - using namespace LAMMPS_NS; +#define MAX_CACHE_ROWS 500 + // Cannot use virtual inheritance on the GPU, so must duplicate code /* ---------------------------------------------------------------------- */ @@ -46,6 +46,7 @@ PairEAMFSKokkos::PairEAMFSKokkos(LAMMPS *lmp) : PairEAM(lmp) respa_enable = 0; single_enable = 0; one_coeff = 1; + manybody_flag = 1; kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -59,10 +60,10 @@ PairEAMFSKokkos::PairEAMFSKokkos(LAMMPS *lmp) : PairEAM(lmp) template PairEAMFSKokkos::~PairEAMFSKokkos() { - if (!copymode) { - memoryKK->destroy_kokkos(k_eatom,eatom); - memoryKK->destroy_kokkos(k_vatom,vatom); - } + if (copymode) return; + + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } /* ---------------------------------------------------------------------- */ @@ -118,7 +119,7 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - int inum = list->inum; + inum = list->inum; need_dup = lmp->kokkos->need_dup(); if (need_dup) { @@ -138,9 +139,9 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) // zero out density if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); else - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); // loop over neighbors of my atoms @@ -152,15 +153,15 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } @@ -178,18 +179,22 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) // compute kernel B if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else if (neighflag == FULL) { // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } if (eflag) { @@ -197,7 +202,7 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) ev.evdwl = 0.0; } - // communicate derivative of embedding function (on the device) + // communicate derivative of embedding function k_fp.template modify(); comm->forward_comm(this); @@ -208,41 +213,65 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } } @@ -427,7 +456,7 @@ int PairEAMFSKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_ d_sendlist = k_sendlist.view(); iswap = iswap_in; v_buf = buf.view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); return n; } @@ -445,7 +474,7 @@ void PairEAMFSKokkos::unpack_forward_comm_kokkos(int n, int first_in { first = first_in; v_buf = buf.view(); - Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); } template @@ -645,6 +674,7 @@ void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, const for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); @@ -691,7 +721,7 @@ template template KOKKOS_INLINE_FUNCTION void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, const int &ii) const { - EV_FLOAT ev; + EV_FLOAT ev; this->template operator()(TagPairEAMFSKernelAB(), ii, ev); this->template operator()(TagPairEAMFSKernelAB(), ii, ev); } @@ -807,6 +837,235 @@ void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + // rho = density at each atom + // loop over neighbors of my atoms + const int m_max = d_rhor_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_rhor_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_rhor_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_rhor_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + if (d_type2rhor_ji == 0 && d_rhor_spline_cached == 1) { + rhotmp += ((A(m,3)*p + A(m,4))*p + + A(m,5))*p + A(m,6); + } else + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + } + + } + d_rho[i] += rhotmp; + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMFSKernelAB(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + auto a_f = v_f.template access>(); + + const int m_max = d_z2r_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_z2r_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_z2r_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_z2r_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + const F_FLOAT r = sqrt(rsq); + F_FLOAT p = r*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p + + d_rhor_spline(d_type2rhor_ij,m,2); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p + + d_rhor_spline(d_type2rhor_ji,m,2); + const int d_type2z2r_ij = d_type2z2r(itype,jtype); + + const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij); + const auto z2r_spline_3 = (have_cache) ? A(m,3) : d_z2r_spline(d_type2z2r_ij,m,3); + const auto z2r_spline_4 = (have_cache) ? A(m,4) : d_z2r_spline(d_type2z2r_ij,m,4); + const auto z2r_spline_5 = (have_cache) ? A(m,5) : d_z2r_spline(d_type2z2r_ij,m,5); + const auto z2r_spline_6 = (have_cache) ? A(m,6) : d_z2r_spline(d_type2z2r_ij,m,6); + + const F_FLOAT z2p = (3.0*rdr*z2r_spline_3*p + 2.0*rdr*z2r_spline_4)*p + + rdr*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function + const F_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p + + z2r_spline_5)*p + z2r_spline_6; + + const F_FLOAT recip = 1.0/r; + const F_FLOAT phi = z2*recip; + const F_FLOAT phip = z2p*recip - phi*recip; + const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; + const F_FLOAT fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (eflag) { + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); + } + + } + } + + a_f(i,0) += fxtmp; + a_f(i,1) += fytmp; + a_f(i,2) += fztmp; + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC, + /*const int &ii*/ + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMFSKernelC(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + template template KOKKOS_INLINE_FUNCTION @@ -1225,6 +1484,35 @@ void PairEAMFSKokkos::file2array_fs() /* ---------------------------------------------------------------------- */ +template +template +struct PairEAMFSKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + auto policy = Kokkos::RangePolicy(0,inum); + return policy; + } +}; + +#ifdef KOKKOS_ENABLE_HIP +template<> +template +struct PairEAMFSKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + static_assert(t_ffloat_2d_n7::static_extent(2) == 7, + "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); + + auto policy = Kokkos::TeamPolicy((inum+1023)/1024, 1024) + .set_scratch_size(0, + Kokkos::PerTeam(MAX_CACHE_ROWS*7*sizeof(double))); + return policy; + } +}; +#endif + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class PairEAMFSKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h index 06a395cba7..bd03ab0015 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.h +++ b/src/KOKKOS/pair_eam_fs_kokkos.h @@ -61,7 +61,6 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { ~PairEAMFSKokkos() override; void compute(int, int) override; void init_style() override; - void *extract(const char *, int &) override { return nullptr; } void coeff(int, char **) override; KOKKOS_INLINE_FUNCTION @@ -93,6 +92,14 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMFSKernelAB, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelAB, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelAB, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMFSKernelC, const int&, EV_FLOAT&) const; @@ -101,6 +108,14 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMFSKernelC, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelC, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelC, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -125,7 +140,7 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { typename AT::t_efloat_1d d_eatom; typename AT::t_virial_array d_vatom; - int need_dup; + int need_dup,inum; using KKDeviceType = typename KKDevice::value; @@ -139,7 +154,6 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { DupScatterView dup_f; DupScatterView dup_eatom; DupScatterView dup_vatom; - NonDupScatterView ndup_rho; NonDupScatterView ndup_f; NonDupScatterView ndup_eatom; @@ -163,13 +177,15 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { t_ffloat_2d_n7 d_frho_spline; t_ffloat_2d_n7 d_rhor_spline; t_ffloat_2d_n7 d_z2r_spline; - + void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); void file2array() override; void file2array_fs(); void array2spline() override; - void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); void read_file(char *) override; + template + struct policyInstance; + typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d d_ilist; typename AT::t_int_1d d_numneigh; @@ -186,7 +202,6 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { }; } - #endif #endif diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 6e1cd1feea..de6d3646bf 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -31,9 +31,10 @@ #include "pair_kokkos.h" #include - using namespace LAMMPS_NS; +#define MAX_CACHE_ROWS 500 + /* ---------------------------------------------------------------------- */ template @@ -113,7 +114,7 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - int inum = list->inum; + inum = list->inum; need_dup = lmp->kokkos->need_dup(); if (need_dup) { @@ -147,15 +148,15 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } @@ -173,18 +174,22 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) // compute kernel B if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else if (neighflag == FULL) { // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } if (eflag) { @@ -203,41 +208,65 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce( + Kokkos::RangePolicy>(0,inum), + *this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for( + policyInstance>::get(inum), + *this); } } } @@ -583,6 +612,7 @@ template template KOKKOS_INLINE_FUNCTION void PairEAMKokkos::operator()(TagPairEAMKernelB, const int &ii, EV_FLOAT& ev) const { + // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), @@ -802,6 +832,235 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC +template +KOKKOS_INLINE_FUNCTION +void PairEAMKokkos::operator()(TagPairEAMKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + // rho = density at each atom + // loop over neighbors of my atoms + const int m_max = d_rhor_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_rhor_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_rhor_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_rhor_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + if (d_type2rhor_ji == 0 && d_rhor_spline_cached == 1) { + rhotmp += ((A(m,3)*p + A(m,4))*p + + A(m,5))*p + A(m,6); + } else + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + } + + } + d_rho[i] += rhotmp; + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMKokkos::operator()(TagPairEAMKernelAB, + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMKernelAB(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMKokkos::operator()(TagPairEAMKernelC, + const typename Kokkos::TeamPolicy::member_type& team_member, + EV_FLOAT& ev) const { + + int ii = team_member.league_rank()*team_member.team_size() + team_member.team_rank(); + + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + auto a_f = v_f.template access>(); + + const int m_max = d_z2r_spline.extent_int(1); + const int j_max = t_ffloat_2d_n7::static_extent(2); + const int d_z2r_spline_cached = (m_max > MAX_CACHE_ROWS) ? 0 : 1; + Kokkos::View> A(team_member.team_scratch(0), MAX_CACHE_ROWS); + + if (d_z2r_spline_cached) { + for(int i = team_member.team_rank(); i < m_max*j_max; i+= team_member.team_size()) { + int j = i%j_max; + int m = i/j_max; + A(m,j) = d_z2r_spline(0,m,j); + } + team_member.team_barrier(); + } + if (ii < inum) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + const F_FLOAT r = sqrt(rsq); + F_FLOAT p = r*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p + + d_rhor_spline(d_type2rhor_ij,m,2); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p + + d_rhor_spline(d_type2rhor_ji,m,2); + const int d_type2z2r_ij = d_type2z2r(itype,jtype); + + const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij); + const auto z2r_spline_3 = (have_cache) ? A(m,3) : d_z2r_spline(d_type2z2r_ij,m,3); + const auto z2r_spline_4 = (have_cache) ? A(m,4) : d_z2r_spline(d_type2z2r_ij,m,4); + const auto z2r_spline_5 = (have_cache) ? A(m,5) : d_z2r_spline(d_type2z2r_ij,m,5); + const auto z2r_spline_6 = (have_cache) ? A(m,6) : d_z2r_spline(d_type2z2r_ij,m,6); + + const F_FLOAT z2p = (3.0*rdr*z2r_spline_3*p + 2.0*rdr*z2r_spline_4)*p + + rdr*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function + const F_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p + + z2r_spline_5)*p + z2r_spline_6; + + const F_FLOAT recip = 1.0/r; + const F_FLOAT phi = z2*recip; + const F_FLOAT phip = z2p*recip - phi*recip; + const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; + const F_FLOAT fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (eflag) { + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); + } + + } + } + + a_f(i,0) += fxtmp; + a_f(i,1) += fytmp; + a_f(i,2) += fztmp; + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMKokkos::operator()(TagPairEAMKernelC, + /*const int &ii*/ + const typename Kokkos::TeamPolicy::member_type& team_member) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMKernelC(), team_member, ev); +} + +/* ---------------------------------------------------------------------- */ + template template KOKKOS_INLINE_FUNCTION @@ -898,6 +1157,37 @@ void PairEAMKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & } } +/* ---------------------------------------------------------------------- */ + +template +template +struct PairEAMKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + auto policy = Kokkos::RangePolicy(0,inum); + return policy; + } +}; + +#ifdef KOKKOS_ENABLE_HIP +template<> +template +struct PairEAMKokkos::policyInstance { + KOKKOS_INLINE_FUNCTION + static auto get(int inum) { + static_assert(t_ffloat_2d_n7::static_extent(2) == 7, + "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); + + auto policy = Kokkos::TeamPolicy((inum+1023)/1024, 1024) + .set_scratch_size(0, + Kokkos::PerTeam(MAX_CACHE_ROWS*7*sizeof(double))); + return policy; + } +}; +#endif + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class PairEAMKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/pair_eam_kokkos.h b/src/KOKKOS/pair_eam_kokkos.h index 006dceb1f2..9d066d40a0 100644 --- a/src/KOKKOS/pair_eam_kokkos.h +++ b/src/KOKKOS/pair_eam_kokkos.h @@ -60,6 +60,7 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { void compute(int, int) override; void init_style() override; + KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMPackForwardComm, const int&) const; @@ -89,6 +90,14 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMKernelAB, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMKernelAB, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMKernelAB, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMKernelC, const int&, EV_FLOAT&) const; @@ -97,6 +106,14 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagPairEAMKernelC, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMKernelC, const typename Kokkos::TeamPolicy::member_type&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMKernelC, const typename Kokkos::TeamPolicy::member_type&) const; + template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -121,7 +138,7 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { typename AT::t_efloat_1d d_eatom; typename AT::t_virial_array d_vatom; - int need_dup; + int need_dup,inum; using KKDeviceType = typename KKDevice::value; @@ -162,6 +179,9 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { void file2array() override; void array2spline() override; + template + struct policyInstance; + typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d d_ilist; typename AT::t_int_1d d_numneigh; diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index 5d1e318db2..ff8c5eb9c9 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -2351,12 +2351,6 @@ double FixGCMC::energy_full() if (force->kspace) force->kspace->compute(eflag,vflag); - // unlike Verlet, not performing a reverse_comm() or forces here - // b/c GCMC does not care about forces - // don't think it will mess up energy due to any post_force() fixes - // but Modify::pre_reverse() is needed for INTEL - - if (modify->n_pre_reverse) modify->pre_reverse(eflag,vflag); if (modify->n_post_force_any) modify->post_force(vflag); // NOTE: all fixes with energy_global_flag set and which diff --git a/src/MC/fix_widom.cpp b/src/MC/fix_widom.cpp index a98f29da5e..cc2f1bc94d 100644 --- a/src/MC/fix_widom.cpp +++ b/src/MC/fix_widom.cpp @@ -1050,13 +1050,7 @@ double FixWidom::energy_full() if (force->kspace) force->kspace->compute(eflag,vflag); - // unlike Verlet, not performing a reverse_comm() or forces here - // b/c Widom does not care about forces - // don't think it will mess up energy due to any post_force() fixes - // but Modify::pre_reverse() is needed for INTEL - - if (modify->n_pre_reverse) modify->pre_reverse(eflag,vflag); - if (modify->n_pre_force) modify->pre_force(vflag); + if (modify->n_post_force_any) modify->post_force(vflag); // NOTE: all fixes with energy_global_flag set and which // operate at pre_force() or post_force() diff --git a/src/NETCDF/dump_netcdf.cpp b/src/NETCDF/dump_netcdf.cpp index 0115c0bbe9..a4699897a3 100644 --- a/src/NETCDF/dump_netcdf.cpp +++ b/src/NETCDF/dump_netcdf.cpp @@ -442,11 +442,11 @@ void DumpNetCDF::openfile() const int nfield = *th->get_nfield(); for (int i = 0; i < nfield; i++) { - if (fields[i].type == multitype::DOUBLE) { + if (fields[i].type == multitype::LAMMPS_DOUBLE) { NCERRX( nc_def_var(ncid, keywords[i].c_str(), type_nc_real, 1, dims, &thermovar[i]), keywords[i].c_str() ); - } else if (fields[i].type == multitype::INT) { + } else if (fields[i].type == multitype::LAMMPS_INT) { NCERRX( nc_def_var(ncid, keywords[i].c_str(), NC_INT, 1, dims, &thermovar[i]), keywords[i].c_str() ); - } else if (fields[i].type == multitype::BIGINT) { + } else if (fields[i].type == multitype::LAMMPS_INT64) { NCERRX( nc_def_var(ncid, keywords[i].c_str(), NC_INT64, 1, dims, &thermovar[i]), keywords[i].c_str() ); } } @@ -624,11 +624,11 @@ void DumpNetCDF::write() int nfield = *th->get_nfield(); for (int i = 0; i < nfield; i++) { if (filewriter) { - if (fields[i].type == multitype::DOUBLE) { + if (fields[i].type == multitype::LAMMPS_DOUBLE) { NCERRX( nc_put_var1_double(ncid, thermovar[i], start, &fields[i].data.d), keywords[i].c_str() ); - } else if (fields[i].type == multitype::INT) { + } else if (fields[i].type == multitype::LAMMPS_INT) { NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &fields[i].data.i), keywords[i].c_str() ); - } else if (fields[i].type == multitype::BIGINT) { + } else if (fields[i].type == multitype::LAMMPS_INT64) { NCERRX( nc_put_var1_bigint(ncid, thermovar[i], start, &fields[i].data.b), keywords[i].c_str() ); } } diff --git a/src/NETCDF/dump_netcdf_mpiio.cpp b/src/NETCDF/dump_netcdf_mpiio.cpp index f9382dacef..2407678386 100644 --- a/src/NETCDF/dump_netcdf_mpiio.cpp +++ b/src/NETCDF/dump_netcdf_mpiio.cpp @@ -432,11 +432,11 @@ void DumpNetCDFMPIIO::openfile() const int nfield = *th->get_nfield(); for (int i = 0; i < nfield; i++) { - if (fields[i].type == multitype::DOUBLE) { + if (fields[i].type == multitype::LAMMPS_DOUBLE) { NCERRX( ncmpi_def_var(ncid, keywords[i].c_str(), type_nc_real, 1, dims, &thermovar[i]), keywords[i].c_str() ); - } else if (fields[i].type == multitype::INT) { + } else if (fields[i].type == multitype::LAMMPS_INT) { NCERRX( ncmpi_def_var(ncid, keywords[i].c_str(), NC_INT, 1, dims, &thermovar[i]), keywords[i].c_str() ); - } else if (fields[i].type == multitype::BIGINT) { + } else if (fields[i].type == multitype::LAMMPS_INT64) { NCERRX( ncmpi_def_var(ncid, keywords[i].c_str(), NC_INT64, 1, dims, &thermovar[i]), keywords[i].c_str() ); } } @@ -617,11 +617,11 @@ void DumpNetCDFMPIIO::write() int nfield = *th->get_nfield(); for (int i = 0; i < nfield; i++) { if (filewriter) { - if (fields[i].type == multitype::DOUBLE) { + if (fields[i].type == multitype::LAMMPS_DOUBLE) { NCERRX( ncmpi_put_var1_double(ncid, thermovar[i], start, &fields[i].data.d), keywords[i].c_str() ); - } else if (fields[i].type == multitype::INT) { + } else if (fields[i].type == multitype::LAMMPS_INT) { NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &fields[i].data.i), keywords[i].c_str() ); - } else if (fields[i].type == multitype::BIGINT) { + } else if (fields[i].type == multitype::LAMMPS_INT64) { NCERRX( ncmpi_put_var1_bigint(ncid, thermovar[i], start, &fields[i].data.b), keywords[i].c_str() ); } } diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 784c163f70..83442ecb25 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -131,6 +131,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : global_freq = 1; extvector = 0; rxnID = 0; + cuff = 1; maxnconstraints = 0; narrhenius = 0; status = PROCEED; @@ -250,8 +251,9 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : memory->create(ghostly_rxn_count,nreacts,"bond/react:ghostly_rxn_count"); memory->create(reaction_count_total,nreacts,"bond/react:reaction_count_total"); + rescale_charges_anyflag = 0; for (int i = 0; i < nreacts; i++) { - fraction[i] = 1; + fraction[i] = 1.0; seed[i] = 12345; max_rxn[i] = INT_MAX; for (int j = 0; j < 3; j++) @@ -261,7 +263,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : rescale_charges_flag[i] = 0; create_atoms_flag[i] = 0; modify_create_fragid[i] = -1; - overlapsq[i] = 0; + overlapsq[i] = 0.0; molecule_keyword[i] = OFF; nconstraints[i] = 0; // set default limit duration to 60 timesteps @@ -388,8 +390,16 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " "'rescale_charges' has too few arguments"); if (strcmp(arg[iarg+1],"no") == 0) rescale_charges_flag[rxn] = 0; //default - else if (strcmp(arg[iarg+1],"yes") == 0) rescale_charges_flag[rxn] = 1; - else error->one(FLERR,"Bond/react: Illegal option for 'rescale_charges' keyword"); + else if (strcmp(arg[iarg+1],"yes") == 0) { + if (!atom->q_flag) error->all(FLERR,"Illegal fix bond/react command: cannot use " + "'rescale_charges' without atomic charges enabled"); + twomol = atom->molecules[reacted_mol[rxn]]; + if (!twomol->qflag) error->all(FLERR,"Illegal fix bond/react command: cannot use " + "'rescale_charges' without Charges section in post-reaction template"); + rescale_charges_flag[rxn] = 1; // overloaded below to also indicate number of atoms to update + rescale_charges_anyflag = 1; + cuff = 2; // index shift for extra values carried around by mega_gloves + } else error->one(FLERR,"Bond/react: Illegal option for 'rescale_charges' keyword"); iarg += 2; } else if (strcmp(arg[iarg],"molecule") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " @@ -442,8 +452,10 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : memory->create(delete_atoms,max_natoms,nreacts,"bond/react:delete_atoms"); memory->create(create_atoms,max_natoms,nreacts,"bond/react:create_atoms"); memory->create(chiral_atoms,max_natoms,6,nreacts,"bond/react:chiral_atoms"); + memory->create(mol_total_charge,nreacts,"bond/react:mol_total_charge"); for (int j = 0; j < nreacts; j++) { + mol_total_charge[j] = 0.0; for (int i = 0; i < max_natoms; i++) { edge[i][j] = 0; custom_charges[i][j] = 1; // update all partial charges by default @@ -483,6 +495,21 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : if (custom_charges_fragid[i] >= 0) CustomCharges(custom_charges_fragid[i],i); } + // charge rescaling values must be calculated after calling CustomCharges + for (int myrxn = 0; myrxn < nreacts; myrxn++) { + if (rescale_charges_flag[myrxn]) { + rescale_charges_flag[myrxn] = 0; // will now store number of updated atoms + twomol = atom->molecules[reacted_mol[myrxn]]; + for (int j = 0; j < twomol->natoms; j++) { + int jj = equivalences[j][1][myrxn]-1; + if (custom_charges[jj][myrxn] == 1 && delete_atoms[jj][myrxn] == 0) { + mol_total_charge[myrxn] += twomol->q[j]; + rescale_charges_flag[myrxn]++; + } + } + } + } + // get the names of per-atom variables needed by 'rxn' functions of custom constraint customvarnames(); @@ -605,6 +632,7 @@ FixBondReact::~FixBondReact() memory->destroy(delete_atoms); memory->destroy(create_atoms); memory->destroy(chiral_atoms); + memory->destroy(mol_total_charge); if (vvec != nullptr) memory->destroy(vvec); memory->destroy(rxn_name); @@ -1282,11 +1310,11 @@ void FixBondReact::superimpose_algorithm() memory->create(restore_pt,MAXGUESS,4,"bond/react:restore_pt"); memory->create(pioneers,max_natoms,"bond/react:pioneers"); memory->create(restore,max_natoms,MAXGUESS*4,"bond/react:restore"); - memory->create(my_mega_glove,max_natoms+1,allnattempt,"bond/react:local_mega_glove"); + memory->create(my_mega_glove,max_natoms+cuff,allnattempt,"bond/react:my_mega_glove"); - for (int i = 0; i < max_natoms+1; i++) + for (int i = 0; i < max_natoms+cuff; i++) for (int j = 0; j < allnattempt; j++) - my_mega_glove[i][j] = 0; + my_mega_glove[i][j] = 0.0; attempted_rxn = 1; @@ -1336,9 +1364,10 @@ void FixBondReact::superimpose_algorithm() status = REJECT; } else { status = ACCEPT; - my_mega_glove[0][my_num_mega] = rxnID; + my_mega_glove[0][my_num_mega] = (double) rxnID; + if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); for (int i = 0; i < onemol->natoms; i++) { - my_mega_glove[i+1][my_num_mega] = glove[i][1]; + my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; } my_num_mega++; } @@ -1384,9 +1413,10 @@ void FixBondReact::superimpose_algorithm() if (fraction[rxnID] < 1.0 && random[rxnID]->uniform() >= fraction[rxnID]) status = REJECT; else { - my_mega_glove[0][my_num_mega] = rxnID; + my_mega_glove[0][my_num_mega] = (double) rxnID; + if (rescale_charges_flag[rxnID]) my_mega_glove[1][my_num_mega] = get_totalcharge(); for (int i = 0; i < onemol->natoms; i++) { - my_mega_glove[i+1][my_num_mega] = glove[i][1]; + my_mega_glove[i+cuff][my_num_mega] = (double) glove[i][1]; } my_num_mega++; } @@ -1405,13 +1435,13 @@ void FixBondReact::superimpose_algorithm() global_megasize = 0; - memory->create(local_mega_glove,max_natoms+1,my_num_mega,"bond/react:local_mega_glove"); - memory->create(ghostly_mega_glove,max_natoms+1,my_num_mega,"bond/react:ghostly_mega_glove"); + memory->create(local_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:local_mega_glove"); + memory->create(ghostly_mega_glove,max_natoms+cuff,my_num_mega,"bond/react:ghostly_mega_glove"); - for (int i = 0; i < max_natoms+1; i++) { + for (int i = 0; i < max_natoms+cuff; i++) { for (int j = 0; j < my_num_mega; j++) { - local_mega_glove[i][j] = 0; - ghostly_mega_glove[i][j] = 0; + local_mega_glove[i][j] = 0.0; + ghostly_mega_glove[i][j] = 0.0; } } @@ -2186,6 +2216,24 @@ double FixBondReact::get_temperature(tagint **myglove, int row_offset, int col) return t; } +/* ---------------------------------------------------------------------- +compute sum of partial charges in rxn site, for updated atoms +note: currently uses global rxnID and onemol variables +------------------------------------------------------------------------- */ + +double FixBondReact::get_totalcharge() +{ + int j,jj,ilocal; + double *q = atom->q; + double sim_total_charge = 0.0; + for (j = 0; j < onemol->natoms; j++) { + jj = equivalences[j][1][rxnID]-1; + if (custom_charges[jj][rxnID] == 1) + sim_total_charge += q[atom->map(glove[jj][1])]; + } + return sim_total_charge; +} + /* ---------------------------------------------------------------------- get per-atom variable names used by custom constraint ------------------------------------------------------------------------- */ @@ -2673,18 +2721,18 @@ void FixBondReact::dedup_mega_gloves(int dedup_mode) dedup_size = global_megasize; } - tagint **dedup_glove; - memory->create(dedup_glove,max_natoms+1,dedup_size,"bond/react:dedup_glove"); + double **dedup_glove; + memory->create(dedup_glove,max_natoms+cuff,dedup_size,"bond/react:dedup_glove"); if (dedup_mode == LOCAL) { for (int i = 0; i < dedup_size; i++) { - for (int j = 0; j < max_natoms+1; j++) { + for (int j = 0; j < max_natoms+cuff; j++) { dedup_glove[j][i] = my_mega_glove[j][i]; } } } else if (dedup_mode == GLOBAL) { for (int i = 0; i < dedup_size; i++) { - for (int j = 0; j < max_natoms+1; j++) { + for (int j = 0; j < max_natoms+cuff; j++) { dedup_glove[j][i] = global_mega_glove[j][i]; } } @@ -2700,16 +2748,16 @@ void FixBondReact::dedup_mega_gloves(int dedup_mode) // let's randomly mix up our reaction instances first // then we can feel okay about ignoring ones we've already deleted (or accepted) // based off std::shuffle - int *temp_rxn = new int[max_natoms+1]; + double *temp_rxn = new double[max_natoms+cuff]; for (int i = dedup_size-1; i > 0; --i) { //dedup_size // choose random entry to swap current one with int k = floor(random[0]->uniform()*(i+1)); // swap entries - for (int j = 0; j < max_natoms+1; j++) + for (int j = 0; j < max_natoms+cuff; j++) temp_rxn[j] = dedup_glove[j][i]; - for (int j = 0; j < max_natoms+1; j++) { + for (int j = 0; j < max_natoms+cuff; j++) { dedup_glove[j][i] = dedup_glove[j][k]; dedup_glove[j][k] = temp_rxn[j]; } @@ -2721,13 +2769,13 @@ void FixBondReact::dedup_mega_gloves(int dedup_mode) int myrxnid1 = dedup_glove[0][i]; onemol = atom->molecules[unreacted_mol[myrxnid1]]; for (int j = 0; j < onemol->natoms; j++) { - int check1 = dedup_glove[j+1][i]; + int check1 = dedup_glove[j+cuff][i]; for (int ii = i + 1; ii < dedup_size; ii++) { if (dedup_mask[ii] == 0) { int myrxnid2 = dedup_glove[0][ii]; twomol = atom->molecules[unreacted_mol[myrxnid2]]; for (int jj = 0; jj < twomol->natoms; jj++) { - int check2 = dedup_glove[jj+1][ii]; + int check2 = dedup_glove[jj+cuff][ii]; if (check2 == check1) { dedup_mask[ii] = 1; break; @@ -2745,7 +2793,7 @@ void FixBondReact::dedup_mega_gloves(int dedup_mode) int my_new_megasize = 0; for (int i = 0; i < my_num_mega; i++) { if (dedup_mask[i] == 0) { - for (int j = 0; j < max_natoms+1; j++) { + for (int j = 0; j < max_natoms+cuff; j++) { my_mega_glove[j][my_new_megasize] = dedup_glove[j][i]; } my_new_megasize++; @@ -2760,8 +2808,8 @@ void FixBondReact::dedup_mega_gloves(int dedup_mode) int new_global_megasize = 0; for (int i = 0; i < global_megasize; i++) { if (dedup_mask[i] == 0) { - ghostly_rxn_count[dedup_glove[0][i]]++; - for (int j = 0; j < max_natoms + 1; j++) { + ghostly_rxn_count[(int) dedup_glove[0][i]]++; + for (int j = 0; j < max_natoms + cuff; j++) { global_mega_glove[j][new_global_megasize] = dedup_glove[j][i]; } new_global_megasize++; @@ -2830,7 +2878,7 @@ void FixBondReact::glove_ghostcheck() local_rxn_count[i] = 0; for (int i = 0; i < my_num_mega; i++) { - rxnID = my_mega_glove[0][i]; + rxnID = (int) my_mega_glove[0][i]; onemol = atom->molecules[unreacted_mol[rxnID]]; int ghostly = 0; #if !defined(MPI_STUBS) @@ -2839,7 +2887,7 @@ void FixBondReact::glove_ghostcheck() ghostly = 1; } else { for (int j = 0; j < onemol->natoms; j++) { - int ilocal = atom->map(my_mega_glove[j+1][i]); + int ilocal = atom->map((tagint) my_mega_glove[j+cuff][i]); if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) { ghostly = 1; break; @@ -2852,15 +2900,13 @@ void FixBondReact::glove_ghostcheck() #endif if (ghostly == 1) { - ghostly_mega_glove[0][ghostly_num_mega] = rxnID; - for (int j = 0; j < onemol->natoms+1; j++) { + for (int j = 0; j < onemol->natoms+cuff; j++) { ghostly_mega_glove[j][ghostly_num_mega] = my_mega_glove[j][i]; } ghostly_num_mega++; } else { - local_mega_glove[0][local_num_mega] = rxnID; local_rxn_count[rxnID]++; - for (int j = 0; j < onemol->natoms+1; j++) { + for (int j = 0; j < onemol->natoms+cuff; j++) { local_mega_glove[j][local_num_mega] = my_mega_glove[j][i]; } local_num_mega++; @@ -2899,23 +2945,23 @@ void FixBondReact::ghost_glovecast() } MPI_Allgather(&start, 1, MPI_INT, allstarts, 1, MPI_INT, world); MPI_Datatype columnunsized, column; - int sizes[2] = {max_natoms+1, global_megasize}; - int subsizes[2] = {max_natoms+1, 1}; + int sizes[2] = {max_natoms+cuff, global_megasize}; + int subsizes[2] = {max_natoms+cuff, 1}; int starts[2] = {0,0}; MPI_Type_create_subarray (2, sizes, subsizes, starts, MPI_ORDER_C, - MPI_LMP_TAGINT, &columnunsized); - MPI_Type_create_resized (columnunsized, 0, sizeof(tagint), &column); + MPI_DOUBLE, &columnunsized); + MPI_Type_create_resized (columnunsized, 0, sizeof(double), &column); MPI_Type_commit(&column); memory->destroy(global_mega_glove); - memory->create(global_mega_glove,max_natoms+1,global_megasize,"bond/react:global_mega_glove"); + memory->create(global_mega_glove,max_natoms+cuff,global_megasize,"bond/react:global_mega_glove"); - for (int i = 0; i < max_natoms+1; i++) + for (int i = 0; i < max_natoms+cuff; i++) for (int j = 0; j < global_megasize; j++) global_mega_glove[i][j] = 0; if (ghostly_num_mega > 0) { - for (int i = 0; i < max_natoms+1; i++) { + for (int i = 0; i < max_natoms+cuff; i++) { for (int j = 0; j < ghostly_num_mega; j++) { global_mega_glove[i][j+start] = ghostly_mega_glove[i][j]; } @@ -3006,7 +3052,12 @@ void FixBondReact::update_everything() int update_num_mega; tagint **update_mega_glove; - memory->create(update_mega_glove,max_natoms+1,MAX(local_num_mega,global_megasize),"bond/react:update_mega_glove"); + // for now, keeping rxnID in update_mega_glove, but not rest of cuff in update_mega_glove + int maxmega = MAX(local_num_mega,global_megasize); + memory->create(update_mega_glove,max_natoms+1,maxmega,"bond/react:update_mega_glove"); + + double *sim_total_charges; + if (rescale_charges_anyflag) memory->create(sim_total_charges,maxmega,"bond/react:sim_total_charges"); for (int pass = 0; pass < 2; pass++) { update_num_mega = 0; @@ -3014,15 +3065,20 @@ void FixBondReact::update_everything() for (int i = 0; i < nreacts; i++) iskip[i] = 0; if (pass == 0) { for (int i = 0; i < local_num_mega; i++) { - rxnID = local_mega_glove[0][i]; + rxnID = (int) local_mega_glove[0][i]; // reactions already shuffled from dedup procedure, so can skip first N if (iskip[rxnID]++ < nlocalskips[rxnID]) continue; + // this will be overwritten if reaction skipped by create_atoms below + update_mega_glove[0][update_num_mega] = (tagint) local_mega_glove[0][i]; + for (int j = 0; j < max_natoms; j++) + update_mega_glove[j+1][update_num_mega] = (tagint) local_mega_glove[j+cuff][i]; + // atoms inserted here for serial MPI_STUBS build only if (create_atoms_flag[rxnID] == 1) { onemol = atom->molecules[unreacted_mol[rxnID]]; twomol = atom->molecules[reacted_mol[rxnID]]; - if (insert_atoms(local_mega_glove,i)) { + if (insert_atoms(update_mega_glove,update_num_mega)) { inserted_atoms_flag = 1; } else { // create aborted reaction_count_total[rxnID]--; @@ -3030,23 +3086,27 @@ void FixBondReact::update_everything() } } - for (int j = 0; j < max_natoms+1; j++) - update_mega_glove[j][update_num_mega] = local_mega_glove[j][i]; + if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = local_mega_glove[1][i]; update_num_mega++; } } else if (pass == 1) { for (int i = 0; i < global_megasize; i++) { - rxnID = global_mega_glove[0][i]; + rxnID = (int) global_mega_glove[0][i]; // reactions already shuffled from dedup procedure, so can skip first N if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue; + // this will be overwritten if reaction skipped by create_atoms below + update_mega_glove[0][update_num_mega] = (tagint) global_mega_glove[0][i]; + for (int j = 0; j < max_natoms; j++) + update_mega_glove[j+1][update_num_mega] = (tagint) global_mega_glove[j+cuff][i]; + // we can insert atoms here, now that reactions are finalized // can't do it any earlier, due to skipped reactions (max_rxn) // for MPI build, reactions that create atoms are always treated as 'global' if (create_atoms_flag[rxnID] == 1) { onemol = atom->molecules[unreacted_mol[rxnID]]; twomol = atom->molecules[reacted_mol[rxnID]]; - if (insert_atoms(global_mega_glove,i)) { + if (insert_atoms(update_mega_glove,update_num_mega)) { inserted_atoms_flag = 1; } else { // create aborted reaction_count_total[rxnID]--; @@ -3054,8 +3114,7 @@ void FixBondReact::update_everything() } } - for (int j = 0; j < max_natoms+1; j++) - update_mega_glove[j][update_num_mega] = global_mega_glove[j][i]; + if (rescale_charges_flag[rxnID]) sim_total_charges[update_num_mega] = global_mega_glove[1][i]; update_num_mega++; } } @@ -3090,38 +3149,18 @@ void FixBondReact::update_everything() } } - // get charge rescale delta - double charge_rescale_addend = 0; - if (rescale_charges_flag[rxnID] == 1) { - double sim_total_charge = 0; - double mol_total_charge = 0; - int n_custom_charge = 0; - for (int i = 0; i < update_num_mega; i++) { - rxnID = update_mega_glove[0][i]; - twomol = atom->molecules[reacted_mol[rxnID]]; - for (int j = 0; j < twomol->natoms; j++) { - int jj = equivalences[j][1][rxnID]-1; - if (atom->map(update_mega_glove[jj+1][i]) >= 0 && - atom->map(update_mega_glove[jj+1][i]) < nlocal) { - if (landlocked_atoms[j][rxnID] == 1) - type[atom->map(update_mega_glove[jj+1][i])] = twomol->type[j]; - if (twomol->qflag && atom->q_flag && custom_charges[jj][rxnID] == 1) { - double *q = atom->q; - sim_total_charge += q[atom->map(update_mega_glove[jj+1][i])]; - mol_total_charge += twomol->q[j]; - n_custom_charge++; - } - } - } - } - charge_rescale_addend = (sim_total_charge-mol_total_charge)/n_custom_charge; - } - // update charges and types of landlocked atoms // also keep track of 'stabilization' groups here + int n_custom_charge; + double charge_rescale_addend; for (int i = 0; i < update_num_mega; i++) { + charge_rescale_addend = 0; rxnID = update_mega_glove[0][i]; twomol = atom->molecules[reacted_mol[rxnID]]; + if (rescale_charges_flag[rxnID]) { + n_custom_charge = rescale_charges_flag[rxnID]; + charge_rescale_addend = (sim_total_charges[i]-mol_total_charge[rxnID])/n_custom_charge; + } for (int j = 0; j < twomol->natoms; j++) { int jj = equivalences[j][1][rxnID]-1; int ilocal = atom->map(update_mega_glove[jj+1][i]); @@ -3555,6 +3594,7 @@ void FixBondReact::update_everything() } memory->destroy(update_mega_glove); + if (rescale_charges_anyflag) memory->destroy(sim_total_charges); // delete atoms. taken from fix_evaporate. but don't think it needs to be in pre_exchange // loop in reverse order to avoid copying marked atoms @@ -3643,7 +3683,7 @@ void FixBondReact::update_everything() insert created atoms ------------------------------------------------------------------------- */ -int FixBondReact::insert_atoms(tagint **my_mega_glove, int iupdate) +int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate) { // inserting atoms based off fix_deposit->pre_exchange int flag; @@ -3692,14 +3732,15 @@ int FixBondReact::insert_atoms(tagint **my_mega_glove, int iupdate) n2superpose++; } - int ifit = atom->map(my_mega_glove[ibonding[rxnID]+1][iupdate]); // use this local ID to find fitting proc + int ifit = atom->map(my_update_mega_glove[ibonding[rxnID]+1][iupdate]); // use this local ID to find fitting proc Superpose3D superposer(n2superpose); int fitroot = 0; if (ifit >= 0 && ifit < atom->nlocal) { fitroot = comm->me; // get 'temperatere' averaged over site, used for created atoms' vels - t = get_temperature(my_mega_glove,1,iupdate); + // note: row_offset for my_update_mega_glove is unity, not 'cuff' + t = get_temperature(my_update_mega_glove,1,iupdate); double **xfrozen; // coordinates for the "frozen" target molecule double **xmobile; // coordinates for the "mobile" molecule @@ -3713,12 +3754,12 @@ int FixBondReact::insert_atoms(tagint **my_mega_glove, int iupdate) if (!twomol->fragmentmask[modify_create_fragid[rxnID]][j]) continue; int ipre = equivalences[j][1][rxnID]-1; // equiv pre-reaction template index if (!create_atoms[j][rxnID] && !delete_atoms[ipre][rxnID]) { - if (atom->map(my_mega_glove[ipre+1][iupdate]) < 0) { + if (atom->map(my_update_mega_glove[ipre+1][iupdate]) < 0) { error->warning(FLERR," eligible atoms skipped for created-atoms fit on rank {}\n", comm->me); continue; } - iatom = atom->map(my_mega_glove[ipre+1][iupdate]); + iatom = atom->map(my_update_mega_glove[ipre+1][iupdate]); if (iref == -1) iref = iatom; iatom = domain->closest_image(iref,iatom); for (int k = 0; k < 3; k++) { @@ -3838,7 +3879,7 @@ int FixBondReact::insert_atoms(tagint **my_mega_glove, int iupdate) atom->tag[n] = maxtag_all + add_count; // locally update mega_glove - my_mega_glove[preID][iupdate] = atom->tag[n]; + my_update_mega_glove[preID][iupdate] = atom->tag[n]; if (atom->molecule_flag) { if (twomol->moleculeflag) { @@ -3866,7 +3907,7 @@ int FixBondReact::insert_atoms(tagint **my_mega_glove, int iupdate) } // globally update mega_glove and equivalences MPI_Allreduce(MPI_IN_PLACE,&root,1,MPI_INT,MPI_SUM,world); - MPI_Bcast(&my_mega_glove[preID][iupdate],1,MPI_LMP_TAGINT,root,world); + MPI_Bcast(&my_update_mega_glove[preID][iupdate],1,MPI_LMP_TAGINT,root,world); equivalences[m][0][rxnID] = m+1; equivalences[m][1][rxnID] = preID; reverse_equiv[preID-1][0][rxnID] = preID; diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 66a5e4d6a0..534261e11d 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -71,7 +71,9 @@ class FixBondReact : public Fix { int **store_rxn_count; int *stabilize_steps_flag; int *custom_charges_fragid; - int *rescale_charges_flag; + int *rescale_charges_flag; // if nonzero, indicates number of atoms whose charges are updated + int rescale_charges_anyflag; // indicates if any reactions do charge rescaling + double *mol_total_charge; // sum of charges of post-reaction atoms whose charges are updated int *create_atoms_flag; int *modify_create_fragid; double *overlapsq; @@ -156,10 +158,12 @@ class FixBondReact : public Fix { int lcl_inst; // reaction instance tagint **glove; // 1st colmn: pre-reacted template, 2nd colmn: global IDs // for all mega_gloves: first row is the ID of bond/react - tagint **my_mega_glove; // local + ghostly reaction instances - tagint **local_mega_glove; // consolidation of local reaction instances - tagint **ghostly_mega_glove; // consolidation of nonlocal reaction instances - tagint **global_mega_glove; // consolidation (inter-processor) of gloves + // 'cuff' leaves room for additional values carried around + int cuff; // default = 1, w/ rescale_charges_flag = 2 + double **my_mega_glove; // local + ghostly reaction instances + double **local_mega_glove; // consolidation of local reaction instances + double **ghostly_mega_glove; // consolidation of nonlocal reaction instances + double **global_mega_glove; // consolidation (inter-processor) of gloves // containing nonlocal atoms int *localsendlist; // indicates ghosts of other procs @@ -191,6 +195,7 @@ class FixBondReact : public Fix { int check_constraints(); void get_IDcoords(int, int, double *); double get_temperature(tagint **, int, int); + double get_totalcharge(); void customvarnames(); // get per-atom variables names used by custom constraint void get_customvars(); // evaluate local values for variables names used by custom constraint double custom_constraint(const std::string &); // evaulate expression for custom constraint diff --git a/src/library.cpp b/src/library.cpp index 0fcedfc20b..1996ebbcb4 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -794,7 +794,7 @@ argument string. - yes * - type - data type of thermo output column; see :cpp:enum:`_LMP_DATATYPE_CONST` - - pointer to static int + - pointer to int - yes * - data - actual field data for column @@ -815,7 +815,6 @@ void *lammps_last_thermo(void *handle, const char *what, int index) Thermo *th = lmp->output->thermo; if (!th) return nullptr; const int nfield = *th->get_nfield(); - static int datatype; BEGIN_CAPTURE { @@ -833,25 +832,15 @@ void *lammps_last_thermo(void *handle, const char *what, int index) } else if (strcmp(what, "type") == 0) { if ((index < 0) || (index >= nfield)) return nullptr; const auto &field = th->get_fields()[index]; - if (field.type == multitype::INT) { - datatype = LAMMPS_INT; - val = (void *) &datatype; - } else if (field.type == multitype::BIGINT) { - datatype = LAMMPS_INT64; - val = (void *) &datatype; - } else if (field.type == multitype::DOUBLE) { - datatype = LAMMPS_DOUBLE; - val = (void *) &datatype; - } - + val = (void *) &field.type; } else if (strcmp(what, "data") == 0) { if ((index < 0) || (index >= nfield)) return nullptr; const auto &field = th->get_fields()[index]; - if (field.type == multitype::INT) { + if (field.type == multitype::LAMMPS_INT) { val = (void *) &field.data.i; - } else if (field.type == multitype::BIGINT) { + } else if (field.type == multitype::LAMMPS_INT64) { val = (void *) &field.data.b; - } else if (field.type == multitype::DOUBLE) { + } else if (field.type == multitype::LAMMPS_DOUBLE) { val = (void *) &field.data.d; } @@ -1351,6 +1340,13 @@ int lammps_extract_global_datatype(void * /*handle*/, const char *name) if (strcmp(name,"q_flag") == 0) return LAMMPS_INT; if (strcmp(name,"units") == 0) return LAMMPS_STRING; + if (strcmp(name,"atom_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"pair_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"bond_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"angle_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"dihedral_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"improper_style") == 0) return LAMMPS_STRING; + if (strcmp(name,"kspace_style") == 0) return LAMMPS_STRING; if (strcmp(name,"boltz") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"hplanck") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"mvv2e") == 0) return LAMMPS_DOUBLE; @@ -1597,6 +1593,34 @@ report the "native" data type. The following tables are provided: - int - 1 - **deprecated**. Use :cpp:func:`lammps_extract_setting` instead. + * - atom_style + - char \* + - 1 + - string with the current atom style. + * - pair_style + - char \* + - 1 + - string with the current pair style. + * - bond_style + - char \* + - 1 + - string with the current bond style. + * - angle_style + - char \* + - 1 + - string with the current angle style. + * - dihedral_style + - char \* + - 1 + - string with the current dihedral style. + * - improper_style + - char \* + - 1 + - string with the current improper style. + * - kspace_style + - char \* + - 1 + - string with the current KSpace style. .. _extract_unit_settings: @@ -1703,6 +1727,13 @@ void *lammps_extract_global(void *handle, const char *name) auto lmp = (LAMMPS *) handle; if (strcmp(name,"units") == 0) return (void *) lmp->update->unit_style; + if (strcmp(name,"atom_style") == 0) return (void *) lmp->atom->atom_style; + if (strcmp(name,"pair_style") == 0) return (void *) lmp->force->pair_style; + if (strcmp(name,"bond_style") == 0) return (void *) lmp->force->bond_style; + if (strcmp(name,"angle_style") == 0) return (void *) lmp->force->angle_style; + if (strcmp(name,"dihedral_style") == 0) return (void *) lmp->force->dihedral_style; + if (strcmp(name,"improper_style") == 0) return (void *) lmp->force->improper_style; + if (strcmp(name,"kspace_style") == 0) return (void *) lmp->force->kspace_style; if (strcmp(name,"dt") == 0) return (void *) &lmp->update->dt; if (strcmp(name,"ntimestep") == 0) return (void *) &lmp->update->ntimestep; // update->atime can be referenced as a pointer diff --git a/src/library.h b/src/library.h index 5f46e36ccf..30a12ebdef 100644 --- a/src/library.h +++ b/src/library.h @@ -41,10 +41,11 @@ /** Data type constants for extracting data from atoms, computes and fixes * * Must be kept in sync with the equivalent constants in ``python/lammps/constants.py``, - * ``fortran/lammps.f90``, ``tools/swig/lammps.i``, and + * ``fortran/lammps.f90``, ``tools/swig/lammps.i``, ``src/lmptype.h``, and *``examples/COUPLE/plugin/liblammpsplugin.h`` */ enum _LMP_DATATYPE_CONST { + LAMMPS_NONE = -1, /*!< no data type assigned (yet) */ LAMMPS_INT = 0, /*!< 32-bit integer (array) */ LAMMPS_INT_2D = 1, /*!< two-dimensional 32-bit integer array */ LAMMPS_DOUBLE = 2, /*!< 64-bit double (array) */ diff --git a/src/lmptype.h b/src/lmptype.h index 6e0b54d988..2089350f48 100644 --- a/src/lmptype.h +++ b/src/lmptype.h @@ -276,7 +276,21 @@ union ubuf { \endverbatim */ struct multitype { - enum { NONE, DOUBLE, INT, BIGINT }; + /** Data type constants for extracting data from atoms, computes and fixes + * + * This enum must be kept in sync with the corresponding enum or constants + * in ``python/lammps/constants.py``, ``fortran/lammps.f90``, ``tools/swig/lammps.i``, + * ``src/library.h``, and ``examples/COUPLE/plugin/liblammpsplugin.h`` */ + enum _LMP_DATATYPE_CONST { + LAMMPS_NONE = -1, /*!< no data type assigned (yet) */ + LAMMPS_INT = 0, /*!< 32-bit integer (array) */ + LAMMPS_INT_2D = 1, /*!< two-dimensional 32-bit integer array */ + LAMMPS_DOUBLE = 2, /*!< 64-bit double (array) */ + LAMMPS_DOUBLE_2D = 3, /*!< two-dimensional 64-bit double array */ + LAMMPS_INT64 = 4, /*!< 64-bit integer (array) */ + LAMMPS_INT64_2D = 5, /*!< two-dimensional 64-bit integer array */ + LAMMPS_STRING = 6 /*!< C-String */ + }; int type; union { @@ -285,26 +299,26 @@ struct multitype { int64_t b; } data; - multitype() : type(NONE) { data.d = 0.0; } + multitype() : type(LAMMPS_NONE) { data.d = 0.0; } multitype(const multitype &) = default; multitype(multitype &&) = default; ~multitype() = default; multitype &operator=(const double &_d) { - type = DOUBLE; + type = LAMMPS_DOUBLE; data.d = _d; return *this; } multitype &operator=(const int &_i) { - type = INT; + type = LAMMPS_INT; data.i = _i; return *this; } multitype &operator=(const int64_t &_b) { - type = BIGINT; + type = LAMMPS_INT64; data.b = _b; return *this; } diff --git a/tools/swig/lammps.i b/tools/swig/lammps.i index c4ef0a7109..91a6866107 100644 --- a/tools/swig/lammps.i +++ b/tools/swig/lammps.i @@ -28,9 +28,10 @@ * * Must be kept in sync with the equivalent constants in ``src/library.h``, * ``python/lammps/constants.py``, ``examples/COUPLE/plugin/liblammpsplugin.h``, - * and ``fortran/lammps.f90`` */ + * ``src/lmptype.h``, and ``fortran/lammps.f90`` */ enum _LMP_DATATYPE_CONST { + LAMMPS_NONE =-1, /*!< no data type assigned (yet) */ LAMMPS_INT = 0, /*!< 32-bit integer (array) */ LAMMPS_INT_2D = 1, /*!< two-dimensional 32-bit integer array */ LAMMPS_DOUBLE = 2, /*!< 64-bit double (array) */ diff --git a/unittest/fortran/test_fortran_get_thermo.f90 b/unittest/fortran/test_fortran_get_thermo.f90 index 7911ab07d5..965e6d58f7 100644 --- a/unittest/fortran/test_fortran_get_thermo.f90 +++ b/unittest/fortran/test_fortran_get_thermo.f90 @@ -152,3 +152,111 @@ FUNCTION f_lammps_get_thermo_zhi() BIND(C) f_lammps_get_thermo_zhi = lmp%get_thermo('zhi') END FUNCTION f_lammps_get_thermo_zhi + +SUBROUTINE f_lammps_last_thermo_setup() BIND(C) + USE liblammps + USE keepstuff, ONLY : lmp, big_input, cont_input, pair_input + IMPLICIT NONE + + CALL lmp%commands_list(big_input) + CALL lmp%commands_list(cont_input) + CALL lmp%commands_list(pair_input) + CALL lmp%command('thermo 10') + CALL lmp%command('run 15 post no') +END SUBROUTINE f_lammps_last_thermo_setup + +FUNCTION f_lammps_last_thermo_step() BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int, c_int64_t + USE liblammps + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int) :: f_lammps_last_thermo_step + INTEGER :: size_bigint + INTEGER(c_int), POINTER :: ival + INTEGER(c_int64_t), POINTER :: bval + + size_bigint = lmp%extract_setting('bigint') + IF (size_bigint == 4) THEN + ival = lmp%last_thermo('step',1) + f_lammps_last_thermo_step = INT(ival) + ELSE + bval = lmp%last_thermo('step',1) + f_lammps_last_thermo_step = INT(bval) + END IF +END FUNCTION f_lammps_last_thermo_step + +FUNCTION f_lammps_last_thermo_num() BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int + USE liblammps + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int) :: f_lammps_last_thermo_num + INTEGER(c_int), POINTER :: ival + + ival = lmp%last_thermo('num',1) + f_lammps_last_thermo_num = ival +END FUNCTION f_lammps_last_thermo_num + +FUNCTION f_lammps_last_thermo_type(idx) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int + USE liblammps + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int), VALUE :: idx + INTEGER(c_int) :: f_lammps_last_thermo_type + INTEGER(c_int), POINTER :: ival + + ival = lmp%last_thermo('type',idx) + f_lammps_last_thermo_type = ival +END FUNCTION f_lammps_last_thermo_type + +FUNCTION f_lammps_last_thermo_string(idx) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int, c_ptr, c_null_ptr + USE liblammps + USE keepstuff, ONLY : lmp, f2c_string + IMPLICIT NONE + INTEGER(c_int), VALUE :: idx + TYPE(c_ptr) :: f_lammps_last_thermo_string + CHARACTER(LEN=12) :: buffer + + buffer = lmp%last_thermo('keyword',idx) + IF (LEN_TRIM(buffer) > 0) THEN + f_lammps_last_thermo_string = f2c_string(buffer) + ELSE + f_lammps_last_thermo_string = c_null_ptr + END IF +END FUNCTION f_lammps_last_thermo_string + +FUNCTION f_lammps_last_thermo_int(idx) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int, c_int64_t + USE liblammps + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int), VALUE :: idx + INTEGER(c_int), POINTER :: ival + INTEGER(c_int64_t), POINTER :: bval + INTEGER(c_int) :: f_lammps_last_thermo_int + INTEGER :: size_bigint + + size_bigint = lmp%extract_setting('bigint') + IF (size_bigint == 4) THEN + ival = lmp%last_thermo('data',idx) + f_lammps_last_thermo_int = INT(ival) + ELSE + bval = lmp%last_thermo('data',idx) + f_lammps_last_thermo_int = INT(bval) + END IF +END FUNCTION f_lammps_last_thermo_int + +FUNCTION f_lammps_last_thermo_double(idx) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_int, c_double + USE liblammps + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int), VALUE :: idx + REAL(c_double), POINTER :: dval + REAL(c_double) :: f_lammps_last_thermo_double + + dval = lmp%last_thermo('data',idx) + f_lammps_last_thermo_double = dval +END FUNCTION f_lammps_last_thermo_double diff --git a/unittest/fortran/wrap_get_thermo.cpp b/unittest/fortran/wrap_get_thermo.cpp index 71b51609eb..e18697ba9b 100644 --- a/unittest/fortran/wrap_get_thermo.cpp +++ b/unittest/fortran/wrap_get_thermo.cpp @@ -1,6 +1,7 @@ // unit tests for getting thermodynamic output from a LAMMPS instance through the Fortran wrapper #include "lammps.h" +#include "lmptype.h" #include #include @@ -23,8 +24,18 @@ double f_lammps_get_thermo_ylo(); double f_lammps_get_thermo_yhi(); double f_lammps_get_thermo_zlo(); double f_lammps_get_thermo_zhi(); + +void f_lammps_last_thermo_setup(); +int f_lammps_last_thermo_step(); +int f_lammps_last_thermo_num(); +int f_lammps_last_thermo_type(int); +const char *f_lammps_last_thermo_string(int); +int f_lammps_last_thermo_int(int); +double f_lammps_last_thermo_double(int); } +using LAMMPS_NS::multitype; + class LAMMPS_thermo : public ::testing::Test { protected: LAMMPS_NS::LAMMPS *lmp; @@ -65,3 +76,33 @@ TEST_F(LAMMPS_thermo, get_thermo) EXPECT_DOUBLE_EQ(f_lammps_get_thermo_zlo(), 0.0); EXPECT_DOUBLE_EQ(f_lammps_get_thermo_zhi(), 4.0); }; + +TEST_F(LAMMPS_thermo, last_thermo) +{ + EXPECT_EQ(f_lammps_last_thermo_step(), -1); + EXPECT_EQ(f_lammps_last_thermo_type(1), multitype::LAMMPS_NONE); + EXPECT_EQ(f_lammps_last_thermo_type(2), multitype::LAMMPS_NONE); + f_lammps_last_thermo_setup(); + EXPECT_EQ(f_lammps_last_thermo_step(), 15); + EXPECT_EQ(f_lammps_last_thermo_num(), 6); + EXPECT_STREQ(f_lammps_last_thermo_string(1), "Step"); + EXPECT_STREQ(f_lammps_last_thermo_string(2), "Temp"); + EXPECT_STREQ(f_lammps_last_thermo_string(3), "E_pair"); + EXPECT_STREQ(f_lammps_last_thermo_string(6), "Press"); +#if defined(LAMMPS_SMALLSMALL) + EXPECT_EQ(f_lammps_last_thermo_type(1), multitype::LAMMPS_INT); +#else + EXPECT_EQ(f_lammps_last_thermo_type(1), multitype::LAMMPS_INT64); +#endif + EXPECT_EQ(f_lammps_last_thermo_int(1), 15); + EXPECT_EQ(f_lammps_last_thermo_type(2), multitype::LAMMPS_DOUBLE); + EXPECT_EQ(f_lammps_last_thermo_type(3), multitype::LAMMPS_DOUBLE); + EXPECT_EQ(f_lammps_last_thermo_type(4), multitype::LAMMPS_DOUBLE); + EXPECT_EQ(f_lammps_last_thermo_type(5), multitype::LAMMPS_DOUBLE); + EXPECT_EQ(f_lammps_last_thermo_type(6), multitype::LAMMPS_DOUBLE); + EXPECT_DOUBLE_EQ(f_lammps_last_thermo_double(2), 0.0); + EXPECT_DOUBLE_EQ(f_lammps_last_thermo_double(3), -0.13713425198078993); + EXPECT_DOUBLE_EQ(f_lammps_last_thermo_double(4), 0.0); + EXPECT_DOUBLE_EQ(f_lammps_last_thermo_double(5), -0.13713425198078993); + EXPECT_DOUBLE_EQ(f_lammps_last_thermo_double(6), -0.022421073321023492); +}; diff --git a/unittest/python/python-pylammps.py b/unittest/python/python-pylammps.py index 2b92f82248..9e691b1b8c 100644 --- a/unittest/python/python-pylammps.py +++ b/unittest/python/python-pylammps.py @@ -82,14 +82,26 @@ class PythonPyLammps(unittest.TestCase): self.pylmp.variable("fx atom fx") self.pylmp.run(10) - self.assertEqual(len(self.pylmp.runs), 1) - self.assertEqual(self.pylmp.last_run, self.pylmp.runs[0]) - self.assertEqual(len(self.pylmp.last_run.thermo.Step), 2) - self.assertEqual(len(self.pylmp.last_run.thermo.Temp), 2) - self.assertEqual(len(self.pylmp.last_run.thermo.E_pair), 2) - self.assertEqual(len(self.pylmp.last_run.thermo.E_mol), 2) - self.assertEqual(len(self.pylmp.last_run.thermo.TotEng), 2) - self.assertEqual(len(self.pylmp.last_run.thermo.Press), 2) + # thermo data is only captured during a run if PYTHON package is enabled + # without it, it will only capture the final thermo at completion + if self.pylmp.lmp.has_package("PYTHON"): + self.assertEqual(len(self.pylmp.runs), 1) + self.assertEqual(self.pylmp.last_run, self.pylmp.runs[0]) + self.assertEqual(len(self.pylmp.last_run.thermo.Step), 2) + self.assertEqual(len(self.pylmp.last_run.thermo.Temp), 2) + self.assertEqual(len(self.pylmp.last_run.thermo.E_pair), 2) + self.assertEqual(len(self.pylmp.last_run.thermo.E_mol), 2) + self.assertEqual(len(self.pylmp.last_run.thermo.TotEng), 2) + self.assertEqual(len(self.pylmp.last_run.thermo.Press), 2) + else: + self.assertEqual(len(self.pylmp.runs), 1) + self.assertEqual(self.pylmp.last_run, self.pylmp.runs[0]) + self.assertEqual(len(self.pylmp.last_run.thermo.Step), 1) + self.assertEqual(len(self.pylmp.last_run.thermo.Temp), 1) + self.assertEqual(len(self.pylmp.last_run.thermo.E_pair), 1) + self.assertEqual(len(self.pylmp.last_run.thermo.E_mol), 1) + self.assertEqual(len(self.pylmp.last_run.thermo.TotEng), 1) + self.assertEqual(len(self.pylmp.last_run.thermo.Press), 1) def test_info_queries(self): self.pylmp.lattice("fcc", 0.8442), diff --git a/unittest/utils/test_lmptype.cpp b/unittest/utils/test_lmptype.cpp index e5dc871953..383c9d4b2c 100644 --- a/unittest/utils/test_lmptype.cpp +++ b/unittest/utils/test_lmptype.cpp @@ -54,18 +54,18 @@ TEST(Types, multitype) m[4] = -1023; m[5] = -2.225; - EXPECT_EQ(m[0].type, multitype::BIGINT); - EXPECT_EQ(m[1].type, multitype::INT); - EXPECT_EQ(m[2].type, multitype::DOUBLE); + EXPECT_EQ(m[0].type, multitype::LAMMPS_INT64); + EXPECT_EQ(m[1].type, multitype::LAMMPS_INT); + EXPECT_EQ(m[2].type, multitype::LAMMPS_DOUBLE); #if defined(LAMMPS_SMALLSMALL) - EXPECT_EQ(m[3].type, multitype::INT); + EXPECT_EQ(m[3].type, multitype::LAMMPS_INT); #else - EXPECT_EQ(m[3].type, multitype::BIGINT); + EXPECT_EQ(m[3].type, multitype::LAMMPS_INT64); #endif - EXPECT_EQ(m[4].type, multitype::INT); - EXPECT_EQ(m[5].type, multitype::DOUBLE); - EXPECT_EQ(m[6].type, multitype::NONE); + EXPECT_EQ(m[4].type, multitype::LAMMPS_INT); + EXPECT_EQ(m[5].type, multitype::LAMMPS_DOUBLE); + EXPECT_EQ(m[6].type, multitype::LAMMPS_NONE); EXPECT_EQ(m[0].data.b, b1); EXPECT_EQ(m[1].data.i, i1);