From 10a3e857963165350c4d72d5a665678207926425 Mon Sep 17 00:00:00 2001 From: julient31 Date: Wed, 22 Apr 2020 11:53:08 -0600 Subject: [PATCH 01/64] C1 JT 042220 - added biquadratic pair/spin exchange --- .../llg_exchange.py | 2 +- src/SPIN/pair_spin_exchange.cpp | 24 +- src/SPIN/pair_spin_exchange_biquadratic.cpp | 594 ++++++++++++++++++ src/SPIN/pair_spin_exchange_biquadratic.h | 85 +++ 4 files changed, 695 insertions(+), 10 deletions(-) create mode 100644 src/SPIN/pair_spin_exchange_biquadratic.cpp create mode 100644 src/SPIN/pair_spin_exchange_biquadratic.h diff --git a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py index 49eecb5b44..dd1c543bb3 100755 --- a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py +++ b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py @@ -65,6 +65,6 @@ for t in range (0,N): # calc. average magnetization Sm = (S1+S2)*0.5 # calc. energy - en = -2.0*J0*(np.dot(S1,S2)) + en = -J0*(np.dot(S1,S2)) # print res. in ps for comparison with LAMMPS print(t*dt/1000.0,Sm[0],Sm[1],Sm[2],en) diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index 5c5d5cb1a4..b23f4fa0cb 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -231,9 +231,15 @@ void PairSpinExchange::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_exchange(i,j,rsq,fmi,spj); - if (lattice_flag) { + + if (lattice_flag) compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); - } + + if (eflag) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= 0.5*hbar; + emag[i] += evdwl; + } else evdwl = 0.0; } f[i][0] += fi[0]; @@ -243,11 +249,11 @@ void PairSpinExchange::compute(int eflag, int vflag) fm[i][1] += fmi[1]; fm[i][2] += fmi[2]; - if (eflag) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; - emag[i] += evdwl; - } else evdwl = 0.0; + // if (eflag) { + // evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + // evdwl *= 0.5*hbar; + // emag[i] += evdwl; + // } else evdwl = 0.0; if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); @@ -364,8 +370,8 @@ void PairSpinExchange::compute_exchange(int i, int j, double rsq, double fmi[3], compute the mechanical force due to the exchange interaction between atom i and atom j ------------------------------------------------------------------------- */ -void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, double eij[3], - double fi[3], double spi[3], double spj[3]) +void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, + double eij[3], double fi[3], double spi[3], double spj[3]) { int *type = atom->type; int itype, jtype; diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp new file mode 100644 index 0000000000..a7f64690af --- /dev/null +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -0,0 +1,594 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------ + Contributing authors: Julien Tranchida (SNL) + Aidan Thompson (SNL) + + Please cite the related publication: + Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018). + Massively parallel symplectic algorithm for coupled magnetic spin dynamics + and molecular dynamics. Journal of Computational Physics. +------------------------------------------------------------------------- */ + +#include "pair_spin_exchange_biquadratic.h" +#include +#include +#include +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "fix.h" +#include "force.h" +#include "neigh_list.h" +#include "memory.h" +#include "modify.h" +#include "update.h" +#include "utils.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairSpinExchangeBiquadratic::~PairSpinExchangeBiquadratic() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cut_spin_exchange); + memory->destroy(J1_mag); + memory->destroy(J1_mech); + memory->destroy(J2); + memory->destroy(J3); + memory->destroy(K1_mag); + memory->destroy(K1_mech); + memory->destroy(K2); + memory->destroy(K3); + memory->destroy(cutsq); // to be implemented + memory->destroy(emag); + } +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::settings(int narg, char **arg) +{ + PairSpin::settings(narg,arg); + + cut_spin_exchange_global = force->numeric(FLERR,arg[0]); + + // reset cutoffs that have been explicitly set + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i+1; j <= atom->ntypes; j++) + if (setflag[i][j]) { + cut_spin_exchange[i][j] = cut_spin_exchange_global; + } + } +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type spin pairs +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) +{ + if (!allocated) allocate(); + + // check if args correct + + if (strcmp(arg[2],"biquadratic") != 0) + error->all(FLERR,"Incorrect args in pair_style command"); + if (narg != 10) + error->all(FLERR,"Incorrect args in pair_style command"); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + // get exchange arguments from input command + + const double rc = force->numeric(FLERR,arg[3]); + const double j1 = force->numeric(FLERR,arg[4]); + const double j2 = force->numeric(FLERR,arg[5]); + const double j3 = force->numeric(FLERR,arg[6]); + const double k1 = force->numeric(FLERR,arg[7]); + const double k2 = force->numeric(FLERR,arg[8]); + const double k3 = force->numeric(FLERR,arg[9]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + cut_spin_exchange[i][j] = rc; + J1_mag[i][j] = j1/hbar; + J1_mech[i][j] = j1; + J2[i][j] = j2; + J3[i][j] = j3; + K1_mag[i][j] = k1/hbar; + K1_mech[i][j] = k1; + K2[i][j] = k2; + K3[i][j] = k3; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args in pair_style command"); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairSpinExchangeBiquadratic::init_one(int i, int j) +{ + + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + J1_mag[j][i] = J1_mag[i][j]; + J1_mech[j][i] = J1_mech[i][j]; + J2[j][i] = J2[i][j]; + J3[j][i] = J3[i][j]; + K1_mag[j][i] = K1_mag[i][j]; + K1_mech[j][i] = K1_mech[i][j]; + K2[j][i] = K2[i][j]; + K3[j][i] = K3[i][j]; + cut_spin_exchange[j][i] = cut_spin_exchange[i][j]; + + return cut_spin_exchange_global; +} + +/* ---------------------------------------------------------------------- + extract the larger cutoff +------------------------------------------------------------------------- */ + +void *PairSpinExchangeBiquadratic::extract(const char *str, int &dim) +{ + dim = 0; + if (strcmp(str,"cut") == 0) return (void *) &cut_spin_exchange_global; + return NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double evdwl, ecoul; + double xi[3], eij[3]; + double delx,dely,delz; + double spi[3], spj[3]; + double fi[3], fmi[3]; + double local_cut2; + double rsq, inorm; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + ev_init(eflag,vflag); + + double **x = atom->x; + double **f = atom->f; + double **fm = atom->fm; + double **sp = atom->sp; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // checking size of emag + + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + + // computation of the exchange interaction + // loop over atoms and their neighbors + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + itype = type[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xi[2] = x[i][2]; + spi[0] = sp[i][0]; + spi[1] = sp[i][1]; + spi[2] = sp[i][2]; + emag[i] = 0.0; + + // loop on neighbors + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + + evdwl = 0.0; + fi[0] = fi[1] = fi[2] = 0.0; + fmi[0] = fmi[1] = fmi[2] = 0.0; + + delx = xi[0] - x[j][0]; + dely = xi[1] - x[j][1]; + delz = xi[2] - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + inorm = 1.0/sqrt(rsq); + eij[0] = -inorm*delx; + eij[1] = -inorm*dely; + eij[2] = -inorm*delz; + + local_cut2 = cut_spin_exchange[itype][jtype]*cut_spin_exchange[itype][jtype]; + + // compute exchange interaction + + if (rsq <= local_cut2) { + compute_exchange(i,j,rsq,fmi,spi,spj); + if (lattice_flag) + compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); + + if (eflag) { + evdwl -= compute_energy(i,j,rsq,spi,spj); + emag[i] += evdwl; + } else evdwl = 0.0; + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); + } + } + + if (vflag_fdotr) virial_fdotr_compute(); + +} + +/* ---------------------------------------------------------------------- + update the pair interactions fmi acting on the spin ii +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::compute_single_pair(int ii, double fmi[3]) +{ + int *type = atom->type; + double **x = atom->x; + double **sp = atom->sp; + double local_cut2; + double xi[3]; + double delx,dely,delz; + double spi[3],spj[3]; + + int j,jnum,itype,jtype,ntypes; + int k,locflag; + int *jlist,*numneigh,**firstneigh; + + double rsq; + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // check if interaction applies to type of ii + + itype = type[ii]; + ntypes = atom->ntypes; + locflag = 0; + k = 1; + while (k <= ntypes) { + if (k <= itype) { + if (setflag[k][itype] == 1) { + locflag =1; + break; + } + k++; + } else if (k > itype) { + if (setflag[itype][k] == 1) { + locflag =1; + break; + } + k++; + } else error->all(FLERR,"Wrong type number"); + } + + // if interaction applies to type ii, + // locflag = 1 and compute pair interaction + + if (locflag == 1) { + + xi[0] = x[ii][0]; + xi[1] = x[ii][1]; + xi[2] = x[ii][2]; + spi[0] = sp[ii][0]; + spi[1] = sp[ii][1]; + spi[2] = sp[ii][2]; + + jlist = firstneigh[ii]; + jnum = numneigh[ii]; + + for (int jj = 0; jj < jnum; jj++) { + + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + local_cut2 = cut_spin_exchange[itype][jtype]*cut_spin_exchange[itype][jtype]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + + delx = xi[0] - x[j][0]; + dely = xi[1] - x[j][1]; + delz = xi[2] - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= local_cut2) { + compute_exchange(ii,j,rsq,fmi,spi,spj); + } + } + } +} + +/* ---------------------------------------------------------------------- + compute exchange interaction between spins i and j +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, + double fmi[3], double spi[3], double spj[3]) +{ + int *type = atom->type; + int itype,jtype; + double Jex,Kex,ra,sdots; + double rj,rk,r2j,r2k,ir3j,ir3k; + itype = type[i]; + jtype = type[j]; + + ra = sqrt(rsq); + rj = ra/J3[itype][jtype]; + r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; + ir3j = 1.0/(rj*rj*rj); + rk = ra/K3[itype][jtype]; + r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; + ir3k = 1.0/(rk*rk*rk); + + // modified Yukawa + Jex = (1.0-J2[itype][jtype]*r2j); + Jex *= J1_mag[itype][jtype]*ir3j; + Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + + Kex = (1.0-K2[itype][jtype]*r2k); + Kex *= K1_mag[itype][jtype]*ir3k; + Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + fmi[0] += Jex*spj[0] + 2.0*Kex*spj[0]*sdots; + fmi[1] += Jex*spj[1] + 2.0*Kex*spj[1]*sdots; + fmi[2] += Jex*spj[2] + 2.0*Kex*spj[2]*sdots; +} + +/* ---------------------------------------------------------------------- + compute the mechanical force due to the exchange interaction between atom i and atom j +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq, + double eij[3], double fi[3], double spi[3], double spj[3]) +{ + int *type = atom->type; + int itype,jtype; + double Jex,Jex_mech,Kex,Kex_mech,ra,sdots; + double rj,rk,r2j,r2k,ir3j,ir3k; + itype = type[i]; + jtype = type[j]; + + ra = sqrt(rsq); + rj = ra/J3[itype][jtype]; + r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; + ir3j = 1.0/(rj*rj*rj); + rk = ra/K3[itype][jtype]; + r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; + ir3k = 1.0/(rk*rk*rk); + + // modified Yukawa + Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]); + Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j); + Jex_mech *= -J1_mech[itype][jtype]*ir3j; + Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + + Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]); + Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k); + Kex_mech *= -K1_mech[itype][jtype]*ir3k; + Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; + fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; + fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; +} + +/* ---------------------------------------------------------------------- + compute energy of spin pair i and j +------------------------------------------------------------------------- */ + +double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, + double spi[3], double spj[3]) +{ + int *type = atom->type; + int itype,jtype; + double Jex,Kex,ra,sdots; + double rj,rk,r2j,r2k,ir3j,ir3k; + double energy = 0.0; + itype = type[i]; + jtype = type[j]; + + ra = sqrt(rsq); + rj = ra/J3[itype][jtype]; + r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; + ir3j = 1.0/(rj*rj*rj); + rk = ra/K3[itype][jtype]; + r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; + ir3k = 1.0/(rk*rk*rk); + + // modified Yukawa + Jex = (1.0-J2[itype][jtype]*r2j); + Jex *= J1_mech[itype][jtype]*ir3j; + Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + + Kex = (1.0-K2[itype][jtype]*r2k); + Kex *= K1_mech[itype][jtype]*ir3k; + Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + energy = 0.5*(Jex*sdots + Kex*sdots*sdots); + return energy; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cut_spin_exchange,n+1,n+1,"pair/spin/exchange:cut_spin_exchange"); + memory->create(J1_mag,n+1,n+1,"pair/spin/exchange:J1_mag"); + memory->create(J1_mech,n+1,n+1,"pair/spin/exchange:J1_mech"); + memory->create(J2,n+1,n+1,"pair/spin/exchange:J2"); + memory->create(J3,n+1,n+1,"pair/spin/exchange:J3"); + memory->create(K1_mag,n+1,n+1,"pair/spin/exchange:J1_mag"); + memory->create(K1_mech,n+1,n+1,"pair/spin/exchange:J1_mech"); + memory->create(K2,n+1,n+1,"pair/spin/exchange:J2"); + memory->create(K3,n+1,n+1,"pair/spin/exchange:J3"); + memory->create(cutsq,n+1,n+1,"pair:cutsq"); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&J1_mag[i][j],sizeof(double),1,fp); + fwrite(&J1_mech[i][j],sizeof(double),1,fp); + fwrite(&J2[i][j],sizeof(double),1,fp); + fwrite(&J3[i][j],sizeof(double),1,fp); + fwrite(&K1_mag[i][j],sizeof(double),1,fp); + fwrite(&K1_mech[i][j],sizeof(double),1,fp); + fwrite(&K2[i][j],sizeof(double),1,fp); + fwrite(&K3[i][j],sizeof(double),1,fp); + fwrite(&cut_spin_exchange[i][j],sizeof(double),1,fp); + } + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::read_restart(FILE *fp) +{ + read_restart_settings(fp); + + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,NULL,error); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,NULL,error); + } + MPI_Bcast(&J1_mag[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&J1_mech[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&J2[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&J3[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&K1_mag[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&K1_mech[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&K2[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&K3[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_spin_exchange[i][j],1,MPI_DOUBLE,0,world); + } + } + } +} + + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp) +{ + fwrite(&cut_spin_exchange_global,sizeof(double),1,fp); + fwrite(&offset_flag,sizeof(int),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error); + utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error); + } + MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&offset_flag,1,MPI_INT,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h new file mode 100644 index 0000000000..6fb9a7a94c --- /dev/null +++ b/src/SPIN/pair_spin_exchange_biquadratic.h @@ -0,0 +1,85 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(spin/exchange/biquadratic,PairSpinExchangeBiquadratic) + +#else + +#ifndef LMP_PAIR_SPIN_EXCHANGE_BIQUADRATIC_H +#define LMP_PAIR_SPIN_EXCHANGE_BIQUADRATIC_H + +#include "pair_spin.h" + +namespace LAMMPS_NS { + +class PairSpinExchangeBiquadratic : public PairSpin { + public: + PairSpinExchangeBiquadratic(LAMMPS *lmp) : PairSpin(lmp) {} + virtual ~PairSpinExchangeBiquadratic(); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void *extract(const char *, int &); + + void compute(int, int); + void compute_single_pair(int, double *); + + void compute_exchange(int, int, double, double *, double *, double *); + void compute_exchange_mech(int, int, double, double *, double *, double *, double *); + double compute_energy(int , int , double , double *, double *); + + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + + double cut_spin_exchange_global; // global exchange cutoff distance + + protected: + double **J1_mag; // H exchange coeffs in eV + double **J1_mech; // mech exchange coeffs in + double **J2, **J3; // J1 in eV, J2 in Ang-1, J3 in Ang + double **K1_mag; // Bi exchange coeffs in eV + double **K1_mech; // mech exchange coeffs in + double **K2, **K3; // K1 in eV, K2 Ang-1, K3 in Ang + double **cut_spin_exchange; // cutoff distance exchange + + void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args in pair_spin command + +Self-explanatory. + +E: Spin simulations require metal unit style + +Self-explanatory. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair spin requires atom attribute spin + +The atom style defined does not have these attributes. + +*/ From e941670f2c7ae02a22ce1617a01fa967dbeaff56 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 13 Jul 2020 14:43:14 -0600 Subject: [PATCH 02/64] Commit modif biquad --- doc/src/fix_precession_spin.rst | 2 +- src/SPIN/pair_spin_dipole_cut.cpp | 5 +++-- src/SPIN/pair_spin_dipole_long.cpp | 12 ++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/src/fix_precession_spin.rst b/doc/src/fix_precession_spin.rst index 783963af72..043c5cb200 100644 --- a/doc/src/fix_precession_spin.rst +++ b/doc/src/fix_precession_spin.rst @@ -62,7 +62,7 @@ with: The field value in Tesla is multiplied by the gyromagnetic ratio, :math:`g \cdot \mu_B/\hbar`, converting it into a precession frequency in -rad.THz (in metal units and with :math:`\mu_B = 5.788 eV/T`). +rad.THz (in metal units and with :math:`\mu_B = 5.788\cdot 10^{-5}` eV/T). As a comparison, the figure below displays the simulation of a single spin (of norm :math:`\mu_i = 1.0`) submitted to an external diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp index cdae3c0bab..e18c24bcc0 100644 --- a/src/SPIN/pair_spin_dipole_cut.cpp +++ b/src/SPIN/pair_spin_dipole_cut.cpp @@ -48,9 +48,10 @@ PairSpinDipoleCut::PairSpinDipoleCut(LAMMPS *lmp) : PairSpin(lmp) hbar = force->hplanck/MY_2PI; // eV/(rad.THz) mub = 9.274e-4; // in A.Ang^2 - mu_0 = 785.15; // in eV/Ang/A^2 + // mu_0 = 785.15; // in eV/Ang/A^2 + mu_0 = 784.15; // in eV/Ang/A^2 mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 - //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV + // mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz } diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp index aeb916cfae..5ac3b276d2 100644 --- a/src/SPIN/pair_spin_dipole_long.cpp +++ b/src/SPIN/pair_spin_dipole_long.cpp @@ -52,7 +52,7 @@ PairSpinDipoleLong::PairSpinDipoleLong(LAMMPS *lmp) : PairSpin(lmp) hbar = force->hplanck/MY_2PI; // eV/(rad.THz) mub = 9.274e-4; // in A.Ang^2 - mu_0 = 785.15; // in eV/Ang/A^2 + mu_0 = 784.15; // in eV/Ang/A^2 mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz @@ -136,10 +136,11 @@ void PairSpinDipoleLong::init_style() // insure use of KSpace long-range solver, set g_ewald - if (force->kspace == NULL) - error->all(FLERR,"Pair style requires a KSpace style"); + // if (force->kspace == NULL) + // error->all(FLERR,"Pair style requires a KSpace style"); - g_ewald = force->kspace->g_ewald; + // g_ewald = force->kspace->g_ewald; + g_ewald = 1.0; } /* ---------------------------------------------------------------------- @@ -220,6 +221,9 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) memory->grow(emag,nlocal_max,"pair/spin:emag"); } + + printf("test gewald %g \n",g_ewald); + pre1 = 2.0 * g_ewald / MY_PIS; pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS; pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS; From 7054c82b679031845592e28b400c4b1a5d2c890f Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 24 Aug 2020 09:23:31 -0600 Subject: [PATCH 03/64] added BS function to pair/spin/biquadractic --- src/SPIN/pair_spin_exchange_biquadratic.cpp | 102 ++++++++++++++------ 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index a7f64690af..20cea77396 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -375,15 +375,24 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, rk = ra/K3[itype][jtype]; r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; ir3k = 1.0/(rk*rk*rk); + + // BS model + Jex = 4.0*J1_mag[itype][jtype]*r2j; + Jex *= (1.0-J2[itype][jtype]*r2j); + Jex *= exp(-r2j); + + Kex = 4.0*K1_mag[itype][jtype]*r2k; + Kex *= (1.0-K2[itype][jtype]*r2k); + Kex *= exp(-r2k); // modified Yukawa - Jex = (1.0-J2[itype][jtype]*r2j); - Jex *= J1_mag[itype][jtype]*ir3j; - Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); - - Kex = (1.0-K2[itype][jtype]*r2k); - Kex *= K1_mag[itype][jtype]*ir3k; - Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + // Jex = (1.0-J2[itype][jtype]*r2j); + // Jex *= J1_mag[itype][jtype]*ir3j; + // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + // + // Kex = (1.0-K2[itype][jtype]*r2k); + // Kex *= K1_mag[itype][jtype]*ir3k; + // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); @@ -402,28 +411,48 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq int *type = atom->type; int itype,jtype; double Jex,Jex_mech,Kex,Kex_mech,ra,sdots; - double rj,rk,r2j,r2k,ir3j,ir3k; + // double rj,rk,r2j,r2k,ir3j,ir3k; + double rja,rka,rjr,rkr,iJ3,iK3; itype = type[i]; jtype = type[j]; - ra = sqrt(rsq); - rj = ra/J3[itype][jtype]; - r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; - ir3j = 1.0/(rj*rj*rj); - rk = ra/K3[itype][jtype]; - r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; - ir3k = 1.0/(rk*rk*rk); + // ra = sqrt(rsq); + // rj = ra/J3[itype][jtype]; + // r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; + // ir3j = 1.0/(rj*rj*rj); + // rk = ra/K3[itype][jtype]; + // r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; + // ir3k = 1.0/(rk*rk*rk); - // modified Yukawa - Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]); - Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j); - Jex_mech *= -J1_mech[itype][jtype]*ir3j; - Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + Jex = J1_mech[itype][jtype]; + iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]); + Kex = K1_mech[itype][jtype]; + iK3 = 1.0/(K3[itype][jtype]*K3[itype][jtype]); + + rja = rsq*iJ3; + rjr = sqrt(rsq)*iJ3; + rka = rsq*iK3; + rkr = sqrt(rsq)*iK3; + + // BS model + Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja); + Jex_mech *= 8.0*Jex*rjr*exp(-rja); + Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka); + Kex_mech *= 8.0*Kex*rkr*exp(-rka); + Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]); - Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k); - Kex_mech *= -K1_mech[itype][jtype]*ir3k; - Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + // modified Yukawa + // Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]); + // Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j); + // Jex_mech *= -J1_mech[itype][jtype]*ir3j; + // Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + + // Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]); + // Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k); + // Kex_mech *= -K1_mech[itype][jtype]*ir3k; + // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); @@ -454,15 +483,24 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, rk = ra/K3[itype][jtype]; r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; ir3k = 1.0/(rk*rk*rk); - + + // BS model + Jex = 4.0*J1_mech[itype][jtype]*r2j; + Jex *= (1.0-J2[itype][jtype]*r2j); + Jex *= exp(-r2j); + + Kex = 4.0*K1_mech[itype][jtype]*r2k; + Kex *= (1.0-K2[itype][jtype]*r2k); + Kex *= exp(-r2k); + // modified Yukawa - Jex = (1.0-J2[itype][jtype]*r2j); - Jex *= J1_mech[itype][jtype]*ir3j; - Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); - - Kex = (1.0-K2[itype][jtype]*r2k); - Kex *= K1_mech[itype][jtype]*ir3k; - Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); + // Jex = (1.0-J2[itype][jtype]*r2j); + // Jex *= J1_mech[itype][jtype]*ir3j; + // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); + // + // Kex = (1.0-K2[itype][jtype]*r2k); + // Kex *= K1_mech[itype][jtype]*ir3k; + // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); From 901fe9d3aa494f1ec92867e4fbc1a26e18222c99 Mon Sep 17 00:00:00 2001 From: julient31 Date: Tue, 15 Sep 2020 18:22:11 -0600 Subject: [PATCH 04/64] modification of pair spin exchange/biquadratic, to offset ground state spin pressure --- src/SPIN/compute_spin.cpp | 37 +++++++++++++++++++-- src/SPIN/pair_spin_exchange_biquadratic.cpp | 9 +++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 94eff27f53..ca3c40e11a 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -46,6 +46,7 @@ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) : if ((narg != 3) && (narg != 4)) error->all(FLERR,"Illegal compute compute/spin command"); vector_flag = 1; + // size_vector = 7; size_vector = 6; extvector = 0; @@ -148,15 +149,19 @@ void ComputeSpin::compute_vector() int i; int countsp, countsptot; double mag[4], magtot[4]; + double m2, m2tot; + double m4, m4tot; double magenergy, magenergytot; double tempnum, tempnumtot; double tempdenom, tempdenomtot; - double spintemperature; + double spintemperature,binder; invoked_vector = update->ntimestep; countsp = countsptot = 0.0; mag[0] = mag[1] = mag[2] = mag[3] = 0.0; + // m2 = m2tot = 0.0; + // m4 = m4tot = 0.0; magtot[0] = magtot[1] = magtot[2] = magtot[3] = 0.0; magenergy = magenergytot = 0.0; tempnum = tempnumtot = 0.0; @@ -176,10 +181,25 @@ void ComputeSpin::compute_vector() for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { if (atom->sp_flag) { + + // compute first moment + mag[0] += sp[i][0]; mag[1] += sp[i][1]; mag[2] += sp[i][2]; + // compute second moment + + // m2 += sp[i][0]*sp[i][0]; + // m2 += sp[i][1]*sp[i][1]; + // m2 += sp[i][2]*sp[i][2]; + + // compute fourth moment + + // m4 += sp[i][0]*sp[i][0]*sp[i][0]*sp[i][0]; + // m4 += sp[i][1]*sp[i][1]*sp[i][1]*sp[i][1]; + // m4 += sp[i][2]*sp[i][2]*sp[i][2]*sp[i][2]; + // update magnetic precession energies if (precession_spin_flag) { @@ -206,26 +226,39 @@ void ComputeSpin::compute_vector() } MPI_Allreduce(mag,magtot,4,MPI_DOUBLE,MPI_SUM,world); + // MPI_Allreduce(&m2,&m2tot,1,MPI_DOUBLE,MPI_SUM,world); + // MPI_Allreduce(&m4,&m4tot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&magenergy,&magenergytot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&tempnum,&tempnumtot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&tempdenom,&tempdenomtot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&countsp,&countsptot,1,MPI_INT,MPI_SUM,world); + // compute average magnetization + double scale = 1.0/countsptot; magtot[0] *= scale; magtot[1] *= scale; magtot[2] *= scale; magtot[3] = sqrt((magtot[0]*magtot[0])+(magtot[1]*magtot[1])+(magtot[2]*magtot[2])); + + // compute spin temperature + spintemperature = hbar*tempnumtot; spintemperature /= (2.0*kb*tempdenomtot); + // compute Binder cumulant + + // m2tot *= scale; + // m4tot *= scale; + // binder = 1.0 - m4tot/(3.0*m2tot*m2tot); + vector[0] = magtot[0]; vector[1] = magtot[1]; vector[2] = magtot[2]; vector[3] = magtot[3]; vector[4] = magenergytot; vector[5] = spintemperature; - + // vector[6] = binder; } /* ---------------------------------------------------------------------- diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 20cea77396..812ccf40ab 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -454,11 +454,15 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq // Kex_mech *= -K1_mech[itype][jtype]*ir3k; // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0); fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; + // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; + // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; + // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; } /* ---------------------------------------------------------------------- @@ -502,7 +506,8 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, // Kex *= K1_mech[itype][jtype]*ir3k; // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0); energy = 0.5*(Jex*sdots + Kex*sdots*sdots); return energy; From 7d5109454f02bde06f625065e18f4506701446ac Mon Sep 17 00:00:00 2001 From: julient31 Date: Tue, 15 Sep 2020 20:16:48 -0600 Subject: [PATCH 05/64] correcting small issue with offset of biquadratic exchange --- src/SPIN/pair_spin_exchange_biquadratic.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 812ccf40ab..61b3df70ce 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -455,11 +455,11 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0); + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - fi[0] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; - fi[1] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; - fi[2] -= (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; + fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; + fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; + fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; @@ -507,9 +507,9 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2] - 1.0); + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - energy = 0.5*(Jex*sdots + Kex*sdots*sdots); + energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0)); return energy; } From 9aba7b00505e3d33771d308b4253f310cad9297e Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 28 Sep 2020 15:42:26 -0600 Subject: [PATCH 06/64] adding a kokkos/spin atom style --- src/KOKKOS/kokkos_type.h | 60 +++++++++++ src/SPIN/compute_spin.cpp | 24 ----- src/SPIN/pair_spin_exchange.cpp | 41 ++++---- src/SPIN/pair_spin_exchange_biquadratic.cpp | 107 ++++++-------------- src/SPIN/pair_spin_neel.cpp | 8 +- 5 files changed, 117 insertions(+), 123 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index c8fccaf409..a3ebe4f030 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -714,6 +714,39 @@ typedef tdual_virial_array::t_dev_um t_virial_array_um; typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um; typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread; +// Spin Types + +//3d SP_FLOAT array n*4 +#ifdef LMP_KOKKOS_NO_LEGACY +typedef Kokkos::DualView tdual_sp_array; +#else +typedef Kokkos::DualView tdual_sp_array; +#endif +typedef tdual_sp_array::t_dev t_sp_array; +typedef tdual_sp_array::t_dev_const t_sp_array_const; +typedef tdual_sp_array::t_dev_um t_sp_array_um; +typedef tdual_sp_array::t_dev_const_um t_sp_array_const_um; +typedef tdual_sp_array::t_dev_const_randomread t_sp_array_randomread; + +//3d FM_FLOAT array n*3 + +typedef Kokkos::DualView tdual_fm_array; +typedef tdual_fm_array::t_dev t_fm_array; +typedef tdual_fm_array::t_dev_const t_fm_array_const; +typedef tdual_fm_array::t_dev_um t_fm_array_um; +typedef tdual_fm_array::t_dev_const_um t_fm_array_const_um; +typedef tdual_fm_array::t_dev_const_randomread t_fm_array_randomread; + +//3d FML_FLOAT array n*3 + +typedef Kokkos::DualView tdual_fm_long_array; +typedef tdual_fm_long_array::t_dev t_fm_long_array; +typedef tdual_fm_long_array::t_dev_const t_fm_long_array_const; +typedef tdual_fm_long_array::t_dev_um t_fm_long_array_um; +typedef tdual_fm_long_array::t_dev_const_um t_fm_long_array_const_um; +typedef tdual_fm_long_array::t_dev_const_randomread t_fm_long_array_randomread; + + //Energy Types //1d E_FLOAT array n @@ -950,6 +983,33 @@ typedef tdual_virial_array::t_host_um t_virial_array_um; typedef tdual_virial_array::t_host_const_um t_virial_array_const_um; typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread; +// Spin types + +//2d X_FLOAT array n*3 +typedef Kokkos::DualView tdual_sp_array; +typedef tdual_sp_array::t_host t_sp_array; +typedef tdual_sp_array::t_host_const t_sp_array_const; +typedef tdual_sp_array::t_host_um t_sp_array_um; +typedef tdual_sp_array::t_host_const_um t_sp_array_const_um; +typedef tdual_sp_array::t_host_const_randomread t_sp_array_randomread; + +//2d F_FLOAT array n*3 +typedef Kokkos::DualView tdual_fm_array; +//typedef Kokkos::DualView tdual_f_array; +typedef tdual_fm_array::t_host t_fm_array; +typedef tdual_fm_array::t_host_const t_fm_array_const; +typedef tdual_fm_array::t_host_um t_fm_array_um; +typedef tdual_fm_array::t_host_const_um t_fm_array_const_um; +typedef tdual_fm_array::t_host_const_randomread t_fm_array_randomread; + +//2d F_FLOAT array n*3 +typedef Kokkos::DualView tdual_fm_long_array; +//typedef Kokkos::DualView tdual_f_array; +typedef tdual_fm_long_array::t_host t_fm_long_array; +typedef tdual_fm_long_array::t_host_const t_fm_long_array_const; +typedef tdual_fm_long_array::t_host_um t_fm_long_array_um; +typedef tdual_fm_long_array::t_host_const_um t_fm_long_array_const_um; +typedef tdual_fm_long_array::t_host_const_randomread t_fm_long_array_randomread; //Energy Types diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index ca3c40e11a..5edfb04645 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -46,7 +46,6 @@ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) : if ((narg != 3) && (narg != 4)) error->all(FLERR,"Illegal compute compute/spin command"); vector_flag = 1; - // size_vector = 7; size_vector = 6; extvector = 0; @@ -160,8 +159,6 @@ void ComputeSpin::compute_vector() countsp = countsptot = 0.0; mag[0] = mag[1] = mag[2] = mag[3] = 0.0; - // m2 = m2tot = 0.0; - // m4 = m4tot = 0.0; magtot[0] = magtot[1] = magtot[2] = magtot[3] = 0.0; magenergy = magenergytot = 0.0; tempnum = tempnumtot = 0.0; @@ -188,18 +185,6 @@ void ComputeSpin::compute_vector() mag[1] += sp[i][1]; mag[2] += sp[i][2]; - // compute second moment - - // m2 += sp[i][0]*sp[i][0]; - // m2 += sp[i][1]*sp[i][1]; - // m2 += sp[i][2]*sp[i][2]; - - // compute fourth moment - - // m4 += sp[i][0]*sp[i][0]*sp[i][0]*sp[i][0]; - // m4 += sp[i][1]*sp[i][1]*sp[i][1]*sp[i][1]; - // m4 += sp[i][2]*sp[i][2]*sp[i][2]*sp[i][2]; - // update magnetic precession energies if (precession_spin_flag) { @@ -226,8 +211,6 @@ void ComputeSpin::compute_vector() } MPI_Allreduce(mag,magtot,4,MPI_DOUBLE,MPI_SUM,world); - // MPI_Allreduce(&m2,&m2tot,1,MPI_DOUBLE,MPI_SUM,world); - // MPI_Allreduce(&m4,&m4tot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&magenergy,&magenergytot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&tempnum,&tempnumtot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&tempdenom,&tempdenomtot,1,MPI_DOUBLE,MPI_SUM,world); @@ -246,19 +229,12 @@ void ComputeSpin::compute_vector() spintemperature = hbar*tempnumtot; spintemperature /= (2.0*kb*tempdenomtot); - // compute Binder cumulant - - // m2tot *= scale; - // m4tot *= scale; - // binder = 1.0 - m4tot/(3.0*m2tot*m2tot); - vector[0] = magtot[0]; vector[1] = magtot[1]; vector[2] = magtot[2]; vector[3] = magtot[3]; vector[4] = magenergytot; vector[5] = spintemperature; - // vector[6] = binder; } /* ---------------------------------------------------------------------- diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index b23f4fa0cb..611230c73e 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -240,28 +240,26 @@ void PairSpinExchange::compute(int eflag, int vflag) evdwl *= 0.5*hbar; emag[i] += evdwl; } else evdwl = 0.0; + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } - - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - // if (eflag) { - // evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - // evdwl *= 0.5*hbar; - // emag[i] += evdwl; - // } else evdwl = 0.0; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } } if (vflag_fdotr) virial_fdotr_compute(); - } /* ---------------------------------------------------------------------- @@ -389,9 +387,12 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, Jex_mech *= 8.0*Jex*rr*exp(-ra); Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - fi[0] -= Jex_mech*eij[0]; - fi[1] -= Jex_mech*eij[1]; - fi[2] -= Jex_mech*eij[2]; + fi[0] -= 0.5*Jex_mech*eij[0]; + fi[1] -= 0.5*Jex_mech*eij[1]; + fi[2] -= 0.5*Jex_mech*eij[2]; + // fi[0] -= Jex_mech*eij[0]; + // fi[1] -= Jex_mech*eij[1]; + // fi[2] -= Jex_mech*eij[2]; } /* ---------------------------------------------------------------------- diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 61b3df70ce..cf351e6539 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -246,6 +246,7 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_exchange(i,j,rsq,fmi,spi,spj); + if (lattice_flag) compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); @@ -253,22 +254,26 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag) evdwl -= compute_energy(i,j,rsq,spi,spj); emag[i] += evdwl; } else evdwl = 0.0; + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } - - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } } if (vflag_fdotr) virial_fdotr_compute(); - } /* ---------------------------------------------------------------------- @@ -363,20 +368,13 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, { int *type = atom->type; int itype,jtype; - double Jex,Kex,ra,sdots; - double rj,rk,r2j,r2k,ir3j,ir3k; + double Jex,Kex,r2j,r2k,sdots; itype = type[i]; jtype = type[j]; - ra = sqrt(rsq); - rj = ra/J3[itype][jtype]; r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; - ir3j = 1.0/(rj*rj*rj); - rk = ra/K3[itype][jtype]; - r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; - ir3k = 1.0/(rk*rk*rk); + r2k = rsq/J3[itype][jtype]/J3[itype][jtype]; - // BS model Jex = 4.0*J1_mag[itype][jtype]*r2j; Jex *= (1.0-J2[itype][jtype]*r2j); Jex *= exp(-r2j); @@ -385,45 +383,27 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, Kex *= (1.0-K2[itype][jtype]*r2k); Kex *= exp(-r2k); - // modified Yukawa - // Jex = (1.0-J2[itype][jtype]*r2j); - // Jex *= J1_mag[itype][jtype]*ir3j; - // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); - // - // Kex = (1.0-K2[itype][jtype]*r2k); - // Kex *= K1_mag[itype][jtype]*ir3k; - // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - fmi[0] += Jex*spj[0] + 2.0*Kex*spj[0]*sdots; - fmi[1] += Jex*spj[1] + 2.0*Kex*spj[1]*sdots; - fmi[2] += Jex*spj[2] + 2.0*Kex*spj[2]*sdots; + fmi[0] += (Jex*spj[0] + 2.0*Kex*spj[0]*sdots); + fmi[1] += (Jex*spj[1] + 2.0*Kex*spj[1]*sdots); + fmi[2] += (Jex*spj[2] + 2.0*Kex*spj[2]*sdots); } /* ---------------------------------------------------------------------- compute the mechanical force due to the exchange interaction between atom i and atom j ------------------------------------------------------------------------- */ -void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq, - double eij[3], double fi[3], double spi[3], double spj[3]) +void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, + double rsq, double eij[3], double fi[3], double spi[3], double spj[3]) { int *type = atom->type; int itype,jtype; double Jex,Jex_mech,Kex,Kex_mech,ra,sdots; - // double rj,rk,r2j,r2k,ir3j,ir3k; double rja,rka,rjr,rkr,iJ3,iK3; itype = type[i]; jtype = type[j]; - // ra = sqrt(rsq); - // rj = ra/J3[itype][jtype]; - // r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; - // ir3j = 1.0/(rj*rj*rj); - // rk = ra/K3[itype][jtype]; - // r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; - // ir3k = 1.0/(rk*rk*rk); - Jex = J1_mech[itype][jtype]; iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]); Kex = K1_mech[itype][jtype]; @@ -434,35 +414,22 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq rka = rsq*iK3; rkr = sqrt(rsq)*iK3; - // BS model Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja); Jex_mech *= 8.0*Jex*rjr*exp(-rja); - Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + // Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka); Kex_mech *= 8.0*Kex*rkr*exp(-rka); - Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + // Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - // modified Yukawa - // Jex_mech = J2[itype][jtype]*2.0*ra/(J3[itype][jtype]*J3[itype][jtype]); - // Jex_mech += (3.0/ra+1.0/J3[itype][jtype])*(1.0-J2[itype][jtype]*r2j); - // Jex_mech *= -J1_mech[itype][jtype]*ir3j; - // Jex_mech *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); - - // Kex_mech = K2[itype][jtype]*2.0*ra/(K3[itype][jtype]*K3[itype][jtype]); - // Kex_mech += (3.0/ra+1.0/K3[itype][jtype])*(1.0-K2[itype][jtype]*r2k); - // Kex_mech *= -K1_mech[itype][jtype]*ir3k; - // Kex_mech *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); - - // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; - fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; - fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; - // fi[0] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; - // fi[1] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; - // fi[2] += (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; + fi[0] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; + fi[1] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; + fi[2] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; + // fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; + // fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; + // fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; } /* ---------------------------------------------------------------------- @@ -488,7 +455,6 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; ir3k = 1.0/(rk*rk*rk); - // BS model Jex = 4.0*J1_mech[itype][jtype]*r2j; Jex *= (1.0-J2[itype][jtype]*r2j); Jex *= exp(-r2j); @@ -497,19 +463,10 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, Kex *= (1.0-K2[itype][jtype]*r2k); Kex *= exp(-r2k); - // modified Yukawa - // Jex = (1.0-J2[itype][jtype]*r2j); - // Jex *= J1_mech[itype][jtype]*ir3j; - // Jex *= exp((J3[itype][jtype]-ra)/J3[itype][jtype]); - // - // Kex = (1.0-K2[itype][jtype]*r2k); - // Kex *= K1_mech[itype][jtype]*ir3k; - // Kex *= exp((K3[itype][jtype]-ra)/K3[itype][jtype]); - - // sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0)); + // energy = 0.5*(Jex*(sdots) + Kex*(sdots*sdots-1.0)); return energy; } diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index 4fd8ecc215..fc7cb6ab9a 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -262,8 +262,8 @@ void PairSpinNeel::compute(int eflag, int vflag) fm[i][2] += fmi[2]; if (eflag) { - evdwl = compute_neel_energy(i,j,rsq,eij,spi,spj); - evdwl *= 0.5*hbar; + evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj); + // evdwl *= 0.5*hbar; emag[i] += evdwl; } else evdwl = 0.0; @@ -588,12 +588,12 @@ double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3] // compute Neel's functions ra = rsq/g3[itype][jtype]/g3[itype][jtype]; - gr = 4.0*g1[itype][jtype]*ra; + gr = 4.0*g1_mech[itype][jtype]*ra; gr *= (1.0-g2[itype][jtype]*ra); gr *= exp(-ra); ra = rsq/q3[itype][jtype]/q3[itype][jtype]; - qr = 4.0*q1[itype][jtype]*ra; + qr = 4.0*q1_mech[itype][jtype]*ra; qr *= (1.0-q2[itype][jtype]*ra); qr *= exp(-ra); From f0729551ae3798edccd44521cbf015e3d5d19fb7 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 28 Sep 2020 16:54:10 -0600 Subject: [PATCH 07/64] adding for good new kokkos spin style --- src/KOKKOS/atom_vec_spin_kokkos.cpp | 1297 +++++++++++++++++++++++++++ src/KOKKOS/atom_vec_spin_kokkos.h | 132 +++ 2 files changed, 1429 insertions(+) create mode 100644 src/KOKKOS/atom_vec_spin_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_spin_kokkos.h diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp new file mode 100644 index 0000000000..8a7dd3317c --- /dev/null +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -0,0 +1,1297 @@ +/* ---------------------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + +------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------ + Contributing authors: Julien Tranchida (SNL) + Aidan Thompson (SNL) + + Please cite the related publication: + Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018). + Massively parallel symplectic algorithm for coupled magnetic spin dynamics + and molecular dynamics. Journal of Computational Physics. +------------------------------------------------------------------------- */ + +#include "atom_vec_spin_kokkos.h" +#include +#include +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "error.h" +#include "fix.h" +#include "memory_kokkos.h" +#include "modify.h" +#include "utils.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + mass_type = 1; + forceclearflag = 1; + + comm_x_only = comm_f_only = 0; + size_forward = 7; + size_reverse = 9; + size_border = 10; + size_velocity = 3; + size_data_atom = 9; + size_data_vel = 4; + xcol_data = 4; + + atom->sp_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by a chunk + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::grow(int n) +{ + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); + + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + // allocating mech. quantities + + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); + + // allocating mag. quantities + + memoryKK->grow_kokkos(atomKK->k_sp,atomKK->sp,nmax,"atom:sp"); + memoryKK->grow_kokkos(atomKK->k_fm,atomKK->fm,nmax,"atom:fm"); + memoryKK->grow_kokkos(atomKK->k_fm_long,atomKK->fm_long,nmax,"atom:fm_long"); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + sp = atomKK->sp; + d_sp = atomKK->k_sp.d_view; + h_sp = atomKK->k_sp.h_view; + fm = atom->fm; + d_fm = atomKK->k_fm.d_view; + h_fm = atomKK->k_fm.h_view; + fm_long = atom->fm_long; + d_fm_long = atomKK->k_fm_long.d_view; + h_fm_long = atomKK->k_fm_long.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::copy(int i, int j, int delflag) +{ + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_sp(j,0) = h_sp(i,0) + h_sp(j,1) = h_sp(i,1) + h_sp(j,2) = h_sp(i,2) + h_sp(j,3) = h_sp(i,3) + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecSpinKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_sp_array_randomread _sp; + typename ArrayTypes::t_xfloat_2d_um _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecSpinKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_sp_array &sp, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()),_sp(sp.view()), + _list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; + // const size_t elements = 3; + const size_t elements = 7; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _sp(j,0); + _buf(i,4) = _sp(j,1); + _buf(i,5) = _sp(j,2); + _buf(i,6) = _sp(j,3); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,3) = _sp(j,0); + _buf(i,4) = _sp(j,1); + _buf(i,5) = _sp(j,2); + _buf(i,6) = _sp(j,3); + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,3) = _sp(j,0); + _buf(i,4) = _sp(j,1); + _buf(i,5) = _sp(j,2); + _buf(i,6) = _sp(j,3); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecSpinKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes::t_xfloat_2d _buf; + const typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes::t_x_array_randomread _x; + const typename ArrayTypes::t_tagint_1d _tag; + const typename ArrayTypes::t_int_1d _type; + const typename ArrayTypes::t_int_1d _mask; + const typename ArrayTypes::t_sp_array_randomread _sp; + X_FLOAT _dx,_dy,_dz; + + AtomVecSpinKokkos_PackBorder( + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_sp_array &sp, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_sp(sp),_tag(tag),_type(type),_mask(mask), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _sp(j,0); + _buf(i,7) = _sp(j,1); + _buf(i,8) = _sp(j,2); + _buf(i,9) = _sp(j,3); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _sp(j,0); + _buf(i,7) = _sp(j,1); + _buf(i,8) = _sp(j,2); + _buf(i,9) = _sp(j,3); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecSpinKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSpinKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecSpinKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSpinKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_sp(j,0); + buf[m++] = h_sp(j,1); + buf[m++] = h_sp(j,2); + buf[m++] = h_sp(j,3); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecSpinKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes::t_xfloat_2d_const _buf; + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_tagint_1d _tag; + typename ArrayTypes::t_int_1d _type; + typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_sp_array _sp; + int _first; + + + AtomVecSpinKokkos_UnpackBorder( + const typename ArrayTypes::t_xfloat_2d_const &buf, + typename ArrayTypes::t_x_array &x, + typename ArrayTypes::t_tagint_1d &tag, + typename ArrayTypes::t_int_1d &type, + typename ArrayTypes::t_int_1d &mask, + typename ArrayTypes::t_sp_array &sp, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _sp(i+_first) = _buf(i,6); + _sp(i+_first) = _buf(i,7); + _sp(i+_first) = _buf(i,8); + _sp(i+_first) = _buf(i,9); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + if (first+n >= nmax) { + grow(first+n+100); + } + if(space==Host) { + struct AtomVecSpinKokkos_UnpackBorder + f(buf.view(),h_x,h_tag,h_type,h_mask,h_sp,first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSpinKokkos_UnpackBorder + f(buf.view(),d_x,d_tag,d_type,d_mask,d_sp,first); + Kokkos::parallel_for(n,f); + } + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + + for (i = first; i < last; i++) { + if (i == nmax) { + grow(0); + } + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_sp(i,0) = buf[m++]; + h_sp(i,1) = buf[m++]; + h_sp(i,2) = buf[m++]; + h_sp(i,3) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_sp(i,0) = buf[m++]; + h_sp(i,1) = buf[m++]; + h_sp(i,2) = buf[m++]; + h_sp(i,3) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) + h_sp(i,0) = buf[m++]; + h_sp(i,1) = buf[m++]; + h_sp(i,2) = buf[m++]; + h_sp(i,3) = buf[m++]; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecSpinKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_sp_array_randomread _sp; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_sp_array _spw; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecSpinKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _sp(atom->k_sp.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _spw(atom->k_sp.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 15; + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 15; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = d_ubuf(_tag[i]).d; + _buf(mysend,8) = d_ubuf(_type[i]).d; + _buf(mysend,9) = d_ubuf(_mask[i]).d; + _buf(mysend,10) = d_ubuf(_image[i]).d; + _buf(mysend,11) = _sp(i,0); + _buf(mysend,12) = _sp(i,1); + _buf(mysend,13) = _sp(i,2); + _buf(mysend,14) = _sp(i,3); + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _spw(i,0) = _sp(j,0); + _spw(i,1) = _sp(j,1); + _spw(i,2) = _sp(j,2); + _spw(i,3) = _sp(j,3); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim, + X_FLOAT lo,X_FLOAT hi ) +{ + if(nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/15) { + int newsize = nsend*15/k_buf.view().extent(1)+1; + k_buf.resize(newsize,k_buf.view().extent(1)); + } + if(space == Host) { + AtomVecSpinKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + return nsend*15; + } else { + AtomVecSpinKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + return nsend*15; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_exchange(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_sp(i,0); + buf[m++] = h_sp(i,1); + buf[m++] = h_sp(i,2); + buf[m++] = h_sp(i,3); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecSpinKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_sp_array _sp; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecSpinKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _sp(atom->k_sp.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 15; + const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; + _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; + _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; + _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; + _sp(i,0) = _buf(myrecv,11); + _sp(i,1) = _buf(myrecv,12); + _sp(i,2) = _buf(myrecv,13); + _sp(i,3) = _buf(myrecv,14); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/15,f); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecSpinKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/15,f); + k_count.modify(); + k_count.sync(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | SP_MASK); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_sp(nlocal,0) = buf[m++]; + h_sp(nlocal,1) = buf[m++]; + h_sp(nlocal,2) = buf[m++]; + h_sp(nlocal,3) = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 15 * nlocal; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_restart(int i, double *buf) +{ + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | SP_MASK); + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = h_sp(i,0); + buf[m++] = h_sp(i,1); + buf[m++] = h_sp(i,2); + buf[m++] = h_sp(i,3); + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | SP_MASK); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_sp(nlocal,0) = buf[m++]; + h_sp(nlocal,1) = buf[m++]; + h_sp(nlocal,2) = buf[m++]; + h_sp(nlocal,3) = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast (buf[0]) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->sync(Host,ALL_MASK); + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_sp(nlocal,0) = 0.0; + h_sp(nlocal,1) = 0.0; + h_sp(nlocal,2) = 0.0; + h_sp(nlocal,3) = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag[nlocal] = utils::inumeric(FLERR,values[0],true,lmp); + h_type[nlocal] = utils::inumeric(FLERR,values[1],true,lmp); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_sp(nlocal,3) = utils::numeric(FLERR,values[2],true,lmp); + h_sp(nlocal,0) = utils::numeric(FLERR,values[6],true,lmp); + h_sp(nlocal,1) = utils::numeric(FLERR,values[7],true,lmp); + h_sp(nlocal,2) = utils::numeric(FLERR,values[8],true,lmp); + double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] + + sp[nlocal][1]*sp[nlocal][1] + + sp[nlocal][2]*sp[nlocal][2]); + h_sp(nlocal,0) *= inorm; + h_sp(nlocal,1) *= inorm; + h_sp(nlocal,2) *= inorm; + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image[nlocal] = imagetmp; + + h_mask[nlocal] = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + atomKK->modified(Host,ALL_MASK); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_sp(nlocal,3) = utils::numeric(FLERR,values[0],true,lmp); + h_sp(nlocal,0) = utils::numeric(FLERR,values[1],true,lmp); + h_sp(nlocal,1) = utils::numeric(FLERR,values[2],true,lmp); + h_sp(nlocal,2) = utils::numeric(FLERR,values[3],true,lmp); + double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] + + sp[nlocal][1]*sp[nlocal][1] + + sp[nlocal][2]*sp[nlocal][2]); + sp[nlocal][0] *= inorm; + sp[nlocal][1] *= inorm; + sp[nlocal][2] *= inorm; + + return 4; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag[i]; + buf[i][1] = h_type[i]; + buf[i][2] = h_sp(i,0); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][2] = h_sp(i,1); + buf[i][2] = h_sp(i,2); + buf[i][2] = h_sp(i,3); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_sp(i,3); + buf[1] = h_sp(i,0); + buf[2] = h_sp(i,1); + buf[3] = h_sp(i,2); + return 4; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4], + buf[i][5],(int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecSpinKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2],buf[3]); + return 4; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecSpinKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("sp")) bytes += memory->usage(sp,nmax,4); + if (atom->memcheck("fm")) bytes += memory->usage(fm,nmax*comm->nthreads,3); + if (atom->memcheck("fm_long")) bytes += memory->usage(fm_long,nmax*comm->nthreads,3); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & SP_MASK) atomKK->k_sp.sync(); + if (mask & FM_MASK) atomKK->k_fm.sync(); + if (mask & FML_MASK) atomKK->k_fm_long.sync(); + } else { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & SP_MASK) atomKK->k_sp.sync(); + if (mask & FM_MASK) atomKK->k_fm.sync(); + if (mask & FML_MASK) atomKK->k_fm_long.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & SP_MASK) atomKK->k_sp.modify(); + if (mask & FM_MASK) atomKK->k_fm.modify(); + if (mask & FML_MASK) atomKK->k_fm_long.modify(); + } else { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & SP_MASK) atomKK->k_sp.modify(); + if (mask & FM_MASK) atomKK->k_fm.modify(); + if (mask & FML_MASK) atomKK->k_fm_long.modify(); + } +} + +void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + if ((mask & SP_MASK) && atomKK->k_sp.need_sync()) + perform_async_copy(atomKK->k_sp,space); + if ((mask & FM_MASK) && atomKK->k_sp.need_sync()) + perform_async_copy(atomKK->k_fm,space); + if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync()) + perform_async_copy(atomKK->k_fm_long,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + if ((mask & SP_MASK) && atomKK->k_sp.need_sync()) + perform_async_copy(atomKK->k_sp,space); + if ((mask & FM_MASK) && atomKK->k_fm.need_sync()) + perform_async_copy(atomKK->k_fm,space); + if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync()) + perform_async_copy(atomKK->k_fm_long,space); + } +} + diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h new file mode 100644 index 0000000000..5b57cfd8e6 --- /dev/null +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -0,0 +1,132 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(spin/kk,AtomVecSpinKokkos) +AtomStyle(spin/kk/device,AtomVecSpinKokkos) +AtomStyle(spin/kk/host,AtomVecSpinKokkos) + +#else + +#ifndef LMP_ATOM_VEC_SPIN_KOKKOS_H +#define LMP_ATOM_VEC_SPIN_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecSpinKokkos : public AtomVecKokkos { + public: + AtomVecSpinKokkos(class LAMMPS *); + void grow(int); + void copy(int, int, int); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, imageint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + // clear magnetic and mechanic forces + + void force_clear(int, size_t); + + void grow_reset(); + // input lists to be checked + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + + protected: + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; // lattice quantities + + // spin quantities + double **sp; // sp[i][0-2] direction of the spin i + // sp[i][3] atomic magnetic moment of the spin i + double **fm; // fm[i][0-2] direction of magnetic precession + double **fm_long; // storage of long-range spin prec. components + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + + DAT::t_x_array d_sp; + DAT::t_x_array d_fm; + DAT::t_x_array d_fm_long; + + HAT::t_x_array h_sp; + HAT::t_x_array h_fm; + HAT::t_x_array h_fm_long; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +*/ From 735676241ff8b56bf952e67d2e9f410a674251b0 Mon Sep 17 00:00:00 2001 From: julient31 Date: Tue, 29 Sep 2020 08:06:41 -0600 Subject: [PATCH 08/64] start correcting atom spin/kk --- src/KOKKOS/Install.sh | 2 ++ src/KOKKOS/atom_kokkos.cpp | 6 ++++++ src/KOKKOS/atom_kokkos.h | 5 +++++ src/KOKKOS/atom_vec_spin_kokkos.cpp | 21 +++++++++++---------- src/atom_masks.h | 6 ++++++ 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 540389f599..87cddbe1de 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -63,6 +63,8 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp action atom_vec_bond_kokkos.h atom_vec_bond.h action atom_vec_charge_kokkos.cpp action atom_vec_charge_kokkos.h +action atom_vec_spin_kokkos.cpp +action atom_vec_spin_kokkos.h action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp action atom_vec_dpd_kokkos.h atom_vec_dpd.h action atom_vec_full_kokkos.cpp atom_vec_full.cpp diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 4637a9a21c..2640c1611d 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -76,6 +76,12 @@ AtomKokkos::~AtomKokkos() memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3); memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4); + // SPIN package + + memoryKK->destroy_kokkos(k_sp, sp); + memoryKK->destroy_kokkos(k_fm, fm); + memoryKK->destroy_kokkos(k_fm_long, fm_long); + // USER-DPD package memoryKK->destroy_kokkos(k_uCond,uCond); memoryKK->destroy_kokkos(k_uMech,uMech); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 0ae032032a..3ed703c66a 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -54,6 +54,11 @@ class AtomKokkos : public Atom { DAT::tdual_float_2d k_dvector; + // SPIN package + + DAT::tdual_x_array k_sp; + DAT::tdual_x_array k_fm; + DAT::tdual_x_array k_fm_long; // USER-DPD package DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew, diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 8a7dd3317c..ef0b350092 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -24,19 +24,20 @@ ------------------------------------------------------------------------- */ #include "atom_vec_spin_kokkos.h" -#include -#include #include "atom_kokkos.h" #include "comm_kokkos.h" #include "domain.h" -#include "error.h" -#include "fix.h" -#include "memory_kokkos.h" #include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory_kokkos.h" +#include "error.h" #include "utils.h" using namespace LAMMPS_NS; +#define DELTA 10 + /* ---------------------------------------------------------------------- */ AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) @@ -159,10 +160,10 @@ void AtomVecSpinKokkos::copy(int i, int j, int delflag) h_v(j,1) = h_v(i,1); h_v(j,2) = h_v(i,2); - h_sp(j,0) = h_sp(i,0) - h_sp(j,1) = h_sp(i,1) - h_sp(j,2) = h_sp(i,2) - h_sp(j,3) = h_sp(i,3) + h_sp(j,0) = h_sp(i,0); + h_sp(j,1) = h_sp(i,1); + h_sp(j,2) = h_sp(i,2); + h_sp(j,3) = h_sp(i,3); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -263,7 +264,7 @@ struct AtomVecSpinKokkos_PackBorder { const typename ArrayTypes::t_sp_array &sp, const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): _buf(buf),_list(list),_iswap(iswap), - _x(x),_sp(sp),_tag(tag),_type(type),_mask(mask), + _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp), _dx(dx),_dy(dy),_dz(dz) {} KOKKOS_INLINE_FUNCTION diff --git a/src/atom_masks.h b/src/atom_masks.h index 8e29448488..daad323835 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -42,6 +42,12 @@ #define ENERGY_MASK 0x00010000 #define VIRIAL_MASK 0x00020000 +// SPIN + +#define SP_MASK 0x00000001 +#define FM_MASK 0x00000002 +#define FML_MASK 0x00000004 + // DPD #define DPDRHO_MASK 0x00040000 From d3aa2d1cd01c6f4fa86b3eb388130b1fe9214d26 Mon Sep 17 00:00:00 2001 From: julient31 Date: Wed, 30 Sep 2020 10:27:22 -0600 Subject: [PATCH 09/64] compilable kokkos files (still a segfault issue) --- src/KOKKOS/atom_kokkos.h | 6 +++--- src/KOKKOS/atom_vec_spin_kokkos.cpp | 22 ++++++++++++++++------ src/KOKKOS/atom_vec_spin_kokkos.h | 12 ++++++------ 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 3ed703c66a..b66d54cbdd 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -56,9 +56,9 @@ class AtomKokkos : public Atom { // SPIN package - DAT::tdual_x_array k_sp; - DAT::tdual_x_array k_fm; - DAT::tdual_x_array k_fm_long; + DAT::tdual_sp_array k_sp; + DAT::tdual_fm_array k_fm; + DAT::tdual_fm_long_array k_fm_long; // USER-DPD package DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew, diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index ef0b350092..6ed62c0242 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -135,10 +135,10 @@ void AtomVecSpinKokkos::grow_reset() sp = atomKK->sp; d_sp = atomKK->k_sp.d_view; h_sp = atomKK->k_sp.h_view; - fm = atom->fm; + fm = atomKK->fm; d_fm = atomKK->k_fm.d_view; h_fm = atomKK->k_fm.h_view; - fm_long = atom->fm_long; + fm_long = atomKK->fm_long; d_fm_long = atomKK->k_fm_long.d_view; h_fm_long = atomKK->k_fm_long.h_view; } @@ -537,10 +537,10 @@ struct AtomVecSpinKokkos_UnpackBorder { _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _sp(i+_first) = _buf(i,6); - _sp(i+_first) = _buf(i,7); - _sp(i+_first) = _buf(i,8); - _sp(i+_first) = _buf(i,9); + _sp(i+_first,0) = _buf(i,6); + _sp(i+_first,1) = _buf(i,7); + _sp(i+_first,2) = _buf(i,8); + _sp(i+_first,3) = _buf(i,9); } }; @@ -1296,3 +1296,13 @@ void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned i } } +/* ---------------------------------------------------------------------- + clear all forces (mech and mag) +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes) +{ + memset(&atom->f[0][0],0,3*nbytes); + memset(&atom->fm[0][0],0,3*nbytes); + memset(&atom->fm_long[0][0],0,3*nbytes); +} diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 5b57cfd8e6..d439424076 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -102,13 +102,13 @@ class AtomVecSpinKokkos : public AtomVecKokkos { DAT::t_v_array d_v; DAT::t_f_array d_f; - DAT::t_x_array d_sp; - DAT::t_x_array d_fm; - DAT::t_x_array d_fm_long; + DAT::t_sp_array d_sp; + DAT::t_fm_array d_fm; + DAT::t_fm_long_array d_fm_long; - HAT::t_x_array h_sp; - HAT::t_x_array h_fm; - HAT::t_x_array h_fm_long; + HAT::t_sp_array h_sp; + HAT::t_fm_array h_fm; + HAT::t_fm_long_array h_fm_long; DAT::tdual_int_1d k_count; }; From a8d304405ddca36740deef2e8608d8b4c782f88a Mon Sep 17 00:00:00 2001 From: julient31 Date: Wed, 30 Sep 2020 15:55:18 -0600 Subject: [PATCH 10/64] before pull from other machine --- src/SPIN/pair_spin_exchange_biquadratic.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index cf351e6539..3fffb8b58e 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -416,11 +416,9 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja); Jex_mech *= 8.0*Jex*rjr*exp(-rja); - // Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka); Kex_mech *= 8.0*Kex*rkr*exp(-rka); - // Kex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); From 84c104641b4d510cfba8535085f9f17befe22926 Mon Sep 17 00:00:00 2001 From: julient31 Date: Fri, 2 Oct 2020 10:47:29 -0600 Subject: [PATCH 11/64] adding offset option and doc --- doc/src/pair_spin_exchange.rst | 159 ++++++++++++++++---- src/SPIN/pair_spin_exchange.cpp | 114 ++++++++++---- src/SPIN/pair_spin_exchange.h | 6 +- src/SPIN/pair_spin_exchange_biquadratic.cpp | 71 +++++++-- src/SPIN/pair_spin_exchange_biquadratic.h | 4 +- src/SPIN/pair_spin_neel.cpp | 2 +- 6 files changed, 279 insertions(+), 77 deletions(-) diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst index 14eefaccec..32a722c5f0 100644 --- a/doc/src/pair_spin_exchange.rst +++ b/doc/src/pair_spin_exchange.rst @@ -3,12 +3,16 @@ pair_style spin/exchange command ================================ +pair_style spin/exchange/biquadratic command +================================ + Syntax """""" .. code-block:: LAMMPS pair_style spin/exchange cutoff + pair_style spin/exchange/biquadratic cutoff * cutoff = global cutoff pair (distance in metal units) @@ -19,7 +23,10 @@ Examples pair_style spin/exchange 4.0 pair_coeff * * exchange 4.0 0.0446928 0.003496 1.4885 - pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965 + pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965 offset yes + pair_style spin/exchange/biquadratic 4.0 + pair_coeff * * biquadratic 4.0 0.05 0.03 1.48 0.05 0.03 1.48 offset no + pair_coeff 1 2 biquadratic 6.0 -0.01 0.0 1.9 0.0 0.1 19 Description """"""""""" @@ -31,69 +38,163 @@ pairs of magnetic spins: H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,\vec{s}_i \cdot \vec{s}_j -where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two neighboring magnetic spins of two particles, -:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance between the two -particles. The summation is over pairs of nearest neighbors. -:math:`J(r_{ij})` is a function defining the intensity and the sign of the exchange -interaction for different neighboring shells. This function is defined as: +where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two unit vectors representing +the magnetic spins of two particles (usually atoms), and +:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance +between those two particles. The summation is over pairs of nearest neighbors. +:math:`J(r_{ij})` is a function defining the intensity and the sign of the +exchange interaction for different neighboring shells. + +Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction +between pairs of magnetic spins: + +.. math:: + + H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\, + \vec{s}_{i}\cdot \vec{s}_{j} + -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\, + \left(\vec{s}_{i}\cdot + \vec{s}_{j}\right)^2 + +where :math:`\vec{s}_i`, :math:`\vec{s}_j`, :math:`r_{ij}` and +:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is +a second function, defining the intensity and the sign of the biquadratic term. + +The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both +interactions above is defined by the following function: .. math:: - {J}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d} \right)^2 \left( 1 - b \left( \frac{r_{ij}}{d} \right)^2 \right) e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij}) + {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d} \right)^2 + \left( 1 - b \left( \frac{r_{ij}}{d} \right)^2 \right) + e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij}) -where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients defined in the associated -"pair_coeff" command, and :math:`R_c` is the radius cutoff associated to -the pair interaction (see below for more explanations). +where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients +defined in the associated "pair_coeff" command, and :math:`R_c` is the radius +cutoff associated to the pair interaction (see below for more explanations). -The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that the function above matches with -the value of the exchange interaction for the :math:`N` neighbor shells taken into account. -Examples and more explanations about this function and its parameterization are reported -in :ref:`(Tranchida) `. +The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that +the function above matches with the value of the exchange interaction for the +:math:`N` neighbor shells taken into account. +Examples and more explanations about this function and its parameterization +are reported in :ref:`(Tranchida) `. + +When a *spin/exchange/biquadratic* pair style is defined, six coefficients +(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be +fitted. From this exchange interaction, each spin :math:`i` will be submitted -to a magnetic torque :math:`\vec{\omega}`, and its associated atom can be submitted to a -force :math:`\vec{F}` for spin-lattice calculations (see :doc:`fix nve/spin `), -such as: +to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be +submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see +:doc:`fix nve/spin `), such as: .. math:: \vec{\omega}_{i} = \frac{1}{\hbar} \sum_{j}^{Neighb} {J} \left(r_{ij} \right)\,\vec{s}_{j} ~~{\rm and}~~ - \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij} + \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ + \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij} -with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij} = \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit +with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij} += \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit vector between sites :math:`i` and :math:`j`. +Equivalent forces and magnetic torques are generated for the biquadratic term +when a *spin/exchange/biquadratic* pair style is defined. More details about the derivation of these torques/forces are reported in :ref:`(Tranchida) `. -For the *spin/exchange* pair style, the following coefficients must be defined -for each pair of atoms types via the :doc:`pair_coeff ` command as in -the examples above, or in the data file or restart files read by the -:doc:`read_data ` or :doc:`read_restart ` commands, and -set in the following order: +For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the +following coefficients must be defined for each pair of atoms types via the +:doc:`pair_coeff ` command as in the examples above, or in the data +file or restart files read by the :doc:`read_data ` or +:doc:`read_restart ` commands, and set in the following order: * :math:`R_c` (distance units) * :math:`a` (energy units) * :math:`b` (adim parameter) * :math:`d` (distance units) -Note that :math:`R_c` is the radius cutoff of the considered exchange interaction, -and :math:`a`, :math:`b` and :math:`d` are the three coefficients performing the parameterization -of the function :math:`J(r_{ij})` defined above. +for the *spin/exchange* pair style, and: + +* :math:`R_c` (distance units) +* :math:`a_j` (energy units) +* :math:`b_j` (adim parameter) +* :math:`d_j` (distance units) +* :math:`a_k` (energy units) +* :math:`b_k` (adim parameter) +* :math:`d_k` (distance units) + +for the *spin/exchange/biquadratic* pair style. + +Note that :math:`R_c` is the radius cutoff of the considered exchange +interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients +performing the parameterization of the function :math:`J(r_{ij})` defined +above (in the *biquadratic* ase, :math:`a_j`, :math:`b_j`, :math:`d_j` and +:math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})` +and :math:`K(r_{ij})` respectively). + None of those coefficients is optional. If not specified, the *spin/exchange* pair style cannot be used. ---------- +**Offsetting magnetic forces and energies**\ : + +For spin-lattice simulation, it can be useful to offset the +mechanical forces and energies generated by the exchange +interaction. +The *offset* keyword allows to apply this offset. +By setting *offset* to *yes*, the energy definitions above are +replaced by: + +.. math:: + + H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,[ \vec{s}_i \cdot \vec{s}_j-1 ] + +for the *spin/exchange* pair style, and: + +.. math:: + + H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\, + [ \vec{s}_{i}\cdot \vec{s}_{j} -1 ] + -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\, + [ \left(\vec{s}_{i}\cdot + \vec{s}_{j}\right)^2 -1] + +for the *spin/exchange/biquadratic* pair style. + +Note that this offset only affects the calculation of the energy +and mechanical forces. It does not modify the calculation of the +precession vectors (and thus does no impact the purely magnetic +properties). +This ensures that when all spins are aligned, the magnetic energy +and the associated mechanical forces (and thus the pressure +generated by the magnetic potential) are null. + +.. note:: + This offset term can be very important when calculations such as + equations of state (energy vs volume, or energy vs pressure) are + being performed. Indeed, setting the *offset* term ensures that + at the ground state of the crystal and at the equilibrium magnetic + configuration (typically ferromagnetic), the pressure is null, + as expected. + Otherwise, magnetic forces could generate a residual pressure. + +When the *offset* option is set to *no*, no offset is applied +(also corresponding to the default option). + +---------- + Restrictions """""""""""" All the *pair/spin* styles are part of the SPIN package. These styles are only enabled if LAMMPS was built with this package, and if the -atom_style "spin" was declared. See the :doc:`Build package ` doc page for more info. +atom_style "spin" was declared. +See the :doc:`Build package ` doc page for more info. Related commands """""""""""""""" @@ -103,7 +204,7 @@ Related commands **Default:** -none +The default *offset* keyword value is *no*. ---------- diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index 611230c73e..5792738fd1 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -40,6 +40,14 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ +PairSpinExchange::PairSpinExchange(LAMMPS *lmp) : + PairSpin(lmp) +{ + e_offset = 0; +} + +/* ---------------------------------------------------------------------- */ + PairSpinExchange::~PairSpinExchange() { if (allocated) { @@ -61,6 +69,8 @@ PairSpinExchange::~PairSpinExchange() void PairSpinExchange::settings(int narg, char **arg) { PairSpin::settings(narg,arg); + + if (narg != 1) error->all(FLERR,"Illegal pair_style command"); cut_spin_exchange_global = force->numeric(FLERR,arg[0]); @@ -87,9 +97,9 @@ void PairSpinExchange::coeff(int narg, char **arg) // check if args correct if (strcmp(arg[2],"exchange") != 0) - error->all(FLERR,"Incorrect args in pair_style command"); - if (narg != 7) - error->all(FLERR,"Incorrect args in pair_style command"); + error->all(FLERR,"Incorrect args for pair coefficients"); + if ((narg != 7) && (narg != 9)) + error->all(FLERR,"Incorrect args for pair coefficients"); int ilo,ihi,jlo,jhi; force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); @@ -97,11 +107,25 @@ void PairSpinExchange::coeff(int narg, char **arg) // get exchange arguments from input command + int iarg = 7; const double rc = force->numeric(FLERR,arg[3]); const double j1 = force->numeric(FLERR,arg[4]); const double j2 = force->numeric(FLERR,arg[5]); const double j3 = force->numeric(FLERR,arg[6]); + // read energy offset flag if specified + + while (iarg < narg) { + if (strcmp(arg[7],"offset") == 0) { + if (strcmp(arg[8],"yes") == 0) { + e_offset = 1; + } else if (strcmp(arg[8],"no") == 0) { + e_offset = 0; + } else error->all(FLERR,"Incorrect args for pair coefficients"); + iarg += 2; + } else error->all(FLERR,"Incorrect args for pair coefficients"); + } + int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { @@ -236,8 +260,7 @@ void PairSpinExchange::compute(int eflag, int vflag) compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); if (eflag) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; + evdwl -= compute_energy(i,j,rsq,spi,spj); emag[i] += evdwl; } else evdwl = 0.0; @@ -373,7 +396,9 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, { int *type = atom->type; int itype, jtype; - double Jex, Jex_mech, ra, rr, iJ3; + double Jex, Jex_mech, ra, sdots; + double rr, iJ3; + double fx, fy, fz; itype = type[i]; jtype = type[j]; @@ -385,38 +410,62 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, Jex_mech = 1.0-ra-J2[itype][jtype]*ra*(2.0-ra); Jex_mech *= 8.0*Jex*rr*exp(-ra); - Jex_mech *= (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + // apply or not energy and force offset + + fx = fy = fz = 0.0; + if (e_offset == 1) { // set offset + fx = Jex_mech*(sdots-1.0)*eij[0]; + fy = Jex_mech*(sdots-1.0)*eij[1]; + fz = Jex_mech*(sdots-1.0)*eij[2]; + } else if (e_offset == 0) { // no offset ("normal" calculation) + fx = Jex_mech*sdots*eij[0]; + fy = Jex_mech*sdots*eij[1]; + fz = Jex_mech*sdots*eij[2]; + } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); + + fi[0] -= 0.5*fx; + fi[1] -= 0.5*fy; + fi[2] -= 0.5*fz; + // fi[0] -= fx; + // fi[1] -= fy; + // fi[2] -= fz; - fi[0] -= 0.5*Jex_mech*eij[0]; - fi[1] -= 0.5*Jex_mech*eij[1]; - fi[2] -= 0.5*Jex_mech*eij[2]; - // fi[0] -= Jex_mech*eij[0]; - // fi[1] -= Jex_mech*eij[1]; - // fi[2] -= Jex_mech*eij[2]; } /* ---------------------------------------------------------------------- compute energy of spin pair i and j ------------------------------------------------------------------------- */ -// double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], double spj[3]) -// { -// int *type = atom->type; -// int itype, jtype; -// double Jex, ra; -// double energy = 0.0; -// itype = type[i]; -// jtype = type[j]; -// -// Jex = J1_mech[itype][jtype]; -// ra = rsq/J3[itype][jtype]/J3[itype][jtype]; -// Jex = 4.0*Jex*ra; -// Jex *= (1.0-J2[itype][jtype]*ra); -// Jex *= exp(-ra); -// -// energy = Jex*(spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); -// return energy; -// } +double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], double spj[3]) +{ + int *type = atom->type; + int itype, jtype; + double Jex, ra, sdots; + double energy = 0.0; + itype = type[i]; + jtype = type[j]; + + Jex = J1_mech[itype][jtype]; + ra = rsq/J3[itype][jtype]/J3[itype][jtype]; + Jex = 4.0*Jex*ra; + Jex *= (1.0-J2[itype][jtype]*ra); + Jex *= exp(-ra); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + // apply or not energy and force offset + + if (e_offset == 1) { // set offset + energy = 0.5*Jex*(sdots-1.0); + } else if (e_offset == 0) { // no offset ("normal" calculation) + energy = 0.5*Jex*sdots; + } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); + + return energy; +} /* ---------------------------------------------------------------------- allocate all arrays @@ -505,6 +554,7 @@ void PairSpinExchange::read_restart(FILE *fp) void PairSpinExchange::write_restart_settings(FILE *fp) { fwrite(&cut_spin_exchange_global,sizeof(double),1,fp); + fwrite(&e_offset,sizeof(int),1,fp); fwrite(&offset_flag,sizeof(int),1,fp); fwrite(&mix_flag,sizeof(int),1,fp); } @@ -517,10 +567,12 @@ void PairSpinExchange::read_restart_settings(FILE *fp) { if (comm->me == 0) { utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error); utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error); utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error); } MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&e_offset,1,MPI_INT,0,world); MPI_Bcast(&offset_flag,1,MPI_INT,0,world); MPI_Bcast(&mix_flag,1,MPI_INT,0,world); } diff --git a/src/SPIN/pair_spin_exchange.h b/src/SPIN/pair_spin_exchange.h index 4e9e6bfac8..2a31f9516e 100644 --- a/src/SPIN/pair_spin_exchange.h +++ b/src/SPIN/pair_spin_exchange.h @@ -26,7 +26,7 @@ namespace LAMMPS_NS { class PairSpinExchange : public PairSpin { public: - PairSpinExchange(LAMMPS *lmp) : PairSpin(lmp) {} + PairSpinExchange(class LAMMPS *); virtual ~PairSpinExchange(); void settings(int, char **); void coeff(int, char **); @@ -38,8 +38,7 @@ class PairSpinExchange : public PairSpin { void compute_exchange(int, int, double, double *, double *); void compute_exchange_mech(int, int, double, double *, double *, double *, double *); - - // double compute_energy(int , int , double , double *, double *); + double compute_energy(int , int , double , double *, double *); void write_restart(FILE *); void read_restart(FILE *); @@ -49,6 +48,7 @@ class PairSpinExchange : public PairSpin { double cut_spin_exchange_global; // global exchange cutoff distance protected: + int e_offset; // apply energy offset double **J1_mag; // exchange coeffs in eV double **J1_mech; // mech exchange coeffs in double **J2, **J3; // J1 in eV, J2 adim, J3 in Ang diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 3fffb8b58e..4c6c3936cf 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -40,6 +40,14 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ +PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) : + PairSpin(lmp) +{ + e_offset = 0; +} + +/* ---------------------------------------------------------------------- */ + PairSpinExchangeBiquadratic::~PairSpinExchangeBiquadratic() { if (allocated) { @@ -66,6 +74,8 @@ void PairSpinExchangeBiquadratic::settings(int narg, char **arg) { PairSpin::settings(narg,arg); + if (narg != 1) error->all(FLERR,"Illegal pair_style command"); + cut_spin_exchange_global = force->numeric(FLERR,arg[0]); // reset cutoffs that have been explicitly set @@ -91,9 +101,9 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) // check if args correct if (strcmp(arg[2],"biquadratic") != 0) - error->all(FLERR,"Incorrect args in pair_style command"); - if (narg != 10) - error->all(FLERR,"Incorrect args in pair_style command"); + error->all(FLERR,"Incorrect args for pair coefficients"); + if ((narg != 10) && (narg != 12)) + error->all(FLERR,"Incorrect args for pair coefficients"); int ilo,ihi,jlo,jhi; force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); @@ -101,6 +111,7 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) // get exchange arguments from input command + int iarg = 10; const double rc = force->numeric(FLERR,arg[3]); const double j1 = force->numeric(FLERR,arg[4]); const double j2 = force->numeric(FLERR,arg[5]); @@ -109,6 +120,19 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) const double k2 = force->numeric(FLERR,arg[8]); const double k3 = force->numeric(FLERR,arg[9]); + // read energy offset flag if specified + + while (iarg < narg) { + if (strcmp(arg[10],"offset") == 0) { + if (strcmp(arg[11],"yes") == 0) { + e_offset = 1; + } else if (strcmp(arg[11],"no") == 0) { + e_offset = 0; + } else error->all(FLERR,"Incorrect args for pair coefficients"); + iarg += 2; + } else error->all(FLERR,"Incorrect args for pair coefficients"); + } + int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { @@ -399,8 +423,9 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, { int *type = atom->type; int itype,jtype; - double Jex,Jex_mech,Kex,Kex_mech,ra,sdots; + double Jex,Jex_mech,Kex,Kex_mech,sdots; double rja,rka,rjr,rkr,iJ3,iK3; + double fx, fy, fz; itype = type[i]; jtype = type[j]; @@ -422,12 +447,25 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - fi[0] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; - fi[1] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; - fi[2] -= 0.5*(Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; - // fi[0] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; - // fi[1] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; - // fi[2] -= (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; + // apply or not energy and force offset + + fx = fy = fz = 0.0; + if (e_offset == 1) { // set offset + fx = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; + fy = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[1]; + fz = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[2]; + } else if (e_offset == 0) { // no offset ("normal" calculation) + fx = (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[0]; + fy = (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[1]; + fz = (Jex_mech*sdots + Kex_mech*sdots*sdots)*eij[2]; + } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); + + fi[0] -= 0.5*fx; + fi[1] -= 0.5*fy; + fi[2] -= 0.5*fz; + // fi[0] -= fx; + // fi[1] -= fy; + // fi[2] -= fz; } /* ---------------------------------------------------------------------- @@ -463,8 +501,14 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0)); - // energy = 0.5*(Jex*(sdots) + Kex*(sdots*sdots-1.0)); + // apply or not energy and force offset + + if (e_offset == 1) { // set offset + energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0)); + } else if (e_offset == 0) { // no offset ("normal" calculation) + energy = 0.5*(Jex*sdots + Kex*sdots*sdots); + } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); + return energy; } @@ -571,6 +615,7 @@ void PairSpinExchangeBiquadratic::read_restart(FILE *fp) void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp) { fwrite(&cut_spin_exchange_global,sizeof(double),1,fp); + fwrite(&e_offset,sizeof(int),1,fp); fwrite(&offset_flag,sizeof(int),1,fp); fwrite(&mix_flag,sizeof(int),1,fp); } @@ -583,10 +628,12 @@ void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp) { if (comm->me == 0) { utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error); utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error); utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error); } MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&e_offset,1,MPI_INT,0,world); MPI_Bcast(&offset_flag,1,MPI_INT,0,world); MPI_Bcast(&mix_flag,1,MPI_INT,0,world); } diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h index 6fb9a7a94c..1074b50f7b 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.h +++ b/src/SPIN/pair_spin_exchange_biquadratic.h @@ -26,7 +26,7 @@ namespace LAMMPS_NS { class PairSpinExchangeBiquadratic : public PairSpin { public: - PairSpinExchangeBiquadratic(LAMMPS *lmp) : PairSpin(lmp) {} + PairSpinExchangeBiquadratic(class LAMMPS *); virtual ~PairSpinExchangeBiquadratic(); void settings(int, char **); void coeff(int, char **); @@ -48,6 +48,8 @@ class PairSpinExchangeBiquadratic : public PairSpin { double cut_spin_exchange_global; // global exchange cutoff distance protected: + + int e_offset; // apply energy offset double **J1_mag; // H exchange coeffs in eV double **J1_mech; // mech exchange coeffs in double **J2, **J3; // J1 in eV, J2 in Ang-1, J3 in Ang diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index fc7cb6ab9a..c2377e7aee 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -612,7 +612,7 @@ double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3] eij_sj_3 = eij_sj*eij_sj_2; epq2 = q2r*(eij_si*eij_sj_3+eij_sj*eij_si_3); - return (epd+epq1+epq2); + return 0.5*(epd+epq1+epq2); } /* ---------------------------------------------------------------------- From 1cb0b9dece6e7e07b6dc8f2ba6bbd790b1bbfe9c Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 17:11:53 -0600 Subject: [PATCH 12/64] - modified all pairs (if on rcut) - clean KOKKOS from atom spin/kk (other PR) --- src/KOKKOS/Install.sh | 2 - src/KOKKOS/atom_kokkos.cpp | 6 - src/KOKKOS/atom_kokkos.h | 5 - src/KOKKOS/atom_vec_spin_kokkos.cpp | 1308 ------------------- src/KOKKOS/atom_vec_spin_kokkos.h | 132 -- src/KOKKOS/kokkos_type.h | 60 - src/SPIN/pair_spin_dipole_cut.cpp | 48 +- src/SPIN/pair_spin_dipole_long.cpp | 46 +- src/SPIN/pair_spin_dmi.cpp | 49 +- src/SPIN/pair_spin_exchange_biquadratic.cpp | 33 +- src/SPIN/pair_spin_magelec.cpp | 46 +- src/SPIN/pair_spin_neel.cpp | 44 +- 12 files changed, 143 insertions(+), 1636 deletions(-) delete mode 100644 src/KOKKOS/atom_vec_spin_kokkos.cpp delete mode 100644 src/KOKKOS/atom_vec_spin_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 4c5c9d7e1d..03508578ae 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -63,8 +63,6 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp action atom_vec_bond_kokkos.h atom_vec_bond.h action atom_vec_charge_kokkos.cpp action atom_vec_charge_kokkos.h -action atom_vec_spin_kokkos.cpp -action atom_vec_spin_kokkos.h action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp action atom_vec_dpd_kokkos.h atom_vec_dpd.h action atom_vec_full_kokkos.cpp atom_vec_full.cpp diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index b85b063190..a587494d09 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -76,12 +76,6 @@ AtomKokkos::~AtomKokkos() memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3); memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4); - // SPIN package - - memoryKK->destroy_kokkos(k_sp, sp); - memoryKK->destroy_kokkos(k_fm, fm); - memoryKK->destroy_kokkos(k_fm_long, fm_long); - // USER-DPD package memoryKK->destroy_kokkos(k_uCond,uCond); memoryKK->destroy_kokkos(k_uMech,uMech); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index e2c666fea5..6eebbad661 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -54,11 +54,6 @@ class AtomKokkos : public Atom { DAT::tdual_float_2d k_dvector; - // SPIN package - - DAT::tdual_sp_array k_sp; - DAT::tdual_fm_array k_fm; - DAT::tdual_fm_long_array k_fm_long; // USER-DPD package DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew, diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp deleted file mode 100644 index 6ed62c0242..0000000000 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ /dev/null @@ -1,1308 +0,0 @@ -/* ---------------------------------------------------------------------- - - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. - -------------------------------------------------------------------------- */ - -/* ------------------------------------------------------------------------ - Contributing authors: Julien Tranchida (SNL) - Aidan Thompson (SNL) - - Please cite the related publication: - Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018). - Massively parallel symplectic algorithm for coupled magnetic spin dynamics - and molecular dynamics. Journal of Computational Physics. -------------------------------------------------------------------------- */ - -#include "atom_vec_spin_kokkos.h" -#include "atom_kokkos.h" -#include "comm_kokkos.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "atom_masks.h" -#include "memory_kokkos.h" -#include "error.h" -#include "utils.h" - -using namespace LAMMPS_NS; - -#define DELTA 10 - -/* ---------------------------------------------------------------------- */ - -AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) -{ - molecular = 0; - mass_type = 1; - forceclearflag = 1; - - comm_x_only = comm_f_only = 0; - size_forward = 7; - size_reverse = 9; - size_border = 10; - size_velocity = 3; - size_data_atom = 9; - size_data_vel = 4; - xcol_data = 4; - - atom->sp_flag = 1; - - k_count = DAT::tdual_int_1d("atom::k_count",1); - atomKK = (AtomKokkos *) atom; - commKK = (CommKokkos *) comm; -} - -/* ---------------------------------------------------------------------- - grow atom arrays - n = 0 grows arrays by a chunk - n > 0 allocates arrays to size n -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::grow(int n) -{ - int step = MAX(DELTA,nmax*0.01); - if (n == 0) nmax += step; - else nmax = n; - atomKK->nmax = nmax; - if (nmax < 0 || nmax > MAXSMALLINT) - error->one(FLERR,"Per-processor system is too big"); - - atomKK->sync(Device,ALL_MASK); - atomKK->modified(Device,ALL_MASK); - - memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - - // allocating mech. quantities - - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); - - // allocating mag. quantities - - memoryKK->grow_kokkos(atomKK->k_sp,atomKK->sp,nmax,"atom:sp"); - memoryKK->grow_kokkos(atomKK->k_fm,atomKK->fm,nmax,"atom:fm"); - memoryKK->grow_kokkos(atomKK->k_fm_long,atomKK->fm_long,nmax,"atom:fm_long"); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); -} - -/* ---------------------------------------------------------------------- - reset local array ptrs -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::grow_reset() -{ - tag = atomKK->tag; - d_tag = atomKK->k_tag.d_view; - h_tag = atomKK->k_tag.h_view; - - type = atomKK->type; - d_type = atomKK->k_type.d_view; - h_type = atomKK->k_type.h_view; - mask = atomKK->mask; - d_mask = atomKK->k_mask.d_view; - h_mask = atomKK->k_mask.h_view; - image = atomKK->image; - d_image = atomKK->k_image.d_view; - h_image = atomKK->k_image.h_view; - - x = atomKK->x; - d_x = atomKK->k_x.d_view; - h_x = atomKK->k_x.h_view; - v = atomKK->v; - d_v = atomKK->k_v.d_view; - h_v = atomKK->k_v.h_view; - f = atomKK->f; - d_f = atomKK->k_f.d_view; - h_f = atomKK->k_f.h_view; - - sp = atomKK->sp; - d_sp = atomKK->k_sp.d_view; - h_sp = atomKK->k_sp.h_view; - fm = atomKK->fm; - d_fm = atomKK->k_fm.d_view; - h_fm = atomKK->k_fm.h_view; - fm_long = atomKK->fm_long; - d_fm_long = atomKK->k_fm_long.d_view; - h_fm_long = atomKK->k_fm_long.h_view; -} - -/* ---------------------------------------------------------------------- - copy atom I info to atom J -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::copy(int i, int j, int delflag) -{ - h_tag[j] = h_tag[i]; - h_type[j] = h_type[i]; - mask[j] = mask[i]; - h_image[j] = h_image[i]; - h_x(j,0) = h_x(i,0); - h_x(j,1) = h_x(i,1); - h_x(j,2) = h_x(i,2); - h_v(j,0) = h_v(i,0); - h_v(j,1) = h_v(i,1); - h_v(j,2) = h_v(i,2); - - h_sp(j,0) = h_sp(i,0); - h_sp(j,1) = h_sp(i,1); - h_sp(j,2) = h_sp(i,2); - h_sp(j,3) = h_sp(i,3); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_sp_array_randomread _sp; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecSpinKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_sp_array &sp, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_sp(sp.view()), - _list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - // const size_t elements = 3; - const size_t elements = 7; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackBorder { - typedef DeviceType device_type; - - typename ArrayTypes::t_xfloat_2d _buf; - const typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - const typename ArrayTypes::t_x_array_randomread _x; - const typename ArrayTypes::t_tagint_1d _tag; - const typename ArrayTypes::t_int_1d _type; - const typename ArrayTypes::t_int_1d _mask; - const typename ArrayTypes::t_sp_array_randomread _sp; - X_FLOAT _dx,_dy,_dz; - - AtomVecSpinKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, - const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const typename ArrayTypes::t_sp_array &sp, - const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - X_FLOAT dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - } - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - } - } - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border_hybrid(int n, int *list, double *buf) -{ - int i,j,m; - - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_sp(j,0); - buf[m++] = h_sp(j,1); - buf[m++] = h_sp(j,2); - buf[m++] = h_sp(j,3); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackBorder { - typedef DeviceType device_type; - - const typename ArrayTypes::t_xfloat_2d_const _buf; - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_tagint_1d _tag; - typename ArrayTypes::t_int_1d _type; - typename ArrayTypes::t_int_1d _mask; - typename ArrayTypes::t_sp_array _sp; - int _first; - - - AtomVecSpinKokkos_UnpackBorder( - const typename ArrayTypes::t_xfloat_2d_const &buf, - typename ArrayTypes::t_x_array &x, - typename ArrayTypes::t_tagint_1d &tag, - typename ArrayTypes::t_int_1d &type, - typename ArrayTypes::t_int_1d &mask, - typename ArrayTypes::t_sp_array &sp, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){ - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _sp(i+_first,0) = _buf(i,6); - _sp(i+_first,1) = _buf(i,7); - _sp(i+_first,2) = _buf(i,8); - _sp(i+_first,3) = _buf(i,9); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - if (first+n >= nmax) { - grow(first+n+100); - } - if(space==Host) { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view(),h_x,h_tag,h_type,h_mask,h_sp,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view(),d_x,d_tag,d_type,d_mask,d_sp,first); - Kokkos::parallel_for(n,f); - } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::unpack_border(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - - for (i = first; i < last; i++) { - if (i == nmax) { - grow(0); - } - atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_tag(i) = (tagint) ubuf(buf[m++]).i; - h_type(i) = (int) ubuf(buf[m++]).i; - h_mask(i) = (int) ubuf(buf[m++]).i; - h_sp(i,0) = buf[m++]; - h_sp(i,1) = buf[m++]; - h_sp(i,2) = buf[m++]; - h_sp(i,3) = buf[m++]; - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]-> - unpack_border(n,first,&buf[m]); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::unpack_border_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - if (i == nmax) grow(0); - atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_tag(i) = (tagint) ubuf(buf[m++]).i; - h_type(i) = (int) ubuf(buf[m++]).i; - h_mask(i) = (int) ubuf(buf[m++]).i; - h_sp(i,0) = buf[m++]; - h_sp(i,1) = buf[m++]; - h_sp(i,2) = buf[m++]; - h_sp(i,3) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]-> - unpack_border(n,first,&buf[m]); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_border_hybrid(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) - h_sp(i,0) = buf[m++]; - h_sp(i,1) = buf[m++]; - h_sp(i,2) = buf[m++]; - h_sp(i,3) = buf[m++]; - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_x_array_randomread _x; - typename AT::t_v_array_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_sp_array_randomread _sp; - typename AT::t_x_array _xw; - typename AT::t_v_array _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_sp_array _spw; - - typename AT::t_xfloat_2d_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - - AtomVecSpinKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _spw(atom->k_sp.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 15; - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/elements; - - buffer_view(_buf,buf,maxsendlist,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = 15; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _sp(i,0); - _buf(mysend,12) = _sp(i,1); - _buf(mysend,13) = _sp(i,2); - _buf(mysend,14) = _sp(i,3); - const int j = _copylist(mysend); - - if(j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _spw(i,0) = _sp(j,0); - _spw(i,1) = _sp(j,1); - _spw(i,2) = _sp(j,2); - _spw(i,3) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space,int dim, - X_FLOAT lo,X_FLOAT hi ) -{ - if(nsend > (int) (k_buf.view().extent(0)*k_buf.view().extent(1))/15) { - int newsize = nsend*15/k_buf.view().extent(1)+1; - k_buf.resize(newsize,k_buf.view().extent(1)); - } - if(space == Host) { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); - Kokkos::parallel_for(nsend,f); - return nsend*15; - } else { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); - Kokkos::parallel_for(nsend,f); - return nsend*15; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_exchange(int i, double *buf) -{ - int m = 1; - buf[m++] = h_x(i,0); - buf[m++] = h_x(i,1); - buf[m++] = h_x(i,2); - buf[m++] = h_v(i,0); - buf[m++] = h_v(i,1); - buf[m++] = h_v(i,2); - buf[m++] = ubuf(h_tag(i)).d; - buf[m++] = ubuf(h_type(i)).d; - buf[m++] = ubuf(h_mask(i)).d; - buf[m++] = ubuf(h_image(i)).d; - buf[m++] = h_sp(i,0); - buf[m++] = h_sp(i,1); - buf[m++] = h_sp(i,2); - buf[m++] = h_sp(i,3); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); - - buf[0] = m; - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_x_array _x; - typename AT::t_v_array _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_sp_array _sp; - typename AT::t_xfloat_2d_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - X_FLOAT _lo,_hi; - - AtomVecSpinKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 15; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - - buffer_view(_buf,buf,maxsendlist,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - X_FLOAT x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _sp(i,0) = _buf(myrecv,11); - _sp(i,1) = _buf(myrecv,12); - _sp(i,2) = _buf(myrecv,13); - _sp(i,3) = _buf(myrecv,14); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, - int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, - ExecutionSpace space) { - if(space == Host) { - k_count.h_view(0) = nlocal; - AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); - return k_count.h_view(0); - } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecSpinKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/15,f); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_exchange(double *buf) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) grow(0); - atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK | SP_MASK); - - int m = 1; - h_x(nlocal,0) = buf[m++]; - h_x(nlocal,1) = buf[m++]; - h_x(nlocal,2) = buf[m++]; - h_v(nlocal,0) = buf[m++]; - h_v(nlocal,1) = buf[m++]; - h_v(nlocal,2) = buf[m++]; - h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; - h_type(nlocal) = (int) ubuf(buf[m++]).i; - h_mask(nlocal) = (int) ubuf(buf[m++]).i; - h_image(nlocal) = (imageint) ubuf(buf[m++]).i; - h_sp(nlocal,0) = buf[m++]; - h_sp(nlocal,1) = buf[m++]; - h_sp(nlocal,2) = buf[m++]; - h_sp(nlocal,3) = buf[m++]; - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(nlocal,&buf[m]); - - atom->nlocal++; - return m; -} - -/* ---------------------------------------------------------------------- - size of restart data for all atoms owned by this proc - include extra data stored by fixes -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::size_restart() -{ - int i; - - int nlocal = atom->nlocal; - int n = 15 * nlocal; - - if (atom->nextra_restart) - for (int iextra = 0; iextra < atom->nextra_restart; iextra++) - for (i = 0; i < nlocal; i++) - n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); - - return n; -} - -/* ---------------------------------------------------------------------- - pack atom I's data for restart file including extra quantities - xyz must be 1st 3 values, so that read_restart can test on them - molecular types may be negative, but write as positive -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_restart(int i, double *buf) -{ - atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK | SP_MASK); - - int m = 1; - buf[m++] = h_x(i,0); - buf[m++] = h_x(i,1); - buf[m++] = h_x(i,2); - buf[m++] = ubuf(h_tag(i)).d; - buf[m++] = ubuf(h_type(i)).d; - buf[m++] = ubuf(h_mask(i)).d; - buf[m++] = ubuf(h_image(i)).d; - buf[m++] = h_v(i,0); - buf[m++] = h_v(i,1); - buf[m++] = h_v(i,2); - - buf[m++] = h_sp(i,0); - buf[m++] = h_sp(i,1); - buf[m++] = h_sp(i,2); - buf[m++] = h_sp(i,3); - - if (atom->nextra_restart) - for (int iextra = 0; iextra < atom->nextra_restart; iextra++) - m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); - - buf[0] = m; - return m; -} - -/* ---------------------------------------------------------------------- - unpack data for one atom from restart file including extra quantities -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_restart(double *buf) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) { - grow(0); - if (atom->nextra_store) - memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); - } - - atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK | SP_MASK); - - int m = 1; - h_x(nlocal,0) = buf[m++]; - h_x(nlocal,1) = buf[m++]; - h_x(nlocal,2) = buf[m++]; - h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; - h_type(nlocal) = (int) ubuf(buf[m++]).i; - h_mask(nlocal) = (int) ubuf(buf[m++]).i; - h_image(nlocal) = (imageint) ubuf(buf[m++]).i; - h_v(nlocal,0) = buf[m++]; - h_v(nlocal,1) = buf[m++]; - h_v(nlocal,2) = buf[m++]; - - h_sp(nlocal,0) = buf[m++]; - h_sp(nlocal,1) = buf[m++]; - h_sp(nlocal,2) = buf[m++]; - h_sp(nlocal,3) = buf[m++]; - - double **extra = atom->extra; - if (atom->nextra_store) { - int size = static_cast (buf[0]) - m; - for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; - } - - atom->nlocal++; - return m; -} - -/* ---------------------------------------------------------------------- - create one atom of itype at coord - set other values to defaults -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::create_atom(int itype, double *coord) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) { - atomKK->modified(Host,ALL_MASK); - grow(0); - } - atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); - - tag[nlocal] = 0; - type[nlocal] = itype; - h_x(nlocal,0) = coord[0]; - h_x(nlocal,1) = coord[1]; - h_x(nlocal,2) = coord[2]; - h_mask[nlocal] = 1; - h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) | - ((imageint) IMGMAX << IMGBITS) | IMGMAX; - h_v(nlocal,0) = 0.0; - h_v(nlocal,1) = 0.0; - h_v(nlocal,2) = 0.0; - - h_sp(nlocal,0) = 0.0; - h_sp(nlocal,1) = 0.0; - h_sp(nlocal,2) = 0.0; - h_sp(nlocal,3) = 0.0; - - atom->nlocal++; -} - -/* ---------------------------------------------------------------------- - unpack one line from Atoms section of data file - initialize other atom quantities -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::data_atom(double *coord, imageint imagetmp, - char **values) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) grow(0); - - h_tag[nlocal] = utils::inumeric(FLERR,values[0],true,lmp); - h_type[nlocal] = utils::inumeric(FLERR,values[1],true,lmp); - if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) - error->one(FLERR,"Invalid atom type in Atoms section of data file"); - - h_sp(nlocal,3) = utils::numeric(FLERR,values[2],true,lmp); - h_sp(nlocal,0) = utils::numeric(FLERR,values[6],true,lmp); - h_sp(nlocal,1) = utils::numeric(FLERR,values[7],true,lmp); - h_sp(nlocal,2) = utils::numeric(FLERR,values[8],true,lmp); - double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] + - sp[nlocal][1]*sp[nlocal][1] + - sp[nlocal][2]*sp[nlocal][2]); - h_sp(nlocal,0) *= inorm; - h_sp(nlocal,1) *= inorm; - h_sp(nlocal,2) *= inorm; - - h_x(nlocal,0) = coord[0]; - h_x(nlocal,1) = coord[1]; - h_x(nlocal,2) = coord[2]; - - h_image[nlocal] = imagetmp; - - h_mask[nlocal] = 1; - h_v(nlocal,0) = 0.0; - h_v(nlocal,1) = 0.0; - h_v(nlocal,2) = 0.0; - - atomKK->modified(Host,ALL_MASK); - - atom->nlocal++; -} - -/* ---------------------------------------------------------------------- - unpack hybrid quantities from one line in Atoms section of data file - initialize other atom quantities for this sub-style -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::data_atom_hybrid(int nlocal, char **values) -{ - h_sp(nlocal,3) = utils::numeric(FLERR,values[0],true,lmp); - h_sp(nlocal,0) = utils::numeric(FLERR,values[1],true,lmp); - h_sp(nlocal,1) = utils::numeric(FLERR,values[2],true,lmp); - h_sp(nlocal,2) = utils::numeric(FLERR,values[3],true,lmp); - double inorm = 1.0/sqrt(sp[nlocal][0]*sp[nlocal][0] + - sp[nlocal][1]*sp[nlocal][1] + - sp[nlocal][2]*sp[nlocal][2]); - sp[nlocal][0] *= inorm; - sp[nlocal][1] *= inorm; - sp[nlocal][2] *= inorm; - - return 4; -} - -/* ---------------------------------------------------------------------- - pack atom info for data file including 3 image flags -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::pack_data(double **buf) -{ - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - buf[i][0] = h_tag[i]; - buf[i][1] = h_type[i]; - buf[i][2] = h_sp(i,0); - buf[i][3] = h_x(i,0); - buf[i][4] = h_x(i,1); - buf[i][5] = h_x(i,2); - buf[i][2] = h_sp(i,1); - buf[i][2] = h_sp(i,2); - buf[i][2] = h_sp(i,3); - buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; - buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; - buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; - } -} - -/* ---------------------------------------------------------------------- - pack hybrid atom info for data file -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_data_hybrid(int i, double *buf) -{ - buf[0] = h_sp(i,3); - buf[1] = h_sp(i,0); - buf[2] = h_sp(i,1); - buf[3] = h_sp(i,2); - return 4; -} - -/* ---------------------------------------------------------------------- - write atom info to data file including 3 image flags -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::write_data(FILE *fp, int n, double **buf) -{ - for (int i = 0; i < n; i++) - fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", - (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4], - buf[i][5],(int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); -} - -/* ---------------------------------------------------------------------- - write hybrid atom info to data file -------------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::write_data_hybrid(FILE *fp, double *buf) -{ - fprintf(fp," %-1.16e %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2],buf[3]); - return 4; -} - -/* ---------------------------------------------------------------------- - return # of bytes of allocated memory -------------------------------------------------------------------------- */ - -bigint AtomVecSpinKokkos::memory_usage() -{ - bigint bytes = 0; - - if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); - if (atom->memcheck("type")) bytes += memory->usage(type,nmax); - if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); - if (atom->memcheck("image")) bytes += memory->usage(image,nmax); - if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); - if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); - if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); - - if (atom->memcheck("sp")) bytes += memory->usage(sp,nmax,4); - if (atom->memcheck("fm")) bytes += memory->usage(fm,nmax*comm->nthreads,3); - if (atom->memcheck("fm_long")) bytes += memory->usage(fm_long,nmax*comm->nthreads,3); - - return bytes; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & SP_MASK) atomKK->k_sp.sync(); - if (mask & FM_MASK) atomKK->k_fm.sync(); - if (mask & FML_MASK) atomKK->k_fm_long.sync(); - } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & SP_MASK) atomKK->k_sp.sync(); - if (mask & FM_MASK) atomKK->k_fm.sync(); - if (mask & FML_MASK) atomKK->k_fm_long.sync(); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if (mask & X_MASK) atomKK->k_x.modify(); - if (mask & V_MASK) atomKK->k_v.modify(); - if (mask & F_MASK) atomKK->k_f.modify(); - if (mask & TAG_MASK) atomKK->k_tag.modify(); - if (mask & TYPE_MASK) atomKK->k_type.modify(); - if (mask & MASK_MASK) atomKK->k_mask.modify(); - if (mask & IMAGE_MASK) atomKK->k_image.modify(); - if (mask & SP_MASK) atomKK->k_sp.modify(); - if (mask & FM_MASK) atomKK->k_fm.modify(); - if (mask & FML_MASK) atomKK->k_fm_long.modify(); - } else { - if (mask & X_MASK) atomKK->k_x.modify(); - if (mask & V_MASK) atomKK->k_v.modify(); - if (mask & F_MASK) atomKK->k_f.modify(); - if (mask & TAG_MASK) atomKK->k_tag.modify(); - if (mask & TYPE_MASK) atomKK->k_type.modify(); - if (mask & MASK_MASK) atomKK->k_mask.modify(); - if (mask & IMAGE_MASK) atomKK->k_image.modify(); - if (mask & SP_MASK) atomKK->k_sp.modify(); - if (mask & FM_MASK) atomKK->k_fm.modify(); - if (mask & FML_MASK) atomKK->k_fm_long.modify(); - } -} - -void AtomVecSpinKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if ((mask & X_MASK) && atomKK->k_x.need_sync()) - perform_async_copy(atomKK->k_x,space); - if ((mask & V_MASK) && atomKK->k_v.need_sync()) - perform_async_copy(atomKK->k_v,space); - if ((mask & F_MASK) && atomKK->k_f.need_sync()) - perform_async_copy(atomKK->k_f,space); - if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) - perform_async_copy(atomKK->k_tag,space); - if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) - perform_async_copy(atomKK->k_type,space); - if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) - perform_async_copy(atomKK->k_mask,space); - if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) - perform_async_copy(atomKK->k_image,space); - if ((mask & SP_MASK) && atomKK->k_sp.need_sync()) - perform_async_copy(atomKK->k_sp,space); - if ((mask & FM_MASK) && atomKK->k_sp.need_sync()) - perform_async_copy(atomKK->k_fm,space); - if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync()) - perform_async_copy(atomKK->k_fm_long,space); - } else { - if ((mask & X_MASK) && atomKK->k_x.need_sync()) - perform_async_copy(atomKK->k_x,space); - if ((mask & V_MASK) && atomKK->k_v.need_sync()) - perform_async_copy(atomKK->k_v,space); - if ((mask & F_MASK) && atomKK->k_f.need_sync()) - perform_async_copy(atomKK->k_f,space); - if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) - perform_async_copy(atomKK->k_tag,space); - if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) - perform_async_copy(atomKK->k_type,space); - if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) - perform_async_copy(atomKK->k_mask,space); - if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) - perform_async_copy(atomKK->k_image,space); - if ((mask & SP_MASK) && atomKK->k_sp.need_sync()) - perform_async_copy(atomKK->k_sp,space); - if ((mask & FM_MASK) && atomKK->k_fm.need_sync()) - perform_async_copy(atomKK->k_fm,space); - if ((mask & FML_MASK) && atomKK->k_fm_long.need_sync()) - perform_async_copy(atomKK->k_fm_long,space); - } -} - -/* ---------------------------------------------------------------------- - clear all forces (mech and mag) -------------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes) -{ - memset(&atom->f[0][0],0,3*nbytes); - memset(&atom->fm[0][0],0,3*nbytes); - memset(&atom->fm_long[0][0],0,3*nbytes); -} diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h deleted file mode 100644 index d439424076..0000000000 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ /dev/null @@ -1,132 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef ATOM_CLASS - -AtomStyle(spin/kk,AtomVecSpinKokkos) -AtomStyle(spin/kk/device,AtomVecSpinKokkos) -AtomStyle(spin/kk/host,AtomVecSpinKokkos) - -#else - -#ifndef LMP_ATOM_VEC_SPIN_KOKKOS_H -#define LMP_ATOM_VEC_SPIN_KOKKOS_H - -#include "atom_vec_kokkos.h" -#include "kokkos_type.h" - -namespace LAMMPS_NS { - -class AtomVecSpinKokkos : public AtomVecKokkos { - public: - AtomVecSpinKokkos(class LAMMPS *); - void grow(int); - void copy(int, int, int); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - int pack_border_hybrid(int, int *, double *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int unpack_border_hybrid(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - int size_restart(); - int pack_restart(int, double *); - int unpack_restart(double *); - void create_atom(int, double *); - void data_atom(double *, imageint, char **); - int data_atom_hybrid(int, char **); - void pack_data(double **); - int pack_data_hybrid(int, double *); - void write_data(FILE *, int, double **); - int write_data_hybrid(FILE *, double *); - bigint memory_usage(); - - // clear magnetic and mechanic forces - - void force_clear(int, size_t); - - void grow_reset(); - // input lists to be checked - int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, - DAT::tdual_xfloat_2d buf,int iswap, - int pbc_flag, int *pbc, ExecutionSpace space); - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf, - ExecutionSpace space); - int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); - int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, - int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space); - - void sync(ExecutionSpace space, unsigned int mask); - void modified(ExecutionSpace space, unsigned int mask); - void sync_overlapping_device(ExecutionSpace space, unsigned int mask); - - protected: - tagint *tag; - int *type,*mask; - imageint *image; - double **x,**v,**f; // lattice quantities - - // spin quantities - double **sp; // sp[i][0-2] direction of the spin i - // sp[i][3] atomic magnetic moment of the spin i - double **fm; // fm[i][0-2] direction of magnetic precession - double **fm_long; // storage of long-range spin prec. components - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_x_array d_x; - DAT::t_v_array d_v; - DAT::t_f_array d_f; - - DAT::t_sp_array d_sp; - DAT::t_fm_array d_fm; - DAT::t_fm_long_array d_fm_long; - - HAT::t_sp_array h_sp; - HAT::t_fm_array h_fm; - HAT::t_fm_long_array h_fm_long; - - DAT::tdual_int_1d k_count; -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Per-processor system is too big - -The number of owned atoms plus ghost atoms on a single -processor must fit in 32-bit integer. - -E: Invalid atom type in Atoms section of data file - -Atom types must range from 1 to specified # of types. - -*/ diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 7a575ecf28..5930a9e207 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -758,39 +758,6 @@ typedef tdual_virial_array::t_dev_um t_virial_array_um; typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um; typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread; -// Spin Types - -//3d SP_FLOAT array n*4 -#ifdef LMP_KOKKOS_NO_LEGACY -typedef Kokkos::DualView tdual_sp_array; -#else -typedef Kokkos::DualView tdual_sp_array; -#endif -typedef tdual_sp_array::t_dev t_sp_array; -typedef tdual_sp_array::t_dev_const t_sp_array_const; -typedef tdual_sp_array::t_dev_um t_sp_array_um; -typedef tdual_sp_array::t_dev_const_um t_sp_array_const_um; -typedef tdual_sp_array::t_dev_const_randomread t_sp_array_randomread; - -//3d FM_FLOAT array n*3 - -typedef Kokkos::DualView tdual_fm_array; -typedef tdual_fm_array::t_dev t_fm_array; -typedef tdual_fm_array::t_dev_const t_fm_array_const; -typedef tdual_fm_array::t_dev_um t_fm_array_um; -typedef tdual_fm_array::t_dev_const_um t_fm_array_const_um; -typedef tdual_fm_array::t_dev_const_randomread t_fm_array_randomread; - -//3d FML_FLOAT array n*3 - -typedef Kokkos::DualView tdual_fm_long_array; -typedef tdual_fm_long_array::t_dev t_fm_long_array; -typedef tdual_fm_long_array::t_dev_const t_fm_long_array_const; -typedef tdual_fm_long_array::t_dev_um t_fm_long_array_um; -typedef tdual_fm_long_array::t_dev_const_um t_fm_long_array_const_um; -typedef tdual_fm_long_array::t_dev_const_randomread t_fm_long_array_randomread; - - //Energy Types //1d E_FLOAT array n @@ -1027,33 +994,6 @@ typedef tdual_virial_array::t_host_um t_virial_array_um; typedef tdual_virial_array::t_host_const_um t_virial_array_const_um; typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread; -// Spin types - -//2d X_FLOAT array n*3 -typedef Kokkos::DualView tdual_sp_array; -typedef tdual_sp_array::t_host t_sp_array; -typedef tdual_sp_array::t_host_const t_sp_array_const; -typedef tdual_sp_array::t_host_um t_sp_array_um; -typedef tdual_sp_array::t_host_const_um t_sp_array_const_um; -typedef tdual_sp_array::t_host_const_randomread t_sp_array_randomread; - -//2d F_FLOAT array n*3 -typedef Kokkos::DualView tdual_fm_array; -//typedef Kokkos::DualView tdual_f_array; -typedef tdual_fm_array::t_host t_fm_array; -typedef tdual_fm_array::t_host_const t_fm_array_const; -typedef tdual_fm_array::t_host_um t_fm_array_um; -typedef tdual_fm_array::t_host_const_um t_fm_array_const_um; -typedef tdual_fm_array::t_host_const_randomread t_fm_array_randomread; - -//2d F_FLOAT array n*3 -typedef Kokkos::DualView tdual_fm_long_array; -//typedef Kokkos::DualView tdual_f_array; -typedef tdual_fm_long_array::t_host t_fm_long_array; -typedef tdual_fm_long_array::t_host_const t_fm_long_array_const; -typedef tdual_fm_long_array::t_host_um t_fm_long_array_um; -typedef tdual_fm_long_array::t_host_const_um t_fm_long_array_const_um; -typedef tdual_fm_long_array::t_host_const_randomread t_fm_long_array_randomread; //Energy Types diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp index 41bb1a7755..7eb81e7a03 100644 --- a/src/SPIN/pair_spin_dipole_cut.cpp +++ b/src/SPIN/pair_spin_dipole_cut.cpp @@ -233,36 +233,44 @@ void PairSpinDipoleCut::compute(int eflag, int vflag) local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; + // compute dipolar interaction + if (rsq < local_cut2) { r2inv = 1.0/rsq; r3inv = r2inv*rinv; compute_dipolar(i,j,eij,fmi,spi,spj,r3inv); - if (lattice_flag) compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv); - } + + if (lattice_flag) + compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv); - // force accumulation + if (eflag) { + if (rsq <= local_cut2) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= 0.5*hbar; + emag[i] += evdwl; + } + } else evdwl = 0.0; - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (eflag) { - if (rsq <= local_cut2) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; - emag[i] += evdwl; + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; } - } else evdwl = 0.0; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); + } } } + + if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- @@ -391,7 +399,7 @@ void PairSpinDipoleCut::compute_dipolar_mech(int /* i */, int /* j */, double ei sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; bij = sisj - 5.0*sieij*sjeij; - pre = 3.0*mub2mu0*gigjri4; + pre = 0.5*3.0*mub2mu0*gigjri4; fi[0] -= pre * (eij[0] * bij + (sjeij*spi[0] + sieij*spj[0])); fi[1] -= pre * (eij[1] * bij + (sjeij*spi[1] + sieij*spj[1])); diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp index 85b3c1d7bb..e90a43d1cf 100644 --- a/src/SPIN/pair_spin_dipole_long.cpp +++ b/src/SPIN/pair_spin_dipole_long.cpp @@ -281,32 +281,37 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) bij[3] = (5.0*bij[2] + pre3*expm2) * r2inv; compute_long(i,j,eij,bij,fmi,spi,spj); - compute_long_mech(i,j,eij,bij,fmi,spi,spj); - } + if (lattice_flag) + compute_long_mech(i,j,eij,bij,fmi,spi,spj); - // force accumulation + if (eflag) { + if (rsq <= local_cut2) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= 0.5*hbar; + emag[i] += evdwl; + } + } else evdwl = 0.0; - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (eflag) { - if (rsq <= local_cut2) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; - emag[i] += evdwl; + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; } - } else evdwl = 0.0; + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); - + } } } + + if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- @@ -373,7 +378,6 @@ void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3]) spi[3] = sp[ii][3]; jlist = firstneigh[ii]; jnum = numneigh[ii]; - //itype = type[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -459,7 +463,7 @@ void PairSpinDipoleLong::compute_long_mech(int /* i */, int /* j */, double eij[ double g1,g2,g1b2_g2b3,gigj,pre; gigj = spi[3] * spj[3]; - pre = gigj*mub2mu0; + pre = 0.5 * gigj*mub2mu0; sisj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; sieij = spi[0]*eij[0] + spi[1]*eij[1] + spi[2]*eij[2]; sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp index e2ddd708df..8d43a3a870 100644 --- a/src/SPIN/pair_spin_dmi.cpp +++ b/src/SPIN/pair_spin_dmi.cpp @@ -244,31 +244,36 @@ void PairSpinDmi::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_dmi(i,j,eij,fmi,spj); - if (lattice_flag) { + + if (lattice_flag) compute_dmi_mech(i,j,rsq,eij,fi,spi,spj); - } + + if (eflag) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= 0.5*hbar; + emag[i] += evdwl; + } else evdwl = 0.0; + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } - - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (eflag) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; - emag[i] += evdwl; - } else evdwl = 0.0; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } } if (vflag_fdotr) virial_fdotr_compute(); - } /* ---------------------------------------------------------------------- @@ -405,9 +410,9 @@ void PairSpinDmi::compute_dmi_mech(int i, int j, double rsq, double /*eij*/[3], cdmy = (dmiz*csx - dmix*csz); cdmz = (dmix*csy - dmiy*csz); - fi[0] += irij*cdmx; - fi[1] += irij*cdmy; - fi[2] += irij*cdmz; + fi[0] += 0.5*irij*cdmx; + fi[1] += 0.5*irij*cdmy; + fi[2] += 0.5*irij*cdmz; } /* ---------------------------------------------------------------------- diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 4c6c3936cf..36f3dbcf5e 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -22,19 +22,16 @@ ------------------------------------------------------------------------- */ #include "pair_spin_exchange_biquadratic.h" -#include -#include -#include + #include "atom.h" #include "comm.h" #include "error.h" -#include "fix.h" #include "force.h" -#include "neigh_list.h" #include "memory.h" -#include "modify.h" -#include "update.h" -#include "utils.h" +#include "neigh_list.h" + +#include +#include using namespace LAMMPS_NS; @@ -76,7 +73,7 @@ void PairSpinExchangeBiquadratic::settings(int narg, char **arg) if (narg != 1) error->all(FLERR,"Illegal pair_style command"); - cut_spin_exchange_global = force->numeric(FLERR,arg[0]); + cut_spin_exchange_global = utils::numeric(FLERR,arg[0],false,lmp); // reset cutoffs that have been explicitly set @@ -106,19 +103,19 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) error->all(FLERR,"Incorrect args for pair coefficients"); int ilo,ihi,jlo,jhi; - force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); - force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); + utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); // get exchange arguments from input command int iarg = 10; - const double rc = force->numeric(FLERR,arg[3]); - const double j1 = force->numeric(FLERR,arg[4]); - const double j2 = force->numeric(FLERR,arg[5]); - const double j3 = force->numeric(FLERR,arg[6]); - const double k1 = force->numeric(FLERR,arg[7]); - const double k2 = force->numeric(FLERR,arg[8]); - const double k3 = force->numeric(FLERR,arg[9]); + const double rc = utils::numeric(FLERR,arg[3],false,lmp); + const double j1 = utils::numeric(FLERR,arg[4],false,lmp); + const double j2 = utils::numeric(FLERR,arg[5],false,lmp); + const double j3 = utils::numeric(FLERR,arg[6],false,lmp); + const double k1 = utils::numeric(FLERR,arg[7],false,lmp); + const double k2 = utils::numeric(FLERR,arg[8],false,lmp); + const double k3 = utils::numeric(FLERR,arg[9],false,lmp); // read energy offset flag if specified diff --git a/src/SPIN/pair_spin_magelec.cpp b/src/SPIN/pair_spin_magelec.cpp index 849590bad2..2a672416b9 100644 --- a/src/SPIN/pair_spin_magelec.cpp +++ b/src/SPIN/pair_spin_magelec.cpp @@ -237,31 +237,35 @@ void PairSpinMagelec::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_magelec(i,j,eij,fmi,spj); - if (lattice_flag) { + + if (lattice_flag) compute_magelec_mech(i,j,fi,spi,spj); + + if (eflag) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= 0.5*hbar; + emag[i] += evdwl; + } else evdwl = 0.0; + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; } + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } - - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (eflag) { - evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); - evdwl *= 0.5*hbar; - emag[i] += evdwl; - } else evdwl = 0.0; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } } if (vflag_fdotr) virial_fdotr_compute(); - } /* ---------------------------------------------------------------------- @@ -400,9 +404,9 @@ void PairSpinMagelec::compute_magelec_mech(int i, int j, double fi[3], double sp meiy *= ME_mech[itype][jtype]; meiz *= ME_mech[itype][jtype]; - fi[0] += (meiy*vz - meiz*vy); - fi[1] += (meiz*vx - meix*vz); - fi[2] += (meix*vy - meiy*vx); + fi[0] += 0.5*(meiy*vz - meiz*vy); + fi[1] += 0.5*(meiz*vx - meix*vz); + fi[2] += 0.5*(meix*vy - meiy*vx); } diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index c09b5ac191..5c05bef525 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -246,31 +246,33 @@ void PairSpinNeel::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_neel(i,j,rsq,eij,fmi,spi,spj); - if (lattice_flag) { + if (lattice_flag) compute_neel_mech(i,j,rsq,eij,fi,spi,spj); + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; } + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (eflag) { + evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj); + emag[i] += evdwl; + } else evdwl = 0.0; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); } - - f[i][0] += fi[0]; - f[i][1] += fi[1]; - f[i][2] += fi[2]; - fm[i][0] += fmi[0]; - fm[i][1] += fmi[1]; - fm[i][2] += fmi[2]; - - if (eflag) { - evdwl -= compute_neel_energy(i,j,rsq,eij,spi,spj); - // evdwl *= 0.5*hbar; - emag[i] += evdwl; - } else evdwl = 0.0; - - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, - evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); } } if (vflag_fdotr) virial_fdotr_compute(); - } /* ---------------------------------------------------------------------- @@ -563,9 +565,9 @@ void PairSpinNeel::compute_neel_mech(int i, int j, double rsq, double eij[3], do // adding three contributions - fi[0] = pdx + pq1x + pq2x; - fi[1] = pdy + pq1y + pq2y; - fi[2] = pdz + pq1z + pq2z; + fi[0] = 0.5*(pdx + pq1x + pq2x); + fi[1] = 0.5*(pdy + pq1y + pq2y); + fi[2] = 0.5*(pdz + pq1z + pq2z); } /* ---------------------------------------------------------------------- */ From 2825abb0284a164b368d1dda18a62140c807b000 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 17:13:54 -0600 Subject: [PATCH 13/64] Improved scripts validaton problems --- .../validation_damped_exchange/run-test-exchange.sh | 4 ++-- examples/SPIN/test_problems/validation_nve/run-test-nve.sh | 2 +- examples/SPIN/test_problems/validation_nvt/plot_nvt.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh index 599730fe7b..bd878a52de 100755 --- a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh +++ b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh @@ -13,7 +13,7 @@ en="$(echo "$en-$in" | bc -l)" tail -n +$in log.lammps | head -n $en > res_lammps.dat # compute Langevin -python3 -m llg_exchange.py > res_llg.dat +python3 llg_exchange.py > res_llg.dat # plot results -python3 -m plot_precession.py res_lammps.dat res_llg.dat +python3 plot_precession.py res_lammps.dat res_llg.dat diff --git a/examples/SPIN/test_problems/validation_nve/run-test-nve.sh b/examples/SPIN/test_problems/validation_nve/run-test-nve.sh index 441e7cf46d..18cedd9503 100755 --- a/examples/SPIN/test_problems/validation_nve/run-test-nve.sh +++ b/examples/SPIN/test_problems/validation_nve/run-test-nve.sh @@ -13,4 +13,4 @@ en="$(echo "$en-$in" | bc -l)" tail -n +$in log.lammps | head -n $en > res_lammps.dat # plot results -python3 -m plot_nve.py res_lammps.dat res_llg.dat +python3 plot_nve.py res_lammps.dat res_llg.dat diff --git a/examples/SPIN/test_problems/validation_nvt/plot_nvt.py b/examples/SPIN/test_problems/validation_nvt/plot_nvt.py index 06c48b4c28..4109d60245 100755 --- a/examples/SPIN/test_problems/validation_nvt/plot_nvt.py +++ b/examples/SPIN/test_problems/validation_nvt/plot_nvt.py @@ -39,5 +39,5 @@ plt.xlabel('Time (in ps)') plt.legend() plt.show() -fig.savefig(os.path.join(os.getcwd(), "nve_spin_lattice.pdf"), bbox_inches="tight") +fig.savefig(os.path.join(os.getcwd(), "nvt_spin_lattice.pdf"), bbox_inches="tight") plt.close(fig) From 2d7494186c053e96d85440dd4892566072f9d90e Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 19:37:24 -0600 Subject: [PATCH 14/64] rerun all validations tests (modifed one) --- .../test-spin-precession.in | 18 ++++++++------- .../validation_damped_exchange/two_spins.data | 22 ------------------- .../validation_nvt/in.spin.nvt_lattice | 2 +- .../validation_nvt/in.spin.nvt_spin | 2 +- src/SPIN/pair_spin_exchange.cpp | 4 ---- 5 files changed, 12 insertions(+), 36 deletions(-) delete mode 100644 examples/SPIN/test_problems/validation_damped_exchange/two_spins.data diff --git a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in index 0ca49364d2..86da20e6f9 100644 --- a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in +++ b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in @@ -5,22 +5,24 @@ atom_style spin atom_modify map array boundary f f f -read_data two_spins.data +atom_modify map array +lattice sc 3.0 +region box block 0 2 0 1 0 1 +create_box 1 box +create_atoms 1 box + +mass 1 55.845 +set atom 1 spin 2.0 1.0 0.0 0.0 +set atom 2 spin 2.0 0.0 1.0 0.0 pair_style spin/exchange 3.1 pair_coeff * * exchange 3.1 11.254 0.0 1.0 -group bead type 1 - -variable H equal 0.0 -variable Kan equal 0.0 variable Temperature equal 0.0 variable RUN equal 30000 fix 1 all nve/spin lattice no -fix 2 all precession/spin zeeman ${H} 0.0 0.0 1.0 anisotropy ${Kan} 0.0 0.0 1.0 -fix_modify 2 energy yes -fix 3 all langevin/spin ${Temperature} 0.01 12345 +fix 2 all langevin/spin ${Temperature} 0.01 12345 compute out_mag all spin compute out_pe all pe diff --git a/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data b/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data deleted file mode 100644 index 013f813751..0000000000 --- a/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data +++ /dev/null @@ -1,22 +0,0 @@ -LAMMPS data file via write_data, version 19 Sep 2019, timestep = 0 - -2 atoms -1 atom types - -0.0 6.0 xlo xhi -0.0 3.0 ylo yhi -0.0 3.0 zlo zhi - -Masses - -1 1 - -Atoms # spin - -1 1 2.0 0.0 0.0 0.0 1.0 0.0 0.0 0 0 0 -2 1 2.0 3.0 0.0 0.0 0.0 1.0 0.0 0 0 0 - -Velocities - -1 0.0 0.0 0.0 -2 0.0 0.0 0.0 diff --git a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice index 1d63f01d43..2375c0ff8d 100644 --- a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice +++ b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_lattice @@ -30,7 +30,7 @@ neighbor 0.1 bin neigh_modify every 10 check yes delay 20 fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 -fix 2 all langevin 200.0 200.0 10.0 48279 +fix 2 all langevin 200.0 200.0 1.0 48279 fix 3 all langevin/spin 0.0 0.00001 321 fix 4 all nve/spin lattice moving timestep 0.001 diff --git a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin index 435e877bdf..6b65df7109 100644 --- a/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin +++ b/examples/SPIN/test_problems/validation_nvt/in.spin.nvt_spin @@ -29,7 +29,7 @@ neighbor 0.1 bin neigh_modify every 10 check yes delay 20 fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 -fix 2 all langevin/spin 200.0 0.1 321 +fix 2 all langevin/spin 200.0 0.01 321 fix 3 all nve/spin lattice moving timestep 0.001 diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index e6b6db375f..e35408e9ec 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -426,10 +426,6 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, fi[0] -= 0.5*fx; fi[1] -= 0.5*fy; fi[2] -= 0.5*fz; - // fi[0] -= fx; - // fi[1] -= fy; - // fi[2] -= fz; - } /* ---------------------------------------------------------------------- From 3147dd850c53be305c776b1b5ff76fce7c1b4b0f Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 20:01:34 -0600 Subject: [PATCH 15/64] adding corrections to doc page --- doc/src/Commands_pair.rst | 1 + doc/src/Packages_details.rst | 2 ++ doc/src/pair_spin_exchange.rst | 1 + doc/src/pair_style.rst | 1 + 4 files changed, 5 insertions(+) diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 888a445daa..4f3b164c98 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -240,6 +240,7 @@ OPT. * :doc:`spin/dipole/long ` * :doc:`spin/dmi ` * :doc:`spin/exchange ` + * :doc:`spin/exchange/biquadratic ` * :doc:`spin/magelec ` * :doc:`spin/neel ` * :doc:`srp ` diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst index 1beeeff5b4..d9e1e31470 100644 --- a/doc/src/Packages_details.rst +++ b/doc/src/Packages_details.rst @@ -1036,9 +1036,11 @@ the usual manner via MD. Various pair, fix, and compute styles. * :doc:`pair_style spin/dipole/long ` * :doc:`pair_style spin/dmi ` * :doc:`pair_style spin/exchange ` +* :doc:`pair_style spin/exchange/biquadratic ` * :doc:`pair_style spin/magelec ` * :doc:`pair_style spin/neel ` * :doc:`fix nve/spin ` +* :doc:`fix langevin/spin ` * :doc:`fix precession/spin ` * :doc:`compute spin ` * :doc:`neb/spin ` diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst index 85cf6d3aa8..38e59eed19 100644 --- a/doc/src/pair_spin_exchange.rst +++ b/doc/src/pair_spin_exchange.rst @@ -1,4 +1,5 @@ .. index:: pair_style spin/exchange +.. index:: pair_style spin/exchange/biquadratic pair_style spin/exchange command ================================ diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst index 4feaeacad0..2a6c81c0f8 100644 --- a/doc/src/pair_style.rst +++ b/doc/src/pair_style.rst @@ -304,6 +304,7 @@ accelerated styles exist. * :doc:`spin/dipole/long ` - * :doc:`spin/dmi ` - * :doc:`spin/exchange ` - +* :doc:`spin/exchange/biquadratic ` - * :doc:`spin/magelec ` - * :doc:`spin/neel ` - * :doc:`srp ` - From 4baf60ffd1393e063b9332c3c0cfca94d986da7a Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 20:47:07 -0600 Subject: [PATCH 16/64] adding examples of the biquadratic pair_style and offset option --- examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp | 2 +- examples/SPIN/iron/in.spin.iron | 2 +- examples/SPIN/iron/in.spin.iron_cubic | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp index 2bfa8393f3..6429cec349 100644 --- a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp +++ b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp @@ -26,7 +26,7 @@ velocity all create 100 4928459 rot yes dist gaussian #pair_style hybrid/overlay eam/alloy spin/exchange 4.0 spin/neel 4.0 pair_style hybrid/overlay eam/alloy spin/exchange 4.0 pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co -pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567 +pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.0645 offset yes #pair_coeff * * spin/neel neel 4.0 0.0048 0.234 1.168 2.6905 0.705 0.652 neighbor 0.1 bin diff --git a/examples/SPIN/iron/in.spin.iron b/examples/SPIN/iron/in.spin.iron index 58c0537af7..f678d39f56 100644 --- a/examples/SPIN/iron/in.spin.iron +++ b/examples/SPIN/iron/in.spin.iron @@ -25,7 +25,7 @@ velocity all create 100 4928459 rot yes dist gaussian pair_style hybrid/overlay eam/alloy spin/exchange 3.5 pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe -pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 +pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 offset yes neighbor 0.1 bin neigh_modify every 10 check yes delay 20 diff --git a/examples/SPIN/iron/in.spin.iron_cubic b/examples/SPIN/iron/in.spin.iron_cubic index 30a3e0e97c..35011e796f 100644 --- a/examples/SPIN/iron/in.spin.iron_cubic +++ b/examples/SPIN/iron/in.spin.iron_cubic @@ -21,9 +21,9 @@ mass 1 55.845 set group all spin 2.2 -1.0 0.0 0.0 velocity all create 100 4928459 rot yes dist gaussian -pair_style hybrid/overlay eam/alloy spin/exchange 3.5 +pair_style hybrid/overlay eam/alloy spin/exchange/biquadratic 3.5 pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe -pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 +pair_coeff * * spin/exchange/biquadratic biquadratic 3.4 0.02726 0.2171 1.841 0.0 0.0 2.0 offset yes neighbor 0.1 bin neigh_modify every 10 check yes delay 20 From e3b8563ed9785455a7211e9933cd5daa6ff88d7c Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 5 Oct 2020 21:28:26 -0600 Subject: [PATCH 17/64] correcting spelling errors --- doc/src/pair_spin_exchange.rst | 2 +- doc/utils/sphinx-config/false_positives.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst index 38e59eed19..72c416ac72 100644 --- a/doc/src/pair_spin_exchange.rst +++ b/doc/src/pair_spin_exchange.rst @@ -132,7 +132,7 @@ for the *spin/exchange/biquadratic* pair style. Note that :math:`R_c` is the radius cutoff of the considered exchange interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients performing the parameterization of the function :math:`J(r_{ij})` defined -above (in the *biquadratic* ase, :math:`a_j`, :math:`b_j`, :math:`d_j` and +above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and :math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})` and :math:`K(r_{ij})` respectively). diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index b276933a88..a06f72fde5 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -240,6 +240,7 @@ bigint Bij bilayer bilayers +biquadratic binsize binstyle binutils From 73b2ad0acce681b5203ffe8c67d7f8f3a906ee26 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 12 Oct 2020 11:38:52 -0600 Subject: [PATCH 18/64] - slight modifs of the damped exchange example --- .../validation_damped_exchange/llg_exchange.py | 18 +++++++++++++++++- .../test-spin-precession.in | 5 ++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py index dd1c543bb3..5b93ac5c2d 100755 --- a/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py +++ b/examples/SPIN/test_problems/validation_damped_exchange/llg_exchange.py @@ -6,9 +6,17 @@ import matplotlib.pyplot as plt import mpmath as mp hbar=0.658212 # Planck's constant (eV.fs/rad) -J0=0.05 # per-neighbor exchange interaction (eV) +# J0=0.05 # per-neighbor exchange interaction (eV) + +# exchange interaction parameters +J1 = 11.254 # in eV +J2 = 0.0 # adim +J3 = 1.0 # in Ang. + +# initial spins S1 = np.array([1.0, 0.0, 0.0]) S2 = np.array([0.0, 1.0, 0.0]) + alpha=0.01 # damping coefficient pi=math.pi @@ -30,6 +38,14 @@ def rotation_matrix(axis, theta): [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) +#Definition of the Bethe-Slater function +def func_BS(x,a,b,c): + return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2) + +#Definition of the derivative of the Bethe-Slater function +def func_dBS(x,a,b,c): + return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2) + # calculating precession field of spin Sr def calc_rot_vector(Sr,Sf): rot = (J0/hbar)*(Sf-alpha*np.cross(Sf,Sr))/(1.0+alpha**2) diff --git a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in index 86da20e6f9..9dfb4a98d6 100644 --- a/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in +++ b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in @@ -21,7 +21,7 @@ pair_coeff * * exchange 3.1 11.254 0.0 1.0 variable Temperature equal 0.0 variable RUN equal 30000 -fix 1 all nve/spin lattice no +fix 1 all nve/spin lattice frozen fix 2 all langevin/spin ${Temperature} 0.01 12345 compute out_mag all spin @@ -36,6 +36,9 @@ variable emag equal c_out_mag[5] thermo_style custom step time v_magx v_magy v_magz v_emag pe etotal thermo 10 +compute outsp all property/atom spx spy spz sp fmx fmy fmz +dump 1 all custom 10 dump.data type x y z c_outsp[1] c_outsp[2] c_outsp[3] fx fy fz + timestep 0.0001 run ${RUN} From 5159d255a74bffef78aee32ec5d05c514618a26e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 26 Oct 2020 11:02:09 -0400 Subject: [PATCH 19/64] update bundled fmtlib to version 7.1.0 --- src/fmt/chrono.h | 79 +- src/fmt/color.h | 56 +- src/fmt/compile.h | 75 +- src/fmt/core.h | 432 +++-- src/fmt/format-inl.h | 1890 +++++++++++++++++--- src/fmt/format.h | 1137 +++++++----- src/fmt/locale.h | 40 +- src/fmt/os.h | 106 +- src/fmt/ostream.h | 28 +- src/fmt/printf.h | 4 +- src/fmt/ranges.h | 19 +- src/fmtlib_format.cpp | 4 +- src/fmtlib_os.cpp | 17 +- unittest/force-styles/test_error_stats.cpp | 2 +- 14 files changed, 2897 insertions(+), 992 deletions(-) diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h index e70b8053a6..1a3b8d5e5c 100644 --- a/src/fmt/chrono.h +++ b/src/fmt/chrono.h @@ -72,43 +72,27 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { static_assert(F::is_integer, "From must be integral"); static_assert(T::is_integer, "To must be integral"); - if (F::is_signed && !T::is_signed) { + if (detail::const_check(F::is_signed && !T::is_signed)) { // From may be negative, not allowed! if (fmt::detail::is_negative(from)) { ec = 1; return {}; } - // From is positive. Can it always fit in To? - if (F::digits <= T::digits) { - // yes, From always fits in To. - } else { - // from may not fit in To, we have to do a dynamic check - if (from > static_cast((T::max)())) { - ec = 1; - return {}; - } + if (F::digits > T::digits && + from > static_cast(detail::max_value())) { + ec = 1; + return {}; } } - if (!F::is_signed && T::is_signed) { - // can from be held in To? - if (F::digits < T::digits) { - // yes, From always fits in To. - } else { - // from may not fit in To, we have to do a dynamic check - if (from > static_cast((T::max)())) { - // outside range. - ec = 1; - return {}; - } - } + if (!F::is_signed && T::is_signed && F::digits >= T::digits && + from > static_cast(detail::max_value())) { + ec = 1; + return {}; } - - // reaching here means all is ok for lossless conversion. - return static_cast(from); - -} // function + return static_cast(from); // Lossless conversion. +} template ::value)> @@ -190,11 +174,9 @@ To safe_duration_cast(std::chrono::duration from, // safe conversion to IntermediateRep IntermediateRep count = lossless_integral_conversion(from.count(), ec); - if (ec) { - return {}; - } + if (ec) return {}; // multiply with Factor::num without overflow or underflow - if (Factor::num != 1) { + if (detail::const_check(Factor::num != 1)) { const auto max1 = detail::max_value() / Factor::num; if (count > max1) { ec = 1; @@ -209,17 +191,9 @@ To safe_duration_cast(std::chrono::duration from, count *= Factor::num; } - // this can't go wrong, right? den>0 is checked earlier. - if (Factor::den != 1) { - count /= Factor::den; - } - // convert to the to type, safely - using ToRep = typename To::rep; - const ToRep tocount = lossless_integral_conversion(count, ec); - if (ec) { - return {}; - } - return To{tocount}; + if (detail::const_check(Factor::den != 1)) count /= Factor::den; + auto tocount = lossless_integral_conversion(count, ec); + return ec ? To() : To(tocount); } /** @@ -351,6 +325,11 @@ inline std::tm localtime(std::time_t time) { return lt.tm_; } +inline std::tm localtime( + std::chrono::time_point time_point) { + return localtime(std::chrono::system_clock::to_time_t(time_point)); +} + // Thread-safe replacement for std::gmtime inline std::tm gmtime(std::time_t time) { struct dispatcher { @@ -387,6 +366,11 @@ inline std::tm gmtime(std::time_t time) { return gt.tm_; } +inline std::tm gmtime( + std::chrono::time_point time_point) { + return gmtime(std::chrono::system_clock::to_time_t(time_point)); +} + namespace detail { inline size_t strftime(char* str, size_t count, const char* format, const std::tm* time) { @@ -399,6 +383,17 @@ inline size_t strftime(wchar_t* str, size_t count, const wchar_t* format, } } // namespace detail +template +struct formatter, Char> + : formatter { + template + auto format(std::chrono::time_point val, + FormatContext& ctx) -> decltype(ctx.out()) { + std::tm time = localtime(val); + return formatter::format(time, ctx); + } +}; + template struct formatter { template auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { diff --git a/src/fmt/color.h b/src/fmt/color.h index b65f892afc..7891058950 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -463,16 +463,16 @@ template <> inline void reset_color(FILE* stream) FMT_NOEXCEPT { } template -inline void reset_color(basic_memory_buffer& buffer) FMT_NOEXCEPT { +inline void reset_color(buffer& buffer) FMT_NOEXCEPT { const char* begin = data::reset_color; const char* end = begin + sizeof(data::reset_color) - 1; buffer.append(begin, end); } template -void vformat_to(basic_memory_buffer& buf, const text_style& ts, +void vformat_to(buffer& buf, const text_style& ts, basic_string_view format_str, - basic_format_args> args) { + basic_format_args>> args) { bool has_style = false; if (ts.has_emphasis()) { has_style = true; @@ -496,7 +496,7 @@ void vformat_to(basic_memory_buffer& buf, const text_style& ts, template > void vprint(std::FILE* f, const text_style& ts, const S& format, - basic_format_args> args) { + basic_format_args>> args) { basic_memory_buffer buf; detail::vformat_to(buf, ts, to_string_view(format), args); buf.push_back(Char(0)); @@ -504,20 +504,22 @@ void vprint(std::FILE* f, const text_style& ts, const S& format, } /** + \rst Formats a string and prints it to the specified file stream using ANSI escape sequences to specify text formatting. - Example: + + **Example**:: + fmt::print(fmt::emphasis::bold | fg(fmt::color::red), "Elapsed time: {0:.2f} seconds", 1.23); + \endrst */ template ::value)> void print(std::FILE* f, const text_style& ts, const S& format_str, const Args&... args) { - detail::check_format_string(format_str); - using context = buffer_context>; - format_arg_store as{args...}; - vprint(f, ts, format_str, basic_format_args(as)); + vprint(f, ts, format_str, + fmt::make_args_checked(format_str, args...)); } /** @@ -558,7 +560,41 @@ template > inline std::basic_string format(const text_style& ts, const S& format_str, const Args&... args) { return vformat(ts, to_string_view(format_str), - detail::make_args_checked(format_str, args...)); + fmt::make_args_checked(format_str, args...)); +} + +/** + Formats a string with the given text_style and writes the output to ``out``. + */ +template ::value)> +OutputIt vformat_to( + OutputIt out, const text_style& ts, basic_string_view format_str, + basic_format_args>> args) { + decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); + detail::vformat_to(buf, ts, format_str, args); + return detail::get_iterator(buf); +} + +/** + \rst + Formats arguments with the given text_style, writes the result to the output + iterator ``out`` and returns the iterator past the end of the output range. + + **Example**:: + + std::vector out; + fmt::format_to(std::back_inserter(out), + fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); + \endrst +*/ +template >::value&& + detail::is_string::value)> +inline OutputIt format_to(OutputIt out, const text_style& ts, + const S& format_str, Args&&... args) { + return vformat_to(out, ts, to_string_view(format_str), + fmt::make_args_checked(format_str, args...)); } FMT_END_NAMESPACE diff --git a/src/fmt/compile.h b/src/fmt/compile.h index d7e6449ebb..7db610d90f 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -368,7 +368,8 @@ template struct type_list {}; // Returns a reference to the argument at index N from [first, rest...]. template -constexpr const auto& get(const T& first, const Args&... rest) { +constexpr const auto& get([[maybe_unused]] const T& first, + [[maybe_unused]] const Args&... rest) { static_assert(N < 1 + sizeof...(Args), "index is out of bounds"); if constexpr (N == 0) return first; @@ -406,6 +407,19 @@ constexpr text make_text(basic_string_view s, size_t pos, return {{&s[pos], size}}; } +template struct code_unit { + Char value; + using char_type = Char; + + template + OutputIt format(OutputIt out, const Args&...) const { + return write(out, value); + } +}; + +template +struct is_compiled_format> : std::true_type {}; + // A replacement field that refers to argument N. template struct field { using char_type = Char; @@ -430,7 +444,9 @@ template struct spec_field { OutputIt format(OutputIt out, const Args&... args) const { // This ensures that the argument type is convertile to `const T&`. const T& arg = get(args...); - basic_format_context ctx(out, {}); + const auto& vargs = + make_format_args>(args...); + basic_format_context ctx(out, vargs); return fmt.format(arg, ctx); } }; @@ -489,16 +505,17 @@ constexpr auto parse_tail(T head, S format_str) { template struct parse_specs_result { formatter fmt; size_t end; + int next_arg_id; }; template constexpr parse_specs_result parse_specs(basic_string_view str, - size_t pos) { + size_t pos, int arg_id) { str.remove_prefix(pos); - auto ctx = basic_format_parse_context(str); + auto ctx = basic_format_parse_context(str, {}, arg_id + 1); auto f = formatter(); auto end = f.parse(ctx); - return {f, pos + (end - str.data()) + 1}; + return {f, pos + (end - str.data()) + 1, ctx.next_arg_id()}; } // Compiles a non-empty format string and returns the compiled representation @@ -518,8 +535,8 @@ constexpr auto compile_format_string(S format_str) { format_str); } else if constexpr (str[POS + 1] == ':') { using type = get_type; - constexpr auto result = parse_specs(str, POS + 2); - return parse_tail( + constexpr auto result = parse_specs(str, POS + 2, ID); + return parse_tail( spec_field{result.fmt}, format_str); } else { return unknown_format(); @@ -530,8 +547,13 @@ constexpr auto compile_format_string(S format_str) { return parse_tail(make_text(str, POS, 1), format_str); } else { constexpr auto end = parse_text(str, POS + 1); - return parse_tail(make_text(str, POS, end - POS), - format_str); + if constexpr (end - POS > 1) { + return parse_tail(make_text(str, POS, end - POS), + format_str); + } else { + return parse_tail(code_unit{str[POS]}, + format_str); + } } } @@ -587,8 +609,7 @@ template format(const CompiledFormat& cf, const Args&... args) { basic_memory_buffer buffer; - detail::buffer& base = buffer; - cf.format(std::back_inserter(base), args...); + cf.format(detail::buffer_appender(buffer), args...); return to_string(buffer); } @@ -608,8 +629,7 @@ template format(const CompiledFormat& cf, const Args&... args) { basic_memory_buffer buffer; using context = buffer_context; - detail::buffer& base = buffer; - detail::cf::vformat_to(std::back_inserter(base), cf, + detail::cf::vformat_to(detail::buffer_appender(buffer), cf, make_format_args(args...)); return to_string(buffer); } @@ -618,9 +638,13 @@ template ::value)> FMT_INLINE std::basic_string format(const S&, Args&&... args) { - constexpr basic_string_view str = S(); - if (str.size() == 2 && str[0] == '{' && str[1] == '}') - return fmt::to_string(detail::first(args...)); +#ifdef __cpp_if_constexpr + if constexpr (std::is_same::value) { + constexpr basic_string_view str = S(); + if (str.size() == 2 && str[0] == '{' && str[1] == '}') + return fmt::to_string(detail::first(args...)); + } +#endif constexpr auto compiled = detail::compile(S()); return format(compiled, std::forward(args)...); } @@ -643,10 +667,11 @@ OutputIt format_to(OutputIt out, const S&, const Args&... args) { return format_to(out, compiled, args...); } -template < - typename OutputIt, typename CompiledFormat, typename... Args, - FMT_ENABLE_IF(detail::is_output_iterator::value&& std::is_base_of< - detail::basic_compiled_format, CompiledFormat>::value)> +template ::value&& + std::is_base_of::value)> format_to_n_result format_to_n(OutputIt out, size_t n, const CompiledFormat& cf, const Args&... args) { @@ -655,6 +680,16 @@ format_to_n_result format_to_n(OutputIt out, size_t n, return {it.base(), it.count()}; } +template ::value)> +format_to_n_result format_to_n(OutputIt out, size_t n, const S&, + const Args&... args) { + constexpr auto compiled = detail::compile(S()); + auto it = format_to(detail::truncating_iterator(out, n), compiled, + args...); + return {it.base(), it.count()}; +} + template size_t formatted_size(const CompiledFormat& cf, const Args&... args) { return format_to(detail::counting_iterator(), cf, args...).count(); diff --git a/src/fmt/core.h b/src/fmt/core.h index 6d87ab290a..317292288d 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -18,7 +18,7 @@ #include // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 70003 +#define FMT_VERSION 70100 #ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -57,6 +57,7 @@ # define FMT_MSC_VER 0 # define FMT_SUPPRESS_MSC_WARNING(n) #endif + #ifdef __has_feature # define FMT_HAS_FEATURE(x) __has_feature(x) #else @@ -64,7 +65,7 @@ #endif #if defined(__has_include) && !defined(__INTELLISENSE__) && \ - !(FMT_ICC_VERSION && FMT_ICC_VERSION < 1600) + (!FMT_ICC_VERSION || FMT_ICC_VERSION >= 1600) # define FMT_HAS_INCLUDE(x) __has_include(x) #else # define FMT_HAS_INCLUDE(x) 0 @@ -99,7 +100,7 @@ #endif #ifndef FMT_OVERRIDE -# if FMT_HAS_FEATURE(cxx_override) || \ +# if FMT_HAS_FEATURE(cxx_override_control) || \ (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 # define FMT_OVERRIDE override # else @@ -152,7 +153,7 @@ # if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 # define FMT_DEPRECATED [[deprecated]] # else -# if defined(__GNUC__) || defined(__clang__) +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) # define FMT_DEPRECATED __attribute__((deprecated)) # elif FMT_MSC_VER # define FMT_DEPRECATED __declspec(deprecated) @@ -177,6 +178,15 @@ # endif #endif +#ifndef FMT_USE_INLINE_NAMESPACES +# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ + (FMT_MSC_VER >= 1900 && !_MANAGED) +# define FMT_USE_INLINE_NAMESPACES 1 +# else +# define FMT_USE_INLINE_NAMESPACES 0 +# endif +#endif + // LAMMPS customization // use 'v7_lmp' namespace instead of 'v7' so that our // bundled copy does not collide with linking other code @@ -184,8 +194,7 @@ // a different version. #ifndef FMT_BEGIN_NAMESPACE -# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ - FMT_MSC_VER >= 1900 +# if FMT_USE_INLINE_NAMESPACES # define FMT_INLINE_NAMESPACE inline namespace # define FMT_END_NAMESPACE \ } \ @@ -275,8 +284,7 @@ struct monostate {}; namespace detail { -// A helper function to suppress bogus "conditional expression is constant" -// warnings. +// A helper function to suppress "conditional expression is constant" warnings. template constexpr T const_check(T value) { return value; } FMT_NORETURN FMT_API void assert_fail(const char* file, int line, @@ -305,7 +313,8 @@ template struct std_string_view {}; #ifdef FMT_USE_INT128 // Do nothing. -#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && !(FMT_CLANG_VERSION && FMT_MSC_VER) +#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && \ + !(FMT_CLANG_VERSION && FMT_MSC_VER) # define FMT_USE_INT128 1 using int128_t = __int128_t; using uint128_t = __uint128_t; @@ -514,6 +523,18 @@ template struct char_t_impl::value>> { using type = typename result::value_type; }; +// Reports a compile-time error if S is not a valid format string. +template ::value)> +FMT_INLINE void check_format_string(const S&) { +#ifdef FMT_ENFORCE_COMPILE_STRING + static_assert(is_compile_string::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " + "FMT_STRING."); +#endif +} +template ::value)> +void check_format_string(S); + struct error_handler { constexpr error_handler() = default; constexpr error_handler(const error_handler&) = default; @@ -553,8 +574,9 @@ class basic_format_parse_context : private ErrorHandler { using iterator = typename basic_string_view::iterator; explicit constexpr basic_format_parse_context( - basic_string_view format_str, ErrorHandler eh = {}) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} + basic_string_view format_str, ErrorHandler eh = {}, + int next_arg_id = 0) + : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {} /** Returns an iterator to the beginning of the format string range being @@ -624,8 +646,24 @@ template using has_formatter = std::is_constructible>; +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; +template +struct is_contiguous> : std::true_type {}; + namespace detail { +// Extracts a reference to the container from back_insert_iterator. +template +inline Container& get_container(std::back_insert_iterator it) { + using bi_iterator = std::back_insert_iterator; + struct accessor : bi_iterator { + accessor(bi_iterator iter) : bi_iterator(iter) {} + using bi_iterator::container; + }; + return *accessor(it).container; +} + /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -648,6 +686,8 @@ template class buffer { size_(sz), capacity_(cap) {} + ~buffer() = default; + /** Sets the buffer data and capacity. */ void set(T* buf_data, size_t buf_capacity) FMT_NOEXCEPT { ptr_ = buf_data; @@ -663,7 +703,6 @@ template class buffer { buffer(const buffer&) = delete; void operator=(const buffer&) = delete; - virtual ~buffer() = default; T* begin() FMT_NOEXCEPT { return ptr_; } T* end() FMT_NOEXCEPT { return ptr_ + size_; } @@ -683,24 +722,26 @@ template class buffer { /** Returns a pointer to the buffer data. */ const T* data() const FMT_NOEXCEPT { return ptr_; } - /** - Resizes the buffer. If T is a POD type new elements may not be initialized. - */ - void resize(size_t new_size) { - reserve(new_size); - size_ = new_size; - } - /** Clears this buffer. */ void clear() { size_ = 0; } - /** Reserves space to store at least *capacity* elements. */ - void reserve(size_t new_capacity) { + // Tries resizing the buffer to contain *count* elements. If T is a POD type + // the new elements may not be initialized. + void try_resize(size_t count) { + try_reserve(count); + size_ = count <= capacity_ ? count : capacity_; + } + + // Tries increasing the buffer capacity to *new_capacity*. It can increase the + // capacity by a smaller amount than requested but guarantees there is space + // for at least one additional element either by increasing the capacity or by + // flushing the buffer if it is full. + void try_reserve(size_t new_capacity) { if (new_capacity > capacity_) grow(new_capacity); } void push_back(const T& value) { - reserve(size_ + 1); + try_reserve(size_ + 1); ptr_[size_++] = value; } @@ -713,32 +754,150 @@ template class buffer { } }; -// A container-backed buffer. +struct buffer_traits { + explicit buffer_traits(size_t) {} + size_t count() const { return 0; } + size_t limit(size_t size) { return size; } +}; + +class fixed_buffer_traits { + private: + size_t count_ = 0; + size_t limit_; + + public: + explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} + size_t count() const { return count_; } + size_t limit(size_t size) { + size_t n = limit_ - count_; + count_ += size; + return size < n ? size : n; + } +}; + +// A buffer that writes to an output iterator when flushed. +template +class iterator_buffer final : public Traits, public buffer { + private: + OutputIt out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + protected: + void grow(size_t) final FMT_OVERRIDE { + if (this->size() == buffer_size) flush(); + } + void flush(); + + public: + explicit iterator_buffer(OutputIt out, size_t n = buffer_size) + : Traits(n), + buffer(data_, 0, n < size_t(buffer_size) ? n : size_t(buffer_size)), + out_(out) {} + ~iterator_buffer() { flush(); } + + OutputIt out() { + flush(); + return out_; + } + size_t count() const { return Traits::count() + this->size(); } +}; + +template class iterator_buffer final : public buffer { + protected: + void grow(size_t) final FMT_OVERRIDE {} + + public: + explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} + + T* out() { return &*this->end(); } +}; + +// A buffer that writes to a container with the contiguous storage. template -class container_buffer : public buffer { +class iterator_buffer, + enable_if_t::value, + typename Container::value_type>> + final : public buffer { private: Container& container_; protected: - void grow(size_t capacity) FMT_OVERRIDE { + void grow(size_t capacity) final FMT_OVERRIDE { container_.resize(capacity); this->set(&container_[0], capacity); } public: - explicit container_buffer(Container& c) + explicit iterator_buffer(Container& c) : buffer(c.size()), container_(c) {} + explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) + : iterator_buffer(get_container(out)) {} + std::back_insert_iterator out() { + return std::back_inserter(container_); + } }; -// Extracts a reference to the container from back_insert_iterator. -template -inline Container& get_container(std::back_insert_iterator it) { - using bi_iterator = std::back_insert_iterator; - struct accessor : bi_iterator { - accessor(bi_iterator iter) : bi_iterator(iter) {} - using bi_iterator::container; - }; - return *accessor(it).container; +// A buffer that counts the number of code units written discarding the output. +template class counting_buffer final : public buffer { + private: + enum { buffer_size = 256 }; + T data_[buffer_size]; + size_t count_ = 0; + + protected: + void grow(size_t) final FMT_OVERRIDE { + if (this->size() != buffer_size) return; + count_ += this->size(); + this->clear(); + } + + public: + counting_buffer() : buffer(data_, 0, buffer_size) {} + + size_t count() { return count_ + this->size(); } +}; + +// An output iterator that appends to the buffer. +// It is used to reduce symbol sizes for the common case. +template +class buffer_appender : public std::back_insert_iterator> { + using base = std::back_insert_iterator>; + + public: + explicit buffer_appender(buffer& buf) : base(buf) {} + buffer_appender(base it) : base(it) {} + + buffer_appender& operator++() { + base::operator++(); + return *this; + } + + buffer_appender operator++(int) { + buffer_appender tmp = *this; + ++*this; + return tmp; + } +}; + +// Maps an output iterator into a buffer. +template +iterator_buffer get_buffer(OutputIt); +template buffer& get_buffer(buffer_appender); + +template OutputIt get_buffer_init(OutputIt out) { + return out; +} +template buffer& get_buffer_init(buffer_appender out) { + return get_container(out); +} + +template +auto get_iterator(Buffer& buf) -> decltype(buf.out()) { + return buf.out(); +} +template buffer_appender get_iterator(buffer& buf) { + return buffer_appender(buf); } template @@ -767,7 +926,8 @@ template struct named_arg_info { template struct arg_data { // args_[0].named_args points to named_args_ to avoid bloating format_args. - T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : 1)]; + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)]; named_arg_info named_args_[NUM_NAMED_ARGS]; template @@ -779,7 +939,8 @@ struct arg_data { template struct arg_data { - T args_[NUM_ARGS != 0 ? NUM_ARGS : 1]; + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; template FMT_INLINE arg_data(const U&... init) : args_{init...} {} @@ -967,6 +1128,8 @@ enum { long_short = sizeof(long) == sizeof(int) }; using long_type = conditional_t; using ulong_type = conditional_t; +struct unformattable {}; + // Maps formatting arguments to core types. template struct arg_mapper { using char_type = typename Context::char_type; @@ -1075,15 +1238,7 @@ template struct arg_mapper { return map(val.value); } - int map(...) { - constexpr bool formattable = sizeof(Context) == 0; - static_assert( - formattable, - "Cannot format argument. To make type T formattable provide a " - "formatter specialization: " - "https://fmt.dev/latest/api.html#formatting-user-defined-types"); - return 0; - } + unformattable map(...) { return {}; } }; // A type constant after applying arg_mapper. @@ -1207,15 +1362,25 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( return vis(monostate()); } -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; -template -struct is_contiguous> : std::true_type {}; +template struct formattable : std::false_type {}; namespace detail { +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { using type = void; }; +template +using void_t = typename detail::void_t_impl::type; + +template +struct is_output_iterator : std::false_type {}; + +template +struct is_output_iterator< + It, T, + void_t::iterator_category, + decltype(*std::declval() = std::declval())>> + : std::true_type {}; + template struct is_back_insert_iterator : std::false_type {}; template @@ -1227,6 +1392,9 @@ struct is_contiguous_back_insert_iterator : std::false_type {}; template struct is_contiguous_back_insert_iterator> : is_contiguous {}; +template +struct is_contiguous_back_insert_iterator> + : std::true_type {}; // A type-erased reference to an std::locale to avoid heavy include. class locale_ref { @@ -1258,13 +1426,24 @@ FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { return arg; } +template int check(unformattable) { + static_assert( + formattable(), + "Cannot format an argument. To make type T formattable provide a " + "formatter specialization: https://fmt.dev/latest/api.html#udt"); + return 0; +} +template inline const U& check(const U& val) { + return val; +} + // The type template parameter is there to avoid an ODR violation when using // a fallback formatter in one translation unit and an implicit conversion in // another (not recommended). template inline value make_arg(const T& val) { - return arg_mapper().map(val); + return check(arg_mapper().map(val)); } template class basic_format_context { template using buffer_context = - basic_format_context>, Char>; + basic_format_context, Char>; using format_context = buffer_context; using wformat_context = buffer_context; -// Workaround a bug in gcc: https://stackoverflow.com/q/62767544/471164. +// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. #define FMT_BUFFER_CONTEXT(Char) \ - basic_format_context>, Char> + basic_format_context, Char> /** \rst @@ -1422,7 +1601,7 @@ class format_arg_store /** \rst - Constructs an `~fmt::format_arg_store` object that contains references to + Constructs a `~fmt::format_arg_store` object that contains references to arguments and can be implicitly converted to `~fmt::format_args`. `Context` can be omitted in which case it defaults to `~fmt::context`. See `~fmt::arg` for lifetime considerations. @@ -1434,6 +1613,27 @@ inline format_arg_store make_format_args( return {args...}; } +/** + \rst + Constructs a `~fmt::format_arg_store` object that contains references + to arguments and can be implicitly converted to `~fmt::format_args`. + If ``format_str`` is a compile-time string then `make_args_checked` checks + its validity at compile time. + \endrst + */ +template > +inline auto make_args_checked(const S& format_str, + const remove_reference_t&... args) + -> format_arg_store, remove_reference_t...> { + static_assert( + detail::count<( + std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); + detail::check_format_string(format_str); + return {args...}; +} + /** \rst Returns a named argument to be used in a formatting function. It should only @@ -1749,29 +1949,6 @@ struct wformat_args : basic_format_args { namespace detail { -// Reports a compile-time error if S is not a valid format string. -template ::value)> -FMT_INLINE void check_format_string(const S&) { -#ifdef FMT_ENFORCE_COMPILE_STRING - static_assert(is_compile_string::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " - "FMT_STRING."); -#endif -} -template ::value)> -void check_format_string(S); - -template > -inline format_arg_store, remove_reference_t...> -make_args_checked(const S& format_str, - const remove_reference_t&... args) { - static_assert(count<(std::is_base_of>::value && - std::is_reference::value)...>() == 0, - "passing views as lvalues is disallowed"); - check_format_string(format_str); - return {args...}; -} - template ::value)> std::basic_string vformat( basic_string_view format_str, @@ -1780,9 +1957,10 @@ std::basic_string vformat( FMT_API std::string vformat(string_view format_str, format_args args); template -typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to( +void vformat_to( buffer& buf, basic_string_view format_str, - basic_format_args)> args); + basic_format_args)> args, + detail::locale_ref loc = {}); template ::value)> @@ -1797,26 +1975,80 @@ inline void vprint_mojibake(std::FILE*, string_view, format_args) {} /** Formats a string and writes the output to ``out``. */ // GCC 8 and earlier cannot handle std::back_insert_iterator with // vformat_to(...) overload, so SFINAE on iterator type instead. -template < - typename OutputIt, typename S, typename Char = char_t, - FMT_ENABLE_IF(detail::is_contiguous_back_insert_iterator::value)> +template , + FMT_ENABLE_IF(detail::is_output_iterator::value)> OutputIt vformat_to( OutputIt out, const S& format_str, basic_format_args>> args) { - auto& c = detail::get_container(out); - detail::container_buffer> buf(c); + decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); detail::vformat_to(buf, to_string_view(format_str), args); - return out; + return detail::get_iterator(buf); } -template ::value&& detail::is_string::value)> -inline std::back_insert_iterator format_to( - std::back_insert_iterator out, const S& format_str, - Args&&... args) { - return vformat_to(out, to_string_view(format_str), - detail::make_args_checked(format_str, args...)); +/** + \rst + Formats arguments, writes the result to the output iterator ``out`` and returns + the iterator past the end of the output range. + + **Example**:: + + std::vector out; + fmt::format_to(std::back_inserter(out), "{}", 42); + \endrst + */ +// We cannot use FMT_ENABLE_IF because of a bug in gcc 8.3. +template >::value> +inline auto format_to(OutputIt out, const S& format_str, Args&&... args) -> + typename std::enable_if::type { + const auto& vargs = fmt::make_args_checked(format_str, args...); + return vformat_to(out, to_string_view(format_str), vargs); +} + +template struct format_to_n_result { + /** Iterator past the end of the output range. */ + OutputIt out; + /** Total (not truncated) output size. */ + size_t size; +}; + +template ::value)> +inline format_to_n_result vformat_to_n( + OutputIt out, size_t n, basic_string_view format_str, + basic_format_args>> args) { + detail::iterator_buffer buf(out, + n); + detail::vformat_to(buf, format_str, args); + return {buf.out(), buf.count()}; +} + +/** + \rst + Formats arguments, writes up to ``n`` characters of the result to the output + iterator ``out`` and returns the total output size and the iterator past the + end of the output range. + \endrst + */ +template >::value)> +inline format_to_n_result format_to_n(OutputIt out, size_t n, + const S& format_str, + const Args&... args) { + const auto& vargs = fmt::make_args_checked(format_str, args...); + return vformat_to_n(out, n, to_string_view(format_str), vargs); +} + +/** + Returns the number of characters in the output of + ``format(format_str, args...)``. + */ +template +inline size_t formatted_size(string_view format_str, Args&&... args) { + const auto& vargs = fmt::make_args_checked(format_str, args...); + detail::counting_buffer<> buf; + detail::vformat_to(buf, format_str, vargs); + return buf.count(); } template > @@ -1840,7 +2072,7 @@ FMT_INLINE std::basic_string vformat( // std::basic_string> to reduce the symbol size. template > FMT_INLINE std::basic_string format(const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked(format_str, args...); + const auto& vargs = fmt::make_args_checked(format_str, args...); return detail::vformat(to_string_view(format_str), vargs); } @@ -1860,7 +2092,7 @@ FMT_API void vprint(std::FILE*, string_view, format_args); */ template > inline void print(std::FILE* f, const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked(format_str, args...); + const auto& vargs = fmt::make_args_checked(format_str, args...); return detail::is_unicode() ? vprint(f, to_string_view(format_str), vargs) : detail::vprint_mojibake(f, to_string_view(format_str), vargs); @@ -1879,7 +2111,7 @@ inline void print(std::FILE* f, const S& format_str, Args&&... args) { */ template > inline void print(const S& format_str, Args&&... args) { - const auto& vargs = detail::make_args_checked(format_str, args...); + const auto& vargs = fmt::make_args_checked(format_str, args...); return detail::is_unicode() ? vprint(to_string_view(format_str), vargs) : detail::vprint_mojibake(stdout, to_string_view(format_str), diff --git a/src/fmt/format-inl.h b/src/fmt/format-inl.h index d8c9c8a5ee..b7cb3209c8 100644 --- a/src/fmt/format-inl.h +++ b/src/fmt/format-inl.h @@ -13,32 +13,19 @@ #include #include #include -#include // for std::memmove +#include // std::memmove #include #include -#include "format.h" -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR # include #endif #ifdef _WIN32 -# if !defined(NOMINMAX) && !defined(WIN32_LEAN_AND_MEAN) -# define NOMINMAX -# define WIN32_LEAN_AND_MEAN -# include -# undef WIN32_LEAN_AND_MEAN -# undef NOMINMAX -# else -# include -# endif -# include +# include // _isatty #endif -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable : 4702) // unreachable code -#endif +#include "format.h" // Dummy implementations of strerror_r and strerror_s called if corresponding // system functions are not available. @@ -79,8 +66,8 @@ inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { // ERANGE - buffer is not large enough to store the error message // other - failure // Buffer should be at least of size 1. -FMT_FUNC int safe_strerror(int error_code, char*& buffer, - size_t buffer_size) FMT_NOEXCEPT { +inline int safe_strerror(int error_code, char*& buffer, + size_t buffer_size) FMT_NOEXCEPT { FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer"); class dispatcher { @@ -145,7 +132,7 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, // Report error code making sure that the output fits into // inline_buffer_size to avoid dynamic memory allocation and potential // bad_alloc. - out.resize(0); + out.try_resize(0); static const char SEP[] = ": "; static const char ERROR_STR[] = "error "; // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. @@ -156,7 +143,7 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, ++error_code_size; } error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); - auto it = std::back_inserter(out); + auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) format_to(it, "{}{}", message, SEP); format_to(it, "{}{}", ERROR_STR, error_code); @@ -173,8 +160,8 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { +inline void fwrite_fully(const void* ptr, size_t size, size_t count, + FILE* stream) { size_t written = std::fwrite(ptr, size, count, stream); if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); } @@ -242,26 +229,23 @@ template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) { template const typename basic_data::digit_pair basic_data::digits[] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, - {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, - {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, - {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, - {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, - {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, - {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, - {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, - {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, - {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, - {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, - {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, - {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, - {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, - {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, - {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, - {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; template const char basic_data::hex_digits[] = "0123456789abcdef"; @@ -277,18 +261,18 @@ const uint64_t basic_data::powers_of_10_64[] = { 10000000000000000000ULL}; template -const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, +const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, 0, FMT_POWERS_OF_10(1)}; template const uint64_t basic_data::zero_or_powers_of_10_64[] = { - 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), + 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), 10000000000000000000ULL}; // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. // These are generated by support/compute-powers.py. template -const uint64_t basic_data::pow10_significands[] = { +const uint64_t basic_data::grisu_pow10_significands[] = { 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, @@ -323,7 +307,7 @@ const uint64_t basic_data::pow10_significands[] = { // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding // to significands above. template -const int16_t basic_data::pow10_exponents[] = { +const int16_t basic_data::grisu_pow10_exponents[] = { -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, @@ -333,6 +317,744 @@ const int16_t basic_data::pow10_exponents[] = { 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; +template +const divtest_table_entry basic_data::divtest_table_for_pow5_32[] = + {{0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333}, + {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba}, + {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5}, + {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf}, + {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897}, + {0x3ed61f49, 0x000001b7}}; + +template +const divtest_table_entry basic_data::divtest_table_for_pow5_64[] = + {{0x0000000000000001, 0xffffffffffffffff}, + {0xcccccccccccccccd, 0x3333333333333333}, + {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70}, + {0x1cac083126e978d5, 0x020c49ba5e353f7c}, + {0xd288ce703afb7e91, 0x0068db8bac710cb2}, + {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0}, + {0x790fb65668c26139, 0x000431bde82d7b63}, + {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a}, + {0xc767074b22e90e21, 0x00002af31dc46118}, + {0x8e47ce423a2e9c6d, 0x0000089705f4136b}, + {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b}, + {0x0fee64690c913975, 0x00000057f5ff85e5}, + {0x3662e0e1cf503eb1, 0x000000119799812d}, + {0xa47a2cf9f6433fbd, 0x0000000384b84d09}, + {0x54186f653140a659, 0x00000000b424dc35}, + {0x7738164770402145, 0x0000000024075f3d}, + {0xe4a4d1417cd9a041, 0x000000000734aca5}, + {0xc75429d9e5c5200d, 0x000000000170ef54}, + {0xc1773b91fac10669, 0x000000000049c977}, + {0x26b172506559ce15, 0x00000000000ec1e4}, + {0xd489e3a9addec2d1, 0x000000000002f394}, + {0x90e860bb892c8d5d, 0x000000000000971d}, + {0x502e79bf1b6f4f79, 0x0000000000001e39}, + {0xdcd618596be30fe5, 0x000000000000060b}}; + +template +const uint64_t basic_data::dragonbox_pow10_significands_64[] = { + 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, + 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, + 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, + 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, + 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, + 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, + 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, + 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, + 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, + 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, + 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, + 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, + 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, + 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, + 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, + 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, + 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, + 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, + 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, + 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984, + 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296, + 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6, + 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20, + 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd, + 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719, + 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e}; + +template +const uint128_wrapper basic_data::dragonbox_pow10_significands_128[] = { +#if FMT_USE_FULL_CACHE_DRAGONBOX + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0x9faacf3df73609b1, 0x77b191618c54e9ad}, + {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, + {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, + {0x9becce62836ac577, 0x4ee367f9430aec33}, + {0xc2e801fb244576d5, 0x229c41f793cda740}, + {0xf3a20279ed56d48a, 0x6b43527578c11110}, + {0x9845418c345644d6, 0x830a13896b78aaaa}, + {0xbe5691ef416bd60c, 0x23cc986bc656d554}, + {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, + {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, + {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, + {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, + {0x91376c36d99995be, 0x23100809b9c21fa2}, + {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, + {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, + {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, + {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, + {0xdd95317f31c7fa1d, 0x40405643d711d584}, + {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, + {0xad1c8eab5ee43b66, 0xda3243650005eed0}, + {0xd863b256369d4a40, 0x90bed43e40076a83}, + {0x873e4f75e2224e68, 0x5a7744a6e804a292}, + {0xa90de3535aaae202, 0x711515d0a205cb37}, + {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, + {0x8412d9991ed58091, 0xe858790afe9486c3}, + {0xa5178fff668ae0b6, 0x626e974dbe39a873}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, + {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, + {0xc987434744ac874e, 0xa327ffb266b56221}, + {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, + {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, + {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, + {0xf6019da07f549b2b, 0x7e2a53a146606a49}, + {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, + {0xc0314325637a1939, 0xfa911155fefb5309}, + {0xf03d93eebc589f88, 0x793555ab7eba27cb}, + {0x96267c7535b763b5, 0x4bc1558b2f3458df}, + {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, + {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, + {0x92a1958a7675175f, 0x0bfacd89ec191eca}, + {0xb749faed14125d36, 0xcef980ec671f667c}, + {0xe51c79a85916f484, 0x82b7e12780e7401b}, + {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, + {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, + {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, + {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, + {0xaecc49914078536d, 0x58fae9f773886e19}, + {0xda7f5bf590966848, 0xaf39a475506a899f}, + {0x888f99797a5e012d, 0x6d8406c952429604}, + {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, + {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, + {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0xd0601d8efc57b08b, 0xf13b94daf124da27}, + {0x823c12795db6ce57, 0x76c53d08d6b70859}, + {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, + {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, + {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, + {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, + {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, + {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, + {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, + {0xc21094364dfb5636, 0x985915fc12f542e5}, + {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, + {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, + {0xbd8430bd08277231, 0x50c6ff782a838354}, + {0xece53cec4a314ebd, 0xa4f8bf5635246429}, + {0x940f4613ae5ed136, 0x871b7795e136be9a}, + {0xb913179899f68584, 0x28e2557b59846e40}, + {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, + {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, + {0xb4bca50b065abe63, 0x0fed077a756b53aa}, + {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, + {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, + {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, + {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, + {0x89e42caaf9491b60, 0xf41686c49db57245}, + {0xac5d37d5b79b6239, 0x311c2875c522ced6}, + {0xd77485cb25823ac7, 0x7d633293366b828c}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, + {0xd267caa862a12d66, 0xd072df63c324fd7c}, + {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, + {0xa46116538d0deb78, 0x52d9be85f074e609}, + {0xcd795be870516656, 0x67902e276c921f8c}, + {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, + {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, + {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, + {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, + {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, + {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, + {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, + {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, + {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, + {0xef340a98172aace4, 0x86fb897116c87c35}, + {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, + {0xbae0a846d2195712, 0x8974836059cca10a}, + {0xe998d258869facd7, 0x2bd1a438703fc94c}, + {0x91ff83775423cc06, 0x7b6306a34627ddd0}, + {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, + {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, + {0x8e938662882af53e, 0x547eb47b7282ee9d}, + {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, + {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, + {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, + {0xae0b158b4738705e, 0x9624ab50b148d446}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, + {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, + {0xd47487cc8470652b, 0x7647c32000696720}, + {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, + {0xa5fb0a17c777cf09, 0xf468107100525891}, + {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, + {0x81ac1fe293d599bf, 0xc6f14cd848405531}, + {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, + {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, + {0xfd442e4688bd304a, 0x908f4a166d1da664}, + {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, + {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, + {0xf7549530e188c128, 0xd12bee59e68ef47d}, + {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, + {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, + {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, + {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, + {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, + {0xebdf661791d60f56, 0x111b495b3464ad22}, + {0x936b9fcebb25c995, 0xcab10dd900beec35}, + {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, + {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, + {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, + {0xb3f4e093db73a093, 0x59ed216765690f57}, + {0xe0f218b8d25088b8, 0x306869c13ec3532d}, + {0x8c974f7383725573, 0x1e414218c73a13fc}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, + {0x894bc396ce5da772, 0x6b8bba8c328eb784}, + {0xab9eb47c81f5114f, 0x066ea92f3f326565}, + {0xd686619ba27255a2, 0xc80a537b0efefebe}, + {0x8613fd0145877585, 0xbd06742ce95f5f37}, + {0xa798fc4196e952e7, 0x2c48113823b73705}, + {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, + {0x82ef85133de648c4, 0x9a984d73dbe722fc}, + {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, + {0xcc963fee10b7d1b3, 0x318df905079926a9}, + {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, + {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, + {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, + {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, + {0x9c1661a651213e2d, 0x06bea10ca65c084f}, + {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, + {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, + {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, + {0xbe89523386091465, 0xf6bbb397f1135824}, + {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, + {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, + {0xba121a4650e4ddeb, 0x92f34d62616ce414}, + {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, + {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, + {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, + {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, + {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, + {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, + {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, + {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, + {0x87625f056c7c4a8b, 0x11471cd764ad4973}, + {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, + {0xd389b47879823479, 0x4aff1d108d4ec2c4}, + {0x843610cb4bf160cb, 0xcedf722a585139bb}, + {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, + {0xce947a3da6a9273e, 0x733d226229feea33}, + {0x811ccc668829b887, 0x0806357d5a3f5260}, + {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, + {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, + {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, + {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, + {0xc5029163f384a931, 0x0a9e795e65d4df12}, + {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, + {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, + {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, + {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, + {0x964e858c91ba2655, 0x3a6a07f8d510f870}, + {0xbbe226efb628afea, 0x890489f70a55368c}, + {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, + {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, + {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, + {0xb32df8e9f3546564, 0x47939822dc96abfa}, + {0xdff9772470297ebd, 0x59787e2b93bc56f8}, + {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, + {0xaefae51477a06b03, 0xede622920b6b23f2}, + {0xdab99e59958885c4, 0xe95fab368e45ecee}, + {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, + {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, + {0xd59944a37c0752a2, 0x4be76d3346f04960}, + {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, + {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, + {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, + {0x825ecc24c873782f, 0x8ed400668c0c28c9}, + {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, + {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, + {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, + {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, + {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, + {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, + {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, + {0xc24452da229b021b, 0xfbe85badce996169}, + {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, + {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, + {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, + {0xed246723473e3813, 0x290123e9aab23b69}, + {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, + {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, + {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, + {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, + {0x8d590723948a535f, 0x579c487e5a38ad0f}, + {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, + {0xdcdb1b2798182244, 0xf8e431456cf88e66}, + {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, + {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, + {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, + {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, + {0xa87fea27a539e9a5, 0x3f2398d747b36225}, + {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, + {0x83a3eeeef9153e89, 0x1953cf68300424ad}, + {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, + {0xcdb02555653131b6, 0x3792f412cb06794e}, + {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, + {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, + {0xc8de047564d20a8b, 0xf245825a5a445276}, + {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, + {0x9ced737bb6c4183d, 0x55464dd69685606c}, + {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, + {0xf53304714d9265df, 0xd53dd99f4b3066a9}, + {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, + {0xbf8fdb78849a5f96, 0xde98520472bdd034}, + {0xef73d256a5c0f77c, 0x963e66858f6d4441}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xbb127c53b17ec159, 0x5560c018580d5d53}, + {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, + {0x9226712162ab070d, 0xcab3961304ca70e9}, + {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, + {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, + {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, + {0xb267ed1940f1c61c, 0x55f038b237591ed4}, + {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, + {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, + {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, + {0xd9c7dced53c72255, 0x96e7bd358c904a22}, + {0x881cea14545c7575, 0x7e50d64177da2e55}, + {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, + {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, + {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, + {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, + {0xcfb11ead453994ba, 0x67de18eda5814af3}, + {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, + {0xa2425ff75e14fc31, 0xa1258379a94d028e}, + {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, + {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, + {0x9e74d1b791e07e48, 0x775ea264cf55347e}, + {0xc612062576589dda, 0x95364afe032a819e}, + {0xf79687aed3eec551, 0x3a83ddbd83f52205}, + {0x9abe14cd44753b52, 0xc4926a9672793543}, + {0xc16d9a0095928a27, 0x75b7053c0f178294}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, + {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, + {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, + {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, + {0xb877aa3236a4b449, 0x09befeb9fad487c3}, + {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, + {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, + {0xb424dc35095cd80f, 0x538484c19ef38c95}, + {0xe12e13424bb40e13, 0x2865a5f206b06fba}, + {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, + {0xafebff0bcb24aafe, 0xf78f69a51539d749}, + {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, + {0x89705f4136b4a597, 0x31680a88f8953031}, + {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, + {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, + {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, + {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, + {0xd1b71758e219652b, 0xd3c36113404ea4a9}, + {0x83126e978d4fdf3b, 0x645a1cac083126ea}, + {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, + {0xcccccccccccccccc, 0xcccccccccccccccd}, + {0x8000000000000000, 0x0000000000000000}, + {0xa000000000000000, 0x0000000000000000}, + {0xc800000000000000, 0x0000000000000000}, + {0xfa00000000000000, 0x0000000000000000}, + {0x9c40000000000000, 0x0000000000000000}, + {0xc350000000000000, 0x0000000000000000}, + {0xf424000000000000, 0x0000000000000000}, + {0x9896800000000000, 0x0000000000000000}, + {0xbebc200000000000, 0x0000000000000000}, + {0xee6b280000000000, 0x0000000000000000}, + {0x9502f90000000000, 0x0000000000000000}, + {0xba43b74000000000, 0x0000000000000000}, + {0xe8d4a51000000000, 0x0000000000000000}, + {0x9184e72a00000000, 0x0000000000000000}, + {0xb5e620f480000000, 0x0000000000000000}, + {0xe35fa931a0000000, 0x0000000000000000}, + {0x8e1bc9bf04000000, 0x0000000000000000}, + {0xb1a2bc2ec5000000, 0x0000000000000000}, + {0xde0b6b3a76400000, 0x0000000000000000}, + {0x8ac7230489e80000, 0x0000000000000000}, + {0xad78ebc5ac620000, 0x0000000000000000}, + {0xd8d726b7177a8000, 0x0000000000000000}, + {0x878678326eac9000, 0x0000000000000000}, + {0xa968163f0a57b400, 0x0000000000000000}, + {0xd3c21bcecceda100, 0x0000000000000000}, + {0x84595161401484a0, 0x0000000000000000}, + {0xa56fa5b99019a5c8, 0x0000000000000000}, + {0xcecb8f27f4200f3a, 0x0000000000000000}, + {0x813f3978f8940984, 0x4000000000000000}, + {0xa18f07d736b90be5, 0x5000000000000000}, + {0xc9f2c9cd04674ede, 0xa400000000000000}, + {0xfc6f7c4045812296, 0x4d00000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xc5371912364ce305, 0x6c28000000000000}, + {0xf684df56c3e01bc6, 0xc732000000000000}, + {0x9a130b963a6c115c, 0x3c7f400000000000}, + {0xc097ce7bc90715b3, 0x4b9f100000000000}, + {0xf0bdc21abb48db20, 0x1e86d40000000000}, + {0x96769950b50d88f4, 0x1314448000000000}, + {0xbc143fa4e250eb31, 0x17d955a000000000}, + {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, + {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, + {0xb7abc627050305ad, 0xf14a3d9e40000000}, + {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, + {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, + {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, + {0xe0352f62a19e306e, 0xd50b2037ad200000}, + {0x8c213d9da502de45, 0x4526f422cc340000}, + {0xaf298d050e4395d6, 0x9670b12b7f410000}, + {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, + {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, + {0xab0e93b6efee0053, 0x8eea0d047a457a00}, + {0xd5d238a4abe98068, 0x72a4904598d6d880}, + {0x85a36366eb71f041, 0x47a6da2b7f864750}, + {0xa70c3c40a64e6c51, 0x999090b65f67d924}, + {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, + {0x82818f1281ed449f, 0xbff8f10e7a8921a4}, + {0xa321f2d7226895c7, 0xaff72d52192b6a0d}, + {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0x9f4f2726179a2245, 0x01d762422c946590}, + {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5}, + {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2}, + {0x9b934c3b330c8577, 0x63cc55f49f88eb2f}, + {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb}, + {0xf316271c7fc3908a, 0x8bef464e3945ef7a}, + {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac}, + {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317}, + {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd}, + {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a}, + {0xb975d6b6ee39e436, 0xb3e2fd538e122b44}, + {0xe7d34c64a9c85d44, 0x60dbbca87196b616}, + {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd}, + {0xb51d13aea4a488dd, 0x6babab6398bdbe41}, + {0xe264589a4dcdab14, 0xc696963c7eed2dd1}, + {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2}, + {0xb0de65388cc8ada8, 0x3b25a55f43294bcb}, + {0xdd15fe86affad912, 0x49ef0eb713f39ebe}, + {0x8a2dbf142dfcc7ab, 0x6e3569326c784337}, + {0xacb92ed9397bf996, 0x49c2c37f07965404}, + {0xd7e77a8f87daf7fb, 0xdc33745ec97be906}, + {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3}, + {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c}, + {0xd2d80db02aabd62b, 0xf50a3fa490c30190}, + {0x83c7088e1aab65db, 0x792667c6da79e0fa}, + {0xa4b8cab1a1563f52, 0x577001b891185938}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0x80b05e5ac60b6178, 0x544f8158315b05b4}, + {0xa0dc75f1778e39d6, 0x696361ae3db1c721}, + {0xc913936dd571c84c, 0x03bc3a19cd1e38e9}, + {0xfb5878494ace3a5f, 0x04ab48a04065c723}, + {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76}, + {0xc45d1df942711d9a, 0x3ba5d0bd324f8394}, + {0xf5746577930d6500, 0xca8f44ec7ee36479}, + {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb}, + {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e}, + {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e}, + {0x95d04aee3b80ece5, 0xbba1f1d158724a12}, + {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97}, + {0xea1575143cf97226, 0xf52d09d71a3293bd}, + {0x924d692ca61be758, 0x593c2626705f9c56}, + {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c}, + {0xe498f455c38b997a, 0x0b6dfb9c0f956447}, + {0x8edf98b59a373fec, 0x4724bd4189bd5eac}, + {0xb2977ee300c50fe7, 0x58edec91ec2cb657}, + {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed}, + {0x8b865b215899f46c, 0xbd79e0d20082ee74}, + {0xae67f1e9aec07187, 0xecd8590680a3aa11}, + {0xda01ee641a708de9, 0xe80e6f4820cc9495}, + {0x884134fe908658b2, 0x3109058d147fdcdd}, + {0xaa51823e34a7eede, 0xbd4b46f0599fd415}, + {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a}, + {0x850fadc09923329e, 0x03e2cf6bc604ddb0}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0xcfe87f7cef46ff16, 0xe612641865679a63}, + {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e}, + {0xa26da3999aef7749, 0xe3be5e330f38f09d}, + {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5}, + {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6}, + {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa}, + {0xc646d63501a1511d, 0xb281e1fd541501b8}, + {0xf7d88bc24209a565, 0x1f225a7ca91a4226}, + {0x9ae757596946075f, 0x3375788de9b06958}, + {0xc1a12d2fc3978937, 0x0052d6b1641c83ae}, + {0xf209787bb47d6b84, 0xc0678c5dbd23a49a}, + {0x9745eb4d50ce6332, 0xf840b7ba963646e0}, + {0xbd176620a501fbff, 0xb650e5a93bc3d898}, + {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe}, + {0x93ba47c980e98cdf, 0xc66f336c36b10137}, + {0xb8a8d9bbe123f017, 0xb80b0047445d4184}, + {0xe6d3102ad96cec1d, 0xa60dc059157491e5}, + {0x9043ea1ac7e41392, 0x87c89837ad68db2f}, + {0xb454e4a179dd1877, 0x29babe4598c311fb}, + {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a}, + {0x8ce2529e2734bb1d, 0x1899e4a65f58660c}, + {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f}, + {0xdc21a1171d42645d, 0x76707543f4fa1f73}, + {0x899504ae72497eba, 0x6a06494a791c53a8}, + {0xabfa45da0edbde69, 0x0487db9d17636892}, + {0xd6f8d7509292d603, 0x45a9d2845d3c42b6}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xa7f26836f282b732, 0x8e6cac7768d7141e}, + {0xd1ef0244af2364ff, 0x3207d795430cd926}, + {0x8335616aed761f1f, 0x7f44e6bd49e807b8}, + {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6}, + {0xcd036837130890a1, 0x36dba887c37a8c0f}, + {0x802221226be55a64, 0xc2494954da2c9789}, + {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c}, + {0xc83553c5c8965d3d, 0x6f92829494e5acc7}, + {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9}, + {0x9c69a97284b578d7, 0xff2a760414536efb}, + {0xc38413cf25e2d70d, 0xfef5138519684aba}, + {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69}, + {0x98bf2f79d5993802, 0xef2f773ffbd97a61}, + {0xbeeefb584aff8603, 0xaafb550ffacfd8fa}, + {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38}, + {0x952ab45cfa97a0b2, 0xdd945a747bf26183}, + {0xba756174393d88df, 0x94f971119aeef9e4}, + {0xe912b9d1478ceb17, 0x7a37cd5601aab85d}, + {0x91abb422ccb812ee, 0xac62e055c10ab33a}, + {0xb616a12b7fe617aa, 0x577b986b314d6009}, + {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b}, + {0x8e41ade9fbebc27d, 0x14588f13be847307}, + {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8}, + {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb}, + {0x8aec23d680043bee, 0x25de7bb9480d5854}, + {0xada72ccc20054ae9, 0xaf561aa79a10ae6a}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0x87aa9aff79042286, 0x90fb44d2f05d0842}, + {0xa99541bf57452b28, 0x353a1607ac744a53}, + {0xd3fa922f2d1675f2, 0x42889b8997915ce8}, + {0x847c9b5d7c2e09b7, 0x69956135febada11}, + {0xa59bc234db398c25, 0x43fab9837e699095}, + {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb}, + {0x8161afb94b44f57d, 0x1d1be0eebac278f5}, + {0xa1ba1ba79e1632dc, 0x6462d92a69731732}, + {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe}, + {0xfcb2cb35e702af78, 0x5cda735244c3d43e}, + {0x9defbf01b061adab, 0x3a0888136afa64a7}, + {0xc56baec21c7a1916, 0x088aaa1845b8fdd0}, + {0xf6c69a72a3989f5b, 0x8aad549e57273d45}, + {0x9a3c2087a63f6399, 0x36ac54e2f678864b}, + {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd}, + {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5}, + {0x969eb7c47859e743, 0x9f644ae5a4b1b325}, + {0xbc4665b596706114, 0x873d5d9f0dde1fee}, + {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea}, + {0x9316ff75dd87cbd8, 0x09a7f12442d588f2}, + {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f}, + {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa}, + {0x8fa475791a569d10, 0xf96e017d694487bc}, + {0xb38d92d760ec4455, 0x37c981dcc395a9ac}, + {0xe070f78d3927556a, 0x85bbe253f47b1417}, + {0x8c469ab843b89562, 0x93956d7478ccec8e}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e}, + {0x88fcf317f22241e2, 0x441fece3bdf81f03}, + {0xab3c2fddeeaad25a, 0xd527e81cad7626c3}, + {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074}, + {0x85c7056562757456, 0xf6872d5667844e49}, + {0xa738c6bebb12d16c, 0xb428f8ac016561db}, + {0xd106f86e69d785c7, 0xe13336d701beba52}, + {0x82a45b450226b39c, 0xecc0024661173473}, + {0xa34d721642b06084, 0x27f002d7f95d0190}, + {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4}, + {0xff290242c83396ce, 0x7e67047175a15271}, + {0x9f79a169bd203e41, 0x0f0062c6e984d386}, + {0xc75809c42c684dd1, 0x52c07b78a3e60868}, + {0xf92e0c3537826145, 0xa7709a56ccdf8a82}, + {0x9bbcc7a142b17ccb, 0x88a66076400bb691}, + {0xc2abf989935ddbfe, 0x6acff893d00ea435}, + {0xf356f7ebf83552fe, 0x0583f6b8c4124d43}, + {0x98165af37b2153de, 0xc3727a337a8b704a}, + {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c}, + {0xeda2ee1c7064130c, 0x1162def06f79df73}, + {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8}, + {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692}, + {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437}, + {0x910ab1d4db9914a0, 0x1d9c9892400a22a2}, + {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b}, + {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xb10d8e1456105dad, 0x7425a83e872c5f47}, + {0xdd50f1996b947518, 0xd12f124e28f77719}, + {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f}, + {0xace73cbfdc0bfb7b, 0x636cc64d1001550b}, + {0xd8210befd30efa5a, 0x3c47f7e05401aa4e}, + {0x8714a775e3e95c78, 0x65acfaec34810a71}, + {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d}, + {0xd31045a8341ca07c, 0x1ede48111209a050}, + {0x83ea2b892091e44d, 0x934aed0aab460432}, + {0xa4e4b66b68b65d60, 0xf81da84d5617853f}, + {0xce1de40642e3f4b9, 0x36251260ab9d668e}, + {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019}, + {0xa1075a24e4421730, 0xb24cf65b8612f81f}, + {0xc94930ae1d529cfc, 0xdee033f26797b627}, + {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1}, + {0x9d412e0806e88aa5, 0x8e1f289560ee864e}, + {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2}, + {0xf5b5d7ec8acb58a2, 0xae10af696774b1db}, + {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29}, + {0xbff610b0cc6edd3f, 0x17fd090a58d32af3}, + {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0}, + {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e}, + {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1}, + {0xea53df5fd18d5513, 0x84c86189216dc5ed}, + {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4}, + {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e}, + {0xb2c71d5bca9023f8, 0x743e20e9ef511012}, + {0xdf78e4b2bd342cf6, 0x914da9246b255416}, + {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e}, + {0xae9672aba3d0c320, 0xa184ac2473b529b1}, + {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e}, + {0x8865899617fb1871, 0x7e2fa67c7a658892}, + {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7}, + {0xd51ea6fa85785631, 0x552a74227f3ea565}, + {0x8533285c936b35de, 0xd53a88958f87275f}, + {0xa67ff273b8460356, 0x8a892abaf368f137}, + {0xd01fef10a657842c, 0x2d2b7569b0432d85}, + {0x8213f56a67f6b29b, 0x9c3b29620e29fc73}, + {0xa298f2c501f45f42, 0x8349f3ba91b47b8f}, + {0xcb3f2f7642717713, 0x241c70a936219a73}, + {0xfe0efb53d30dd4d7, 0xed238cd383aa0110}, + {0x9ec95d1463e8a506, 0xf4363804324a40aa}, + {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5}, + {0xf81aa16fdc1b81da, 0xdd94b7868e94050a}, + {0x9b10a4e5e9913128, 0xca7cf2b4191c8326}, + {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0}, + {0xf24a01a73cf2dccf, 0xbc633b39673c8cec}, + {0x976e41088617ca01, 0xd5be0503e085d813}, + {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18}, + {0xec9c459d51852ba2, 0xddf8e7d60ed1219e}, + {0x93e1ab8252f33b45, 0xcabb90e5c942b503}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + {0xe7109bfba19c0c9d, 0x0cc512670a783ad4}, + {0x906a617d450187e2, 0x27fb2b80668b24c5}, + {0xb484f9dc9641e9da, 0xb1f9f660802dedf6}, + {0xe1a63853bbd26451, 0x5e7873f8a0396973}, + {0x8d07e33455637eb2, 0xdb0b487b6423e1e8}, + {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62}, + {0xdc5c5301c56b75f7, 0x7641a140cc7810fb}, + {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d}, + {0xac2820d9623bf429, 0x546345fa9fbdcd44}, + {0xd732290fbacaf133, 0xa97c177947ad4095}, + {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d}, + {0xa81f301449ee8c70, 0x5c68f256bfff5a74}, + {0xd226fc195c6a2f8c, 0x73832eec6fff3111}, + {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab}, + {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55}, + {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb}, + {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3}, + {0xa0555e361951c366, 0xd7e105bcc332621f}, + {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7}, + {0xfa856334878fc150, 0xb14f98f6f0feb951}, + {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3}, + {0xc3b8358109e84f07, 0x0a862f80ec4700c8}, + {0xf4a642e14c6262c8, 0xcd27bb612758c0fa}, + {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c}, + {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3}, + {0xeeea5d5004981478, 0x1858ccfce06cac74}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}, + {0xbaa718e68396cffd, 0xd30560258f54e6ba}, + {0xe950df20247c83fd, 0x47c6b82ef32a2069}, + {0x91d28b7416cdd27e, 0x4cdc331d57fa5441}, + {0xb6472e511c81471d, 0xe0133fe4adf8e952}, + {0xe3d8f9e563a198e5, 0x58180fddd97723a6}, + {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648}, + {0xb201833b35d63f73, 0x2cd2cc6551e513da}, + {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1}, + {0x8b112e86420f6191, 0xfb04afaf27faf782}, + {0xadd57a27d29339f6, 0x79c5db9af1f9b563}, + {0xd94ad8b1c7380874, 0x18375281ae7822bc}, + {0x87cec76f1c830548, 0x8f2293910d0b15b5}, + {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22}, + {0xd433179d9c8cb841, 0x5fa60692a46151eb}, + {0x849feec281d7f328, 0xdbc7c41ba6bcd333}, + {0xa5c7ea73224deff3, 0x12b9b522906c0800}, + {0xcf39e50feae16bef, 0xd768226b34870a00}, + {0x81842f29f2cce375, 0xe6a1158300d46640}, + {0xa1e53af46f801c53, 0x60495ae3c1097fd0}, + {0xca5e89b18b602368, 0x385bb19cb14bdfc4}, + {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5}, + {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1}, + {0xc5a05277621be293, 0xc7098b7305241885}, + {0xf70867153aa2db38, 0xb8cbee4fc66d1ea7} +#else + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0xc350000000000000, 0x0000000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc8} +#endif +}; + +#if !FMT_USE_FULL_CACHE_DRAGONBOX +template +const uint64_t basic_data::powers_of_5_64[] = { + 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, + 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, + 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, + 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, + 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, + 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, + 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, + 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, + 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; + +template +const uint32_t basic_data::dragonbox_pow10_recovery_errors[] = { + 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001, + 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555, + 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110, + 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454, + 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014, + 0x69514555, 0x05151109, 0x00155555}; +#endif + template const char basic_data::foreground_color[] = "\x1b[38;2;"; template @@ -366,6 +1088,10 @@ class fp { private: using significand_type = uint64_t; + template + using is_supported_float = bool_constant; + public: significand_type f; int e; @@ -388,63 +1114,38 @@ class fp { template explicit fp(Double d) { assign(d); } // Assigns d to this and return true iff predecessor is closer than successor. - template - bool assign(Double d) { - // Assume double is in the format [sign][exponent][significand]. - using limits = std::numeric_limits; + template ::value)> + bool assign(Float d) { + // Assume float is in the format [sign][exponent][significand]. + using limits = std::numeric_limits; + const int float_significand_size = limits::digits - 1; const int exponent_size = - bits::value - double_significand_size - 1; // -1 for sign - const uint64_t significand_mask = implicit_bit - 1; + bits::value - float_significand_size - 1; // -1 for sign + const uint64_t float_implicit_bit = 1ULL << float_significand_size; + const uint64_t significand_mask = float_implicit_bit - 1; const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask; const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1; - auto u = bit_cast(d); + constexpr bool is_double = sizeof(Float) == sizeof(uint64_t); + auto u = bit_cast>(d); f = u & significand_mask; int biased_e = - static_cast((u & exponent_mask) >> double_significand_size); + static_cast((u & exponent_mask) >> float_significand_size); // Predecessor is closer if d is a normalized power of 2 (f == 0) other than // the smallest normalized number (biased_e > 1). bool is_predecessor_closer = f == 0 && biased_e > 1; if (biased_e != 0) - f += implicit_bit; + f += float_implicit_bit; else biased_e = 1; // Subnormals use biased exponent 1 (min exponent). - e = biased_e - exponent_bias - double_significand_size; + e = biased_e - exponent_bias - float_significand_size; return is_predecessor_closer; } - template - bool assign(Double) { + template ::value)> + bool assign(Float) { *this = fp(); return false; } - - // Assigns d to this together with computing lower and upper boundaries, - // where a boundary is a value half way between the number and its predecessor - // (lower) or successor (upper). The upper boundary is normalized and lower - // has the same exponent but may be not normalized. - template boundaries assign_with_boundaries(Double d) { - bool is_lower_closer = assign(d); - fp lower = - is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1); - // 1 in normalize accounts for the exponent shift above. - fp upper = normalize<1>(fp((f << 1) + 1, e - 1)); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } - - template boundaries assign_float_with_boundaries(Double d) { - assign(d); - constexpr int min_normal_e = std::numeric_limits::min_exponent - - std::numeric_limits::digits; - significand_type half_ulp = 1 << (std::numeric_limits::digits - - std::numeric_limits::digits - 1); - if (min_normal_e > e) half_ulp <<= min_normal_e - e; - fp upper = normalize<0>(fp(f + half_ulp, e)); - fp lower = fp( - f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } }; // Normalizes the value converted from double and multiplied by (1 << SHIFT). @@ -488,11 +1189,12 @@ inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } // Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its // (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. inline fp get_cached_power(int min_exponent, int& pow10_exponent) { - const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10)) + const int shift = 32; + const auto significand = static_cast(data::log10_2_significand); int index = static_cast( - ((min_exponent + fp::significand_size - 1) * one_over_log2_10 + - ((int64_t(1) << 32) - 1)) // ceil - >> 32 // arithmetic shift + ((min_exponent + fp::significand_size - 1) * (significand >> shift) + + ((int64_t(1) << shift) - 1)) // ceil + >> 32 // arithmetic shift ); // Decimal exponent of the first (smallest) cached power of 10. const int first_dec_exp = -348; @@ -500,7 +1202,8 @@ inline fp get_cached_power(int min_exponent, int& pow10_exponent) { const int dec_exp_step = 8; index = (index - first_dec_exp - 1) / dec_exp_step + 1; pow10_exponent = first_dec_exp + index * dec_exp_step; - return {data::pow10_significands[index], data::pow10_exponents[index]}; + return {data::grisu_pow10_significands[index], + data::grisu_pow10_exponents[index]}; } // A simple accumulator to hold the sums of terms in bigint::square if uint128_t @@ -559,9 +1262,8 @@ class bigint { FMT_ASSERT(compare(*this, other) >= 0, ""); bigit borrow = 0; int i = other.exp_ - exp_; - for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) { + for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) subtract_bigits(i, other.bigits_[j], borrow); - } while (borrow > 0) subtract_bigits(i, 0, borrow); remove_leading_zeros(); } @@ -733,22 +1435,26 @@ class bigint { exp_ *= 2; } + // If this bigint has a bigger exponent than other, adds trailing zero to make + // exponents equal. This simplifies some operations such as subtraction. + void align(const bigint& other) { + int exp_difference = exp_ - other.exp_; + if (exp_difference <= 0) return; + int num_bigits = static_cast(bigits_.size()); + bigits_.resize(to_unsigned(num_bigits + exp_difference)); + for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) + bigits_[j] = bigits_[i]; + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + exp_ -= exp_difference; + } + // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. int divmod_assign(const bigint& divisor) { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; - int num_bigits = static_cast(bigits_.size()); FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); - int exp_difference = exp_ - divisor.exp_; - if (exp_difference > 0) { - // Align bigints by adding trailing zeros to simplify subtraction. - bigits_.resize(to_unsigned(num_bigits + exp_difference)); - for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) - bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); - exp_ -= exp_difference; - } + align(divisor); int quotient = 0; do { subtract_aligned(divisor); @@ -788,20 +1494,6 @@ enum result { }; } -// A version of count_digits optimized for grisu_gen_digits. -inline int grisu_count_digits(uint32_t n) { - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -} - // Generates output using the Grisu digit-gen algorithm. // error: the size of the region (lower, upper) outside of which numbers // definitely do not round to value (Delta in Grisu3). @@ -817,7 +1509,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, FMT_ASSERT(integral == value.f >> -one.e, ""); // The fractional part of scaled value (p2 in Grisu) c = value % one. uint64_t fractional = value.f & (one.f - 1); - exp = grisu_count_digits(integral); // kappa in Grisu. + exp = count_digits(integral); // kappa in Grisu. // Divide by 10 to prevent overflow. auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e, value.f / 10, error * 10, exp); @@ -867,8 +1559,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, FMT_ASSERT(false, "invalid number of digits"); } --exp; - uint64_t remainder = - (static_cast(integral) << -one.e) + fractional; + auto remainder = (static_cast(integral) << -one.e) + fractional; result = handler.on_digit(static_cast('0' + digit), data::powers_of_10_64[exp] << -one.e, remainder, error, exp, true); @@ -878,8 +1569,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, for (;;) { fractional *= 10; error *= 10; - char digit = - static_cast('0' + static_cast(fractional >> -one.e)); + char digit = static_cast('0' + (fractional >> -one.e)); fractional &= one.f - 1; --exp; result = handler.on_digit(digit, one.f, fractional, error, exp, false); @@ -916,6 +1606,7 @@ struct fixed_handler { uint64_t error, int, bool integral) { FMT_ASSERT(remainder < divisor, ""); buf[size++] = digit; + if (!integral && error >= remainder) return digits::error; if (size < precision) return digits::more; if (!integral) { // Check if error * 2 < divisor with overflow prevention. @@ -935,59 +1626,684 @@ struct fixed_handler { } if (buf[0] > '9') { buf[0] = '1'; - buf[size++] = '0'; + if (fixed) + buf[size++] = '0'; + else + ++exp10; } return digits::done; } }; -// The shortest representation digit handler. -struct grisu_shortest_handler { - char* buf; - int size; - // Distance between scaled value and upper bound (wp_W in Grisu3). - uint64_t diff; +// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. +namespace dragonbox { +// Computes 128-bit result of multiplication of two 64-bit unsigned integers. +FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x, + uint64_t y) FMT_NOEXCEPT { +#if FMT_USE_INT128 + return static_cast(x) * static_cast(y); +#elif defined(_MSC_VER) && defined(_M_X64) + uint128_wrapper result; + result.low_ = _umul128(x, y, &result.high_); + return result; +#else + const uint64_t mask = (uint64_t(1) << 32) - uint64_t(1); - digits::result on_start(uint64_t, uint64_t, uint64_t, int&) { - return digits::more; + uint64_t a = x >> 32; + uint64_t b = x & mask; + uint64_t c = y >> 32; + uint64_t d = y & mask; + + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + + uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + (bd & mask)}; +#endif +} + +// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. +FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x, + uint64_t y) FMT_NOEXCEPT { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return static_cast(p >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + return umul128(x, y).high(); +#endif +} + +// Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +FMT_SAFEBUFFERS inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) + FMT_NOEXCEPT { + uint128_wrapper g0 = umul128(x, y.high()); + g0 += umul128_upper64(x, y.low()); + return g0.high(); +} + +// Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT { + return static_cast(umul128_upper64(x, y)); +} + +// Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +FMT_SAFEBUFFERS inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) + FMT_NOEXCEPT { + uint64_t g01 = x * y.high(); + uint64_t g10 = umul128_upper64(x, y.low()); + return g01 + g10; +} + +// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint64_t umul96_lower64(uint32_t x, uint64_t y) FMT_NOEXCEPT { + return x * y; +} + +// Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from +// https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4. +inline int floor_log10_pow2(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); + const int shift = 22; + return (e * static_cast(data::log10_2_significand >> (64 - shift))) >> + shift; +} + +// Various fast log computations. +inline int floor_log2_pow10(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); + const uint64_t log2_10_integer_part = 3; + const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9; + const int shift_amount = 19; + return (e * static_cast( + (log2_10_integer_part << shift_amount) | + (log2_10_fractional_digits >> (64 - shift_amount)))) >> + shift_amount; +} +inline int floor_log10_pow2_minus_log10_4_over_3(int e) FMT_NOEXCEPT { + FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent"); + const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375; + const int shift_amount = 22; + return (e * static_cast(data::log10_2_significand >> + (64 - shift_amount)) - + static_cast(log10_4_over_3_fractional_digits >> + (64 - shift_amount))) >> + shift_amount; +} + +// Returns true iff x is divisible by pow(2, exp). +inline bool divisible_by_power_of_2(uint32_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp >= 1, ""); + FMT_ASSERT(x != 0, ""); +#ifdef FMT_BUILTIN_CTZ + return FMT_BUILTIN_CTZ(x) >= exp; +#else + return exp < num_bits() && x == ((x >> exp) << exp); +#endif +} +inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp >= 1, ""); + FMT_ASSERT(x != 0, ""); +#ifdef FMT_BUILTIN_CTZLL + return FMT_BUILTIN_CTZLL(x) >= exp; +#else + return exp < num_bits()) && x == ((x >> exp) << exp); +#endif +} + +// Returns true iff x is divisible by pow(5, exp). +inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp <= 10, "too large exponent"); + return x * data::divtest_table_for_pow5_32[exp].mod_inv <= + data::divtest_table_for_pow5_32[exp].max_quotient; +} +inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT { + FMT_ASSERT(exp <= 23, "too large exponent"); + return x * data::divtest_table_for_pow5_64[exp].mod_inv <= + data::divtest_table_for_pow5_64[exp].max_quotient; +} + +// Replaces n by floor(n / pow(5, N)) returning true if and only if n is +// divisible by pow(5, N). +// Precondition: n <= 2 * pow(5, N + 1). +template +bool check_divisibility_and_divide_by_pow5(uint32_t& n) FMT_NOEXCEPT { + static constexpr struct { + uint32_t magic_number; + int bits_for_comparison; + uint32_t threshold; + int shift_amount; + } infos[] = {{0xcccd, 16, 0x3333, 18}, {0xa429, 8, 0x0a, 20}}; + constexpr auto info = infos[N - 1]; + n *= info.magic_number; + const uint32_t comparison_mask = (1u << info.bits_for_comparison) - 1; + bool result = (n & comparison_mask) <= info.threshold; + n >>= info.shift_amount; + return result; +} + +// Computes floor(n / pow(10, N)) for small n and N. +// Precondition: n <= pow(10, N + 1). +template uint32_t small_division_by_pow10(uint32_t n) FMT_NOEXCEPT { + static constexpr struct { + uint32_t magic_number; + int shift_amount; + uint32_t divisor_times_10; + } infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}}; + constexpr auto info = infos[N - 1]; + FMT_ASSERT(n <= info.divisor_times_10, "n is too large"); + return n * info.magic_number >> info.shift_amount; +} + +// Computes floor(n / 10^(kappa + 1)) (float) +inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) FMT_NOEXCEPT { + return n / float_info::big_divisor; +} +// Computes floor(n / 10^(kappa + 1)) (double) +inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) FMT_NOEXCEPT { + return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9; +} + +// Various subroutines using pow10 cache +template struct cache_accessor; + +template <> struct cache_accessor { + using carrier_uint = float_info::carrier_uint; + using cache_entry_type = uint64_t; + + static uint64_t get_cached_power(int k) FMT_NOEXCEPT { + FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, + "k is out of range"); + return data::dragonbox_pow10_significands_64[k - float_info::min_k]; } - // Decrement the generated number approaching value from above. - void round(uint64_t d, uint64_t divisor, uint64_t& remainder, - uint64_t error) { - while ( - remainder < d && error - remainder >= divisor && - (remainder + divisor < d || d - remainder >= remainder + divisor - d)) { - --buf[size - 1]; - remainder += divisor; - } + static carrier_uint compute_mul(carrier_uint u, + const cache_entry_type& cache) FMT_NOEXCEPT { + return umul96_upper32(u, cache); } - // Implements Grisu's round_weed. - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int exp, bool integral) { - buf[size++] = digit; - if (remainder >= error) return digits::more; - uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp]; - uint64_t up = (diff - 1) * unit; // wp_Wup - round(up, divisor, remainder, error); - uint64_t down = (diff + 1) * unit; // wp_Wdown - if (remainder < down && error - remainder >= divisor && - (remainder + divisor < down || - down - remainder > remainder + divisor - down)) { - return digits::error; - } - return 2 * unit <= remainder && remainder <= error - 4 * unit - ? digits::done - : digits::error; + static uint32_t compute_delta(const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + return static_cast(cache >> (64 - 1 - beta_minus_1)); + } + + static bool compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + FMT_ASSERT(beta_minus_1 >= 1, ""); + FMT_ASSERT(beta_minus_1 < 64, ""); + + return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; + } + + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return static_cast( + (cache - (cache >> (float_info::significand_bits + 2))) >> + (64 - float_info::significand_bits - 1 - beta_minus_1)); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return static_cast( + (cache + (cache >> (float_info::significand_bits + 1))) >> + (64 - float_info::significand_bits - 1 - beta_minus_1)); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (static_cast( + cache >> + (64 - float_info::significand_bits - 2 - beta_minus_1)) + + 1) / + 2; } }; +template <> struct cache_accessor { + using carrier_uint = float_info::carrier_uint; + using cache_entry_type = uint128_wrapper; + + static uint128_wrapper get_cached_power(int k) FMT_NOEXCEPT { + FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, + "k is out of range"); + +#if FMT_USE_FULL_CACHE_DRAGONBOX + return data::dragonbox_pow10_significands_128[k - + float_info::min_k]; +#else + static const int compression_ratio = 27; + + // Compute base index. + int cache_index = (k - float_info::min_k) / compression_ratio; + int kb = cache_index * compression_ratio + float_info::min_k; + int offset = k - kb; + + // Get base cache. + uint128_wrapper base_cache = + data::dragonbox_pow10_significands_128[cache_index]; + if (offset == 0) return base_cache; + + // Compute the required amount of bit-shift. + int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset; + FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected"); + + // Try to recover the real cache. + uint64_t pow5 = data::powers_of_5_64[offset]; + uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5); + uint128_wrapper middle_low = + umul128(base_cache.low() - (kb < 0 ? 1 : 0), pow5); + + recovered_cache += middle_low.high(); + + uint64_t high_to_middle = recovered_cache.high() << (64 - alpha); + uint64_t middle_to_low = recovered_cache.low() << (64 - alpha); + + recovered_cache = + uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle, + ((middle_low.low() >> alpha) | middle_to_low)}; + + if (kb < 0) recovered_cache += 1; + + // Get error. + int error_idx = (k - float_info::min_k) / 16; + uint32_t error = (data::dragonbox_pow10_recovery_errors[error_idx] >> + ((k - float_info::min_k) % 16) * 2) & + 0x3; + + // Add the error back. + FMT_ASSERT(recovered_cache.low() + error >= recovered_cache.low(), ""); + return {recovered_cache.high(), recovered_cache.low() + error}; +#endif + } + + static carrier_uint compute_mul(carrier_uint u, + const cache_entry_type& cache) FMT_NOEXCEPT { + return umul192_upper64(u, cache); + } + + static uint32_t compute_delta(cache_entry_type const& cache, + int beta_minus_1) FMT_NOEXCEPT { + return static_cast(cache.high() >> (64 - 1 - beta_minus_1)); + } + + static bool compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta_minus_1) FMT_NOEXCEPT { + FMT_ASSERT(beta_minus_1 >= 1, ""); + FMT_ASSERT(beta_minus_1 < 64, ""); + + return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0; + } + + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (cache.high() - + (cache.high() >> (float_info::significand_bits + 2))) >> + (64 - float_info::significand_bits - 1 - beta_minus_1); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return (cache.high() + + (cache.high() >> (float_info::significand_bits + 1))) >> + (64 - float_info::significand_bits - 1 - beta_minus_1); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT { + return ((cache.high() >> + (64 - float_info::significand_bits - 2 - beta_minus_1)) + + 1) / + 2; + } +}; + +// Various integer checks +template +bool is_left_endpoint_integer_shorter_interval(int exponent) FMT_NOEXCEPT { + return exponent >= + float_info< + T>::case_shorter_interval_left_endpoint_lower_threshold && + exponent <= + float_info::case_shorter_interval_left_endpoint_upper_threshold; +} +template +bool is_endpoint_integer(typename float_info::carrier_uint two_f, + int exponent, int minus_k) FMT_NOEXCEPT { + if (exponent < float_info::case_fc_pm_half_lower_threshold) return false; + // For k >= 0. + if (exponent <= float_info::case_fc_pm_half_upper_threshold) return true; + // For k < 0. + if (exponent > float_info::divisibility_check_by_5_threshold) return false; + return divisible_by_power_of_5(two_f, minus_k); +} + +template +bool is_center_integer(typename float_info::carrier_uint two_f, int exponent, + int minus_k) FMT_NOEXCEPT { + // Exponent for 5 is negative. + if (exponent > float_info::divisibility_check_by_5_threshold) return false; + if (exponent > float_info::case_fc_upper_threshold) + return divisible_by_power_of_5(two_f, minus_k); + // Both exponents are nonnegative. + if (exponent >= float_info::case_fc_lower_threshold) return true; + // Exponent for 2 is negative. + return divisible_by_power_of_2(two_f, minus_k - exponent + 1); +} + +// Remove trailing zeros from n and return the number of zeros removed (float) +FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { +#ifdef FMT_BUILTIN_CTZ + int t = FMT_BUILTIN_CTZ(n); +#else + int t = ctz(n); +#endif + if (t > float_info::max_trailing_zeros) + t = float_info::max_trailing_zeros; + + const uint32_t mod_inv1 = 0xcccccccd; + const uint32_t max_quotient1 = 0x33333333; + const uint32_t mod_inv2 = 0xc28f5c29; + const uint32_t max_quotient2 = 0x0a3d70a3; + + int s = 0; + for (; s < t - 1; s += 2) { + if (n * mod_inv2 > max_quotient2) break; + n *= mod_inv2; + } + if (s < t && n * mod_inv1 <= max_quotient1) { + n *= mod_inv1; + ++s; + } + n >>= s; + return s; +} + +// Removes trailing zeros and returns the number of zeros removed (double) +FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { +#ifdef FMT_BUILTIN_CTZLL + int t = FMT_BUILTIN_CTZLL(n); +#else + int t = ctzll(n); +#endif + if (t > float_info::max_trailing_zeros) + t = float_info::max_trailing_zeros; + // Divide by 10^8 and reduce to 32-bits + // Since ret_value.significand <= (2^64 - 1) / 1000 < 10^17, + // both of the quotient and the r should fit in 32-bits + + const uint32_t mod_inv1 = 0xcccccccd; + const uint32_t max_quotient1 = 0x33333333; + const uint64_t mod_inv8 = 0xc767074b22e90e21; + const uint64_t max_quotient8 = 0x00002af31dc46118; + + // If the number is divisible by 1'0000'0000, work with the quotient + if (t >= 8) { + auto quotient_candidate = n * mod_inv8; + + if (quotient_candidate <= max_quotient8) { + auto quotient = static_cast(quotient_candidate >> 8); + + int s = 8; + for (; s < t; ++s) { + if (quotient * mod_inv1 > max_quotient1) break; + quotient *= mod_inv1; + } + quotient >>= (s - 8); + n = quotient; + return s; + } + } + + // Otherwise, work with the remainder + auto quotient = static_cast(n / 100000000); + auto remainder = static_cast(n - 100000000 * quotient); + + if (t == 0 || remainder * mod_inv1 > max_quotient1) { + return 0; + } + remainder *= mod_inv1; + + if (t == 1 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 1) + quotient * 10000000ull; + return 1; + } + remainder *= mod_inv1; + + if (t == 2 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 2) + quotient * 1000000ull; + return 2; + } + remainder *= mod_inv1; + + if (t == 3 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 3) + quotient * 100000ull; + return 3; + } + remainder *= mod_inv1; + + if (t == 4 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 4) + quotient * 10000ull; + return 4; + } + remainder *= mod_inv1; + + if (t == 5 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 5) + quotient * 1000ull; + return 5; + } + remainder *= mod_inv1; + + if (t == 6 || remainder * mod_inv1 > max_quotient1) { + n = (remainder >> 6) + quotient * 100ull; + return 6; + } + remainder *= mod_inv1; + + n = (remainder >> 7) + quotient * 10ull; + return 7; +} + +// The main algorithm for shorter interval case +template +FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp shorter_interval_case( + int exponent) FMT_NOEXCEPT { + decimal_fp ret_value; + // Compute k and beta + const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent); + const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); + + // Compute xi and zi + using cache_entry_type = typename cache_accessor::cache_entry_type; + const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); + + auto xi = cache_accessor::compute_left_endpoint_for_shorter_interval_case( + cache, beta_minus_1); + auto zi = cache_accessor::compute_right_endpoint_for_shorter_interval_case( + cache, beta_minus_1); + + // If the left endpoint is not an integer, increase it + if (!is_left_endpoint_integer_shorter_interval(exponent)) ++xi; + + // Try bigger divisor + ret_value.significand = zi / 10; + + // If succeed, remove trailing zeros if necessary and return + if (ret_value.significand * 10 >= xi) { + ret_value.exponent = minus_k + 1; + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + } + + // Otherwise, compute the round-up of y + ret_value.significand = + cache_accessor::compute_round_up_for_shorter_interval_case( + cache, beta_minus_1); + ret_value.exponent = minus_k; + + // When tie occurs, choose one of them according to the rule + if (exponent >= float_info::shorter_interval_tie_lower_threshold && + exponent <= float_info::shorter_interval_tie_upper_threshold) { + ret_value.significand = ret_value.significand % 2 == 0 + ? ret_value.significand + : ret_value.significand - 1; + } else if (ret_value.significand < xi) { + ++ret_value.significand; + } + return ret_value; +} + +template +FMT_SAFEBUFFERS decimal_fp to_decimal(T x) FMT_NOEXCEPT { + // Step 1: integer promotion & Schubfach multiplier calculation. + + using carrier_uint = typename float_info::carrier_uint; + using cache_entry_type = typename cache_accessor::cache_entry_type; + auto br = bit_cast(x); + + // Extract significand bits and exponent bits. + const carrier_uint significand_mask = + (static_cast(1) << float_info::significand_bits) - 1; + carrier_uint significand = (br & significand_mask); + int exponent = static_cast((br & exponent_mask()) >> + float_info::significand_bits); + + if (exponent != 0) { // Check if normal. + exponent += float_info::exponent_bias - float_info::significand_bits; + + // Shorter interval case; proceed like Schubfach. + if (significand == 0) return shorter_interval_case(exponent); + + significand |= + (static_cast(1) << float_info::significand_bits); + } else { + // Subnormal case; the interval is always regular. + if (significand == 0) return {0, 0}; + exponent = float_info::min_exponent - float_info::significand_bits; + } + + const bool include_left_endpoint = (significand % 2 == 0); + const bool include_right_endpoint = include_left_endpoint; + + // Compute k and beta. + const int minus_k = floor_log10_pow2(exponent) - float_info::kappa; + const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); + const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k); + + // Compute zi and deltai + // 10^kappa <= deltai < 10^(kappa + 1) + const uint32_t deltai = cache_accessor::compute_delta(cache, beta_minus_1); + const carrier_uint two_fc = significand << 1; + const carrier_uint two_fr = two_fc | 1; + const carrier_uint zi = + cache_accessor::compute_mul(two_fr << beta_minus_1, cache); + + // Step 2: Try larger divisor; remove trailing zeros if necessary + + // Using an upper bound on zi, we might be able to optimize the division + // better than the compiler; we are computing zi / big_divisor here + decimal_fp ret_value; + ret_value.significand = divide_by_10_to_kappa_plus_1(zi); + uint32_t r = static_cast(zi - float_info::big_divisor * + ret_value.significand); + + if (r > deltai) { + goto small_divisor_case_label; + } else if (r < deltai) { + // Exclude the right endpoint if necessary + if (r == 0 && !include_right_endpoint && + is_endpoint_integer(two_fr, exponent, minus_k)) { + --ret_value.significand; + r = float_info::big_divisor; + goto small_divisor_case_label; + } + } else { + // r == deltai; compare fractional parts + // Check conditions in the order different from the paper + // to take advantage of short-circuiting + const carrier_uint two_fl = two_fc - 1; + if ((!include_left_endpoint || + !is_endpoint_integer(two_fl, exponent, minus_k)) && + !cache_accessor::compute_mul_parity(two_fl, cache, beta_minus_1)) { + goto small_divisor_case_label; + } + } + ret_value.exponent = minus_k + float_info::kappa + 1; + + // We may need to remove trailing zeros + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + + // Step 3: Find the significand with the smaller divisor + +small_divisor_case_label: + ret_value.significand *= 10; + ret_value.exponent = minus_k + float_info::kappa; + + const uint32_t mask = (1u << float_info::kappa) - 1; + auto dist = r - (deltai / 2) + (float_info::small_divisor / 2); + + // Is dist divisible by 2^kappa? + if ((dist & mask) == 0) { + const bool approx_y_parity = + ((dist ^ (float_info::small_divisor / 2)) & 1) != 0; + dist >>= float_info::kappa; + + // Is dist divisible by 5^kappa? + if (check_divisibility_and_divide_by_pow5::kappa>(dist)) { + ret_value.significand += dist; + + // Check z^(f) >= epsilon^(f) + // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, + // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f) + // Since there are only 2 possibilities, we only need to care about the + // parity. Also, zi and r should have the same parity since the divisor + // is an even number + if (cache_accessor::compute_mul_parity(two_fc, cache, beta_minus_1) != + approx_y_parity) { + --ret_value.significand; + } else { + // If z^(f) >= epsilon^(f), we might have a tie + // when z^(f) == epsilon^(f), or equivalently, when y is an integer + if (is_center_integer(two_fc, exponent, minus_k)) { + ret_value.significand = ret_value.significand % 2 == 0 + ? ret_value.significand + : ret_value.significand - 1; + } + } + } + // Is dist not divisible by 5^kappa? + else { + ret_value.significand += dist; + } + } + // Is dist not divisible by 2^kappa? + else { + // Since we know dist is small, we might be able to optimize the division + // better than the compiler; we are computing dist / small_divisor here + ret_value.significand += + small_division_by_pow10::kappa>(dist); + } + return ret_value; +} +} // namespace dragonbox + // Formats value using a variation of the Fixed-Precision Positive // Floating-Point Printout ((FPP)^2) algorithm by Steele & White: // https://fmt.dev/p372-steele.pdf. template -void fallback_format(Double d, buffer& buf, int& exp10) { +void fallback_format(Double d, int num_digits, bool binary32, buffer& buf, + int& exp10) { bigint numerator; // 2 * R in (FPP)^2. bigint denominator; // 2 * S in (FPP)^2. // lower and upper are differences between value and corresponding boundaries. @@ -998,8 +2314,9 @@ void fallback_format(Double d, buffer& buf, int& exp10) { // Shift numerator and denominator by an extra bit or two (if lower boundary // is closer) to make lower and upper integers. This eliminates multiplication // by 2 during later computations. - // TODO: handle float - int shift = value.assign(d) ? 2 : 1; + const bool is_predecessor_closer = + binary32 ? value.assign(static_cast(d)) : value.assign(d); + int shift = is_predecessor_closer ? 2 : 1; uint64_t significand = value.f << shift; if (value.e >= 0) { numerator.assign(significand); @@ -1034,39 +2351,73 @@ void fallback_format(Double d, buffer& buf, int& exp10) { upper = &upper_store; } } - if (!upper) upper = &lower; // Invariant: value == (numerator / denominator) * pow(10, exp10). - bool even = (value.f & 1) == 0; - int num_digits = 0; - char* data = buf.data(); - for (;;) { - int digit = numerator.divmod_assign(denominator); - bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. - // numerator + upper >[=] pow10: - bool high = add_compare(numerator, *upper, denominator) + even > 0; - data[num_digits++] = static_cast('0' + digit); - if (low || high) { - if (!low) { - ++data[num_digits - 1]; - } else if (high) { - int result = add_compare(numerator, numerator, denominator); - // Round half to even. - if (result > 0 || (result == 0 && (digit % 2) != 0)) + if (num_digits < 0) { + // Generate the shortest representation. + if (!upper) upper = &lower; + bool even = (value.f & 1) == 0; + num_digits = 0; + char* data = buf.data(); + for (;;) { + int digit = numerator.divmod_assign(denominator); + bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. + // numerator + upper >[=] pow10: + bool high = add_compare(numerator, *upper, denominator) + even > 0; + data[num_digits++] = static_cast('0' + digit); + if (low || high) { + if (!low) { ++data[num_digits - 1]; + } else if (high) { + int result = add_compare(numerator, numerator, denominator); + // Round half to even. + if (result > 0 || (result == 0 && (digit % 2) != 0)) + ++data[num_digits - 1]; + } + buf.try_resize(to_unsigned(num_digits)); + exp10 -= num_digits - 1; + return; + } + numerator *= 10; + lower *= 10; + if (upper != &lower) *upper *= 10; + } + } + // Generate the given number of digits. + exp10 -= num_digits - 1; + if (num_digits == 0) { + buf.try_resize(1); + denominator *= 10; + buf[0] = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + return; + } + buf.try_resize(to_unsigned(num_digits)); + for (int i = 0; i < num_digits - 1; ++i) { + int digit = numerator.divmod_assign(denominator); + buf[i] = static_cast('0' + digit); + numerator *= 10; + } + int digit = numerator.divmod_assign(denominator); + auto result = add_compare(numerator, numerator, denominator); + if (result > 0 || (result == 0 && (digit % 2) != 0)) { + if (digit == 9) { + const auto overflow = '0' + 10; + buf[num_digits - 1] = overflow; + // Propagate the carry. + for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] == overflow) { + buf[0] = '1'; + ++exp10; } - buf.resize(to_unsigned(num_digits)); - exp10 -= num_digits - 1; return; } - numerator *= 10; - lower *= 10; - if (upper != &lower) *upper *= 10; + ++digit; } + buf[num_digits - 1] = static_cast('0' + digit); } -// Formats value using the Grisu algorithm -// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf) -// if T is a IEEE754 binary32 or binary64 and snprintf otherwise. template int format_float(T value, int precision, float_specs specs, buffer& buf) { static_assert(!std::is_same::value, ""); @@ -1078,66 +2429,57 @@ int format_float(T value, int precision, float_specs specs, buffer& buf) { buf.push_back('0'); return 0; } - buf.resize(to_unsigned(precision)); + buf.try_resize(to_unsigned(precision)); std::uninitialized_fill_n(buf.data(), precision, '0'); return -precision; } if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf); + if (precision < 0) { + // Use Dragonbox for the shortest format. + if (specs.binary32) { + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } + + // Use Grisu + Dragon4 for the given precision: + // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. int exp = 0; const int min_exp = -60; // alpha in Grisu. int cached_exp10 = 0; // K in Grisu. - if (precision < 0) { - fp fp_value; - auto boundaries = specs.binary32 - ? fp_value.assign_float_with_boundaries(value) - : fp_value.assign_with_boundaries(value); - fp_value = normalize(fp_value); - // Find a cached power of 10 such that multiplying value by it will bring - // the exponent in the range [min_exp, -32]. - const fp cached_pow = get_cached_power( - min_exp - (fp_value.e + fp::significand_size), cached_exp10); - // Multiply value and boundaries by the cached power of 10. - fp_value = fp_value * cached_pow; - boundaries.lower = multiply(boundaries.lower, cached_pow.f); - boundaries.upper = multiply(boundaries.upper, cached_pow.f); - assert(min_exp <= fp_value.e && fp_value.e <= -32); - --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}. - ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}. - // Numbers outside of (lower, upper) definitely do not round to value. - grisu_shortest_handler handler{buf.data(), 0, - boundaries.upper - fp_value.f}; - auto result = - grisu_gen_digits(fp(boundaries.upper, fp_value.e), - boundaries.upper - boundaries.lower, exp, handler); - if (result == digits::error) { - exp += handler.size - cached_exp10 - 1; - fallback_format(value, buf, exp); - return exp; - } - buf.resize(to_unsigned(handler.size)); + fp normalized = normalize(fp(value)); + const auto cached_pow = get_cached_power( + min_exp - (normalized.e + fp::significand_size), cached_exp10); + normalized = normalized * cached_pow; + // Limit precision to the maximum possible number of significant digits in an + // IEEE754 double because we don't need to generate zeros. + const int max_double_digits = 767; + if (precision > max_double_digits) precision = max_double_digits; + fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; + if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) { + exp += handler.size - cached_exp10 - 1; + fallback_format(value, handler.precision, specs.binary32, buf, exp); } else { - if (precision > 17) return snprintf_float(value, precision, specs, buf); - fp normalized = normalize(fp(value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::significand_size), cached_exp10); - normalized = normalized * cached_pow; - fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) - return snprintf_float(value, precision, specs, buf); - int num_digits = handler.size; - if (!fixed) { - // Remove trailing zeros. - while (num_digits > 0 && buf[num_digits - 1] == '0') { - --num_digits; - ++exp; - } - } - buf.resize(to_unsigned(num_digits)); + exp += handler.exp10; + buf.try_resize(to_unsigned(handler.size)); } - return exp - cached_exp10; -} + if (!fixed && !specs.showpoint) { + // Remove trailing zeros. + auto num_digits = buf.size(); + while (num_digits > 0 && buf[num_digits - 1] == '0') { + --num_digits; + ++exp; + } + buf.try_resize(num_digits); + } + return exp; +} // namespace detail template int snprintf_float(T value, int precision, float_specs specs, @@ -1185,19 +2527,20 @@ int snprintf_float(T value, int precision, float_specs specs, ? snprintf_ptr(begin, capacity, format, precision, value) : snprintf_ptr(begin, capacity, format, value); if (result < 0) { - buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially. + // The buffer will grow exponentially. + buf.try_reserve(buf.capacity() + 1); continue; } auto size = to_unsigned(result); // Size equal to capacity means that the last character was truncated. if (size >= capacity) { - buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'. + buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. continue; } auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; if (specs.format == float_format::fixed) { if (precision == 0) { - buf.resize(size); + buf.try_resize(size); return 0; } // Find and remove the decimal point. @@ -1207,11 +2550,11 @@ int snprintf_float(T value, int precision, float_specs specs, } while (is_digit(*p)); int fraction_size = static_cast(end - p - 1); std::memmove(p, p + 1, to_unsigned(fraction_size)); - buf.resize(size - 1); + buf.try_resize(size - 1); return -fraction_size; } if (specs.format == float_format::hex) { - buf.resize(size + offset); + buf.try_resize(size + offset); return 0; } // Find and parse the exponent. @@ -1237,7 +2580,7 @@ int snprintf_float(T value, int precision, float_specs specs, fraction_size = static_cast(fraction_end - begin - 1); std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size)); } - buf.resize(to_unsigned(fraction_size) + offset + 1); + buf.try_resize(to_unsigned(fraction_size) + offset + 1); return exp - fraction_size; } } @@ -1259,25 +2602,18 @@ int snprintf_float(T value, int precision, float_specs specs, * occurs, this pointer will be a guess that depends on the particular * error, but it will always advance at least one byte. */ -FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; +inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) { static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shifte[] = {0, 6, 4, 2, 0}; - auto s = reinterpret_cast(buf); - int len = lengths[s[0] >> 3]; - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - const char* next = buf + len + !len; + int len = code_point_length(buf); + const char* next = buf + len; // Assume a four-byte character and load four bytes. Unused bits are // shifted out. + auto s = reinterpret_cast(buf); *c = uint32_t(s[0] & masks[len]) << 18; *c |= uint32_t(s[1] & 0x3f) << 12; *c |= uint32_t(s[2] & 0x3f) << 6; @@ -1296,6 +2632,19 @@ FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { return next; } + +struct stringifier { + template FMT_INLINE std::string operator()(T value) const { + return to_string(value); + } + std::string operator()(basic_format_arg::handle h) const { + memory_buffer buf; + format_parse_context parse_ctx({}); + format_context format_ctx(buffer_appender(buf), {}, {}); + h.format(parse_ctx, format_ctx); + return to_string(buf); + } +}; } // namespace detail template <> struct formatter { @@ -1363,7 +2712,8 @@ FMT_FUNC void format_system_error(detail::buffer& out, int error_code, int result = detail::safe_strerror(error_code, system_message, buf.size()); if (result == 0) { - format_to(std::back_inserter(out), "{}: {}", message, system_message); + format_to(detail::buffer_appender(out), "{}: {}", message, + system_message); return; } if (result != ERANGE) @@ -1384,20 +2734,6 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -struct stringifier { - template FMT_INLINE std::string operator()(T value) const { - return to_string(value); - } - std::string operator()(basic_format_arg::handle h) const { - memory_buffer buf; - detail::buffer& base = buf; - format_parse_context parse_ctx({}); - format_context format_ctx(std::back_inserter(base), {}, {}); - h.format(parse_ctx, format_ctx); - return to_string(buf); - } -}; - FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) { if (format_str.size() == 2 && equal2(format_str.data(), "{}")) { auto arg = args.get(0); @@ -1409,6 +2745,14 @@ FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) { return to_string(buffer); } +#ifdef _WIN32 +namespace detail { +using dword = conditional_t; +extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // + void*, const void*, dword, dword*, void*); +} // namespace detail +#endif + FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { memory_buffer buffer; detail::vformat_to(buffer, format_str, @@ -1417,10 +2761,10 @@ FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { auto fd = _fileno(f); if (_isatty(fd)) { detail::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size())); - auto written = DWORD(); - if (!WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), &written, - nullptr)) { + auto written = detail::dword(); + if (!detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), + u16.c_str(), static_cast(u16.size()), + &written, nullptr)) { FMT_THROW(format_error("failed to write to console")); } return; @@ -1446,8 +2790,4 @@ FMT_FUNC void vprint(string_view format_str, format_args args) { FMT_END_NAMESPACE -#ifdef _MSC_VER -# pragma warning(pop) -#endif - #endif // FMT_FORMAT_INL_H_ diff --git a/src/fmt/format.h b/src/fmt/format.h index a4911b9fdb..fbe5045068 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -69,16 +69,12 @@ # define FMT_NOINLINE #endif -// LAMMPS customizations: -// 1) Intel compilers on MacOS have __clang__ defined -// but fail to recognize [[clang::fallthrough]] -// 2) Intel compilers on Linux identify as GCC compatible -// but fail to recognize [[gnu::fallthrough]] - #if __cplusplus == 201103L || __cplusplus == 201402L -# if defined(__clang__) && !defined(__INTEL_COMPILER) +# if defined(__INTEL_COMPILER) || defined(__PGI) +# define FMT_FALLTHROUGH +# elif defined(__clang__) # define FMT_FALLTHROUGH [[clang::fallthrough]] -# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) && !defined(__INTEL_COMPILER) && \ +# elif FMT_GCC_VERSION >= 700 && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) # define FMT_FALLTHROUGH [[gnu::fallthrough]] # else @@ -145,12 +141,13 @@ FMT_END_NAMESPACE #endif #ifndef FMT_USE_UDL_TEMPLATE -// EDG frontend based compilers (icc, nvcc, etc) and GCC < 6.4 do not properly -// support UDL templates and GCC >= 9 warns about them. +// EDG frontend based compilers (icc, nvcc, PGI, etc) and GCC < 6.4 do not +// properly support UDL templates and GCC >= 9 warns about them. # if FMT_USE_USER_DEFINED_LITERALS && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 501) && \ ((FMT_GCC_VERSION >= 604 && __cplusplus >= 201402L) || \ - FMT_CLANG_VERSION >= 304) + FMT_CLANG_VERSION >= 304) && \ + !defined(__PGI) && !defined(__NVCC__) # define FMT_USE_UDL_TEMPLATE 1 # else # define FMT_USE_UDL_TEMPLATE 0 @@ -169,6 +166,14 @@ FMT_END_NAMESPACE # define FMT_USE_LONG_DOUBLE 1 #endif +// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of +// int_writer template instances to just one by only using the largest integer +// type. This results in a reduction in binary size but will cause a decrease in +// integer formatting performance. +#if !defined(FMT_REDUCE_INT_INSTANTIATIONS) +# define FMT_REDUCE_INT_INSTANTIATIONS 0 +#endif + // __builtin_clz is broken in clang with Microsoft CodeGen: // https://github.com/fmtlib/fmt/issues/519 #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER @@ -177,56 +182,87 @@ FMT_END_NAMESPACE #if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER # define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) #endif +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctz)) +# define FMT_BUILTIN_CTZ(n) __builtin_ctz(n) +#endif +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctzll)) +# define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n) +#endif + +#if FMT_MSC_VER +# include // _BitScanReverse[64], _BitScanForward[64], _umul128 +#endif // Some compilers masquerade as both MSVC and GCC-likes or otherwise support // __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the // MSVC intrinsics if the clz and clzll builtins are not available. -#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) -# include // _BitScanReverse, _BitScanReverse64 - +#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && \ + !defined(FMT_BUILTIN_CTZLL) && !defined(_MANAGED) FMT_BEGIN_NAMESPACE namespace detail { // Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. # ifndef __clang__ +# pragma intrinsic(_BitScanForward) # pragma intrinsic(_BitScanReverse) # endif -inline uint32_t clz(uint32_t x) { +# if defined(_WIN64) && !defined(__clang__) +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif + +inline int clz(uint32_t x) { unsigned long r = 0; _BitScanReverse(&r, x); - FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. FMT_SUPPRESS_MSC_WARNING(6102) - return 31 - r; + return 31 ^ static_cast(r); } # define FMT_BUILTIN_CLZ(n) detail::clz(n) -# if defined(_WIN64) && !defined(__clang__) -# pragma intrinsic(_BitScanReverse64) -# endif - -inline uint32_t clzll(uint64_t x) { +inline int clzll(uint64_t x) { unsigned long r = 0; # ifdef _WIN64 _BitScanReverse64(&r, x); # else // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 - (r + 32); - + if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 ^ (r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast(x)); # endif - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. - FMT_SUPPRESS_MSC_WARNING(6102) - return 63 - r; + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + return 63 ^ static_cast(r); } # define FMT_BUILTIN_CLZLL(n) detail::clzll(n) + +inline int ctz(uint32_t x) { + unsigned long r = 0; + _BitScanForward(&r, x); + FMT_ASSERT(x != 0, ""); + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + return static_cast(r); +} +# define FMT_BUILTIN_CTZ(n) detail::ctz(n) + +inline int ctzll(uint64_t x) { + unsigned long r = 0; + FMT_ASSERT(x != 0, ""); + FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. +# ifdef _WIN64 + _BitScanForward64(&r, x); +# else + // Scan the low 32 bits. + if (_BitScanForward(&r, static_cast(x))) return static_cast(r); + // Scan the high 32 bits. + _BitScanForward(&r, static_cast(x >> 32)); + r += 32; +# endif + return static_cast(r); +} +# define FMT_BUILTIN_CTZLL(n) detail::ctzll(n) } // namespace detail FMT_END_NAMESPACE #endif @@ -304,50 +340,11 @@ FMT_INLINE void assume(bool condition) { #endif } -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; - -template -using void_t = typename detail::void_t_impl::type; - // An approximation of iterator_t for pre-C++20 systems. template using iterator_t = decltype(std::begin(std::declval())); template using sentinel_t = decltype(std::end(std::declval())); -// Detect the iterator category of *any* given type in a SFINAE-friendly way. -// Unfortunately, older implementations of std::iterator_traits are not safe -// for use in a SFINAE-context. -template -struct iterator_category : std::false_type {}; - -template struct iterator_category { - using type = std::random_access_iterator_tag; -}; - -template -struct iterator_category> { - using type = typename It::iterator_category; -}; - -// Detect if *any* given type models the OutputIterator concept. -template class is_output_iterator { - // Check for mutability because all iterator categories derived from - // std::input_iterator_tag *may* also meet the requirements of an - // OutputIterator, thereby falling into the category of 'mutable iterators' - // [iterator.requirements.general] clause 4. The compiler reveals this - // property only at the point of *actually dereferencing* the iterator! - template - static decltype(*(std::declval())) test(std::input_iterator_tag); - template static char& test(std::output_iterator_tag); - template static const char& test(...); - - using type = decltype(test(typename iterator_category::type{})); - - public: - enum { value = !std::is_const>::value }; -}; - // A workaround for std::string not having mutable data() until C++17. template inline Char* get_data(std::basic_string& s) { return &s[0]; @@ -380,10 +377,29 @@ reserve(std::back_insert_iterator it, size_t n) { return make_checked(get_data(c) + size, n); } +template +inline buffer_appender reserve(buffer_appender it, size_t n) { + buffer& buf = get_container(it); + buf.try_reserve(buf.size() + n); + return it; +} + template inline Iterator& reserve(Iterator& it, size_t) { return it; } +template +constexpr T* to_pointer(OutputIt, size_t) { + return nullptr; +} +template T* to_pointer(buffer_appender it, size_t n) { + buffer& buf = get_container(it); + auto size = buf.size(); + if (buf.capacity() < size + n) return nullptr; + buf.try_resize(size + n); + return buf.data() + size; +} + template ::value)> inline std::back_insert_iterator base_iterator( std::back_insert_iterator& it, @@ -421,13 +437,17 @@ class counting_iterator { ++count_; return *this; } - counting_iterator operator++(int) { auto it = *this; ++*this; return it; } + friend counting_iterator operator+(counting_iterator it, difference_type n) { + it.count_ += static_cast(n); + return it; + } + value_type operator*() const { return {}; } }; @@ -561,23 +581,38 @@ OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { [](char c) { return static_cast(c); }); } -#ifndef FMT_USE_GRISU -# define FMT_USE_GRISU 1 -#endif - -template constexpr bool use_grisu() { - return FMT_USE_GRISU && std::numeric_limits::is_iec559 && - sizeof(T) <= sizeof(double); +template +inline counting_iterator copy_str(InputIt begin, InputIt end, + counting_iterator it) { + return it + (end - begin); } +template +using is_fast_float = bool_constant::is_iec559 && + sizeof(T) <= sizeof(double)>; + +#ifndef FMT_USE_FULL_CACHE_DRAGONBOX +# define FMT_USE_FULL_CACHE_DRAGONBOX 0 +#endif + template template void buffer::append(const U* begin, const U* end) { - size_t new_size = size_ + to_unsigned(end - begin); - reserve(new_size); - std::uninitialized_copy(begin, end, - make_checked(ptr_ + size_, capacity_ - size_)); - size_ = new_size; + do { + auto count = to_unsigned(end - begin); + try_reserve(size_ + count); + auto free_cap = capacity_ - size_; + if (free_cap < count) count = free_cap; + std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); + size_ += count; + begin += count; + } while (begin != end); +} + +template +void iterator_buffer::flush() { + out_ = std::copy_n(data_, this->limit(this->size()), out_); + this->clear(); } } // namespace detail @@ -616,7 +651,7 @@ enum { inline_buffer_size = 500 }; */ template > -class basic_memory_buffer : public detail::buffer { +class basic_memory_buffer final : public detail::buffer { private: T store_[SIZE]; @@ -630,7 +665,7 @@ class basic_memory_buffer : public detail::buffer { } protected: - void grow(size_t size) FMT_OVERRIDE; + void grow(size_t size) final FMT_OVERRIDE; public: using value_type = T; @@ -640,7 +675,7 @@ class basic_memory_buffer : public detail::buffer { : alloc_(alloc) { this->set(store_, SIZE); } - ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } + ~basic_memory_buffer() { deallocate(); } private: // Move data from other to this buffer. @@ -684,6 +719,22 @@ class basic_memory_buffer : public detail::buffer { // Returns a copy of the allocator associated with this buffer. Allocator get_allocator() const { return alloc_; } + + /** + Resizes the buffer to contain *count* elements. If T is a POD type new + elements may not be initialized. + */ + void resize(size_t count) { this->try_resize(count); } + + /** Increases the buffer capacity to *new_capacity*. */ + void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } + + // Directly append data into the buffer + using detail::buffer::append; + template + void append(const ContiguousRange& range) { + append(range.data(), range.data() + range.size()); + } }; template @@ -754,19 +805,81 @@ FMT_CONSTEXPR bool is_supported_floating_point(T) { } // Smallest of uint32_t, uint64_t, uint128_t that is large enough to -// represent all values of T. +// represent all values of an integral type T. template using uint32_or_64_or_128_t = - conditional_t() <= 32, uint32_t, + conditional_t() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS, + uint32_t, conditional_t() <= 64, uint64_t, uint128_t>>; +// 128-bit integer type used internally +struct FMT_EXTERN_TEMPLATE_API uint128_wrapper { + uint128_wrapper() = default; + +#if FMT_USE_INT128 + uint128_t internal_; + + uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT + : internal_{static_cast(low) | + (static_cast(high) << 64)} {} + + uint128_wrapper(uint128_t u) : internal_{u} {} + + uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); } + uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { + internal_ += n; + return *this; + } +#else + uint64_t high_; + uint64_t low_; + + uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high}, + low_{low} {} + + uint64_t high() const FMT_NOEXCEPT { return high_; } + uint64_t low() const FMT_NOEXCEPT { return low_; } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { +# if defined(_MSC_VER) && defined(_M_X64) + unsigned char carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); + return *this; +# else + uint64_t sum = low_ + n; + high_ += (sum < low_ ? 1 : 0); + low_ = sum; + return *this; +# endif + } +#endif +}; + +// Table entry type for divisibility test used internally +template struct FMT_EXTERN_TEMPLATE_API divtest_table_entry { + T mod_inv; + T max_quotient; +}; + // Static data is placed in this class template for the header-only config. template struct FMT_EXTERN_TEMPLATE_API basic_data { static const uint64_t powers_of_10_64[]; static const uint32_t zero_or_powers_of_10_32[]; static const uint64_t zero_or_powers_of_10_64[]; - static const uint64_t pow10_significands[]; - static const int16_t pow10_exponents[]; + static const uint64_t grisu_pow10_significands[]; + static const int16_t grisu_pow10_exponents[]; + static const divtest_table_entry divtest_table_for_pow5_32[]; + static const divtest_table_entry divtest_table_for_pow5_64[]; + static const uint64_t dragonbox_pow10_significands_64[]; + static const uint128_wrapper dragonbox_pow10_significands_128[]; + // log10(2) = 0x0.4d104d427de7fbcc... + static const uint64_t log10_2_significand = 0x4d104d427de7fbcc; +#if !FMT_USE_FULL_CACHE_DRAGONBOX + static const uint64_t powers_of_5_64[]; + static const uint32_t dragonbox_pow10_recovery_errors[]; +#endif // GCC generates slightly better code for pairs than chars. using digit_pair = char[2]; static const digit_pair digits[]; @@ -780,6 +893,17 @@ template struct FMT_EXTERN_TEMPLATE_API basic_data { static const char right_padding_shifts[5]; }; +// Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). +// This is a function instead of an array to workaround a bug in GCC10 (#1810). +FMT_INLINE uint16_t bsr2log10(int bsr) { + static constexpr uint16_t data[] = { + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, + 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; + return data[bsr]; +} + #ifndef FMT_EXPORTED FMT_EXTERN template struct basic_data; #endif @@ -791,10 +915,9 @@ struct data : basic_data<> {}; // Returns the number of decimal digits in n. Leading zeros are not counted // except for n == 0 in which case count_digits returns 1. inline int count_digits(uint64_t n) { - // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. - int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_64[t]) + 1; + // https://github.com/fmtlib/format-benchmark/blob/master/digits10 + auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63); + return t - (n < data::zero_or_powers_of_10_64[t]); } #else // Fallback version of count_digits used when __builtin_clz is not available. @@ -844,15 +967,24 @@ template <> int count_digits<4>(detail::fallback_uintptr n); #if FMT_GCC_VERSION || FMT_CLANG_VERSION # define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) +#elif FMT_MSC_VER +# define FMT_ALWAYS_INLINE __forceinline #else -# define FMT_ALWAYS_INLINE +# define FMT_ALWAYS_INLINE inline +#endif + +// To suppress unnecessary security cookie checks +#if FMT_MSC_VER && !FMT_CLANG_VERSION +# define FMT_SAFEBUFFERS __declspec(safebuffers) +#else +# define FMT_SAFEBUFFERS #endif #ifdef FMT_BUILTIN_CLZ // Optional version of count_digits for better performance on 32-bit platforms. inline int count_digits(uint32_t n) { - int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_32[t]) + 1; + auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31); + return t - (n < data::zero_or_powers_of_10_32[t]); } #endif @@ -899,7 +1031,7 @@ template void copy2(Char* dst, const char* src) { *dst++ = static_cast(*src++); *dst = static_cast(*src); } -inline void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } +FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } template struct format_decimal_result { Iterator begin; @@ -935,11 +1067,10 @@ inline format_decimal_result format_decimal(Char* out, UInt value, template >::value)> inline format_decimal_result format_decimal(Iterator out, UInt value, - int num_digits) { - // Buffer should be large enough to hold all digits (<= digits10 + 1). - enum { max_size = digits10() + 1 }; - Char buffer[2 * max_size]; - auto end = format_decimal(buffer, value, num_digits).end; + int size) { + // Buffer is large enough to hold all digits (digits10 + 1). + Char buffer[digits10() + 1]; + auto end = format_decimal(buffer, value, size).end; return {out, detail::copy_str(buffer, end, out)}; } @@ -981,6 +1112,10 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, template inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { + if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { + format_uint(ptr, value, num_digits, upper); + return out; + } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). char buffer[num_bits() / BASE_BITS + 1]; format_uint(buffer, value, num_digits, upper); @@ -1072,6 +1207,78 @@ template struct basic_format_specs { using format_specs = basic_format_specs; namespace detail { +namespace dragonbox { + +// Type-specific information that Dragonbox uses. +template struct float_info; + +template <> struct float_info { + using carrier_uint = uint32_t; + static const int significand_bits = 23; + static const int exponent_bits = 8; + static const int min_exponent = -126; + static const int max_exponent = 127; + static const int exponent_bias = -127; + static const int decimal_digits = 9; + static const int kappa = 1; + static const int big_divisor = 100; + static const int small_divisor = 10; + static const int min_k = -31; + static const int max_k = 46; + static const int cache_bits = 64; + static const int divisibility_check_by_5_threshold = 39; + static const int case_fc_pm_half_lower_threshold = -1; + static const int case_fc_pm_half_upper_threshold = 6; + static const int case_fc_lower_threshold = -2; + static const int case_fc_upper_threshold = 6; + static const int case_shorter_interval_left_endpoint_lower_threshold = 2; + static const int case_shorter_interval_left_endpoint_upper_threshold = 3; + static const int shorter_interval_tie_lower_threshold = -35; + static const int shorter_interval_tie_upper_threshold = -35; + static const int max_trailing_zeros = 7; +}; + +template <> struct float_info { + using carrier_uint = uint64_t; + static const int significand_bits = 52; + static const int exponent_bits = 11; + static const int min_exponent = -1022; + static const int max_exponent = 1023; + static const int exponent_bias = -1023; + static const int decimal_digits = 17; + static const int kappa = 2; + static const int big_divisor = 1000; + static const int small_divisor = 100; + static const int min_k = -292; + static const int max_k = 326; + static const int cache_bits = 128; + static const int divisibility_check_by_5_threshold = 86; + static const int case_fc_pm_half_lower_threshold = -2; + static const int case_fc_pm_half_upper_threshold = 9; + static const int case_fc_lower_threshold = -4; + static const int case_fc_upper_threshold = 9; + static const int case_shorter_interval_left_endpoint_lower_threshold = 2; + static const int case_shorter_interval_left_endpoint_upper_threshold = 3; + static const int shorter_interval_tie_lower_threshold = -77; + static const int shorter_interval_tie_upper_threshold = -77; + static const int max_trailing_zeros = 16; +}; + +template struct decimal_fp { + using significand_type = typename float_info::carrier_uint; + significand_type significand; + int exponent; +}; + +template decimal_fp to_decimal(T x) FMT_NOEXCEPT; +} // namespace dragonbox + +template +constexpr typename dragonbox::float_info::carrier_uint exponent_mask() { + using uint = typename dragonbox::float_info::carrier_uint; + return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) + << dragonbox::float_info::significand_bits; +} // A floating-point presentation format. enum class float_format : unsigned char { @@ -1113,113 +1320,6 @@ template It write_exponent(int exp, It it) { return it; } -template class float_writer { - private: - // The number is given as v = digits_ * pow(10, exp_). - const char* digits_; - int num_digits_; - int exp_; - size_t size_; - float_specs specs_; - Char decimal_point_; - - template It prettify(It it) const { - // pow(10, full_exp - 1) <= v <= pow(10, full_exp). - int full_exp = num_digits_ + exp_; - if (specs_.format == float_format::exp) { - // Insert a decimal point after the first digit and add an exponent. - *it++ = static_cast(*digits_); - int num_zeros = specs_.precision - num_digits_; - if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_; - it = copy_str(digits_ + 1, digits_ + num_digits_, it); - if (num_zeros > 0 && specs_.showpoint) - it = std::fill_n(it, num_zeros, static_cast('0')); - *it++ = static_cast(specs_.upper ? 'E' : 'e'); - return write_exponent(full_exp - 1, it); - } - if (num_digits_ <= full_exp) { - // 1234e7 -> 12340000000[.0+] - it = copy_str(digits_, digits_ + num_digits_, it); - it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); - if (specs_.showpoint || specs_.precision < 0) { - *it++ = decimal_point_; - int num_zeros = specs_.precision - full_exp; - if (num_zeros <= 0) { - if (specs_.format != float_format::fixed) - *it++ = static_cast('0'); - return it; - } -#ifdef FMT_FUZZ - if (num_zeros > 5000) - throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); -#endif - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else if (full_exp > 0) { - // 1234e-2 -> 12.34[0+] - it = copy_str(digits_, digits_ + full_exp, it); - if (!specs_.showpoint) { - // Remove trailing zeros. - int num_digits = num_digits_; - while (num_digits > full_exp && digits_[num_digits - 1] == '0') - --num_digits; - if (num_digits != full_exp) *it++ = decimal_point_; - return copy_str(digits_ + full_exp, digits_ + num_digits, it); - } - *it++ = decimal_point_; - it = copy_str(digits_ + full_exp, digits_ + num_digits_, it); - if (specs_.precision > num_digits_) { - // Add trailing zeros. - int num_zeros = specs_.precision - num_digits_; - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else { - // 1234e-6 -> 0.001234 - *it++ = static_cast('0'); - int num_zeros = -full_exp; - int num_digits = num_digits_; - if (num_digits == 0 && specs_.precision >= 0 && - specs_.precision < num_zeros) { - num_zeros = specs_.precision; - } - // Remove trailing zeros. - if (!specs_.showpoint) - while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; - if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) { - *it++ = decimal_point_; - it = std::fill_n(it, num_zeros, static_cast('0')); - it = copy_str(digits_, digits_ + num_digits, it); - } - } - return it; - } - - public: - float_writer(const char* digits, int num_digits, int exp, float_specs specs, - Char decimal_point) - : digits_(digits), - num_digits_(num_digits), - exp_(exp), - specs_(specs), - decimal_point_(decimal_point) { - int full_exp = num_digits + exp - 1; - int precision = specs.precision > 0 ? specs.precision : 16; - if (specs_.format == float_format::general && - !(full_exp >= -4 && full_exp < precision)) { - specs_.format = float_format::exp; - } - size_ = prettify(counting_iterator()).count(); - size_ += specs.sign ? 1 : 0; - } - - size_t size() const { return size_; } - - template It operator()(It it) const { - if (specs_.sign) *it++ = static_cast(data::signs[specs_.sign]); - return prettify(it); - } -}; - template int format_float(T value, int precision, float_specs specs, buffer& buf); @@ -1398,7 +1498,7 @@ template inline OutputIt write_padded(OutputIt out, const basic_format_specs& specs, size_t size, - size_t width, const F& f) { + size_t width, F&& f) { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); size_t padding = spec_width > width ? spec_width - width : 0; @@ -1416,7 +1516,7 @@ template inline OutputIt write_padded(OutputIt out, const basic_format_specs& specs, size_t size, - const F& f) { + F&& f) { return write_padded(out, specs, size, size, f); } @@ -1583,15 +1683,16 @@ template struct int_writer { char digits[40]; format_decimal(digits, abs_value, num_digits); basic_memory_buffer buffer; - size += prefix_size; - buffer.resize(size); + size += static_cast(prefix_size); + const auto usize = to_unsigned(size); + buffer.resize(usize); basic_string_view s(&sep, sep_size); // Index of a decimal digit with the least significant digit having index 0. int digit_index = 0; group = groups.cbegin(); - auto p = buffer.data() + size; - for (int i = num_digits - 1; i >= 0; --i) { - *--p = static_cast(digits[i]); + auto p = buffer.data() + size - 1; + for (int i = num_digits - 1; i > 0; --i) { + *p-- = static_cast(digits[i]); if (*group <= 0 || ++digit_index % *group != 0 || *group == max_value()) continue; @@ -1599,16 +1700,16 @@ template struct int_writer { digit_index = 0; ++group; } - p -= s.size(); std::uninitialized_copy(s.data(), s.data() + s.size(), make_checked(p, s.size())); + p -= s.size(); } - if (prefix_size != 0) p[-1] = static_cast('-'); - using iterator = remove_reference_t; + *p-- = static_cast(*digits); + if (prefix_size != 0) *p = static_cast('-'); auto data = buffer.data(); - out = write_padded(out, specs, size, size, [=](iterator it) { - return copy_str(data, data + size, it); - }); + out = write_padded( + out, specs, usize, usize, + [=](iterator it) { return copy_str(data, data + size, it); }); } void on_chr() { *out++ = static_cast(abs_value); } @@ -1634,6 +1735,168 @@ OutputIt write_nonfinite(OutputIt out, bool isinf, }); } +// A decimal floating-point number significand * pow(10, exp). +struct big_decimal_fp { + const char* significand; + int significand_size; + int exponent; +}; + +inline int get_significand_size(const big_decimal_fp& fp) { + return fp.significand_size; +} +template +inline int get_significand_size(const dragonbox::decimal_fp& fp) { + return count_digits(fp.significand); +} + +template +inline OutputIt write_significand(OutputIt out, const char* significand, + int& significand_size) { + return copy_str(significand, significand + significand_size, out); +} +template +inline OutputIt write_significand(OutputIt out, UInt significand, + int significand_size) { + return format_decimal(out, significand, significand_size).end; +} + +template ::value)> +inline Char* write_significand(Char* out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) { + if (!decimal_point) + return format_decimal(out, significand, significand_size).end; + auto end = format_decimal(out + 1, significand, significand_size).end; + if (integral_size == 1) + out[0] = out[1]; + else + std::copy_n(out + 1, integral_size, out); + out[integral_size] = decimal_point; + return end; +} + +template >::value)> +inline OutputIt write_significand(OutputIt out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) { + // Buffer is large enough to hold digits (digits10 + 1) and a decimal point. + Char buffer[digits10() + 2]; + auto end = write_significand(buffer, significand, significand_size, + integral_size, decimal_point); + return detail::copy_str(buffer, end, out); +} + +template +inline OutputIt write_significand(OutputIt out, const char* significand, + int significand_size, int integral_size, + Char decimal_point) { + out = detail::copy_str(significand, significand + integral_size, out); + if (!decimal_point) return out; + *out++ = decimal_point; + return detail::copy_str(significand + integral_size, + significand + significand_size, out); +} + +template +OutputIt write_float(OutputIt out, const DecimalFP& fp, + const basic_format_specs& specs, float_specs fspecs, + Char decimal_point) { + auto significand = fp.significand; + int significand_size = get_significand_size(fp); + static const Char zero = static_cast('0'); + auto sign = fspecs.sign; + size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); + using iterator = remove_reference_t; + + int output_exp = fp.exponent + significand_size - 1; + auto use_exp_format = [=]() { + if (fspecs.format == float_format::exp) return true; + if (fspecs.format != float_format::general) return false; + // Use the fixed notation if the exponent is in [exp_lower, exp_upper), + // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation. + const int exp_lower = -4, exp_upper = 16; + return output_exp < exp_lower || + output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper); + }; + if (use_exp_format()) { + int num_zeros = 0; + if (fspecs.showpoint) { + num_zeros = (std::max)(fspecs.precision - significand_size, 0); + size += to_unsigned(num_zeros); + } else if (significand_size == 1) { + decimal_point = Char(); + } + auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp; + int exp_digits = 2; + if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; + + size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); + char exp_char = fspecs.upper ? 'E' : 'e'; + auto write = [=](iterator it) { + if (sign) *it++ = static_cast(data::signs[sign]); + // Insert a decimal point after the first digit and add an exponent. + it = write_significand(it, significand, significand_size, 1, + decimal_point); + if (num_zeros > 0) it = std::fill_n(it, num_zeros, zero); + *it++ = static_cast(exp_char); + return write_exponent(output_exp, it); + }; + return specs.width > 0 ? write_padded(out, specs, size, write) + : base_iterator(out, write(reserve(out, size))); + } + + int exp = fp.exponent + significand_size; + if (fp.exponent >= 0) { + // 1234e5 -> 123400000[.0+] + size += to_unsigned(fp.exponent); + int num_zeros = fspecs.precision - exp; +#ifdef FMT_FUZZ + if (num_zeros > 5000) + throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); +#endif + if (fspecs.showpoint) { + if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; + if (num_zeros > 0) size += to_unsigned(num_zeros); + } + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast(data::signs[sign]); + it = write_significand(it, significand, significand_size); + it = std::fill_n(it, fp.exponent, zero); + if (!fspecs.showpoint) return it; + *it++ = decimal_point; + return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + }); + } else if (exp > 0) { + // 1234e-2 -> 12.34[0+] + int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; + size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast(data::signs[sign]); + it = write_significand(it, significand, significand_size, exp, + decimal_point); + return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + }); + } + // 1234e-6 -> 0.001234 + int num_zeros = -exp; + if (significand_size == 0 && fspecs.precision >= 0 && + fspecs.precision < num_zeros) { + num_zeros = fspecs.precision; + } + size += 2 + to_unsigned(num_zeros); + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = static_cast(data::signs[sign]); + *it++ = zero; + if (num_zeros == 0 && significand_size == 0 && !fspecs.showpoint) return it; + *it++ = decimal_point; + it = std::fill_n(it, num_zeros, zero); + return write_significand(it, significand, significand_size); + }); +} + template ::value)> OutputIt write(OutputIt out, T value, basic_format_specs specs, @@ -1673,39 +1936,45 @@ OutputIt write(OutputIt out, T value, basic_format_specs specs, ++precision; } if (const_check(std::is_same())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu(); + fspecs.use_grisu = is_fast_float(); int exp = format_float(promote_float(value), precision, fspecs, buffer); fspecs.precision = precision; Char point = fspecs.locale ? decimal_point(loc) : static_cast('.'); - float_writer w(buffer.data(), static_cast(buffer.size()), exp, - fspecs, point); - return write_padded(out, specs, w.size(), w); + auto fp = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; + return write_float(out, fp, specs, fspecs, point); } template ::value)> + FMT_ENABLE_IF(is_fast_float::value)> OutputIt write(OutputIt out, T value) { if (const_check(!is_supported_floating_point(value))) return out; + + using floaty = conditional_t::value, double, T>; + using uint = typename dragonbox::float_info::carrier_uint; + auto bits = bit_cast(value); + auto fspecs = float_specs(); - if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. + auto sign_bit = bits & (uint(1) << (num_bits() - 1)); + if (sign_bit != 0) { fspecs.sign = sign::minus; value = -value; } - auto specs = basic_format_specs(); - if (!std::isfinite(value)) + static const auto specs = basic_format_specs(); + uint mask = exponent_mask(); + if ((bits & mask) == mask) return write_nonfinite(out, std::isinf(value), specs, fspecs); - memory_buffer buffer; - int precision = -1; - if (const_check(std::is_same())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu(); - int exp = format_float(promote_float(value), precision, fspecs, buffer); - fspecs.precision = precision; - float_writer w(buffer.data(), static_cast(buffer.size()), exp, - fspecs, static_cast('.')); - return base_iterator(out, w(reserve(out, w.size()))); + auto dec = dragonbox::to_decimal(static_cast(value)); + return write_float(out, dec, specs, fspecs, static_cast('.')); +} + +template ::value && + !is_fast_float::value)> +inline OutputIt write(OutputIt out, T value) { + return write(out, value, basic_format_specs()); } template @@ -1758,6 +2027,13 @@ OutputIt write(OutputIt out, basic_string_view value) { return base_iterator(out, it); } +template +buffer_appender write(buffer_appender out, + basic_string_view value) { + get_container(out).append(value.begin(), value.end()); + return out; +} + template ::value && !std::is_same::value && @@ -1768,7 +2044,13 @@ OutputIt write(OutputIt out, T value) { // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); - auto it = reserve(out, (negative ? 1 : 0) + static_cast(num_digits)); + auto size = (negative ? 1 : 0) + static_cast(num_digits); + auto it = reserve(out, size); + if (auto ptr = to_pointer(it, size)) { + if (negative) *ptr++ = static_cast('-'); + format_decimal(ptr, abs_value, num_digits); + return out; + } if (negative) *it++ = static_cast('-'); it = format_decimal(it, abs_value, num_digits).end; return base_iterator(out, it); @@ -1807,8 +2089,13 @@ auto write(OutputIt out, const T& value) -> typename std::enable_if< mapped_type_constant>::value == type::custom_type, OutputIt>::type { - basic_format_context ctx(out, {}, {}); - return formatter().format(value, ctx); + using context_type = basic_format_context; + using formatter_type = + conditional_t::value, + typename context_type::template formatter_type, + fallback_formatter>; + context_type ctx(out, {}, {}); + return formatter_type().format(value, ctx); } // An argument visitor that formats the argument and writes it via the output @@ -2014,6 +2301,48 @@ class arg_formatter_base { } }; +/** The default argument formatter. */ +template +class arg_formatter : public arg_formatter_base { + private: + using char_type = Char; + using base = arg_formatter_base; + using context_type = basic_format_context; + + context_type& ctx_; + basic_format_parse_context* parse_ctx_; + const Char* ptr_; + + public: + using iterator = typename base::iterator; + using format_specs = typename base::format_specs; + + /** + \rst + Constructs an argument formatter object. + *ctx* is a reference to the formatting context, + *specs* contains format specifier information for standard argument types. + \endrst + */ + explicit arg_formatter( + context_type& ctx, + basic_format_parse_context* parse_ctx = nullptr, + format_specs* specs = nullptr, const Char* ptr = nullptr) + : base(ctx.out(), specs, ctx.locale()), + ctx_(ctx), + parse_ctx_(parse_ctx), + ptr_(ptr) {} + + using base::operator(); + + /** Formats an argument of a user-defined type. */ + iterator operator()(typename basic_format_arg::handle handle) { + if (ptr_) advance_to(*parse_ctx_, ptr_); + handle.format(*parse_ctx_, ctx_); + return ctx_.out(); + } +}; + template FMT_CONSTEXPR bool is_name_start(Char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; } @@ -2053,12 +2382,11 @@ template class custom_formatter { Context& ctx) : parse_ctx_(parse_ctx), ctx_(ctx) {} - bool operator()(typename basic_format_arg::handle h) const { + void operator()(typename basic_format_arg::handle h) const { h.format(parse_ctx_, ctx_); - return true; } - template bool operator()(T) const { return false; } + template void operator()(T) const {} }; template @@ -2440,12 +2768,30 @@ template struct precision_adapter { }; template -FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) { - if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1; - do { - ++begin; - } while (begin != end && (*begin & 0xc0) == 0x80); - return begin; +FMT_CONSTEXPR int code_point_length(const Char* begin) { + if (const_check(sizeof(Char) != 1)) return 1; + constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; + int len = lengths[static_cast(*begin) >> 3]; + + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + return len + !len; +} + +template constexpr bool is_ascii_letter(Char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +// Converts a character to ASCII. Returns a number > 127 on conversion failure. +template ::value)> +constexpr Char to_ascii(Char value) { + return value; +} +template ::value)> +constexpr typename std::underlying_type::type to_ascii(Char value) { + return value; } // Parses fill and alignment. @@ -2454,10 +2800,10 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end, Handler&& handler) { FMT_ASSERT(begin != end, ""); auto align = align::none; - auto p = next_code_point(begin, end); - if (p == end) p = begin; + auto p = begin + code_point_length(begin); + if (p >= end) p = begin; for (;;) { - switch (static_cast(*p)) { + switch (to_ascii(*p)) { case '<': align = align::left; break; @@ -2536,13 +2882,13 @@ FMT_CONSTEXPR const Char* parse_precision(const Char* begin, const Char* end, template FMT_CONSTEXPR const Char* parse_format_specs(const Char* begin, const Char* end, SpecHandler&& handler) { - if (begin == end || *begin == '}') return begin; + if (begin == end) return begin; begin = parse_align(begin, end, handler); if (begin == end) return begin; // Parse sign. - switch (static_cast(*begin)) { + switch (to_ascii(*begin)) { case '+': handler.on_plus(); ++begin; @@ -2619,7 +2965,7 @@ FMT_CONSTEXPR const Char* parse_replacement_field(const Char* begin, Handler&& handler) { ++begin; if (begin == end) return handler.on_error("invalid format string"), end; - if (static_cast(*begin) == '}') { + if (*begin == '}') { handler.on_replacement_field(handler.on_arg_id(), begin); } else if (*begin == '{') { handler.on_text(begin, begin + 1); @@ -2664,17 +3010,17 @@ FMT_CONSTEXPR_DECL FMT_INLINE void parse_format_string( return; } struct writer { - FMT_CONSTEXPR void operator()(const Char* begin, const Char* end) { - if (begin == end) return; + FMT_CONSTEXPR void operator()(const Char* pbegin, const Char* pend) { + if (pbegin == pend) return; for (;;) { const Char* p = nullptr; - if (!find(begin, end, '}', p)) - return handler_.on_text(begin, end); + if (!find(pbegin, pend, '}', p)) + return handler_.on_text(pbegin, pend); ++p; - if (p == end || *p != '}') + if (p == pend || *p != '}') return handler_.on_error("unmatched '}' in format string"); - handler_.on_text(begin, p); - begin = p + 1; + handler_.on_text(pbegin, p); + pbegin = p + 1; } } Handler& handler_; @@ -2705,12 +3051,12 @@ FMT_CONSTEXPR const typename ParseContext::char_type* parse_format_specs( return f.parse(ctx); } -template +template struct format_handler : detail::error_handler { basic_format_parse_context parse_context; Context context; - format_handler(typename ArgFormatter::iterator out, + format_handler(OutputIt out, basic_string_view str, basic_format_args format_args, detail::locale_ref loc) : parse_context(str), context(out, format_args, loc) {} @@ -2734,26 +3080,33 @@ struct format_handler : detail::error_handler { FMT_INLINE void on_replacement_field(int id, const Char*) { auto arg = get_arg(context, id); context.advance_to(visit_format_arg( - default_arg_formatter{ + default_arg_formatter{ context.out(), context.args(), context.locale()}, arg)); } const Char* on_format_specs(int id, const Char* begin, const Char* end) { - advance_to(parse_context, begin); auto arg = get_arg(context, id); - custom_formatter f(parse_context, context); - if (visit_format_arg(f, arg)) return parse_context.begin(); - basic_format_specs specs; - using parse_context_t = basic_format_parse_context; - specs_checker> handler( - specs_handler(specs, parse_context, context), - arg.type()); - begin = parse_format_specs(begin, end, handler); - if (begin == end || *begin != '}') on_error("missing '}' in format string"); - advance_to(parse_context, begin); + if (arg.type() == type::custom_type) { + advance_to(parse_context, begin); + visit_format_arg(custom_formatter(parse_context, context), arg); + return parse_context.begin(); + } + auto specs = basic_format_specs(); + if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin)) { + specs.type = static_cast(*begin++); + } else { + using parse_context_t = basic_format_parse_context; + specs_checker> handler( + specs_handler(specs, parse_context, + context), + arg.type()); + begin = parse_format_specs(begin, end, handler); + if (begin == end || *begin != '}') + on_error("missing '}' in format string"); + } context.advance_to( - visit_format_arg(ArgFormatter(context, &parse_context, &specs), arg)); + visit_format_arg(arg_formatter(context, &parse_context, &specs), arg)); return begin; } }; @@ -2905,53 +3258,11 @@ FMT_API void format_error_code(buffer& out, int error_code, FMT_API void report_error(format_func func, int error_code, string_view message) FMT_NOEXCEPT; - -/** The default argument formatter. */ -template -class arg_formatter : public arg_formatter_base { - private: - using char_type = Char; - using base = arg_formatter_base; - using context_type = basic_format_context; - - context_type& ctx_; - basic_format_parse_context* parse_ctx_; - const Char* ptr_; - - public: - using iterator = typename base::iterator; - using format_specs = typename base::format_specs; - - /** - \rst - Constructs an argument formatter object. - *ctx* is a reference to the formatting context, - *specs* contains format specifier information for standard argument types. - \endrst - */ - explicit arg_formatter( - context_type& ctx, - basic_format_parse_context* parse_ctx = nullptr, - format_specs* specs = nullptr, const Char* ptr = nullptr) - : base(ctx.out(), specs, ctx.locale()), - ctx_(ctx), - parse_ctx_(parse_ctx), - ptr_(ptr) {} - - using base::operator(); - - /** Formats an argument of a user-defined type. */ - iterator operator()(typename basic_format_arg::handle handle) { - if (ptr_) advance_to(*parse_ctx_, ptr_); - handle.format(*parse_ctx_, ctx_); - return ctx_.out(); - } -}; } // namespace detail template using arg_formatter FMT_DEPRECATED_ALIAS = - detail::arg_formatter; + detail::arg_formatter; /** An error returned by an operating system or a language runtime, @@ -3214,8 +3525,10 @@ struct formatter : formatter, Char> { // using variant = std::variant; // template <> // struct formatter: dynamic_formatter<> { -// void format(buffer &buf, const variant &v, context &ctx) { -// visit([&](const auto &val) { format(buf, val, ctx); }, v); +// auto format(const variant& v, format_context& ctx) { +// return visit([&](const auto& val) { +// return dynamic_formatter<>::format(val, ctx); +// }, v); // } // }; template class dynamic_formatter { @@ -3283,28 +3596,15 @@ FMT_CONSTEXPR void advance_to( ctx.advance_to(ctx.begin() + (p - &*ctx.begin())); } -/** Formats arguments and writes the output to the range. */ -template -typename Context::iterator vformat_to( - typename ArgFormatter::iterator out, basic_string_view format_str, - basic_format_args args, - detail::locale_ref loc = detail::locale_ref()) { - if (format_str.size() == 2 && detail::equal2(format_str.data(), "{}")) { - auto arg = args.get(0); - if (!arg) detail::error_handler().on_error("argument not found"); - using iterator = typename ArgFormatter::iterator; - return visit_format_arg( - detail::default_arg_formatter{out, args, loc}, arg); - } - detail::format_handler h(out, format_str, args, - loc); - detail::parse_format_string(format_str, h); - return h.context.out(); -} +/** + \rst + Converts ``p`` to ``const void*`` for pointer formatting. -// Casts ``p`` to ``const void*`` for pointer formatting. -// Example: -// auto s = format("{}", ptr(p)); + **Example**:: + + auto s = fmt::format("{}", fmt::ptr(p)); + \endrst + */ template inline const void* ptr(const T* p) { return p; } template inline const void* ptr(const std::unique_ptr& p) { return p.get(); @@ -3323,6 +3623,10 @@ class bytes { }; template <> struct formatter { + private: + detail::dynamic_format_specs specs_; + + public: template FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { using handler_type = detail::dynamic_specs_handler; @@ -3341,9 +3645,6 @@ template <> struct formatter { specs_.precision, specs_.precision_ref, ctx); return detail::write_bytes(ctx.out(), b.data_, specs_); } - - private: - detail::dynamic_format_specs specs_; }; template @@ -3408,15 +3709,14 @@ arg_join join(It begin, Sentinel end, wstring_view sep) { \endrst */ template -arg_join, detail::sentinel_t, char> -join(const Range& range, string_view sep) { +arg_join, detail::sentinel_t, char> join( + Range&& range, string_view sep) { return join(std::begin(range), std::end(range), sep); } template -arg_join, detail::sentinel_t, - wchar_t> -join(const Range& range, wstring_view sep) { +arg_join, detail::sentinel_t, wchar_t> join( + Range&& range, wstring_view sep) { return join(std::begin(range), std::end(range), sep); } @@ -3443,7 +3743,7 @@ inline std::string to_string(T value) { // The buffer should be large enough to store the number including the sign or // "false" for bool. constexpr int max_size = detail::digits10() + 2; - char buffer[max_size > 5 ? max_size : 5]; + char buffer[max_size > 5 ? static_cast(max_size) : 5]; char* begin = buffer; return std::string(begin, detail::write(begin, value)); } @@ -3463,17 +3763,28 @@ std::basic_string to_string(const basic_memory_buffer& buf) { } template -typename buffer_context::iterator detail::vformat_to( +void detail::vformat_to( detail::buffer& buf, basic_string_view format_str, - basic_format_args>> args) { - using af = arg_formatter::iterator, Char>; - return vformat_to(std::back_inserter(buf), to_string_view(format_str), - args); + basic_format_args>> args, + detail::locale_ref loc) { + using iterator = typename buffer_context::iterator; + auto out = buffer_appender(buf); + if (format_str.size() == 2 && equal2(format_str.data(), "{}")) { + auto arg = args.get(0); + if (!arg) error_handler().on_error("argument not found"); + visit_format_arg(default_arg_formatter{out, args, loc}, + arg); + return; + } + format_handler> h( + out, format_str, args, loc); + parse_format_string(format_str, h); } #ifndef FMT_HEADER_ONLY -extern template format_context::iterator detail::vformat_to( - detail::buffer&, string_view, basic_format_args); +extern template void detail::vformat_to(detail::buffer&, string_view, + basic_format_args, + detail::locale_ref); namespace detail { extern template FMT_API std::string grouping_impl(locale_ref loc); extern template FMT_API std::string grouping_impl(locale_ref loc); @@ -3500,7 +3811,7 @@ extern template int snprintf_float(long double value, template , FMT_ENABLE_IF(detail::is_string::value)> -inline typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to( +inline void vformat_to( detail::buffer& buf, const S& format_str, basic_format_args)> args) { return detail::vformat_to(buf, to_string_view(format_str), args); @@ -3510,10 +3821,9 @@ template ::value, char_t>> inline typename buffer_context::iterator format_to( basic_memory_buffer& buf, const S& format_str, Args&&... args) { - detail::check_format_string(format_str); - using context = buffer_context; - return detail::vformat_to(buf, to_string_view(format_str), - make_format_args(args...)); + const auto& vargs = fmt::make_args_checked(format_str, args...); + detail::vformat_to(buf, to_string_view(format_str), vargs); + return detail::buffer_appender(buf); } template @@ -3522,88 +3832,17 @@ using format_context_t = basic_format_context; template using format_args_t = basic_format_args>; -template < - typename S, typename OutputIt, typename... Args, - FMT_ENABLE_IF(detail::is_output_iterator::value && - !detail::is_contiguous_back_insert_iterator::value)> -inline OutputIt vformat_to( - OutputIt out, const S& format_str, - format_args_t, char_t> args) { - using af = detail::arg_formatter>; - return vformat_to(out, to_string_view(format_str), args); -} - -/** - \rst - Formats arguments, writes the result to the output iterator ``out`` and returns - the iterator past the end of the output range. - - **Example**:: - - std::vector out; - fmt::format_to(std::back_inserter(out), "{}", 42); - \endrst - */ -template ::value && - !detail::is_contiguous_back_insert_iterator::value && - detail::is_string::value)> -inline OutputIt format_to(OutputIt out, const S& format_str, Args&&... args) { - detail::check_format_string(format_str); - using context = format_context_t>; - return vformat_to(out, to_string_view(format_str), - make_format_args(args...)); -} - -template struct format_to_n_result { - /** Iterator past the end of the output range. */ - OutputIt out; - /** Total (not truncated) output size. */ - size_t size; -}; +template +using format_to_n_context FMT_DEPRECATED_ALIAS = buffer_context; template -using format_to_n_context = - format_context_t, Char>; - -template -using format_to_n_args = basic_format_args>; +using format_to_n_args FMT_DEPRECATED_ALIAS = + basic_format_args>; template -inline format_arg_store, Args...> +FMT_DEPRECATED format_arg_store, Args...> make_format_to_n_args(const Args&... args) { - return format_arg_store, Args...>( - args...); -} - -template ::value)> -inline format_to_n_result vformat_to_n( - OutputIt out, size_t n, basic_string_view format_str, - format_to_n_args, type_identity_t> args) { - auto it = vformat_to(detail::truncating_iterator(out, n), - format_str, args); - return {it.base(), it.count()}; -} - -/** - \rst - Formats arguments, writes up to ``n`` characters of the result to the output - iterator ``out`` and returns the total output size and the iterator past the - end of the output range. - \endrst - */ -template ::value&& - detail::is_output_iterator::value)> -inline format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, - const Args&... args) { - detail::check_format_string(format_str); - using context = format_to_n_context>; - return vformat_to_n(out, n, to_string_view(format_str), - make_format_args(args...)); + return format_arg_store, Args...>(args...); } template ::value), int>> @@ -3615,15 +3854,6 @@ std::basic_string detail::vformat( return to_string(buffer); } -/** - Returns the number of characters in the output of - ``format(format_str, args...)``. - */ -template -inline size_t formatted_size(string_view format_str, const Args&... args) { - return format_to(detail::counting_iterator(), format_str, args...).count(); -} - template ::value)> void vprint(std::FILE* f, basic_string_view format_str, wformat_args args) { @@ -3648,8 +3878,7 @@ template class udl_formatter { template std::basic_string operator()(Args&&... args) const { static FMT_CONSTEXPR_DECL Char s[] = {CHARS..., '\0'}; - check_format_string...>(FMT_STRING(s)); - return format(s, std::forward(args)...); + return format(FMT_STRING(s), std::forward(args)...); } }; # else diff --git a/src/fmt/locale.h b/src/fmt/locale.h index 988d15cdf8..517f65054f 100644 --- a/src/fmt/locale.h +++ b/src/fmt/locale.h @@ -15,22 +15,12 @@ FMT_BEGIN_NAMESPACE namespace detail { -template -typename buffer_context::iterator vformat_to( - const std::locale& loc, buffer& buf, - basic_string_view format_str, - basic_format_args>> args) { - using af = arg_formatter::iterator, Char>; - return vformat_to(std::back_inserter(buf), to_string_view(format_str), - args, detail::locale_ref(loc)); -} - template std::basic_string vformat( const std::locale& loc, basic_string_view format_str, basic_format_args>> args) { basic_memory_buffer buffer; - detail::vformat_to(loc, buffer, format_str, args); + detail::vformat_to(buffer, format_str, args, detail::locale_ref(loc)); return fmt::to_string(buffer); } } // namespace detail @@ -45,32 +35,28 @@ inline std::basic_string vformat( template > inline std::basic_string format(const std::locale& loc, const S& format_str, Args&&... args) { - return detail::vformat( - loc, to_string_view(format_str), - detail::make_args_checked(format_str, args...)); + return detail::vformat(loc, to_string_view(format_str), + fmt::make_args_checked(format_str, args...)); } template ::value, char_t>> + typename Char = char_t, + FMT_ENABLE_IF(detail::is_output_iterator::value)> inline OutputIt vformat_to( OutputIt out, const std::locale& loc, const S& format_str, - format_args_t, Char> args) { - using af = detail::arg_formatter; - return vformat_to(out, to_string_view(format_str), args, - detail::locale_ref(loc)); + basic_format_args>> args) { + decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); + vformat_to(buf, to_string_view(format_str), args, detail::locale_ref(loc)); + return detail::get_iterator(buf); } template ::value&& - detail::is_string::value)> + typename Char = char_t, + FMT_ENABLE_IF(detail::is_output_iterator::value)> inline OutputIt format_to(OutputIt out, const std::locale& loc, const S& format_str, Args&&... args) { - detail::check_format_string(format_str); - using context = format_context_t>; - format_arg_store as{args...}; - return vformat_to(out, loc, to_string_view(format_str), - basic_format_args(as)); + const auto& vargs = fmt::make_args_checked(format_str, args...); + return vformat_to(out, loc, to_string_view(format_str), vargs); } FMT_END_NAMESPACE diff --git a/src/fmt/os.h b/src/fmt/os.h index a9517ef800..881510065a 100644 --- a/src/fmt/os.h +++ b/src/fmt/os.h @@ -29,7 +29,8 @@ #if FMT_HAS_INCLUDE("winapifamily.h") # include #endif -#if FMT_HAS_INCLUDE("fcntl.h") && \ +#if (FMT_HAS_INCLUDE() || defined(__APPLE__) || \ + defined(__linux__)) && \ (!defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) # include // for O_RDONLY # define FMT_USE_FCNTL 1 @@ -278,7 +279,8 @@ class file { RDONLY = FMT_POSIX(O_RDONLY), // Open for reading only. WRONLY = FMT_POSIX(O_WRONLY), // Open for writing only. RDWR = FMT_POSIX(O_RDWR), // Open for reading and writing. - CREATE = FMT_POSIX(O_CREAT) // Create if the file doesn't exist. + CREATE = FMT_POSIX(O_CREAT), // Create if the file doesn't exist. + APPEND = FMT_POSIX(O_APPEND) // Open in append mode. }; // Constructs a file object which doesn't represent any file. @@ -343,36 +345,69 @@ class file { // Returns the memory page size. long getpagesize(); -class direct_buffered_file; +namespace detail { -template -void print(direct_buffered_file& f, const S& format_str, - const Args&... args); +struct buffer_size { + size_t value = 0; + buffer_size operator=(size_t val) const { + auto bs = buffer_size(); + bs.value = val; + return bs; + } +}; -// A buffered file with a direct buffer access and no synchronization. -class direct_buffered_file { +struct ostream_params { + int oflag = file::WRONLY | file::CREATE; + size_t buffer_size = BUFSIZ > 32768 ? BUFSIZ : 32768; + + ostream_params() {} + + template + ostream_params(T... params, int oflag) : ostream_params(params...) { + this->oflag = oflag; + } + + template + ostream_params(T... params, detail::buffer_size bs) + : ostream_params(params...) { + this->buffer_size = bs.value; + } +}; +} // namespace detail + +static constexpr detail::buffer_size buffer_size; + +// A fast output stream which is not thread-safe. +class ostream final : private detail::buffer { private: file file_; - enum { buffer_size = 4096 }; - char buffer_[buffer_size]; - int pos_; - void flush() { - if (pos_ == 0) return; - file_.write(buffer_, pos_); - pos_ = 0; + if (size() == 0) return; + file_.write(data(), size()); + clear(); } - int free_capacity() const { return buffer_size - pos_; } + void grow(size_t) final; + + ostream(cstring_view path, const detail::ostream_params& params) + : file_(path, params.oflag) { + set(new char[params.buffer_size], params.buffer_size); + } public: - direct_buffered_file(cstring_view path, int oflag) - : file_(path, oflag), pos_(0) {} - - ~direct_buffered_file() { - flush(); + ostream(ostream&& other) + : detail::buffer(other.data(), other.size(), other.capacity()), + file_(std::move(other.file_)) { + other.set(nullptr, 0); } + ~ostream() { + flush(); + delete[] data(); + } + + template + friend ostream output_file(cstring_view path, T... params); void close() { flush(); @@ -380,25 +415,20 @@ class direct_buffered_file { } template - friend void print(direct_buffered_file& f, const S& format_str, - const Args&... args) { - // We could avoid double buffering. - auto buf = fmt::memory_buffer(); - fmt::format_to(std::back_inserter(buf), format_str, args...); - auto remaining_pos = 0; - auto remaining_size = buf.size(); - while (remaining_size > detail::to_unsigned(f.free_capacity())) { - auto size = f.free_capacity(); - memcpy(f.buffer_ + f.pos_, buf.data() + remaining_pos, size); - f.pos_ += size; - f.flush(); - remaining_pos += size; - remaining_size -= size; - } - memcpy(f.buffer_ + f.pos_, buf.data() + remaining_pos, remaining_size); - f.pos_ += static_cast(remaining_size); + void print(const S& format_str, const Args&... args) { + format_to(detail::buffer_appender(*this), format_str, args...); } }; + +/** + Opens a file for writing. Supported parameters passed in `params`: + * ````: Output flags (``file::WRONLY | file::CREATE`` by default) + * ``buffer_size=``: Output buffer size + */ +template +inline ostream output_file(cstring_view path, T... params) { + return {path, detail::ostream_params(params...)}; +} #endif // FMT_USE_FCNTL #ifdef FMT_LOCALE diff --git a/src/fmt/ostream.h b/src/fmt/ostream.h index c16107f79b..29c58ec13b 100644 --- a/src/fmt/ostream.h +++ b/src/fmt/ostream.h @@ -49,17 +49,27 @@ template class formatbuf : public std::basic_streambuf { } }; +struct converter { + template ::value)> converter(T); +}; + template struct test_stream : std::basic_ostream { private: - // Hide all operator<< from std::basic_ostream. - void_t<> operator<<(null<>); - void_t<> operator<<(const Char*); - - template ::value && - !std::is_enum::value)> - void_t<> operator<<(T); + void_t<> operator<<(converter); }; +// Hide insertion operators for built-in types. +template +void_t<> operator<<(std::basic_ostream&, Char); +template +void_t<> operator<<(std::basic_ostream&, char); +template +void_t<> operator<<(std::basic_ostream&, char); +template +void_t<> operator<<(std::basic_ostream&, signed char); +template +void_t<> operator<<(std::basic_ostream&, unsigned char); + // Checks if T has a user-defined operator<< (e.g. not a member of // std::ostream). template class is_streamable { @@ -103,7 +113,7 @@ void format_value(buffer& buf, const T& value, #endif output << value; output.exceptions(std::ios_base::failbit | std::ios_base::badbit); - buf.resize(buf.size()); + buf.try_resize(buf.size()); } // Formats an object of type T that has an overloaded ostream operator<<. @@ -160,7 +170,7 @@ template ::value, char_t>> void print(std::basic_ostream& os, const S& format_str, Args&&... args) { vprint(os, to_string_view(format_str), - detail::make_args_checked(format_str, args...)); + fmt::make_args_checked(format_str, args...)); } FMT_END_NAMESPACE diff --git a/src/fmt/printf.h b/src/fmt/printf.h index d4440ed168..8c28ac2327 100644 --- a/src/fmt/printf.h +++ b/src/fmt/printf.h @@ -181,7 +181,7 @@ template class printf_width_handler { template void vprintf(buffer& buf, basic_string_view format, basic_format_args args) { - Context(std::back_inserter(buf), format, args).format(); + Context(buffer_appender(buf), format, args).format(); } } // namespace detail @@ -598,7 +598,7 @@ OutputIt basic_printf_context::format() { template using basic_printf_context_t = - basic_printf_context>, Char>; + basic_printf_context, Char>; using printf_context = basic_printf_context_t; using wprintf_context = basic_printf_context_t; diff --git a/src/fmt/ranges.h b/src/fmt/ranges.h index c48f1727d5..b603d637d7 100644 --- a/src/fmt/ranges.h +++ b/src/fmt/ranges.h @@ -157,6 +157,9 @@ template void for_each(Tuple&& tup, F&& f) { for_each(indexes, std::forward(tup), std::forward(f)); } +template +using value_type = remove_cvref_t().begin())>; + template ::type>::value)> FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const Arg&) { @@ -182,7 +185,6 @@ FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const char) { FMT_CONSTEXPR const wchar_t* format_str_quoted(bool add_space, const wchar_t) { return add_space ? L" '{}'" : L"'{}'"; } - } // namespace detail template struct is_tuple_like { @@ -246,9 +248,15 @@ template struct is_range { !std::is_constructible, T>::value; }; -template -struct formatter::value>> { +template +struct formatter< + T, Char, + enable_if_t::value +// Workaround a bug in MSVC 2017 and earlier. +#if !FMT_MSC_VER || FMT_MSC_VER >= 1927 + && has_formatter, format_context>::value +#endif + >> { formatting_range formatting; template @@ -257,8 +265,7 @@ struct formatter - typename FormatContext::iterator format(const RangeT& values, - FormatContext& ctx) { + typename FormatContext::iterator format(const T& values, FormatContext& ctx) { auto out = detail::copy(formatting.prefix, ctx.out()); size_t i = 0; auto it = values.begin(); diff --git a/src/fmtlib_format.cpp b/src/fmtlib_format.cpp index a64a1f3893..7271341664 100644 --- a/src/fmtlib_format.cpp +++ b/src/fmtlib_format.cpp @@ -44,9 +44,9 @@ template FMT_API char detail::decimal_point_impl(locale_ref); template FMT_API void detail::buffer::append(const char*, const char*); -template FMT_API FMT_BUFFER_CONTEXT(char)::iterator detail::vformat_to( +template FMT_API void detail::vformat_to( detail::buffer&, string_view, - basic_format_args); + basic_format_args, detail::locale_ref); template FMT_API int detail::snprintf_float(double, int, detail::float_specs, detail::buffer&); diff --git a/src/fmtlib_os.cpp b/src/fmtlib_os.cpp index 386119db17..a07e782441 100644 --- a/src/fmtlib_os.cpp +++ b/src/fmtlib_os.cpp @@ -62,7 +62,7 @@ using RWResult = int; inline unsigned convert_rwcount(std::size_t count) { return count <= UINT_MAX ? static_cast(count) : UINT_MAX; } -#else +#elif FMT_USE_FCNTL // Return type of read and write functions. using RWResult = ssize_t; @@ -124,7 +124,8 @@ void detail::format_windows_error(detail::buffer& out, int error_code, if (result != 0) { utf16_to_utf8 utf8_message; if (utf8_message.convert(system_message) == ERROR_SUCCESS) { - format_to(std::back_inserter(out), "{}: {}", message, utf8_message); + format_to(buffer_appender(out), "{}: {}", message, + utf8_message); return; } break; @@ -288,12 +289,12 @@ void file::pipe(file& read_end, file& write_end) { } buffered_file file::fdopen(const char* mode) { - // Don't retry as fdopen doesn't return EINTR. - #if defined(__MINGW32__) && defined(_POSIX_) +// Don't retry as fdopen doesn't return EINTR. +# if defined(__MINGW32__) && defined(_POSIX_) FILE* f = ::fdopen(fd_, mode); - #else +# else FILE* f = FMT_POSIX_CALL(fdopen(fd_, mode)); - #endif +# endif if (!f) FMT_THROW( system_error(errno, "cannot associate stream with file descriptor")); @@ -313,5 +314,9 @@ long getpagesize() { return size; # endif } + +void ostream::grow(size_t) { + if (this->size() == this->capacity()) flush(); +} #endif // FMT_USE_FCNTL FMT_END_NAMESPACE diff --git a/unittest/force-styles/test_error_stats.cpp b/unittest/force-styles/test_error_stats.cpp index 0873757acd..52ba3baae3 100644 --- a/unittest/force-styles/test_error_stats.cpp +++ b/unittest/force-styles/test_error_stats.cpp @@ -26,7 +26,7 @@ TEST(ErrorStats, test) std::stringstream out; out << stats; - ASSERT_EQ(out.str(), "Average: 5.800e-01 StdDev: 7.305e-01 MaxErr: 2.000e+00 @ item: 3.0"); + ASSERT_EQ(out.str(), "Average: 5.800e-01 StdDev: 7.305e-01 MaxErr: 2.000e+00 @ item: 3"); stats.reset(); ASSERT_EQ(stats.has_data(), false); From 756e97954542356417dac9e4a7cb71fefa4fc24e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 29 Oct 2020 21:38:01 -0400 Subject: [PATCH 20/64] update DOIs for new stable release. fix URLs. --- doc/src/Intro_citing.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/src/Intro_citing.rst b/doc/src/Intro_citing.rst index 4346e00bc0..a74d3134f3 100644 --- a/doc/src/Intro_citing.rst +++ b/doc/src/Intro_citing.rst @@ -24,13 +24,15 @@ DOI for the LAMMPS code LAMMPS developers use the `Zenodo service at CERN `_ to create digital object identifies (DOI) for stable releases of the LAMMPS code. There are two types of DOIs for the -LAMMPS source code: 1) the canonical DOI for **all** versions of LAMMPS, -which will always point to the latest stable release version is: +LAMMPS source code: the canonical DOI for **all** versions of LAMMPS, +which will always point to the **latest** stable release version is: - `DOI: 10.5281/zenodo.3726416 `_ +- DOI: `10.5281/zenodo.3726416 `_ -In addition there are DOIs for individual stable releases starting with -the `3 March 2020 version, DOI:10.5281/zenodo.3726417 `_ +In addition there are DOIs for individual stable releases. Currently there are: + +- 3 March 2020 version: `DOI:10.5281/zenodo.3726417 `_ +- 29 October 2020 version: `DOI:10.5281/zenodo.4157471 `_ Home page From 980fce06de53ffb864e655a89f6d08887c6faad6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 30 Oct 2020 13:21:01 -0400 Subject: [PATCH 21/64] pretty --- src/atom_vec_ellipsoid.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/atom_vec_ellipsoid.cpp b/src/atom_vec_ellipsoid.cpp index 3bdeff67af..b4cb4616a7 100644 --- a/src/atom_vec_ellipsoid.cpp +++ b/src/atom_vec_ellipsoid.cpp @@ -533,8 +533,7 @@ void AtomVecEllipsoid::write_data_bonus(FILE *fp, int n, double *buf, int /*flag this may create or delete entry in bonus data ------------------------------------------------------------------------- */ -void AtomVecEllipsoid:: -set_shape(int i, double shapex, double shapey, double shapez) +void AtomVecEllipsoid::set_shape(int i, double shapex, double shapey, double shapez) { if (ellipsoid[i] < 0) { if (shapex == 0.0 && shapey == 0.0 && shapez == 0.0) return; From 66ed16760f93a928196d49e7d60697b93cb9e412 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 31 Oct 2020 11:48:29 -0400 Subject: [PATCH 22/64] do not allow access to rigid body properties before they are fully set up --- src/RIGID/fix_rigid.cpp | 4 ++++ src/RIGID/fix_rigid_small.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index 68c44b3e26..9ce28438d3 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -2710,11 +2710,15 @@ double FixRigid::compute_scalar() void *FixRigid::extract(const char *str, int &dim) { + dim = 0; + if (strcmp(str,"body") == 0) { + if (!setupflag) return nullptr; dim = 1; return body; } if (strcmp(str,"masstotal") == 0) { + if (!setupflag) return nullptr; dim = 1; return masstotal; } diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index e867e5bb68..1b022f35c4 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -3424,7 +3424,10 @@ int FixRigidSmall::modify_param(int narg, char **arg) void *FixRigidSmall::extract(const char *str, int &dim) { + dim = 0; + if (strcmp(str,"body") == 0) { + if (!setupflag) return nullptr; dim = 1; return atom2body; } @@ -3438,6 +3441,7 @@ void *FixRigidSmall::extract(const char *str, int &dim) // used by granular pair styles, indexed by atom2body if (strcmp(str,"masstotal") == 0) { + if (!setupflag) return nullptr; dim = 1; if (nmax_mass < nmax_body) { From 4dac7625c565658413b4c0e2fb341e40bd1ef411 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 14:53:33 -0500 Subject: [PATCH 23/64] error out instead of segfaulting when calling Pair::single() on pair style granular without active history --- src/GRANULAR/pair_granular.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index 270234821c..41dbee3394 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -1536,6 +1536,8 @@ double PairGranular::single(int i, int j, int itype, int jtype, jlist = list->firstneigh[i]; if (use_history) { + if (fix_history == nullptr) + error->one(FLERR,"Pair::single() computation needs history"); allhistory = fix_history->firstvalue[i]; for (int jj = 0; jj < jnum; jj++) { neighprev++; From 2dc80e9521e2d9b6001b5f38f3f0637f4d2974ff Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 15:19:52 -0500 Subject: [PATCH 24/64] avoid crash in the case of checking an empty potential file --- src/utils.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils.cpp b/src/utils.cpp index 5ae76ed00d..8bd36a8065 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -862,6 +862,7 @@ std::string utils::get_potential_date(const std::string &path, const std::string reader.ignore_comments = false; char *line = reader.next_line(); + if (line == nullptr) return ""; Tokenizer words(line); while (words.has_next()) { if (words.next() == "DATE:") { @@ -881,6 +882,7 @@ std::string utils::get_potential_units(const std::string &path, const std::strin reader.ignore_comments = false; char *line = reader.next_line(); + if (line == nullptr) return ""; Tokenizer words(line); while (words.has_next()) { if (words.next() == "UNITS:") { From 6e64ce7228d29d51aaaf18cd1a32cb0a3aa8044e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 16:05:41 -0500 Subject: [PATCH 25/64] add Modify::replace_fix() convenience function --- src/modify.cpp | 17 +++++++++++++++++ src/modify.h | 1 + 2 files changed, 18 insertions(+) diff --git a/src/modify.cpp b/src/modify.cpp index a88d6d54cc..2d0c23d125 100644 --- a/src/modify.cpp +++ b/src/modify.cpp @@ -998,6 +998,23 @@ void Modify::replace_fix(const char *replaceID, add_fix(narg,arg,trysuffix); } +/* ---------------------------------------------------------------------- + convenience function to allow replacing a fix from a single string +------------------------------------------------------------------------- */ + +void Modify::replace_fix(const std::string &oldfix, + const std::string &fixcmd, int trysuffix) +{ + auto args = utils::split_words(fixcmd); + char **newarg = new char*[args.size()]; + int i=0; + for (const auto &arg : args) { + newarg[i++] = (char *)arg.c_str(); + } + replace_fix(oldfix.c_str(),args.size(),newarg,trysuffix); + delete[] newarg; +} + /* ---------------------------------------------------------------------- one instance per fix in style_fix.h ------------------------------------------------------------------------- */ diff --git a/src/modify.h b/src/modify.h index ee14baf638..a347e8486d 100644 --- a/src/modify.h +++ b/src/modify.h @@ -101,6 +101,7 @@ class Modify : protected Pointers { void add_fix(int, char **, int trysuffix=1); void add_fix(const std::string &, int trysuffix=1); void replace_fix(const char *, int, char **, int trysuffix=1); + void replace_fix(const std::string &, const std::string &, int trysuffix=1); void modify_fix(int, char **); void delete_fix(const std::string &); void delete_fix(int); From 4e147632bea1f2d28b6c36c252c2b92d754dcc47 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 16:08:57 -0500 Subject: [PATCH 26/64] simplify call to replace_fix() --- src/GRANULAR/pair_granular.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index 41dbee3394..a11ba0bf3d 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -1102,15 +1102,8 @@ void PairGranular::init_style() // this is so its order in the fix list is preserved if (use_history && fix_history == nullptr) { - char dnumstr[16]; - sprintf(dnumstr,"%d",size_history); - char **fixarg = new char*[4]; - fixarg[0] = (char *) "NEIGH_HISTORY_GRANULAR"; - fixarg[1] = (char *) "all"; - fixarg[2] = (char *) "NEIGH_HISTORY"; - fixarg[3] = dnumstr; - modify->replace_fix("NEIGH_HISTORY_GRANULAR_DUMMY",4,fixarg,1); - delete [] fixarg; + modify->replace_fix("NEIGH_HISTORY_GRANULAR_DUMMY","NEIGH_HISTORY_GRANULAR" + " all NEIGH_HISTORY " + std::to_string(size_history),1); int ifix = modify->find_fix("NEIGH_HISTORY_GRANULAR"); fix_history = (FixNeighHistory *) modify->fix[ifix]; fix_history->pair = this; From 0b51bba75ca1e58730bc2d7aa1f46fed9b090c47 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 16:09:30 -0500 Subject: [PATCH 27/64] avoid division by zero --- src/GRANULAR/pair_granular.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index a11ba0bf3d..da1e7a12ad 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -1402,7 +1402,7 @@ double PairGranular::single(int i, int j, int itype, int jtype, radi = radius[i]; radj = radius[j]; radsum = radi + radj; - Reff = radi*radj/radsum; + Reff = (radsum > 0.0) ? radi*radj/radsum : 0.0; bool touchflag; E = normal_coeffs[itype][jtype][0]; From b1de97a3cdbdd81659b9671234e8ca1e9d6e86d1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 1 Nov 2020 16:10:16 -0500 Subject: [PATCH 28/64] abort with an error if Pair::single() cannot work for granular pair style --- src/GRANULAR/pair_granular.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index da1e7a12ad..1d2c7c3627 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -1398,6 +1398,10 @@ double PairGranular::single(int i, int j, int itype, int jtype, int *jlist; double *history,*allhistory; + int nall = atom->nlocal + atom->nghost; + if ((i >= nall) || (j >= nall)) + error->all(FLERR,"Not enough atoms for pair granular single function"); + double *radius = atom->radius; radi = radius[i]; radj = radius[j]; @@ -1529,8 +1533,8 @@ double PairGranular::single(int i, int j, int itype, int jtype, jlist = list->firstneigh[i]; if (use_history) { - if (fix_history == nullptr) - error->one(FLERR,"Pair::single() computation needs history"); + if ((fix_history == nullptr) || (fix_history->firstvalue == nullptr)) + error->one(FLERR,"Pair granular single computation needs history"); allhistory = fix_history->firstvalue[i]; for (int jj = 0; jj < jnum; jj++) { neighprev++; From 702041858983b844289c34f53121ac324694f3f6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 2 Nov 2020 10:31:46 -0500 Subject: [PATCH 29/64] update fmtlib version 7.1.0 to 7.1.1 --- src/fmt/color.h | 9 +++++---- src/fmt/compile.h | 17 +++++++++-------- src/fmt/core.h | 28 ++++++++++++++++++---------- src/fmt/format-inl.h | 16 ++++++++++++---- src/fmt/format.h | 28 ++++++++++++++++------------ src/fmt/locale.h | 8 ++++---- src/fmtlib_format.cpp | 30 ++++++++++++++++++++++++++++++ 7 files changed, 94 insertions(+), 42 deletions(-) diff --git a/src/fmt/color.h b/src/fmt/color.h index 7891058950..94e3419d1d 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -589,10 +589,11 @@ OutputIt vformat_to( \endrst */ template >::value&& - detail::is_string::value)> -inline OutputIt format_to(OutputIt out, const text_style& ts, - const S& format_str, Args&&... args) { + bool enable = detail::is_output_iterator>::value&& + detail::is_string::value> +inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, + Args&&... args) -> + typename std::enable_if::type { return vformat_to(out, ts, to_string_view(format_str), fmt::make_args_checked(format_str, args...)); } diff --git a/src/fmt/compile.h b/src/fmt/compile.h index 7db610d90f..3a33b02014 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -667,14 +667,15 @@ OutputIt format_to(OutputIt out, const S&, const Args&... args) { return format_to(out, compiled, args...); } -template ::value&& - std::is_base_of::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const CompiledFormat& cf, - const Args&... args) { +template +auto format_to_n(OutputIt out, size_t n, const CompiledFormat& cf, + const Args&... args) -> + typename std::enable_if< + detail::is_output_iterator::value && + std::is_base_of::value, + format_to_n_result>::type { auto it = format_to(detail::truncating_iterator(out, n), cf, args...); return {it.base(), it.count()}; diff --git a/src/fmt/core.h b/src/fmt/core.h index 317292288d..9bd2003b28 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -18,7 +18,7 @@ #include // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 70100 +#define FMT_VERSION 70101 #ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) @@ -1937,7 +1937,14 @@ template class basic_format_args { } }; -/** An alias to ``basic_format_args``. */ +#ifdef FMT_ARM_ABI_COMPATIBILITY +/** An alias to ``basic_format_args``. */ +// Separate types would result in shorter symbols but break ABI compatibility +// between clang and gcc on ARM (#1919). +using format_args = basic_format_args; +using wformat_args = basic_format_args; +#else +// DEPRECATED! These are kept for ABI compatibility. // It is a separate type rather than an alias to make symbols readable. struct format_args : basic_format_args { template @@ -1946,6 +1953,7 @@ struct format_args : basic_format_args { struct wformat_args : basic_format_args { using basic_format_args::basic_format_args; }; +#endif namespace detail { @@ -1976,10 +1984,10 @@ inline void vprint_mojibake(std::FILE*, string_view, format_args) {} // GCC 8 and earlier cannot handle std::back_insert_iterator with // vformat_to(...) overload, so SFINAE on iterator type instead. template , - FMT_ENABLE_IF(detail::is_output_iterator::value)> -OutputIt vformat_to( - OutputIt out, const S& format_str, - basic_format_args>> args) { + bool enable = detail::is_output_iterator::value> +auto vformat_to(OutputIt out, const S& format_str, + basic_format_args>> args) + -> typename std::enable_if::type { decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); detail::vformat_to(buf, to_string_view(format_str), args); return detail::get_iterator(buf); @@ -2031,10 +2039,10 @@ inline format_to_n_result vformat_to_n( \endrst */ template >::value)> -inline format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, - const Args&... args) { + bool enable = detail::is_output_iterator>::value> +inline auto format_to_n(OutputIt out, size_t n, const S& format_str, + const Args&... args) -> + typename std::enable_if>::type { const auto& vargs = fmt::make_args_checked(format_str, args...); return vformat_to_n(out, n, to_string_view(format_str), vargs); } diff --git a/src/fmt/format-inl.h b/src/fmt/format-inl.h index b7cb3209c8..5d466eebbc 100644 --- a/src/fmt/format-inl.h +++ b/src/fmt/format-inl.h @@ -261,11 +261,19 @@ const uint64_t basic_data::powers_of_10_64[] = { 10000000000000000000ULL}; template -const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, 0, +const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, FMT_POWERS_OF_10(1)}; - template const uint64_t basic_data::zero_or_powers_of_10_64[] = { + 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; + +template +const uint32_t basic_data::zero_or_powers_of_10_32_new[] = { + 0, 0, FMT_POWERS_OF_10(1)}; + +template +const uint64_t basic_data::zero_or_powers_of_10_64_new[] = { 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), 10000000000000000000ULL}; @@ -1756,7 +1764,7 @@ inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZLL return FMT_BUILTIN_CTZLL(x) >= exp; #else - return exp < num_bits()) && x == ((x >> exp) << exp); + return exp < num_bits() && x == ((x >> exp) << exp); #endif } @@ -1901,7 +1909,7 @@ template <> struct cache_accessor { uint64_t pow5 = data::powers_of_5_64[offset]; uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5); uint128_wrapper middle_low = - umul128(base_cache.low() - (kb < 0 ? 1 : 0), pow5); + umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5); recovered_cache += middle_low.high(); diff --git a/src/fmt/format.h b/src/fmt/format.h index fbe5045068..13b8da3028 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -866,8 +866,8 @@ template struct FMT_EXTERN_TEMPLATE_API divtest_table_entry { // Static data is placed in this class template for the header-only config. template struct FMT_EXTERN_TEMPLATE_API basic_data { static const uint64_t powers_of_10_64[]; - static const uint32_t zero_or_powers_of_10_32[]; - static const uint64_t zero_or_powers_of_10_64[]; + static const uint32_t zero_or_powers_of_10_32_new[]; + static const uint64_t zero_or_powers_of_10_64_new[]; static const uint64_t grisu_pow10_significands[]; static const int16_t grisu_pow10_exponents[]; static const divtest_table_entry divtest_table_for_pow5_32[]; @@ -891,6 +891,10 @@ template struct FMT_EXTERN_TEMPLATE_API basic_data { static const char signs[]; static const char left_padding_shifts[5]; static const char right_padding_shifts[5]; + + // DEPRECATED! These are for ABI compatibility. + static const uint32_t zero_or_powers_of_10_32[]; + static const uint64_t zero_or_powers_of_10_64[]; }; // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). @@ -917,7 +921,7 @@ struct data : basic_data<> {}; inline int count_digits(uint64_t n) { // https://github.com/fmtlib/format-benchmark/blob/master/digits10 auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63); - return t - (n < data::zero_or_powers_of_10_64[t]); + return t - (n < data::zero_or_powers_of_10_64_new[t]); } #else // Fallback version of count_digits used when __builtin_clz is not available. @@ -984,7 +988,7 @@ template <> int count_digits<4>(detail::fallback_uintptr n); // Optional version of count_digits for better performance on 32-bit platforms. inline int count_digits(uint32_t n) { auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31); - return t - (n < data::zero_or_powers_of_10_32[t]); + return t - (n < data::zero_or_powers_of_10_32_new[t]); } #endif @@ -3056,8 +3060,7 @@ struct format_handler : detail::error_handler { basic_format_parse_context parse_context; Context context; - format_handler(OutputIt out, - basic_string_view str, + format_handler(OutputIt out, basic_string_view str, basic_format_args format_args, detail::locale_ref loc) : parse_context(str), context(out, format_args, loc) {} @@ -3080,8 +3083,8 @@ struct format_handler : detail::error_handler { FMT_INLINE void on_replacement_field(int id, const Char*) { auto arg = get_arg(context, id); context.advance_to(visit_format_arg( - default_arg_formatter{ - context.out(), context.args(), context.locale()}, + default_arg_formatter{context.out(), context.args(), + context.locale()}, arg)); } @@ -3105,8 +3108,8 @@ struct format_handler : detail::error_handler { if (begin == end || *begin != '}') on_error("missing '}' in format string"); } - context.advance_to( - visit_format_arg(arg_formatter(context, &parse_context, &specs), arg)); + context.advance_to(visit_format_arg( + arg_formatter(context, &parse_context, &specs), arg)); return begin; } }; @@ -3776,8 +3779,8 @@ void detail::vformat_to( arg); return; } - format_handler> h( - out, format_str, args, loc); + format_handler> h(out, format_str, args, + loc); parse_format_string(format_str, h); } @@ -3786,6 +3789,7 @@ extern template void detail::vformat_to(detail::buffer&, string_view, basic_format_args, detail::locale_ref); namespace detail { + extern template FMT_API std::string grouping_impl(locale_ref loc); extern template FMT_API std::string grouping_impl(locale_ref loc); extern template FMT_API char thousands_sep_impl(locale_ref loc); diff --git a/src/fmt/locale.h b/src/fmt/locale.h index 517f65054f..7301bf92a2 100644 --- a/src/fmt/locale.h +++ b/src/fmt/locale.h @@ -51,10 +51,10 @@ inline OutputIt vformat_to( } template , - FMT_ENABLE_IF(detail::is_output_iterator::value)> -inline OutputIt format_to(OutputIt out, const std::locale& loc, - const S& format_str, Args&&... args) { + bool enable = detail::is_output_iterator>::value> +inline auto format_to(OutputIt out, const std::locale& loc, + const S& format_str, Args&&... args) -> + typename std::enable_if::type { const auto& vargs = fmt::make_args_checked(format_str, args...); return vformat_to(out, loc, to_string_view(format_str), vargs); } diff --git a/src/fmtlib_format.cpp b/src/fmtlib_format.cpp index 7271341664..bca87b033b 100644 --- a/src/fmtlib_format.cpp +++ b/src/fmtlib_format.cpp @@ -23,6 +23,36 @@ int format_float(char* buf, std::size_t size, const char* format, int precision, return precision < 0 ? snprintf_ptr(buf, size, format, value) : snprintf_ptr(buf, size, format, precision, value); } + +template dragonbox::decimal_fp dragonbox::to_decimal(float x) + FMT_NOEXCEPT; +template dragonbox::decimal_fp dragonbox::to_decimal(double x) + FMT_NOEXCEPT; + +// DEPRECATED! This function exists for ABI compatibility. +template +typename basic_format_context>, + Char>::iterator +vformat_to(buffer& buf, basic_string_view format_str, + basic_format_args>>, + type_identity_t>> + args) { + using iterator = std::back_insert_iterator>; + using context = basic_format_context< + std::back_insert_iterator>>, + type_identity_t>; + auto out = iterator(buf); + format_handler h(out, format_str, args, {}); + parse_format_string(format_str, h); + return out; +} +template basic_format_context>, + char>::iterator +vformat_to(buffer&, string_view, + basic_format_args>>, + type_identity_t>>); } // namespace detail template struct FMT_INSTANTIATION_DEF_API detail::basic_data; From e8337fd128f61c8af971897e2b28b9a6a0e90ce6 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Mon, 2 Nov 2020 10:52:59 -0500 Subject: [PATCH 30/64] Safeguard against possible string overflow --- src/USER-NETCDF/dump_netcdf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-NETCDF/dump_netcdf.cpp b/src/USER-NETCDF/dump_netcdf.cpp index a50e01ce95..e1a8dfab69 100644 --- a/src/USER-NETCDF/dump_netcdf.cpp +++ b/src/USER-NETCDF/dump_netcdf.cpp @@ -183,7 +183,7 @@ DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) : for (int j = 0; j < DUMP_NC_MAX_DIMS; j++) { perat[inc].field[j] = -1; } - strcpy(perat[inc].name, mangled); + strncpy(perat[inc].name, mangled, NC_FIELD_NAME_MAX); n_perat++; } From 0e8e93b2a0c9bbc1ef9e62cdf13fcc660970fd45 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Mon, 2 Nov 2020 10:59:33 -0500 Subject: [PATCH 31/64] Add space for readability --- src/integrate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/integrate.cpp b/src/integrate.cpp index 7fc74c5db2..7d4bf36929 100644 --- a/src/integrate.cpp +++ b/src/integrate.cpp @@ -23,7 +23,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -Integrate::Integrate(LAMMPS *lmp, int /*narg*/, char **/*arg*/) : Pointers(lmp) +Integrate::Integrate(LAMMPS *lmp, int /*narg*/, char ** /*arg*/) : Pointers(lmp) { elist_global = elist_atom = nullptr; vlist_global = vlist_atom = cvlist_atom = nullptr; From c24f7acdd0d031944e90187149faba0e54cfb9eb Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Mon, 2 Nov 2020 11:00:36 -0500 Subject: [PATCH 32/64] Avoid passing invalid pointer during integrator creation --- src/update.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/update.cpp b/src/update.cpp index eebebb91be..a8a698a7f3 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -323,7 +323,12 @@ void Update::create_integrate(int narg, char **arg, int trysuffix) delete integrate; int sflag; - new_integrate(arg[0],narg-1,&arg[1],trysuffix,sflag); + + if(narg-1 > 0) { + new_integrate(arg[0],narg-1,&arg[1],trysuffix,sflag); + } else { + new_integrate(arg[0],0,nullptr,trysuffix,sflag); + } std::string estyle = arg[0]; if (sflag) { From e86b4d3a78dbaaf454bd734491c4dcc4c4bfb449 Mon Sep 17 00:00:00 2001 From: iafoss <49990208+iafoss@users.noreply.github.com> Date: Mon, 2 Nov 2020 11:54:11 -0500 Subject: [PATCH 33/64] bug fix in pair_mesont_tpm.cpp --- src/USER-MESONT/pair_mesont_tpm.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index b15a1e5b1f..9185786341 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -510,6 +510,12 @@ void PairMESONTTPM::compute(int eflag, int vflag){ // set per atom values and accumulators // reallocate per-atom arrays if necessary + if (eatom_s == nullptr) + memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); + if (eatom_b == nullptr) + memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); + if (eatom_t == nullptr) + memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); if (atom->nmax > maxeatom) { maxeatom = atom->nmax; memory->destroy(eatom); From e6643979516195965c7261053878f4b88d9aaa2b Mon Sep 17 00:00:00 2001 From: iafoss <49990208+iafoss@users.noreply.github.com> Date: Mon, 2 Nov 2020 16:12:57 -0500 Subject: [PATCH 34/64] Add files via upload --- src/USER-MESONT/pair_mesont_tpm.cpp | 1611 ++++++++++++++------------- src/USER-MESONT/pair_mesont_tpm.h | 197 ++-- 2 files changed, 907 insertions(+), 901 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index 9185786341..a58f9892ed 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -1,803 +1,808 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.sandia.gov/, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. - - Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu -------------------------------------------------------------------------- */ - -#include "pair_mesont_tpm.h" -#include "export_mesont.h" - - -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "memory.h" -#include "error.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" - -#include -#include -#include - -#include -#include -#include - -using namespace LAMMPS_NS; - -//since LAMMPS is compiled with C++ 2003, define a substitution for std::array -template -class array2003{ -public: - T& operator[] (int idx){ return data[idx];}; - const T& operator[] (int idx) const{ return data[idx];}; -private: - T data[N]; -}; - - -class MESONTList { -public: - MESONTList(const Atom* atom, const NeighList* nblist, double rc2); - ~MESONTList() {}; - //list of segments - const std::vector >& get_segments() const; - //list of triplets - const std::vector >& get_triplets() const; - //list of neighbor chains [start,end] for segments - //(use idx() to get real indexes) - const std::vector > >& get_nbs() const; - //convert idx from sorted representation to real idx - int get_idx(int idx) const; - //return list of indexes for conversion from sorted representation - const std::vector& get_idx_list() const; - //convert idx from real idx to sorted representation - int get_idxb(int idx) const; - //return list of indexes for conversion to sorted representation - const std::vector& get_idxb_list() const; - //check if the node is the end of the tube - bool is_end(int idx) const; - - array2003 get_segment(int idx) const; - array2003 get_triplet(int idx) const; - - static const int cnt_end = -1; - static const int domain_end = -2; - static const int not_cnt = -3; -private: - std::vector > chain_list, segments; - std::vector > triplets; - std::vector > > nb_chains; - std::vector index_list, index_list_b; -}; - -//============================================================================= - -inline const std::vector > > & - MESONTList::get_nbs() const { - return nb_chains; -} - -inline int MESONTList::get_idx(int idx) const { - return index_list[idx]; -} - -inline const std::vector& MESONTList::get_idx_list() const { - return index_list; -}; - - -inline int MESONTList::get_idxb(int idx) const { - return index_list_b[idx]; -} - -inline const std::vector& MESONTList::get_idxb_list() const { - return index_list_b; -}; - -inline const std::vector > & MESONTList::get_segments() - const { - return segments; -} - -inline const std::vector > & MESONTList::get_triplets() - const { - return triplets; -} - -inline array2003 MESONTList::get_segment(int idx) const { - array2003 result; - result[0] = chain_list[idx][0]; - result[1] = idx; - return result; -} - -inline array2003 MESONTList::get_triplet(int idx) const { - array2003 result; - result[0] = chain_list[idx][0]; - result[1] = idx; - result[2] = chain_list[idx][1]; - return result; -} - -inline bool MESONTList::is_end(int idx) const { - return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end; -}; - -template -void vector_union(std::vector& v1, std::vector& v2, - std::vector& merged) { - std::sort(v1.begin(), v1.end()); - std::sort(v2.begin(), v2.end()); - merged.reserve(v1.size() + v2.size()); - typename std::vector::iterator it1 = v1.begin(); - typename std::vector::iterator it2 = v2.begin(); - - while (it1 != v1.end() && it2 != v2.end()) { - if (*it1 < *it2) { - if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); - ++it1; - } - else { - if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); - ++it2; - } - } - while (it1 != v1.end()) { - if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); - ++it1; - } - - while (it2 != v2.end()) { - if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); - ++it2; - } -} - -MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){ - if (atom == nullptr || nblist == nullptr) return; - //number of local atoms at the node - int nlocal = atom->nlocal; - //total number of atoms in the node and ghost shell - int nall = nblist->inum + nblist->gnum; - int ntot = atom->nlocal + atom->nghost; - tagint* const g_id = atom->tag; - tagint** const bonds = atom->bond_nt; - tagint* const chain_id = atom->molecule; - int* ilist = nblist->ilist; - - //convert bonds to local id representation - array2003 tmp_arr; - tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt; - chain_list.resize(ntot, tmp_arr); - for (int ii = 0; ii < nall; ii++) { - int i = ilist[ii]; - chain_list[i][0] = domain_end; - chain_list[i][1] = domain_end; - } - for (int ii = 0; ii < nall; ii++) { - int i = ilist[ii]; - int nnb = nblist->numneigh[i]; - for (int m = 0; m < 2; m++) - if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end; - for (int j = 0; j < nnb; j++) { - int nb = nblist->firstneigh[i][j]; - if (bonds[i][0] == g_id[nb]){ - chain_list[i][0] = nb; - chain_list[nb][1] = i; - break; - } - } - } - - //reorder chains: index list - //list of indexes for conversion FROM reordered representation - index_list.reserve(nall); - index_list_b.resize(ntot, -1); // convert index TO reordered representation - for (int i = 0; i < ntot; i++) { - if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) { - index_list.push_back(i); - index_list_b[i] = index_list.size() - 1; - int idx = i; - while (1) { - idx = chain_list[idx][1]; - if (idx == cnt_end || idx == domain_end) break; - else index_list.push_back(idx); - index_list_b[idx] = index_list.size() - 1; - } - } - } - - //segment list - for (int i = 0; i < nlocal; i++) { - if (chain_list[i][0] == not_cnt) continue; - if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - g_id[i] < g_id[chain_list[i][0]]){ - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; - segments.push_back(tmp_c); - } - if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && - g_id[i] < g_id[chain_list[i][1]]){ - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; - segments.push_back(tmp_c); - } - } - int nbonds = segments.size(); - - //triplets - for (int i = 0; i < nlocal; i++){ - if (chain_list[i][0] == not_cnt) continue; - if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end) - triplets.push_back(get_triplet(i)); - } - - //segment neighbor list - nb_chains.resize(nbonds); - std::vector nb_list_i[2], nb_list; - for (int i = 0; i < nbonds; i++) { - //union of nb lists - for (int m = 0; m < 2; m++) { - nb_list_i[m].resize(0); - int idx = segments[i][m]; - if (idx >= nlocal) continue; - int nnb = nblist->numneigh[idx]; - for (int j = 0; j < nnb; j++) { - int jdx = nblist->firstneigh[idx][j]; - //no self interactions for nbs within the same tube - if (chain_id[jdx] == chain_id[idx] && - std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue; - nb_list_i[m].push_back(index_list_b[jdx]); - } - } - vector_union(nb_list_i[0], nb_list_i[1], nb_list); - - int nnb = nb_list.size(); - if (nnb > 0) { - int idx_s = nb_list[0]; - for (int j = 0; j < nnb; j++) { - //if nodes are not continuous in the sorted representation - //or represent chain ends, create a new neighbor chain - int idx_next = chain_list[index_list[nb_list[j]]][1]; - if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) || - (idx_next == cnt_end) || (idx_next == domain_end)) { - array2003 chain; - chain[0] = idx_s; - chain[1] = nb_list[j]; - //make sure that segments having at least one node - //in the neighbor list are included - int idx0 = index_list[chain[0]]; // real id of the ends - int idx1 = index_list[chain[1]]; - if (chain_list[idx0][0] != cnt_end && - chain_list[idx0][0] != domain_end) chain[0] -= 1; - if (chain_list[idx1][1] != cnt_end && - chain_list[idx1][1] != domain_end) chain[1] += 1; - if(chain[0] != chain[1]) nb_chains[i].push_back(chain); - idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1]; - } - } - } - nb_list.resize(0); - } -} - -/* ---------------------------------------------------------------------- */ - -// the cutoff distance between walls of tubes -static const double TPBRcutoff = 3.0*3.4; -int PairMESONTTPM::instance_count = 0; -/* ---------------------------------------------------------------------- */ - -PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { - writedata=1; - BendingMode = 0; // Harmonic bending model - TPMType = 0; // Inter-tube segment-segment interaction - tab_path = nullptr; - tab_path_length = 0; - - eatom_s = nullptr; - eatom_b = nullptr; - eatom_t = nullptr; - instance_count++; - if(instance_count > 1) error->all(FLERR, - "only a single instance of mesont/tpm pair style can be created"); -} - -/* ---------------------------------------------------------------------- */ - -PairMESONTTPM::~PairMESONTTPM() -{ - if (allocated) { - memory->destroy(setflag); - memory->destroy(cutsq); - memory->destroy(cut); - - memory->destroy(eatom_s); - memory->destroy(eatom_b); - memory->destroy(eatom_t); - } - instance_count--; - if (tab_path != nullptr) memory->destroy(tab_path); -} - -/* ---------------------------------------------------------------------- */ - -void PairMESONTTPM::compute(int eflag, int vflag){ - ev_init(eflag,vflag); - //total number of atoms in the node and ghost shell - int nall = list->inum + list->gnum; - int ntot = atom->nlocal + atom->nghost; - int newton_pair = force->newton_pair; - if(!newton_pair) - error->all(FLERR,"Pair style mesont/tpm requires newton pair on"); - - double **x = atom->x; - double **f = atom->f; - double *r = atom->radius; - double *l = atom->length; - int *buckling = atom->buckling; - tagint *g_id = atom->tag; - - //check if cutoff is chosen correctly - double RT = mesont_lib_get_R(); - double Lmax = 0.0; - for (int ii = 0; ii < list->inum; ii++) { - int i = list->ilist[ii]; - if (Lmax < l[i]) Lmax = l[i]; - } - double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + - std::pow((2.0*RT + TPBRcutoff),2))); - if (cut_global < Rcut_min){ - std::stringstream err; - err << "The selected cutoff is too small for the current system : " << - "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << - ", Rcut_min = " << Rcut_min; - error->all(FLERR, err.str().c_str()); - } - - //generate bonds and chain nblist - MESONTList ntlist(atom, list, cut_global*cut_global); - - //reorder data to make it contiguous within tubes - //and compatible with Fortran functions - std::vector x_sort(3*nall), f_sort(3*nall), s_sort(9*nall); - std::vector u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall); - std::vector b_sort(nall); - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j]; - b_sort[i] = buckling[idx]; - } - - //bending potential - int n_triplets = ntlist.get_triplets().size(); - for (int i = 0; i < n_triplets; i++) { - const array2003& t = ntlist.get_triplets()[i]; - //idx of nodes of a triplet in sorted representation - int idx_s0 = ntlist.get_idxb(t[0]); - int idx_s1 = ntlist.get_idxb(t[1]); - int idx_s2 = ntlist.get_idxb(t[2]); - - double* X1 = &(x_sort[3*idx_s0]); - double* X2 = &(x_sort[3*idx_s1]); - double* X3 = &(x_sort[3*idx_s2]); - double& U1b = u_tb_sort[idx_s0]; - double& U2b = u_tb_sort[idx_s1]; - double& U3b = u_tb_sort[idx_s2]; - double* F1 = &(f_sort[3*idx_s0]); - double* F2 = &(f_sort[3*idx_s1]); - double* F3 = &(f_sort[3*idx_s2]); - double* S1 = &(s_sort[9*idx_s0]); - double* S2 = &(s_sort[9*idx_s1]); - double* S3 = &(s_sort[9*idx_s2]); - double& R123 = r[t[1]]; - double& L123 = l[t[1]]; - int& BBF2 = b_sort[idx_s1]; - - mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3, - X1, X2, X3, R123, L123, BBF2); - } - - //share new values of buckling - if (BendingMode == 1){ - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - buckling[idx] = b_sort[i]; - } - comm->forward_comm_pair(this); - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - b_sort[i] = buckling[idx]; - } - } - - //segment-segment and segment-tube interactions - int n_segments = ntlist.get_segments().size(); - double Rmax = 0.0; - Lmax = 0.0; - for (int i = 0; i < n_segments; i++) { - const array2003& s = ntlist.get_segments()[i]; - //idx of a segment end 1 in sorted representation - int idx_s0 = ntlist.get_idxb(s[0]); - //idx of a segment end 2 in sorted representation - int idx_s1 = ntlist.get_idxb(s[1]); - double* X1 = &(x_sort[3*idx_s0]); - double* X2 = &(x_sort[3*idx_s1]); - double length = std::sqrt(std::pow(X1[0]-X2[0],2) + - std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2)); - if (length > Lmax) Lmax = length; - double& U1t = u_tt_sort[idx_s0]; - double& U2t = u_tt_sort[idx_s1]; - double& U1s = u_ts_sort[idx_s0]; - double& U2s = u_ts_sort[idx_s1]; - double* F1 = &(f_sort[3*idx_s0]); - double* F2 = &(f_sort[3*idx_s1]); - double* S1 = &(s_sort[9*idx_s0]); - double* S2 = &(s_sort[9*idx_s1]); - double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12; - if (std::abs(R12 - RT) > 1e-3) - error->all(FLERR,"Inconsistent input and potential table"); - //assume that the length of the segment is defined by the node with - //smallest global id - double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]]; - mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2, - R12, L12); - - for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){ - //id of the beginning and end of the chain in the sorted representation - const array2003& chain = ntlist.get_nbs()[i][nc]; - int N = chain[1] - chain[0] + 1; //number of elements in the chain - int end1 = ntlist.get_idx(chain[0]); //chain ends (real representation) - int end2 = ntlist.get_idx(chain[1]); - double* X = &(x_sort[3*chain[0]]); - double* Ut = &(u_tt_sort[chain[0]]); - double* F = &(f_sort[3*chain[0]]); - double* S = &(s_sort[9*chain[0]]); - double R = r[end1]; - int* BBF = &(b_sort[chain[0]]); - int E1 = ntlist.is_end(end1); - int E2 = ntlist.is_end(end2); - - int Ee = 0; - double* Xe = X; double* Fe = F; double* Se = S; - if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end && - ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] == - MESONTList::cnt_end){ - Ee = 1; - int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]); - Xe = &(x_sort[3*idx]); - Fe = &(f_sort[3*idx]); - Se = &(s_sort[9*idx]); - } - else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end && - ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] == - MESONTList::cnt_end){ - Ee = 2; - int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]); - Xe = &(x_sort[3*idx]); - Fe = &(f_sort[3*idx]); - Se = &(s_sort[9*idx]); - } - - mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S, - Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType); - } - } - - //check if cutoff is chosen correctly - Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + - std::pow((2.0*Rmax + TPBRcutoff),2))); - if (cut_global < Rcut_min){ - std::stringstream err; - err << "The selected cutoff is too small for the current system : " << - "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << - ", Rcut_min = " << Rcut_min; - error->all(FLERR, err.str().c_str()); - } - - // set per atom values and accumulators - // reallocate per-atom arrays if necessary - if (eatom_s == nullptr) - memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); - if (eatom_b == nullptr) - memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); - if (eatom_t == nullptr) - memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); - if (atom->nmax > maxeatom) { - maxeatom = atom->nmax; - memory->destroy(eatom); - memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom"); - memory->destroy(eatom_s); - memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); - memory->destroy(eatom_b); - memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); - memory->destroy(eatom_t); - memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); - } - - if (atom->nmax > maxvatom) { - maxvatom = atom->nmax; - memory->destroy(vatom); - memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom"); - } - - // zero accumulators - eng_vdwl = 0.0; energy_s = 0.0; - energy_b = 0.0; energy_t = 0.0; - for (int i = 0; i < 6; i++) virial[i] = 0.0; - for (int i = 0; i < ntot; i++){ - eatom[i] = 0.0; eatom_s[i] = 0.0; - eatom_b[i] = 0.0; eatom_t[i] = 0.0; - } - for (int i = 0; i < ntot; i++) - for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; - - //convert from sorted representation - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; - eatom_s[idx] = u_ts_sort[i]; - eatom_b[idx] = u_tb_sort[i]; - eatom_t[idx] = u_tt_sort[i]; - eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; - energy_s += u_ts_sort[i]; - energy_b += u_tb_sort[i]; - energy_t += u_tt_sort[i]; - vatom[idx][0] = s_sort[9*i+0]; //xx - vatom[idx][1] = s_sort[9*i+4]; //yy - vatom[idx][2] = s_sort[9*i+8]; //zz - vatom[idx][3] = s_sort[9*i+1]; //xy - vatom[idx][4] = s_sort[9*i+2]; //xz - vatom[idx][5] = s_sort[9*i+5]; //yz - for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j]; - buckling[idx] = b_sort[i]; - } - eng_vdwl = energy_s + energy_b + energy_t; -} - -/* ---------------------------------------------------------------------- - allocate all arrays -------------------------------------------------------------------------- */ - -void PairMESONTTPM::allocate(){ - allocated = 1; - int n = atom->ntypes; - - memory->create(setflag,n+1,n+1,"pair:setflag"); - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) - setflag[i][j] = 0; - - memory->create(cutsq,n+1,n+1,"pair:cutsq"); - memory->create(cut,n+1,n+1,"pair:cut"); -} - -/* ---------------------------------------------------------------------- - global settings -------------------------------------------------------------------------- */ - -void PairMESONTTPM::settings(int narg, char **arg){ - if ((narg == 0) || (narg > 4)) - error->all(FLERR,"Illegal pair_style command"); - cut_global = utils::numeric(FLERR,arg[0],false,lmp); - - // reset cutoffs that have been explicitly set - if (allocated) { - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - cut[i][j] = cut_global; - } - std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs"; - tab_path_length = TPMAFile.length(); - if (tab_path != nullptr) memory->destroy(tab_path); - //c_str returns '\0' terminated string - memory->create(tab_path,tab_path_length+1,"pair:path"); - std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1); - mesont_lib_SetTablePath(tab_path, tab_path_length); - - if (narg > 2) { - BendingMode = utils::numeric(FLERR,arg[2],false,lmp); - if ((BendingMode < 0) || (BendingMode > 1)) - error->all(FLERR,"Incorrect BendingMode"); - } - if (narg > 3) { - TPMType = utils::numeric(FLERR,arg[3],false,lmp); - if ((TPMType < 0) || (TPMType > 1)) - error->all(FLERR,"Incorrect TPMType"); - } - - mesont_lib_TPBInit(); - int M, N; - std::ifstream in(TPMAFile); - if (!in.is_open()) error->all(FLERR,"Incorrect table path"); - std::string tmp; - std::getline(in,tmp); - std::getline(in,tmp); - std::getline(in,tmp); - in >> M >> N; - in.close(); - mesont_lib_TPMInit(M, N); - mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -void PairMESONTTPM::coeff(int narg, char **arg){ - if ((narg < 2) || (narg > 3)) - error->all(FLERR,"Incorrect args for pair coefficients"); - - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); - utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); - - double cut_one = cut_global; - if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp); - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - cut[i][j] = cut_one; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); -} - -/* ---------------------------------------------------------------------- - init for one type pair i,j and corresponding j,i -------------------------------------------------------------------------- */ - -double PairMESONTTPM::init_one(int i, int j){ - if (setflag[i][j] == 0) { - cut[i][j] = mix_distance(cut[i][i],cut[j][j]); - } - - return cut[i][j]; -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_restart(FILE *fp){ - write_restart_settings(fp); - - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - fwrite(&setflag[i][j],sizeof(int),1,fp); - if (setflag[i][j]) { - fwrite(&cut[i][j],sizeof(double),1,fp); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairMESONTTPM::read_restart(FILE *fp){ - read_restart_settings(fp); - allocate(); - - int i,j; - int me = comm->me; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); - MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); - if (setflag[i][j]) { - if (me == 0) { - fread(&cut[i][j],sizeof(double),1,fp); - } - MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_restart_settings(FILE *fp){ - fwrite(&BendingMode,sizeof(int),1,fp); - fwrite(&TPMType,sizeof(int),1,fp); - fwrite(&cut_global,sizeof(double),1,fp); - fwrite(&tab_path_length,sizeof(int),1,fp); - fwrite(tab_path,tab_path_length+1,1,fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairMESONTTPM::read_restart_settings(FILE *fp){ - int me = comm->me; - if (me == 0) { - fread(&BendingMode,sizeof(int),1,fp); - fread(&TPMType,sizeof(int),1,fp); - fread(&cut_global,sizeof(double),1,fp); - fread(&tab_path_length,sizeof(int),1,fp); - } - MPI_Bcast(&BendingMode,1,MPI_INT,0,world); - MPI_Bcast(&TPMType,1,MPI_INT,0,world); - MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tab_path_length,1,MPI_INT,0,world); - - if (tab_path != nullptr) memory->destroy(tab_path); - memory->create(tab_path,tab_path_length+1,"pair:path"); - if (me == 0) fread(tab_path,tab_path_length+1,1,fp); - MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world); - mesont_lib_SetTablePath(tab_path,tab_path_length); - mesont_lib_TPBInit(); - int M, N; - std::ifstream in(tab_path); - if (!in.is_open()) error->all(FLERR,"Incorrect table path"); - std::string tmp; - std::getline(in,tmp); - std::getline(in,tmp); - std::getline(in,tmp); - in >> M >> N; - in.close(); - mesont_lib_TPMInit(M, N); - mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); -} - -/* ---------------------------------------------------------------------- - proc 0 writes to data file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_data(FILE *fp){ - for (int i = 1; i <= atom->ntypes; i++) - fprintf(fp,"%d\n",i); -} - -/* ---------------------------------------------------------------------- - proc 0 writes all pairs to data file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_data_all(FILE *fp){ - for (int i = 1; i <= atom->ntypes; i++) - for (int j = i; j <= atom->ntypes; j++) - fprintf(fp,"%d %d %g\n",i,j,cut[i][j]); -} - -/* ---------------------------------------------------------------------- */ - -void PairMESONTTPM::init_style(){ - //make sure that a full list is created (including ghost nodes) - int r = neighbor->request(this,instance_me); - neighbor->requests[r]->half = false; - neighbor->requests[r]->full = true; - neighbor->requests[r]->ghost = true; -} - -void* PairMESONTTPM::extract(const char *str, int &){ - if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s; - else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b; - else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t; - else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s; - else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b; - else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t; - else return nullptr; -}; +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu +------------------------------------------------------------------------- */ + +#include "pair_mesont_tpm.h" +#include "export_mesont.h" + + +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "error.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" + +#include +#include +#include + +#include +#include +#include + +using namespace LAMMPS_NS; + +//since LAMMPS is compiled with C++ 2003, define a substitution for std::array +template +class array2003{ +public: + T& operator[] (int idx){ return data[idx];}; + const T& operator[] (int idx) const{ return data[idx];}; +private: + T data[N]; +}; + + +class MESONTList { +public: + MESONTList(const Atom* atom, const NeighList* nblist, double rc2); + ~MESONTList() {}; + //list of segments + const std::vector >& get_segments() const; + //list of triplets + const std::vector >& get_triplets() const; + //list of neighbor chains [start,end] for segments + //(use idx() to get real indexes) + const std::vector > >& get_nbs() const; + //convert idx from sorted representation to real idx + int get_idx(int idx) const; + //return list of indexes for conversion from sorted representation + const std::vector& get_idx_list() const; + //convert idx from real idx to sorted representation + int get_idxb(int idx) const; + //return list of indexes for conversion to sorted representation + const std::vector& get_idxb_list() const; + //check if the node is the end of the tube + bool is_end(int idx) const; + + array2003 get_segment(int idx) const; + array2003 get_triplet(int idx) const; + + static const int cnt_end = -1; + static const int domain_end = -2; + static const int not_cnt = -3; +private: + std::vector > chain_list, segments; + std::vector > triplets; + std::vector > > nb_chains; + std::vector index_list, index_list_b; +}; + +//============================================================================= + +inline const std::vector > > & + MESONTList::get_nbs() const { + return nb_chains; +} + +inline int MESONTList::get_idx(int idx) const { + return index_list[idx]; +} + +inline const std::vector& MESONTList::get_idx_list() const { + return index_list; +}; + + +inline int MESONTList::get_idxb(int idx) const { + return index_list_b[idx]; +} + +inline const std::vector& MESONTList::get_idxb_list() const { + return index_list_b; +}; + +inline const std::vector > & MESONTList::get_segments() + const { + return segments; +} + +inline const std::vector > & MESONTList::get_triplets() + const { + return triplets; +} + +inline array2003 MESONTList::get_segment(int idx) const { + array2003 result; + result[0] = chain_list[idx][0]; + result[1] = idx; + return result; +} + +inline array2003 MESONTList::get_triplet(int idx) const { + array2003 result; + result[0] = chain_list[idx][0]; + result[1] = idx; + result[2] = chain_list[idx][1]; + return result; +} + +inline bool MESONTList::is_end(int idx) const { + return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end; +}; + +template +void vector_union(std::vector& v1, std::vector& v2, + std::vector& merged) { + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + merged.reserve(v1.size() + v2.size()); + typename std::vector::iterator it1 = v1.begin(); + typename std::vector::iterator it2 = v2.begin(); + + while (it1 != v1.end() && it2 != v2.end()) { + if (*it1 < *it2) { + if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); + ++it1; + } + else { + if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); + ++it2; + } + } + while (it1 != v1.end()) { + if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); + ++it1; + } + + while (it2 != v2.end()) { + if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); + ++it2; + } +} + +MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){ + if (atom == nullptr || nblist == nullptr) return; + //number of local atoms at the node + int nlocal = atom->nlocal; + //total number of atoms in the node and ghost shell + int nall = nblist->inum + nblist->gnum; + int ntot = atom->nlocal + atom->nghost; + tagint* const g_id = atom->tag; + tagint** const bonds = atom->bond_nt; + tagint* const chain_id = atom->molecule; + int* ilist = nblist->ilist; + + //convert bonds to local id representation + array2003 tmp_arr; + tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt; + chain_list.resize(ntot, tmp_arr); + for (int ii = 0; ii < nall; ii++) { + int i = ilist[ii]; + chain_list[i][0] = domain_end; + chain_list[i][1] = domain_end; + } + for (int ii = 0; ii < nall; ii++) { + int i = ilist[ii]; + int nnb = nblist->numneigh[i]; + for (int m = 0; m < 2; m++) + if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end; + for (int j = 0; j < nnb; j++) { + int nb = nblist->firstneigh[i][j]; + if (bonds[i][0] == g_id[nb]){ + chain_list[i][0] = nb; + chain_list[nb][1] = i; + break; + } + } + } + + //reorder chains: index list + //list of indexes for conversion FROM reordered representation + index_list.reserve(nall); + index_list_b.resize(ntot, -1); // convert index TO reordered representation + for (int i = 0; i < ntot; i++) { + if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) { + index_list.push_back(i); + index_list_b[i] = index_list.size() - 1; + int idx = i; + while (1) { + idx = chain_list[idx][1]; + if (idx == cnt_end || idx == domain_end) break; + else index_list.push_back(idx); + index_list_b[idx] = index_list.size() - 1; + } + } + } + + //segment list + for (int i = 0; i < nlocal; i++) { + if (chain_list[i][0] == not_cnt) continue; + if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && + g_id[i] < g_id[chain_list[i][0]]){ + array2003 tmp_c; + tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; + segments.push_back(tmp_c); + } + if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && + g_id[i] < g_id[chain_list[i][1]]){ + array2003 tmp_c; + tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; + segments.push_back(tmp_c); + } + } + int nbonds = segments.size(); + + //triplets + for (int i = 0; i < nlocal; i++){ + if (chain_list[i][0] == not_cnt) continue; + if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && + chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end) + triplets.push_back(get_triplet(i)); + } + + //segment neighbor list + nb_chains.resize(nbonds); + std::vector nb_list_i[2], nb_list; + for (int i = 0; i < nbonds; i++) { + //union of nb lists + for (int m = 0; m < 2; m++) { + nb_list_i[m].resize(0); + int idx = segments[i][m]; + if (idx >= nlocal) continue; + int nnb = nblist->numneigh[idx]; + for (int j = 0; j < nnb; j++) { + int jdx = nblist->firstneigh[idx][j]; + //no self interactions for nbs within the same tube + if (chain_id[jdx] == chain_id[idx] && + std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue; + nb_list_i[m].push_back(index_list_b[jdx]); + } + } + vector_union(nb_list_i[0], nb_list_i[1], nb_list); + + int nnb = nb_list.size(); + if (nnb > 0) { + int idx_s = nb_list[0]; + for (int j = 0; j < nnb; j++) { + //if nodes are not continuous in the sorted representation + //or represent chain ends, create a new neighbor chain + int idx_next = chain_list[index_list[nb_list[j]]][1]; + if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) || + (idx_next == cnt_end) || (idx_next == domain_end)) { + array2003 chain; + chain[0] = idx_s; + chain[1] = nb_list[j]; + //make sure that segments having at least one node + //in the neighbor list are included + int idx0 = index_list[chain[0]]; // real id of the ends + int idx1 = index_list[chain[1]]; + if (chain_list[idx0][0] != cnt_end && + chain_list[idx0][0] != domain_end) chain[0] -= 1; + if (chain_list[idx1][1] != cnt_end && + chain_list[idx1][1] != domain_end) chain[1] += 1; + if(chain[0] != chain[1]) nb_chains[i].push_back(chain); + idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1]; + } + } + } + nb_list.resize(0); + } +} + +/* ---------------------------------------------------------------------- */ + +// the cutoff distance between walls of tubes +static const double TPBRcutoff = 3.0*3.4; +int PairMESONTTPM::instance_count = 0; +/* ---------------------------------------------------------------------- */ + +PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { + writedata=1; + BendingMode = 0; // Harmonic bending model + TPMType = 0; // Inter-tube segment-segment interaction + tab_path = nullptr; + tab_path_length = 0; + + eatom_s = nullptr; + eatom_b = nullptr; + eatom_t = nullptr; + nmax = 0; + instance_count++; + if(instance_count > 1) error->all(FLERR, + "only a single instance of mesont/tpm pair style can be created"); +} + +/* ---------------------------------------------------------------------- */ + +PairMESONTTPM::~PairMESONTTPM() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cut); + + memory->destroy(eatom_s); + memory->destroy(eatom_b); + memory->destroy(eatom_t); + } + instance_count--; + if (tab_path != nullptr) memory->destroy(tab_path); +} + +/* ---------------------------------------------------------------------- */ + +void PairMESONTTPM::compute(int eflag, int vflag){ + // set per atom values and accumulators + // reallocate per-atom arrays if necessary + ev_init(eflag,vflag); + if (atom->nmax > nmax) { + memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); + memory->destroy(eatom_b); + memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); + memory->destroy(eatom_t); + memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); + nmax = atom->nmax; + } + //total number of atoms in the node and ghost shell + int nall = list->inum + list->gnum; + int ntot = atom->nlocal + atom->nghost; + int newton_pair = force->newton_pair; + if(!newton_pair) + error->all(FLERR,"Pair style mesont/tpm requires newton pair on"); + + double **x = atom->x; + double **f = atom->f; + double *r = atom->radius; + double *l = atom->length; + int *buckling = atom->buckling; + tagint *g_id = atom->tag; + + //check if cutoff is chosen correctly + double RT = mesont_lib_get_R(); + double Lmax = 0.0; + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + if (Lmax < l[i]) Lmax = l[i]; + } + double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + + std::pow((2.0*RT + TPBRcutoff),2))); + if (cut_global < Rcut_min){ + std::stringstream err; + err << "The selected cutoff is too small for the current system : " << + "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << + ", Rcut_min = " << Rcut_min; + error->all(FLERR, err.str().c_str()); + } + + //generate bonds and chain nblist + MESONTList ntlist(atom, list, cut_global*cut_global); + + //reorder data to make it contiguous within tubes + //and compatible with Fortran functions + std::vector x_sort(3*nall), f_sort(3*nall), s_sort(9*nall); + std::vector u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall); + std::vector b_sort(nall); + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j]; + b_sort[i] = buckling[idx]; + } + + //bending potential + int n_triplets = ntlist.get_triplets().size(); + for (int i = 0; i < n_triplets; i++) { + const array2003& t = ntlist.get_triplets()[i]; + //idx of nodes of a triplet in sorted representation + int idx_s0 = ntlist.get_idxb(t[0]); + int idx_s1 = ntlist.get_idxb(t[1]); + int idx_s2 = ntlist.get_idxb(t[2]); + + double* X1 = &(x_sort[3*idx_s0]); + double* X2 = &(x_sort[3*idx_s1]); + double* X3 = &(x_sort[3*idx_s2]); + double& U1b = u_tb_sort[idx_s0]; + double& U2b = u_tb_sort[idx_s1]; + double& U3b = u_tb_sort[idx_s2]; + double* F1 = &(f_sort[3*idx_s0]); + double* F2 = &(f_sort[3*idx_s1]); + double* F3 = &(f_sort[3*idx_s2]); + double* S1 = &(s_sort[9*idx_s0]); + double* S2 = &(s_sort[9*idx_s1]); + double* S3 = &(s_sort[9*idx_s2]); + double& R123 = r[t[1]]; + double& L123 = l[t[1]]; + int& BBF2 = b_sort[idx_s1]; + + mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3, + X1, X2, X3, R123, L123, BBF2); + } + + //share new values of buckling + if (BendingMode == 1){ + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + buckling[idx] = b_sort[i]; + } + comm->forward_comm_pair(this); + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + b_sort[i] = buckling[idx]; + } + } + + //segment-segment and segment-tube interactions + int n_segments = ntlist.get_segments().size(); + double Rmax = 0.0; + Lmax = 0.0; + for (int i = 0; i < n_segments; i++) { + const array2003& s = ntlist.get_segments()[i]; + //idx of a segment end 1 in sorted representation + int idx_s0 = ntlist.get_idxb(s[0]); + //idx of a segment end 2 in sorted representation + int idx_s1 = ntlist.get_idxb(s[1]); + double* X1 = &(x_sort[3*idx_s0]); + double* X2 = &(x_sort[3*idx_s1]); + double length = std::sqrt(std::pow(X1[0]-X2[0],2) + + std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2)); + if (length > Lmax) Lmax = length; + double& U1t = u_tt_sort[idx_s0]; + double& U2t = u_tt_sort[idx_s1]; + double& U1s = u_ts_sort[idx_s0]; + double& U2s = u_ts_sort[idx_s1]; + double* F1 = &(f_sort[3*idx_s0]); + double* F2 = &(f_sort[3*idx_s1]); + double* S1 = &(s_sort[9*idx_s0]); + double* S2 = &(s_sort[9*idx_s1]); + double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12; + if (std::abs(R12 - RT) > 1e-3) + error->all(FLERR,"Inconsistent input and potential table"); + //assume that the length of the segment is defined by the node with + //smallest global id + double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]]; + mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2, + R12, L12); + + for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){ + //id of the beginning and end of the chain in the sorted representation + const array2003& chain = ntlist.get_nbs()[i][nc]; + int N = chain[1] - chain[0] + 1; //number of elements in the chain + int end1 = ntlist.get_idx(chain[0]); //chain ends (real representation) + int end2 = ntlist.get_idx(chain[1]); + double* X = &(x_sort[3*chain[0]]); + double* Ut = &(u_tt_sort[chain[0]]); + double* F = &(f_sort[3*chain[0]]); + double* S = &(s_sort[9*chain[0]]); + double R = r[end1]; + int* BBF = &(b_sort[chain[0]]); + int E1 = ntlist.is_end(end1); + int E2 = ntlist.is_end(end2); + + int Ee = 0; + double* Xe = X; double* Fe = F; double* Se = S; + if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end && + ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] == + MESONTList::cnt_end){ + Ee = 1; + int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]); + Xe = &(x_sort[3*idx]); + Fe = &(f_sort[3*idx]); + Se = &(s_sort[9*idx]); + } + else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end && + ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] == + MESONTList::cnt_end){ + Ee = 2; + int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]); + Xe = &(x_sort[3*idx]); + Fe = &(f_sort[3*idx]); + Se = &(s_sort[9*idx]); + } + + mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S, + Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType); + } + } + + //check if cutoff is chosen correctly + Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + + std::pow((2.0*Rmax + TPBRcutoff),2))); + if (cut_global < Rcut_min){ + std::stringstream err; + err << "The selected cutoff is too small for the current system : " << + "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << + ", Rcut_min = " << Rcut_min; + error->all(FLERR, err.str().c_str()); + } + + //convert from sorted representation + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; + buckling[idx] = b_sort[i]; + } + if(eflag){ + eng_vdwl = 0.0; energy_s = 0.0; + energy_b = 0.0; energy_t = 0.0; + for (int i = 0; i < ntot; i++){ + eatom[i] = 0.0; eatom_s[i] = 0.0; + eatom_b[i] = 0.0; eatom_t[i] = 0.0; + } + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + eatom_s[idx] = u_ts_sort[i]; + eatom_b[idx] = u_tb_sort[i]; + eatom_t[idx] = u_tt_sort[i]; + eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; + energy_s += u_ts_sort[i]; + energy_b += u_tb_sort[i]; + energy_t += u_tt_sort[i]; + } + eng_vdwl = energy_s + energy_b + energy_t; + } + if(vflag){ + for (int i = 0; i < 6; i++) virial[i] = 0.0; + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + virial[0] += s_sort[9*i+0]; //xx + virial[1] += s_sort[9*i+4]; //yy + virial[2] += s_sort[9*i+8]; //zz + virial[3] += s_sort[9*i+1]; //xy + virial[4] += s_sort[9*i+2]; //xz + virial[5] += s_sort[9*i+5]; //yz + } + } + int vflag_atom = vflag & 4; + if(vflag_atom){ + for (int i = 0; i < ntot; i++) + for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + vatom[idx][0] = s_sort[9*i+0]; //xx + vatom[idx][1] = s_sort[9*i+4]; //yy + vatom[idx][2] = s_sort[9*i+8]; //zz + vatom[idx][3] = s_sort[9*i+1]; //xy + vatom[idx][4] = s_sort[9*i+2]; //xz + vatom[idx][5] = s_sort[9*i+5]; //yz + } + } + +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairMESONTTPM::allocate(){ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + memory->create(cut,n+1,n+1,"pair:cut"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairMESONTTPM::settings(int narg, char **arg){ + if ((narg == 0) || (narg > 4)) + error->all(FLERR,"Illegal pair_style command"); + cut_global = utils::numeric(FLERR,arg[0],false,lmp); + + // reset cutoffs that have been explicitly set + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i+1; j <= atom->ntypes; j++) + cut[i][j] = cut_global; + } + std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs"; + tab_path_length = TPMAFile.length(); + if (tab_path != nullptr) memory->destroy(tab_path); + //c_str returns '\0' terminated string + memory->create(tab_path,tab_path_length+1,"pair:path"); + std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1); + mesont_lib_SetTablePath(tab_path, tab_path_length); + + if (narg > 2) { + BendingMode = utils::numeric(FLERR,arg[2],false,lmp); + if ((BendingMode < 0) || (BendingMode > 1)) + error->all(FLERR,"Incorrect BendingMode"); + } + if (narg > 3) { + TPMType = utils::numeric(FLERR,arg[3],false,lmp); + if ((TPMType < 0) || (TPMType > 1)) + error->all(FLERR,"Incorrect TPMType"); + } + + mesont_lib_TPBInit(); + int M, N; + std::ifstream in(TPMAFile); + if (!in.is_open()) error->all(FLERR,"Incorrect table path"); + std::string tmp; + std::getline(in,tmp); + std::getline(in,tmp); + std::getline(in,tmp); + in >> M >> N; + in.close(); + mesont_lib_TPMInit(M, N); + mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairMESONTTPM::coeff(int narg, char **arg){ + if ((narg < 2) || (narg > 3)) + error->all(FLERR,"Incorrect args for pair coefficients"); + + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); + utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); + + double cut_one = cut_global; + if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + cut[i][j] = cut_one; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairMESONTTPM::init_one(int i, int j){ + if (setflag[i][j] == 0) { + cut[i][j] = mix_distance(cut[i][i],cut[j][j]); + } + + return cut[i][j]; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_restart(FILE *fp){ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&cut[i][j],sizeof(double),1,fp); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairMESONTTPM::read_restart(FILE *fp){ + read_restart_settings(fp); + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&cut[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_restart_settings(FILE *fp){ + fwrite(&BendingMode,sizeof(int),1,fp); + fwrite(&TPMType,sizeof(int),1,fp); + fwrite(&cut_global,sizeof(double),1,fp); + fwrite(&tab_path_length,sizeof(int),1,fp); + fwrite(tab_path,tab_path_length+1,1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairMESONTTPM::read_restart_settings(FILE *fp){ + int me = comm->me; + if (me == 0) { + fread(&BendingMode,sizeof(int),1,fp); + fread(&TPMType,sizeof(int),1,fp); + fread(&cut_global,sizeof(double),1,fp); + fread(&tab_path_length,sizeof(int),1,fp); + } + MPI_Bcast(&BendingMode,1,MPI_INT,0,world); + MPI_Bcast(&TPMType,1,MPI_INT,0,world); + MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&tab_path_length,1,MPI_INT,0,world); + + if (tab_path != nullptr) memory->destroy(tab_path); + memory->create(tab_path,tab_path_length+1,"pair:path"); + if (me == 0) fread(tab_path,tab_path_length+1,1,fp); + MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world); + mesont_lib_SetTablePath(tab_path,tab_path_length); + mesont_lib_TPBInit(); + int M, N; + std::ifstream in(tab_path); + if (!in.is_open()) error->all(FLERR,"Incorrect table path"); + std::string tmp; + std::getline(in,tmp); + std::getline(in,tmp); + std::getline(in,tmp); + in >> M >> N; + in.close(); + mesont_lib_TPMInit(M, N); + mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to data file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_data(FILE *fp){ + for (int i = 1; i <= atom->ntypes; i++) + fprintf(fp,"%d\n",i); +} + +/* ---------------------------------------------------------------------- + proc 0 writes all pairs to data file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_data_all(FILE *fp){ + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) + fprintf(fp,"%d %d %g\n",i,j,cut[i][j]); +} + +/* ---------------------------------------------------------------------- */ + +void PairMESONTTPM::init_style(){ + //make sure that a full list is created (including ghost nodes) + int r = neighbor->request(this,instance_me); + neighbor->requests[r]->half = false; + neighbor->requests[r]->full = true; + neighbor->requests[r]->ghost = true; +} + +void* PairMESONTTPM::extract(const char *str, int &){ + if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s; + else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b; + else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t; + else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s; + else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b; + else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t; + else return nullptr; +}; diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h index 704556d75e..c3d71ae953 100644 --- a/src/USER-MESONT/pair_mesont_tpm.h +++ b/src/USER-MESONT/pair_mesont_tpm.h @@ -1,98 +1,99 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. - - Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(mesont/tpm,PairMESONTTPM) - -#else - -#ifndef LMP_PAIR_MESONT_TPM_H -#define LMP_PAIR_MESONT_TPM_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairMESONTTPM : public Pair { - public: - PairMESONTTPM(class LAMMPS *); - virtual ~PairMESONTTPM(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - void write_data(FILE *); - void write_data_all(FILE *); - virtual void init_style(); - - double energy_s; // accumulated energies for stretching - double energy_b; // accumulated energies for bending - double energy_t; // accumulated energies for tube-tube interaction - double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values - - protected: - int BendingMode, TPMType; - char* tab_path; - int tab_path_length; - double cut_global; - double **cut; - static int instance_count; - - virtual void allocate(); - virtual void *extract(const char *, int &); -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Pair style mesont/tpm requires newton pair on - -newton_pair must be set to on - -E: The selected cutoff is too small for the current system - -cutoff must be increased. - -E: Illegal pair_style command - -Incorrect argument list in the style init. - -E: Incorrect table path - -Incorrect path to the table files. - -E: Incorrect BendingMode - -Self-explanatory. - -E: Incorrect TPMType - -Self-explanatory. - -E: Inconsistent input and potential table - -The tube diameter is inconsistent with the chirality specified -during generation of the potential table. - -*/ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(mesont/tpm,PairMESONTTPM) + +#else + +#ifndef LMP_PAIR_MESONT_TPM_H +#define LMP_PAIR_MESONT_TPM_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairMESONTTPM : public Pair { + public: + PairMESONTTPM(class LAMMPS *); + virtual ~PairMESONTTPM(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + void write_data(FILE *); + void write_data_all(FILE *); + virtual void init_style(); + + double energy_s; // accumulated energies for stretching + double energy_b; // accumulated energies for bending + double energy_t; // accumulated energies for tube-tube interaction + double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values + + protected: + int BendingMode, TPMType; + char* tab_path; + int tab_path_length; + double cut_global; + double **cut; + static int instance_count; + int nmax; + + virtual void allocate(); + virtual void *extract(const char *, int &); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pair style mesont/tpm requires newton pair on + +newton_pair must be set to on + +E: The selected cutoff is too small for the current system + +cutoff must be increased. + +E: Illegal pair_style command + +Incorrect argument list in the style init. + +E: Incorrect table path + +Incorrect path to the table files. + +E: Incorrect BendingMode + +Self-explanatory. + +E: Incorrect TPMType + +Self-explanatory. + +E: Inconsistent input and potential table + +The tube diameter is inconsistent with the chirality specified +during generation of the potential table. + +*/ From 769e7a099511f15e857e079722520576bf7ad940 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 16:24:57 -0500 Subject: [PATCH 35/64] Revert "Add files via upload" This reverts commit e6643979516195965c7261053878f4b88d9aaa2b. --- src/USER-MESONT/pair_mesont_tpm.cpp | 1611 +++++++++++++-------------- src/USER-MESONT/pair_mesont_tpm.h | 197 ++-- 2 files changed, 901 insertions(+), 907 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index a58f9892ed..9185786341 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -1,808 +1,803 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. - - Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu -------------------------------------------------------------------------- */ - -#include "pair_mesont_tpm.h" -#include "export_mesont.h" - - -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "memory.h" -#include "error.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" - -#include -#include -#include - -#include -#include -#include - -using namespace LAMMPS_NS; - -//since LAMMPS is compiled with C++ 2003, define a substitution for std::array -template -class array2003{ -public: - T& operator[] (int idx){ return data[idx];}; - const T& operator[] (int idx) const{ return data[idx];}; -private: - T data[N]; -}; - - -class MESONTList { -public: - MESONTList(const Atom* atom, const NeighList* nblist, double rc2); - ~MESONTList() {}; - //list of segments - const std::vector >& get_segments() const; - //list of triplets - const std::vector >& get_triplets() const; - //list of neighbor chains [start,end] for segments - //(use idx() to get real indexes) - const std::vector > >& get_nbs() const; - //convert idx from sorted representation to real idx - int get_idx(int idx) const; - //return list of indexes for conversion from sorted representation - const std::vector& get_idx_list() const; - //convert idx from real idx to sorted representation - int get_idxb(int idx) const; - //return list of indexes for conversion to sorted representation - const std::vector& get_idxb_list() const; - //check if the node is the end of the tube - bool is_end(int idx) const; - - array2003 get_segment(int idx) const; - array2003 get_triplet(int idx) const; - - static const int cnt_end = -1; - static const int domain_end = -2; - static const int not_cnt = -3; -private: - std::vector > chain_list, segments; - std::vector > triplets; - std::vector > > nb_chains; - std::vector index_list, index_list_b; -}; - -//============================================================================= - -inline const std::vector > > & - MESONTList::get_nbs() const { - return nb_chains; -} - -inline int MESONTList::get_idx(int idx) const { - return index_list[idx]; -} - -inline const std::vector& MESONTList::get_idx_list() const { - return index_list; -}; - - -inline int MESONTList::get_idxb(int idx) const { - return index_list_b[idx]; -} - -inline const std::vector& MESONTList::get_idxb_list() const { - return index_list_b; -}; - -inline const std::vector > & MESONTList::get_segments() - const { - return segments; -} - -inline const std::vector > & MESONTList::get_triplets() - const { - return triplets; -} - -inline array2003 MESONTList::get_segment(int idx) const { - array2003 result; - result[0] = chain_list[idx][0]; - result[1] = idx; - return result; -} - -inline array2003 MESONTList::get_triplet(int idx) const { - array2003 result; - result[0] = chain_list[idx][0]; - result[1] = idx; - result[2] = chain_list[idx][1]; - return result; -} - -inline bool MESONTList::is_end(int idx) const { - return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end; -}; - -template -void vector_union(std::vector& v1, std::vector& v2, - std::vector& merged) { - std::sort(v1.begin(), v1.end()); - std::sort(v2.begin(), v2.end()); - merged.reserve(v1.size() + v2.size()); - typename std::vector::iterator it1 = v1.begin(); - typename std::vector::iterator it2 = v2.begin(); - - while (it1 != v1.end() && it2 != v2.end()) { - if (*it1 < *it2) { - if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); - ++it1; - } - else { - if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); - ++it2; - } - } - while (it1 != v1.end()) { - if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); - ++it1; - } - - while (it2 != v2.end()) { - if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); - ++it2; - } -} - -MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){ - if (atom == nullptr || nblist == nullptr) return; - //number of local atoms at the node - int nlocal = atom->nlocal; - //total number of atoms in the node and ghost shell - int nall = nblist->inum + nblist->gnum; - int ntot = atom->nlocal + atom->nghost; - tagint* const g_id = atom->tag; - tagint** const bonds = atom->bond_nt; - tagint* const chain_id = atom->molecule; - int* ilist = nblist->ilist; - - //convert bonds to local id representation - array2003 tmp_arr; - tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt; - chain_list.resize(ntot, tmp_arr); - for (int ii = 0; ii < nall; ii++) { - int i = ilist[ii]; - chain_list[i][0] = domain_end; - chain_list[i][1] = domain_end; - } - for (int ii = 0; ii < nall; ii++) { - int i = ilist[ii]; - int nnb = nblist->numneigh[i]; - for (int m = 0; m < 2; m++) - if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end; - for (int j = 0; j < nnb; j++) { - int nb = nblist->firstneigh[i][j]; - if (bonds[i][0] == g_id[nb]){ - chain_list[i][0] = nb; - chain_list[nb][1] = i; - break; - } - } - } - - //reorder chains: index list - //list of indexes for conversion FROM reordered representation - index_list.reserve(nall); - index_list_b.resize(ntot, -1); // convert index TO reordered representation - for (int i = 0; i < ntot; i++) { - if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) { - index_list.push_back(i); - index_list_b[i] = index_list.size() - 1; - int idx = i; - while (1) { - idx = chain_list[idx][1]; - if (idx == cnt_end || idx == domain_end) break; - else index_list.push_back(idx); - index_list_b[idx] = index_list.size() - 1; - } - } - } - - //segment list - for (int i = 0; i < nlocal; i++) { - if (chain_list[i][0] == not_cnt) continue; - if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - g_id[i] < g_id[chain_list[i][0]]){ - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; - segments.push_back(tmp_c); - } - if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && - g_id[i] < g_id[chain_list[i][1]]){ - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; - segments.push_back(tmp_c); - } - } - int nbonds = segments.size(); - - //triplets - for (int i = 0; i < nlocal; i++){ - if (chain_list[i][0] == not_cnt) continue; - if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end) - triplets.push_back(get_triplet(i)); - } - - //segment neighbor list - nb_chains.resize(nbonds); - std::vector nb_list_i[2], nb_list; - for (int i = 0; i < nbonds; i++) { - //union of nb lists - for (int m = 0; m < 2; m++) { - nb_list_i[m].resize(0); - int idx = segments[i][m]; - if (idx >= nlocal) continue; - int nnb = nblist->numneigh[idx]; - for (int j = 0; j < nnb; j++) { - int jdx = nblist->firstneigh[idx][j]; - //no self interactions for nbs within the same tube - if (chain_id[jdx] == chain_id[idx] && - std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue; - nb_list_i[m].push_back(index_list_b[jdx]); - } - } - vector_union(nb_list_i[0], nb_list_i[1], nb_list); - - int nnb = nb_list.size(); - if (nnb > 0) { - int idx_s = nb_list[0]; - for (int j = 0; j < nnb; j++) { - //if nodes are not continuous in the sorted representation - //or represent chain ends, create a new neighbor chain - int idx_next = chain_list[index_list[nb_list[j]]][1]; - if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) || - (idx_next == cnt_end) || (idx_next == domain_end)) { - array2003 chain; - chain[0] = idx_s; - chain[1] = nb_list[j]; - //make sure that segments having at least one node - //in the neighbor list are included - int idx0 = index_list[chain[0]]; // real id of the ends - int idx1 = index_list[chain[1]]; - if (chain_list[idx0][0] != cnt_end && - chain_list[idx0][0] != domain_end) chain[0] -= 1; - if (chain_list[idx1][1] != cnt_end && - chain_list[idx1][1] != domain_end) chain[1] += 1; - if(chain[0] != chain[1]) nb_chains[i].push_back(chain); - idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1]; - } - } - } - nb_list.resize(0); - } -} - -/* ---------------------------------------------------------------------- */ - -// the cutoff distance between walls of tubes -static const double TPBRcutoff = 3.0*3.4; -int PairMESONTTPM::instance_count = 0; -/* ---------------------------------------------------------------------- */ - -PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { - writedata=1; - BendingMode = 0; // Harmonic bending model - TPMType = 0; // Inter-tube segment-segment interaction - tab_path = nullptr; - tab_path_length = 0; - - eatom_s = nullptr; - eatom_b = nullptr; - eatom_t = nullptr; - nmax = 0; - instance_count++; - if(instance_count > 1) error->all(FLERR, - "only a single instance of mesont/tpm pair style can be created"); -} - -/* ---------------------------------------------------------------------- */ - -PairMESONTTPM::~PairMESONTTPM() -{ - if (allocated) { - memory->destroy(setflag); - memory->destroy(cutsq); - memory->destroy(cut); - - memory->destroy(eatom_s); - memory->destroy(eatom_b); - memory->destroy(eatom_t); - } - instance_count--; - if (tab_path != nullptr) memory->destroy(tab_path); -} - -/* ---------------------------------------------------------------------- */ - -void PairMESONTTPM::compute(int eflag, int vflag){ - // set per atom values and accumulators - // reallocate per-atom arrays if necessary - ev_init(eflag,vflag); - if (atom->nmax > nmax) { - memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); - memory->destroy(eatom_b); - memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); - memory->destroy(eatom_t); - memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); - nmax = atom->nmax; - } - //total number of atoms in the node and ghost shell - int nall = list->inum + list->gnum; - int ntot = atom->nlocal + atom->nghost; - int newton_pair = force->newton_pair; - if(!newton_pair) - error->all(FLERR,"Pair style mesont/tpm requires newton pair on"); - - double **x = atom->x; - double **f = atom->f; - double *r = atom->radius; - double *l = atom->length; - int *buckling = atom->buckling; - tagint *g_id = atom->tag; - - //check if cutoff is chosen correctly - double RT = mesont_lib_get_R(); - double Lmax = 0.0; - for (int ii = 0; ii < list->inum; ii++) { - int i = list->ilist[ii]; - if (Lmax < l[i]) Lmax = l[i]; - } - double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + - std::pow((2.0*RT + TPBRcutoff),2))); - if (cut_global < Rcut_min){ - std::stringstream err; - err << "The selected cutoff is too small for the current system : " << - "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << - ", Rcut_min = " << Rcut_min; - error->all(FLERR, err.str().c_str()); - } - - //generate bonds and chain nblist - MESONTList ntlist(atom, list, cut_global*cut_global); - - //reorder data to make it contiguous within tubes - //and compatible with Fortran functions - std::vector x_sort(3*nall), f_sort(3*nall), s_sort(9*nall); - std::vector u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall); - std::vector b_sort(nall); - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j]; - b_sort[i] = buckling[idx]; - } - - //bending potential - int n_triplets = ntlist.get_triplets().size(); - for (int i = 0; i < n_triplets; i++) { - const array2003& t = ntlist.get_triplets()[i]; - //idx of nodes of a triplet in sorted representation - int idx_s0 = ntlist.get_idxb(t[0]); - int idx_s1 = ntlist.get_idxb(t[1]); - int idx_s2 = ntlist.get_idxb(t[2]); - - double* X1 = &(x_sort[3*idx_s0]); - double* X2 = &(x_sort[3*idx_s1]); - double* X3 = &(x_sort[3*idx_s2]); - double& U1b = u_tb_sort[idx_s0]; - double& U2b = u_tb_sort[idx_s1]; - double& U3b = u_tb_sort[idx_s2]; - double* F1 = &(f_sort[3*idx_s0]); - double* F2 = &(f_sort[3*idx_s1]); - double* F3 = &(f_sort[3*idx_s2]); - double* S1 = &(s_sort[9*idx_s0]); - double* S2 = &(s_sort[9*idx_s1]); - double* S3 = &(s_sort[9*idx_s2]); - double& R123 = r[t[1]]; - double& L123 = l[t[1]]; - int& BBF2 = b_sort[idx_s1]; - - mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3, - X1, X2, X3, R123, L123, BBF2); - } - - //share new values of buckling - if (BendingMode == 1){ - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - buckling[idx] = b_sort[i]; - } - comm->forward_comm_pair(this); - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - b_sort[i] = buckling[idx]; - } - } - - //segment-segment and segment-tube interactions - int n_segments = ntlist.get_segments().size(); - double Rmax = 0.0; - Lmax = 0.0; - for (int i = 0; i < n_segments; i++) { - const array2003& s = ntlist.get_segments()[i]; - //idx of a segment end 1 in sorted representation - int idx_s0 = ntlist.get_idxb(s[0]); - //idx of a segment end 2 in sorted representation - int idx_s1 = ntlist.get_idxb(s[1]); - double* X1 = &(x_sort[3*idx_s0]); - double* X2 = &(x_sort[3*idx_s1]); - double length = std::sqrt(std::pow(X1[0]-X2[0],2) + - std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2)); - if (length > Lmax) Lmax = length; - double& U1t = u_tt_sort[idx_s0]; - double& U2t = u_tt_sort[idx_s1]; - double& U1s = u_ts_sort[idx_s0]; - double& U2s = u_ts_sort[idx_s1]; - double* F1 = &(f_sort[3*idx_s0]); - double* F2 = &(f_sort[3*idx_s1]); - double* S1 = &(s_sort[9*idx_s0]); - double* S2 = &(s_sort[9*idx_s1]); - double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12; - if (std::abs(R12 - RT) > 1e-3) - error->all(FLERR,"Inconsistent input and potential table"); - //assume that the length of the segment is defined by the node with - //smallest global id - double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]]; - mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2, - R12, L12); - - for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){ - //id of the beginning and end of the chain in the sorted representation - const array2003& chain = ntlist.get_nbs()[i][nc]; - int N = chain[1] - chain[0] + 1; //number of elements in the chain - int end1 = ntlist.get_idx(chain[0]); //chain ends (real representation) - int end2 = ntlist.get_idx(chain[1]); - double* X = &(x_sort[3*chain[0]]); - double* Ut = &(u_tt_sort[chain[0]]); - double* F = &(f_sort[3*chain[0]]); - double* S = &(s_sort[9*chain[0]]); - double R = r[end1]; - int* BBF = &(b_sort[chain[0]]); - int E1 = ntlist.is_end(end1); - int E2 = ntlist.is_end(end2); - - int Ee = 0; - double* Xe = X; double* Fe = F; double* Se = S; - if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end && - ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] == - MESONTList::cnt_end){ - Ee = 1; - int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]); - Xe = &(x_sort[3*idx]); - Fe = &(f_sort[3*idx]); - Se = &(s_sort[9*idx]); - } - else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end && - ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] == - MESONTList::cnt_end){ - Ee = 2; - int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]); - Xe = &(x_sort[3*idx]); - Fe = &(f_sort[3*idx]); - Se = &(s_sort[9*idx]); - } - - mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S, - Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType); - } - } - - //check if cutoff is chosen correctly - Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + - std::pow((2.0*Rmax + TPBRcutoff),2))); - if (cut_global < Rcut_min){ - std::stringstream err; - err << "The selected cutoff is too small for the current system : " << - "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << - ", Rcut_min = " << Rcut_min; - error->all(FLERR, err.str().c_str()); - } - - //convert from sorted representation - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; - buckling[idx] = b_sort[i]; - } - if(eflag){ - eng_vdwl = 0.0; energy_s = 0.0; - energy_b = 0.0; energy_t = 0.0; - for (int i = 0; i < ntot; i++){ - eatom[i] = 0.0; eatom_s[i] = 0.0; - eatom_b[i] = 0.0; eatom_t[i] = 0.0; - } - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - eatom_s[idx] = u_ts_sort[i]; - eatom_b[idx] = u_tb_sort[i]; - eatom_t[idx] = u_tt_sort[i]; - eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; - energy_s += u_ts_sort[i]; - energy_b += u_tb_sort[i]; - energy_t += u_tt_sort[i]; - } - eng_vdwl = energy_s + energy_b + energy_t; - } - if(vflag){ - for (int i = 0; i < 6; i++) virial[i] = 0.0; - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - virial[0] += s_sort[9*i+0]; //xx - virial[1] += s_sort[9*i+4]; //yy - virial[2] += s_sort[9*i+8]; //zz - virial[3] += s_sort[9*i+1]; //xy - virial[4] += s_sort[9*i+2]; //xz - virial[5] += s_sort[9*i+5]; //yz - } - } - int vflag_atom = vflag & 4; - if(vflag_atom){ - for (int i = 0; i < ntot; i++) - for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; - for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - vatom[idx][0] = s_sort[9*i+0]; //xx - vatom[idx][1] = s_sort[9*i+4]; //yy - vatom[idx][2] = s_sort[9*i+8]; //zz - vatom[idx][3] = s_sort[9*i+1]; //xy - vatom[idx][4] = s_sort[9*i+2]; //xz - vatom[idx][5] = s_sort[9*i+5]; //yz - } - } - -} - -/* ---------------------------------------------------------------------- - allocate all arrays -------------------------------------------------------------------------- */ - -void PairMESONTTPM::allocate(){ - allocated = 1; - int n = atom->ntypes; - - memory->create(setflag,n+1,n+1,"pair:setflag"); - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) - setflag[i][j] = 0; - - memory->create(cutsq,n+1,n+1,"pair:cutsq"); - memory->create(cut,n+1,n+1,"pair:cut"); -} - -/* ---------------------------------------------------------------------- - global settings -------------------------------------------------------------------------- */ - -void PairMESONTTPM::settings(int narg, char **arg){ - if ((narg == 0) || (narg > 4)) - error->all(FLERR,"Illegal pair_style command"); - cut_global = utils::numeric(FLERR,arg[0],false,lmp); - - // reset cutoffs that have been explicitly set - if (allocated) { - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i+1; j <= atom->ntypes; j++) - cut[i][j] = cut_global; - } - std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs"; - tab_path_length = TPMAFile.length(); - if (tab_path != nullptr) memory->destroy(tab_path); - //c_str returns '\0' terminated string - memory->create(tab_path,tab_path_length+1,"pair:path"); - std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1); - mesont_lib_SetTablePath(tab_path, tab_path_length); - - if (narg > 2) { - BendingMode = utils::numeric(FLERR,arg[2],false,lmp); - if ((BendingMode < 0) || (BendingMode > 1)) - error->all(FLERR,"Incorrect BendingMode"); - } - if (narg > 3) { - TPMType = utils::numeric(FLERR,arg[3],false,lmp); - if ((TPMType < 0) || (TPMType > 1)) - error->all(FLERR,"Incorrect TPMType"); - } - - mesont_lib_TPBInit(); - int M, N; - std::ifstream in(TPMAFile); - if (!in.is_open()) error->all(FLERR,"Incorrect table path"); - std::string tmp; - std::getline(in,tmp); - std::getline(in,tmp); - std::getline(in,tmp); - in >> M >> N; - in.close(); - mesont_lib_TPMInit(M, N); - mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -void PairMESONTTPM::coeff(int narg, char **arg){ - if ((narg < 2) || (narg > 3)) - error->all(FLERR,"Incorrect args for pair coefficients"); - - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); - utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); - - double cut_one = cut_global; - if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp); - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - cut[i][j] = cut_one; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); -} - -/* ---------------------------------------------------------------------- - init for one type pair i,j and corresponding j,i -------------------------------------------------------------------------- */ - -double PairMESONTTPM::init_one(int i, int j){ - if (setflag[i][j] == 0) { - cut[i][j] = mix_distance(cut[i][i],cut[j][j]); - } - - return cut[i][j]; -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_restart(FILE *fp){ - write_restart_settings(fp); - - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - fwrite(&setflag[i][j],sizeof(int),1,fp); - if (setflag[i][j]) { - fwrite(&cut[i][j],sizeof(double),1,fp); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairMESONTTPM::read_restart(FILE *fp){ - read_restart_settings(fp); - allocate(); - - int i,j; - int me = comm->me; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); - MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); - if (setflag[i][j]) { - if (me == 0) { - fread(&cut[i][j],sizeof(double),1,fp); - } - MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_restart_settings(FILE *fp){ - fwrite(&BendingMode,sizeof(int),1,fp); - fwrite(&TPMType,sizeof(int),1,fp); - fwrite(&cut_global,sizeof(double),1,fp); - fwrite(&tab_path_length,sizeof(int),1,fp); - fwrite(tab_path,tab_path_length+1,1,fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairMESONTTPM::read_restart_settings(FILE *fp){ - int me = comm->me; - if (me == 0) { - fread(&BendingMode,sizeof(int),1,fp); - fread(&TPMType,sizeof(int),1,fp); - fread(&cut_global,sizeof(double),1,fp); - fread(&tab_path_length,sizeof(int),1,fp); - } - MPI_Bcast(&BendingMode,1,MPI_INT,0,world); - MPI_Bcast(&TPMType,1,MPI_INT,0,world); - MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tab_path_length,1,MPI_INT,0,world); - - if (tab_path != nullptr) memory->destroy(tab_path); - memory->create(tab_path,tab_path_length+1,"pair:path"); - if (me == 0) fread(tab_path,tab_path_length+1,1,fp); - MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world); - mesont_lib_SetTablePath(tab_path,tab_path_length); - mesont_lib_TPBInit(); - int M, N; - std::ifstream in(tab_path); - if (!in.is_open()) error->all(FLERR,"Incorrect table path"); - std::string tmp; - std::getline(in,tmp); - std::getline(in,tmp); - std::getline(in,tmp); - in >> M >> N; - in.close(); - mesont_lib_TPMInit(M, N); - mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); -} - -/* ---------------------------------------------------------------------- - proc 0 writes to data file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_data(FILE *fp){ - for (int i = 1; i <= atom->ntypes; i++) - fprintf(fp,"%d\n",i); -} - -/* ---------------------------------------------------------------------- - proc 0 writes all pairs to data file -------------------------------------------------------------------------- */ - -void PairMESONTTPM::write_data_all(FILE *fp){ - for (int i = 1; i <= atom->ntypes; i++) - for (int j = i; j <= atom->ntypes; j++) - fprintf(fp,"%d %d %g\n",i,j,cut[i][j]); -} - -/* ---------------------------------------------------------------------- */ - -void PairMESONTTPM::init_style(){ - //make sure that a full list is created (including ghost nodes) - int r = neighbor->request(this,instance_me); - neighbor->requests[r]->half = false; - neighbor->requests[r]->full = true; - neighbor->requests[r]->ghost = true; -} - -void* PairMESONTTPM::extract(const char *str, int &){ - if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s; - else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b; - else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t; - else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s; - else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b; - else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t; - else return nullptr; -}; +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://lammps.sandia.gov/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu +------------------------------------------------------------------------- */ + +#include "pair_mesont_tpm.h" +#include "export_mesont.h" + + +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "error.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" + +#include +#include +#include + +#include +#include +#include + +using namespace LAMMPS_NS; + +//since LAMMPS is compiled with C++ 2003, define a substitution for std::array +template +class array2003{ +public: + T& operator[] (int idx){ return data[idx];}; + const T& operator[] (int idx) const{ return data[idx];}; +private: + T data[N]; +}; + + +class MESONTList { +public: + MESONTList(const Atom* atom, const NeighList* nblist, double rc2); + ~MESONTList() {}; + //list of segments + const std::vector >& get_segments() const; + //list of triplets + const std::vector >& get_triplets() const; + //list of neighbor chains [start,end] for segments + //(use idx() to get real indexes) + const std::vector > >& get_nbs() const; + //convert idx from sorted representation to real idx + int get_idx(int idx) const; + //return list of indexes for conversion from sorted representation + const std::vector& get_idx_list() const; + //convert idx from real idx to sorted representation + int get_idxb(int idx) const; + //return list of indexes for conversion to sorted representation + const std::vector& get_idxb_list() const; + //check if the node is the end of the tube + bool is_end(int idx) const; + + array2003 get_segment(int idx) const; + array2003 get_triplet(int idx) const; + + static const int cnt_end = -1; + static const int domain_end = -2; + static const int not_cnt = -3; +private: + std::vector > chain_list, segments; + std::vector > triplets; + std::vector > > nb_chains; + std::vector index_list, index_list_b; +}; + +//============================================================================= + +inline const std::vector > > & + MESONTList::get_nbs() const { + return nb_chains; +} + +inline int MESONTList::get_idx(int idx) const { + return index_list[idx]; +} + +inline const std::vector& MESONTList::get_idx_list() const { + return index_list; +}; + + +inline int MESONTList::get_idxb(int idx) const { + return index_list_b[idx]; +} + +inline const std::vector& MESONTList::get_idxb_list() const { + return index_list_b; +}; + +inline const std::vector > & MESONTList::get_segments() + const { + return segments; +} + +inline const std::vector > & MESONTList::get_triplets() + const { + return triplets; +} + +inline array2003 MESONTList::get_segment(int idx) const { + array2003 result; + result[0] = chain_list[idx][0]; + result[1] = idx; + return result; +} + +inline array2003 MESONTList::get_triplet(int idx) const { + array2003 result; + result[0] = chain_list[idx][0]; + result[1] = idx; + result[2] = chain_list[idx][1]; + return result; +} + +inline bool MESONTList::is_end(int idx) const { + return chain_list[idx][0] == cnt_end || chain_list[idx][1] == cnt_end; +}; + +template +void vector_union(std::vector& v1, std::vector& v2, + std::vector& merged) { + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + merged.reserve(v1.size() + v2.size()); + typename std::vector::iterator it1 = v1.begin(); + typename std::vector::iterator it2 = v2.begin(); + + while (it1 != v1.end() && it2 != v2.end()) { + if (*it1 < *it2) { + if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); + ++it1; + } + else { + if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); + ++it2; + } + } + while (it1 != v1.end()) { + if (merged.empty() || merged.back() < *it1) merged.push_back(*it1); + ++it1; + } + + while (it2 != v2.end()) { + if (merged.empty() || merged.back() < *it2) merged.push_back(*it2); + ++it2; + } +} + +MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){ + if (atom == nullptr || nblist == nullptr) return; + //number of local atoms at the node + int nlocal = atom->nlocal; + //total number of atoms in the node and ghost shell + int nall = nblist->inum + nblist->gnum; + int ntot = atom->nlocal + atom->nghost; + tagint* const g_id = atom->tag; + tagint** const bonds = atom->bond_nt; + tagint* const chain_id = atom->molecule; + int* ilist = nblist->ilist; + + //convert bonds to local id representation + array2003 tmp_arr; + tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt; + chain_list.resize(ntot, tmp_arr); + for (int ii = 0; ii < nall; ii++) { + int i = ilist[ii]; + chain_list[i][0] = domain_end; + chain_list[i][1] = domain_end; + } + for (int ii = 0; ii < nall; ii++) { + int i = ilist[ii]; + int nnb = nblist->numneigh[i]; + for (int m = 0; m < 2; m++) + if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end; + for (int j = 0; j < nnb; j++) { + int nb = nblist->firstneigh[i][j]; + if (bonds[i][0] == g_id[nb]){ + chain_list[i][0] = nb; + chain_list[nb][1] = i; + break; + } + } + } + + //reorder chains: index list + //list of indexes for conversion FROM reordered representation + index_list.reserve(nall); + index_list_b.resize(ntot, -1); // convert index TO reordered representation + for (int i = 0; i < ntot; i++) { + if (chain_list[i][0] == cnt_end || chain_list[i][0] == domain_end) { + index_list.push_back(i); + index_list_b[i] = index_list.size() - 1; + int idx = i; + while (1) { + idx = chain_list[idx][1]; + if (idx == cnt_end || idx == domain_end) break; + else index_list.push_back(idx); + index_list_b[idx] = index_list.size() - 1; + } + } + } + + //segment list + for (int i = 0; i < nlocal; i++) { + if (chain_list[i][0] == not_cnt) continue; + if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && + g_id[i] < g_id[chain_list[i][0]]){ + array2003 tmp_c; + tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; + segments.push_back(tmp_c); + } + if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && + g_id[i] < g_id[chain_list[i][1]]){ + array2003 tmp_c; + tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; + segments.push_back(tmp_c); + } + } + int nbonds = segments.size(); + + //triplets + for (int i = 0; i < nlocal; i++){ + if (chain_list[i][0] == not_cnt) continue; + if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && + chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end) + triplets.push_back(get_triplet(i)); + } + + //segment neighbor list + nb_chains.resize(nbonds); + std::vector nb_list_i[2], nb_list; + for (int i = 0; i < nbonds; i++) { + //union of nb lists + for (int m = 0; m < 2; m++) { + nb_list_i[m].resize(0); + int idx = segments[i][m]; + if (idx >= nlocal) continue; + int nnb = nblist->numneigh[idx]; + for (int j = 0; j < nnb; j++) { + int jdx = nblist->firstneigh[idx][j]; + //no self interactions for nbs within the same tube + if (chain_id[jdx] == chain_id[idx] && + std::abs(index_list_b[idx] - index_list_b[jdx]) <= 5) continue; + nb_list_i[m].push_back(index_list_b[jdx]); + } + } + vector_union(nb_list_i[0], nb_list_i[1], nb_list); + + int nnb = nb_list.size(); + if (nnb > 0) { + int idx_s = nb_list[0]; + for (int j = 0; j < nnb; j++) { + //if nodes are not continuous in the sorted representation + //or represent chain ends, create a new neighbor chain + int idx_next = chain_list[index_list[nb_list[j]]][1]; + if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) || + (idx_next == cnt_end) || (idx_next == domain_end)) { + array2003 chain; + chain[0] = idx_s; + chain[1] = nb_list[j]; + //make sure that segments having at least one node + //in the neighbor list are included + int idx0 = index_list[chain[0]]; // real id of the ends + int idx1 = index_list[chain[1]]; + if (chain_list[idx0][0] != cnt_end && + chain_list[idx0][0] != domain_end) chain[0] -= 1; + if (chain_list[idx1][1] != cnt_end && + chain_list[idx1][1] != domain_end) chain[1] += 1; + if(chain[0] != chain[1]) nb_chains[i].push_back(chain); + idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1]; + } + } + } + nb_list.resize(0); + } +} + +/* ---------------------------------------------------------------------- */ + +// the cutoff distance between walls of tubes +static const double TPBRcutoff = 3.0*3.4; +int PairMESONTTPM::instance_count = 0; +/* ---------------------------------------------------------------------- */ + +PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { + writedata=1; + BendingMode = 0; // Harmonic bending model + TPMType = 0; // Inter-tube segment-segment interaction + tab_path = nullptr; + tab_path_length = 0; + + eatom_s = nullptr; + eatom_b = nullptr; + eatom_t = nullptr; + instance_count++; + if(instance_count > 1) error->all(FLERR, + "only a single instance of mesont/tpm pair style can be created"); +} + +/* ---------------------------------------------------------------------- */ + +PairMESONTTPM::~PairMESONTTPM() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cut); + + memory->destroy(eatom_s); + memory->destroy(eatom_b); + memory->destroy(eatom_t); + } + instance_count--; + if (tab_path != nullptr) memory->destroy(tab_path); +} + +/* ---------------------------------------------------------------------- */ + +void PairMESONTTPM::compute(int eflag, int vflag){ + ev_init(eflag,vflag); + //total number of atoms in the node and ghost shell + int nall = list->inum + list->gnum; + int ntot = atom->nlocal + atom->nghost; + int newton_pair = force->newton_pair; + if(!newton_pair) + error->all(FLERR,"Pair style mesont/tpm requires newton pair on"); + + double **x = atom->x; + double **f = atom->f; + double *r = atom->radius; + double *l = atom->length; + int *buckling = atom->buckling; + tagint *g_id = atom->tag; + + //check if cutoff is chosen correctly + double RT = mesont_lib_get_R(); + double Lmax = 0.0; + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + if (Lmax < l[i]) Lmax = l[i]; + } + double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + + std::pow((2.0*RT + TPBRcutoff),2))); + if (cut_global < Rcut_min){ + std::stringstream err; + err << "The selected cutoff is too small for the current system : " << + "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << + ", Rcut_min = " << Rcut_min; + error->all(FLERR, err.str().c_str()); + } + + //generate bonds and chain nblist + MESONTList ntlist(atom, list, cut_global*cut_global); + + //reorder data to make it contiguous within tubes + //and compatible with Fortran functions + std::vector x_sort(3*nall), f_sort(3*nall), s_sort(9*nall); + std::vector u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall); + std::vector b_sort(nall); + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j]; + b_sort[i] = buckling[idx]; + } + + //bending potential + int n_triplets = ntlist.get_triplets().size(); + for (int i = 0; i < n_triplets; i++) { + const array2003& t = ntlist.get_triplets()[i]; + //idx of nodes of a triplet in sorted representation + int idx_s0 = ntlist.get_idxb(t[0]); + int idx_s1 = ntlist.get_idxb(t[1]); + int idx_s2 = ntlist.get_idxb(t[2]); + + double* X1 = &(x_sort[3*idx_s0]); + double* X2 = &(x_sort[3*idx_s1]); + double* X3 = &(x_sort[3*idx_s2]); + double& U1b = u_tb_sort[idx_s0]; + double& U2b = u_tb_sort[idx_s1]; + double& U3b = u_tb_sort[idx_s2]; + double* F1 = &(f_sort[3*idx_s0]); + double* F2 = &(f_sort[3*idx_s1]); + double* F3 = &(f_sort[3*idx_s2]); + double* S1 = &(s_sort[9*idx_s0]); + double* S2 = &(s_sort[9*idx_s1]); + double* S3 = &(s_sort[9*idx_s2]); + double& R123 = r[t[1]]; + double& L123 = l[t[1]]; + int& BBF2 = b_sort[idx_s1]; + + mesont_lib_TubeBendingForceField(U1b, U2b, U3b, F1, F2, F3, S1, S2, S3, + X1, X2, X3, R123, L123, BBF2); + } + + //share new values of buckling + if (BendingMode == 1){ + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + buckling[idx] = b_sort[i]; + } + comm->forward_comm_pair(this); + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + b_sort[i] = buckling[idx]; + } + } + + //segment-segment and segment-tube interactions + int n_segments = ntlist.get_segments().size(); + double Rmax = 0.0; + Lmax = 0.0; + for (int i = 0; i < n_segments; i++) { + const array2003& s = ntlist.get_segments()[i]; + //idx of a segment end 1 in sorted representation + int idx_s0 = ntlist.get_idxb(s[0]); + //idx of a segment end 2 in sorted representation + int idx_s1 = ntlist.get_idxb(s[1]); + double* X1 = &(x_sort[3*idx_s0]); + double* X2 = &(x_sort[3*idx_s1]); + double length = std::sqrt(std::pow(X1[0]-X2[0],2) + + std::pow(X1[1]-X2[1],2) + std::pow(X1[2]-X2[2],2)); + if (length > Lmax) Lmax = length; + double& U1t = u_tt_sort[idx_s0]; + double& U2t = u_tt_sort[idx_s1]; + double& U1s = u_ts_sort[idx_s0]; + double& U2s = u_ts_sort[idx_s1]; + double* F1 = &(f_sort[3*idx_s0]); + double* F2 = &(f_sort[3*idx_s1]); + double* S1 = &(s_sort[9*idx_s0]); + double* S2 = &(s_sort[9*idx_s1]); + double R12 = r[s[0]]; if (R12 > Rmax) Rmax = R12; + if (std::abs(R12 - RT) > 1e-3) + error->all(FLERR,"Inconsistent input and potential table"); + //assume that the length of the segment is defined by the node with + //smallest global id + double L12 = (g_id[s[0]] > g_id[s[1]]) ? l[s[1]] : l[s[0]]; + mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2, + R12, L12); + + for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){ + //id of the beginning and end of the chain in the sorted representation + const array2003& chain = ntlist.get_nbs()[i][nc]; + int N = chain[1] - chain[0] + 1; //number of elements in the chain + int end1 = ntlist.get_idx(chain[0]); //chain ends (real representation) + int end2 = ntlist.get_idx(chain[1]); + double* X = &(x_sort[3*chain[0]]); + double* Ut = &(u_tt_sort[chain[0]]); + double* F = &(f_sort[3*chain[0]]); + double* S = &(s_sort[9*chain[0]]); + double R = r[end1]; + int* BBF = &(b_sort[chain[0]]); + int E1 = ntlist.is_end(end1); + int E2 = ntlist.is_end(end2); + + int Ee = 0; + double* Xe = X; double* Fe = F; double* Se = S; + if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end && + ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] == + MESONTList::cnt_end){ + Ee = 1; + int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]); + Xe = &(x_sort[3*idx]); + Fe = &(f_sort[3*idx]); + Se = &(s_sort[9*idx]); + } + else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end && + ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] == + MESONTList::cnt_end){ + Ee = 2; + int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]); + Xe = &(x_sort[3*idx]); + Fe = &(f_sort[3*idx]); + Se = &(s_sort[9*idx]); + } + + mesont_lib_SegmentTubeForceField(U1t, U2t, Ut, F1, F2, F, Fe, S1, S2, S, + Se, X1, X2, R12, N, X, Xe, BBF, R, E1, E2, Ee, TPMType); + } + } + + //check if cutoff is chosen correctly + Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + + std::pow((2.0*Rmax + TPBRcutoff),2))); + if (cut_global < Rcut_min){ + std::stringstream err; + err << "The selected cutoff is too small for the current system : " << + "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << + ", Rcut_min = " << Rcut_min; + error->all(FLERR, err.str().c_str()); + } + + // set per atom values and accumulators + // reallocate per-atom arrays if necessary + if (eatom_s == nullptr) + memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); + if (eatom_b == nullptr) + memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); + if (eatom_t == nullptr) + memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); + if (atom->nmax > maxeatom) { + maxeatom = atom->nmax; + memory->destroy(eatom); + memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom"); + memory->destroy(eatom_s); + memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); + memory->destroy(eatom_b); + memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); + memory->destroy(eatom_t); + memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); + } + + if (atom->nmax > maxvatom) { + maxvatom = atom->nmax; + memory->destroy(vatom); + memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom"); + } + + // zero accumulators + eng_vdwl = 0.0; energy_s = 0.0; + energy_b = 0.0; energy_t = 0.0; + for (int i = 0; i < 6; i++) virial[i] = 0.0; + for (int i = 0; i < ntot; i++){ + eatom[i] = 0.0; eatom_s[i] = 0.0; + eatom_b[i] = 0.0; eatom_t[i] = 0.0; + } + for (int i = 0; i < ntot; i++) + for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; + + //convert from sorted representation + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; + eatom_s[idx] = u_ts_sort[i]; + eatom_b[idx] = u_tb_sort[i]; + eatom_t[idx] = u_tt_sort[i]; + eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; + energy_s += u_ts_sort[i]; + energy_b += u_tb_sort[i]; + energy_t += u_tt_sort[i]; + vatom[idx][0] = s_sort[9*i+0]; //xx + vatom[idx][1] = s_sort[9*i+4]; //yy + vatom[idx][2] = s_sort[9*i+8]; //zz + vatom[idx][3] = s_sort[9*i+1]; //xy + vatom[idx][4] = s_sort[9*i+2]; //xz + vatom[idx][5] = s_sort[9*i+5]; //yz + for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j]; + buckling[idx] = b_sort[i]; + } + eng_vdwl = energy_s + energy_b + energy_t; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairMESONTTPM::allocate(){ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + memory->create(cut,n+1,n+1,"pair:cut"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairMESONTTPM::settings(int narg, char **arg){ + if ((narg == 0) || (narg > 4)) + error->all(FLERR,"Illegal pair_style command"); + cut_global = utils::numeric(FLERR,arg[0],false,lmp); + + // reset cutoffs that have been explicitly set + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i+1; j <= atom->ntypes; j++) + cut[i][j] = cut_global; + } + std::string TPMAFile = (narg > 1) ? arg[1] : "MESONT-TABTP.xrs"; + tab_path_length = TPMAFile.length(); + if (tab_path != nullptr) memory->destroy(tab_path); + //c_str returns '\0' terminated string + memory->create(tab_path,tab_path_length+1,"pair:path"); + std::memcpy(tab_path, TPMAFile.c_str(), tab_path_length+1); + mesont_lib_SetTablePath(tab_path, tab_path_length); + + if (narg > 2) { + BendingMode = utils::numeric(FLERR,arg[2],false,lmp); + if ((BendingMode < 0) || (BendingMode > 1)) + error->all(FLERR,"Incorrect BendingMode"); + } + if (narg > 3) { + TPMType = utils::numeric(FLERR,arg[3],false,lmp); + if ((TPMType < 0) || (TPMType > 1)) + error->all(FLERR,"Incorrect TPMType"); + } + + mesont_lib_TPBInit(); + int M, N; + std::ifstream in(TPMAFile); + if (!in.is_open()) error->all(FLERR,"Incorrect table path"); + std::string tmp; + std::getline(in,tmp); + std::getline(in,tmp); + std::getline(in,tmp); + in >> M >> N; + in.close(); + mesont_lib_TPMInit(M, N); + mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairMESONTTPM::coeff(int narg, char **arg){ + if ((narg < 2) || (narg > 3)) + error->all(FLERR,"Incorrect args for pair coefficients"); + + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); + utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); + + double cut_one = cut_global; + if (narg == 3) cut_one = utils::numeric(FLERR,arg[2],false,lmp); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + cut[i][j] = cut_one; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairMESONTTPM::init_one(int i, int j){ + if (setflag[i][j] == 0) { + cut[i][j] = mix_distance(cut[i][i],cut[j][j]); + } + + return cut[i][j]; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_restart(FILE *fp){ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&cut[i][j],sizeof(double),1,fp); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairMESONTTPM::read_restart(FILE *fp){ + read_restart_settings(fp); + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&cut[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_restart_settings(FILE *fp){ + fwrite(&BendingMode,sizeof(int),1,fp); + fwrite(&TPMType,sizeof(int),1,fp); + fwrite(&cut_global,sizeof(double),1,fp); + fwrite(&tab_path_length,sizeof(int),1,fp); + fwrite(tab_path,tab_path_length+1,1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairMESONTTPM::read_restart_settings(FILE *fp){ + int me = comm->me; + if (me == 0) { + fread(&BendingMode,sizeof(int),1,fp); + fread(&TPMType,sizeof(int),1,fp); + fread(&cut_global,sizeof(double),1,fp); + fread(&tab_path_length,sizeof(int),1,fp); + } + MPI_Bcast(&BendingMode,1,MPI_INT,0,world); + MPI_Bcast(&TPMType,1,MPI_INT,0,world); + MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&tab_path_length,1,MPI_INT,0,world); + + if (tab_path != nullptr) memory->destroy(tab_path); + memory->create(tab_path,tab_path_length+1,"pair:path"); + if (me == 0) fread(tab_path,tab_path_length+1,1,fp); + MPI_Bcast(tab_path,tab_path_length+1,MPI_CHAR,0,world); + mesont_lib_SetTablePath(tab_path,tab_path_length); + mesont_lib_TPBInit(); + int M, N; + std::ifstream in(tab_path); + if (!in.is_open()) error->all(FLERR,"Incorrect table path"); + std::string tmp; + std::getline(in,tmp); + std::getline(in,tmp); + std::getline(in,tmp); + in >> M >> N; + in.close(); + mesont_lib_TPMInit(M, N); + mesont_lib_InitCNTPotModule(1, 3, 0, BendingMode, mesont_lib_get_R()); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to data file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_data(FILE *fp){ + for (int i = 1; i <= atom->ntypes; i++) + fprintf(fp,"%d\n",i); +} + +/* ---------------------------------------------------------------------- + proc 0 writes all pairs to data file +------------------------------------------------------------------------- */ + +void PairMESONTTPM::write_data_all(FILE *fp){ + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) + fprintf(fp,"%d %d %g\n",i,j,cut[i][j]); +} + +/* ---------------------------------------------------------------------- */ + +void PairMESONTTPM::init_style(){ + //make sure that a full list is created (including ghost nodes) + int r = neighbor->request(this,instance_me); + neighbor->requests[r]->half = false; + neighbor->requests[r]->full = true; + neighbor->requests[r]->ghost = true; +} + +void* PairMESONTTPM::extract(const char *str, int &){ + if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s; + else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b; + else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t; + else if (strcmp(str,"mesonttpm_Es") == 0) return eatom_s; + else if (strcmp(str,"mesonttpm_Eb") == 0) return eatom_b; + else if (strcmp(str,"mesonttpm_Et") == 0) return eatom_t; + else return nullptr; +}; diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h index c3d71ae953..704556d75e 100644 --- a/src/USER-MESONT/pair_mesont_tpm.h +++ b/src/USER-MESONT/pair_mesont_tpm.h @@ -1,99 +1,98 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. - - Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(mesont/tpm,PairMESONTTPM) - -#else - -#ifndef LMP_PAIR_MESONT_TPM_H -#define LMP_PAIR_MESONT_TPM_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairMESONTTPM : public Pair { - public: - PairMESONTTPM(class LAMMPS *); - virtual ~PairMESONTTPM(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - void write_data(FILE *); - void write_data_all(FILE *); - virtual void init_style(); - - double energy_s; // accumulated energies for stretching - double energy_b; // accumulated energies for bending - double energy_t; // accumulated energies for tube-tube interaction - double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values - - protected: - int BendingMode, TPMType; - char* tab_path; - int tab_path_length; - double cut_global; - double **cut; - static int instance_count; - int nmax; - - virtual void allocate(); - virtual void *extract(const char *, int &); -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Pair style mesont/tpm requires newton pair on - -newton_pair must be set to on - -E: The selected cutoff is too small for the current system - -cutoff must be increased. - -E: Illegal pair_style command - -Incorrect argument list in the style init. - -E: Incorrect table path - -Incorrect path to the table files. - -E: Incorrect BendingMode - -Self-explanatory. - -E: Incorrect TPMType - -Self-explanatory. - -E: Inconsistent input and potential table - -The tube diameter is inconsistent with the chirality specified -during generation of the potential table. - -*/ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(mesont/tpm,PairMESONTTPM) + +#else + +#ifndef LMP_PAIR_MESONT_TPM_H +#define LMP_PAIR_MESONT_TPM_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairMESONTTPM : public Pair { + public: + PairMESONTTPM(class LAMMPS *); + virtual ~PairMESONTTPM(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + void write_data(FILE *); + void write_data_all(FILE *); + virtual void init_style(); + + double energy_s; // accumulated energies for stretching + double energy_b; // accumulated energies for bending + double energy_t; // accumulated energies for tube-tube interaction + double *eatom_s, *eatom_b, *eatom_t; // accumulated per-atom values + + protected: + int BendingMode, TPMType; + char* tab_path; + int tab_path_length; + double cut_global; + double **cut; + static int instance_count; + + virtual void allocate(); + virtual void *extract(const char *, int &); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pair style mesont/tpm requires newton pair on + +newton_pair must be set to on + +E: The selected cutoff is too small for the current system + +cutoff must be increased. + +E: Illegal pair_style command + +Incorrect argument list in the style init. + +E: Incorrect table path + +Incorrect path to the table files. + +E: Incorrect BendingMode + +Self-explanatory. + +E: Incorrect TPMType + +Self-explanatory. + +E: Inconsistent input and potential table + +The tube diameter is inconsistent with the chirality specified +during generation of the potential table. + +*/ From 62c7aca26fb5eab910382bcefdff0ea66f38af94 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 16:35:50 -0500 Subject: [PATCH 36/64] fix bug with memory allocation fix bug with eatom_s, eatom_b, eatom_t allocation --- src/USER-MESONT/pair_mesont_tpm.cpp | 115 +++++++++++++++------------- src/USER-MESONT/pair_mesont_tpm.h | 1 + 2 files changed, 61 insertions(+), 55 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index 9185786341..f341a73e23 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -311,6 +311,7 @@ PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { eatom_s = nullptr; eatom_b = nullptr; eatom_t = nullptr; + nmax = 0; instance_count++; if(instance_count > 1) error->all(FLERR, "only a single instance of mesont/tpm pair style can be created"); @@ -336,7 +337,17 @@ PairMESONTTPM::~PairMESONTTPM() /* ---------------------------------------------------------------------- */ void PairMESONTTPM::compute(int eflag, int vflag){ + // set per atom values and accumulators + // reallocate per-atom arrays if necessary ev_init(eflag,vflag); + if (atom->nmax > nmax) { + memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); + memory->destroy(eatom_b); + memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); + memory->destroy(eatom_t); + memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); + nmax = atom->nmax; + } //total number of atoms in the node and ghost shell int nall = list->inum + list->gnum; int ntot = atom->nlocal + atom->nghost; @@ -508,64 +519,58 @@ void PairMESONTTPM::compute(int eflag, int vflag){ error->all(FLERR, err.str().c_str()); } - // set per atom values and accumulators - // reallocate per-atom arrays if necessary - if (eatom_s == nullptr) - memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); - if (eatom_b == nullptr) - memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); - if (eatom_t == nullptr) - memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); - if (atom->nmax > maxeatom) { - maxeatom = atom->nmax; - memory->destroy(eatom); - memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom"); - memory->destroy(eatom_s); - memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); - memory->destroy(eatom_b); - memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); - memory->destroy(eatom_t); - memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); - } - - if (atom->nmax > maxvatom) { - maxvatom = atom->nmax; - memory->destroy(vatom); - memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom"); - } - - // zero accumulators - eng_vdwl = 0.0; energy_s = 0.0; - energy_b = 0.0; energy_t = 0.0; - for (int i = 0; i < 6; i++) virial[i] = 0.0; - for (int i = 0; i < ntot; i++){ - eatom[i] = 0.0; eatom_s[i] = 0.0; - eatom_b[i] = 0.0; eatom_t[i] = 0.0; - } - for (int i = 0; i < ntot; i++) - for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; - //convert from sorted representation for (int i = 0; i < nall; i++){ - int idx = ntlist.get_idx(i); - for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; - eatom_s[idx] = u_ts_sort[i]; - eatom_b[idx] = u_tb_sort[i]; - eatom_t[idx] = u_tt_sort[i]; - eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; - energy_s += u_ts_sort[i]; - energy_b += u_tb_sort[i]; - energy_t += u_tt_sort[i]; - vatom[idx][0] = s_sort[9*i+0]; //xx - vatom[idx][1] = s_sort[9*i+4]; //yy - vatom[idx][2] = s_sort[9*i+8]; //zz - vatom[idx][3] = s_sort[9*i+1]; //xy - vatom[idx][4] = s_sort[9*i+2]; //xz - vatom[idx][5] = s_sort[9*i+5]; //yz - for (int j = 0; j < 6; j++) virial[j] += vatom[idx][j]; - buckling[idx] = b_sort[i]; + int idx = ntlist.get_idx(i); + for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; + buckling[idx] = b_sort[i]; } - eng_vdwl = energy_s + energy_b + energy_t; + if(eflag){ + eng_vdwl = 0.0; energy_s = 0.0; + energy_b = 0.0; energy_t = 0.0; + for (int i = 0; i < ntot; i++){ + eatom[i] = 0.0; eatom_s[i] = 0.0; + eatom_b[i] = 0.0; eatom_t[i] = 0.0; + } + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + eatom_s[idx] = u_ts_sort[i]; + eatom_b[idx] = u_tb_sort[i]; + eatom_t[idx] = u_tt_sort[i]; + eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; + energy_s += u_ts_sort[i]; + energy_b += u_tb_sort[i]; + energy_t += u_tt_sort[i]; + } + eng_vdwl = energy_s + energy_b + energy_t; + } + if(vflag){ + for (int i = 0; i < 6; i++) virial[i] = 0.0; + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + virial[0] += s_sort[9*i+0]; //xx + virial[1] += s_sort[9*i+4]; //yy + virial[2] += s_sort[9*i+8]; //zz + virial[3] += s_sort[9*i+1]; //xy + virial[4] += s_sort[9*i+2]; //xz + virial[5] += s_sort[9*i+5]; //yz + } + } + int vflag_atom = vflag & 4; + if(vflag_atom){ + for (int i = 0; i < ntot; i++) + for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + vatom[idx][0] = s_sort[9*i+0]; //xx + vatom[idx][1] = s_sort[9*i+4]; //yy + vatom[idx][2] = s_sort[9*i+8]; //zz + vatom[idx][3] = s_sort[9*i+1]; //xy + vatom[idx][4] = s_sort[9*i+2]; //xz + vatom[idx][5] = s_sort[9*i+5]; //yz + } + } + } /* ---------------------------------------------------------------------- diff --git a/src/USER-MESONT/pair_mesont_tpm.h b/src/USER-MESONT/pair_mesont_tpm.h index 704556d75e..a18e555349 100644 --- a/src/USER-MESONT/pair_mesont_tpm.h +++ b/src/USER-MESONT/pair_mesont_tpm.h @@ -54,6 +54,7 @@ class PairMESONTTPM : public Pair { double cut_global; double **cut; static int instance_count; + int nmax; virtual void allocate(); virtual void *extract(const char *, int &); From 559d6b10cfb93f3a59b0318b991b70ed092db429 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 16:39:13 -0500 Subject: [PATCH 37/64] fix bug with memory allocation --- src/USER-MESONT/pair_mesont_tpm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index f341a73e23..720a821aa6 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -341,6 +341,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ // reallocate per-atom arrays if necessary ev_init(eflag,vflag); if (atom->nmax > nmax) { + memory->destroy(eatom_s); memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); memory->destroy(eatom_b); memory->create(eatom_b,comm->nthreads*maxeatom,"pair:eatom_b"); From aff54e948a5890c96864cc2d5a31b318f1909961 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 18:39:34 -0500 Subject: [PATCH 38/64] eflag fix --- src/USER-MESONT/pair_mesont_tpm.cpp | 32 +++++++++++++++++------------ 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index 720a821aa6..08b43b122f 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -49,7 +49,7 @@ private: class MESONTList { public: - MESONTList(const Atom* atom, const NeighList* nblist, double rc2); + MESONTList(const Atom* atom, const NeighList* nblist); ~MESONTList() {}; //list of segments const std::vector >& get_segments() const; @@ -165,12 +165,13 @@ void vector_union(std::vector& v1, std::vector& v2, } } -MESONTList::MESONTList(const Atom* atom, const NeighList* nblist, double /* rc2 */){ +MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ if (atom == nullptr || nblist == nullptr) return; //number of local atoms at the node int nlocal = atom->nlocal; - //total number of atoms in the node and ghost shell + //total number of atoms in the node and ghost shell treated as NTs int nall = nblist->inum + nblist->gnum; + //total number of atoms in the node and ghost shell int ntot = atom->nlocal + atom->nghost; tagint* const g_id = atom->tag; tagint** const bonds = atom->bond_nt; @@ -340,7 +341,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ // set per atom values and accumulators // reallocate per-atom arrays if necessary ev_init(eflag,vflag); - if (atom->nmax > nmax) { + if (atom->nmax > nmax && eflag_atom) { memory->destroy(eatom_s); memory->create(eatom_s,comm->nthreads*maxeatom,"pair:eatom_s"); memory->destroy(eatom_b); @@ -349,8 +350,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){ memory->create(eatom_t,comm->nthreads*maxeatom,"pair:eatom_t"); nmax = atom->nmax; } - //total number of atoms in the node and ghost shell + //total number of atoms in the node and ghost shell treated as NTs int nall = list->inum + list->gnum; + //total number of atoms in the node and ghost shell int ntot = atom->nlocal + atom->nghost; int newton_pair = force->newton_pair; if(!newton_pair) @@ -381,7 +383,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } //generate bonds and chain nblist - MESONTList ntlist(atom, list, cut_global*cut_global); + MESONTList ntlist(atom, list); //reorder data to make it contiguous within tubes //and compatible with Fortran functions @@ -526,9 +528,18 @@ void PairMESONTTPM::compute(int eflag, int vflag){ for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; buckling[idx] = b_sort[i]; } - if(eflag){ + if(eflag_global){ eng_vdwl = 0.0; energy_s = 0.0; energy_b = 0.0; energy_t = 0.0; + for (int i = 0; i < nall; i++){ + int idx = ntlist.get_idx(i); + energy_s += u_ts_sort[i]; + energy_b += u_tb_sort[i]; + energy_t += u_tt_sort[i]; + } + eng_vdwl = energy_s + energy_b + energy_t; + } + if(eflag_atom){ for (int i = 0; i < ntot; i++){ eatom[i] = 0.0; eatom_s[i] = 0.0; eatom_b[i] = 0.0; eatom_t[i] = 0.0; @@ -539,13 +550,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){ eatom_b[idx] = u_tb_sort[i]; eatom_t[idx] = u_tt_sort[i]; eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; - energy_s += u_ts_sort[i]; - energy_b += u_tb_sort[i]; - energy_t += u_tt_sort[i]; } - eng_vdwl = energy_s + energy_b + energy_t; } - if(vflag){ + if(vflag_global){ for (int i = 0; i < 6; i++) virial[i] = 0.0; for (int i = 0; i < nall; i++){ int idx = ntlist.get_idx(i); @@ -557,7 +564,6 @@ void PairMESONTTPM::compute(int eflag, int vflag){ virial[5] += s_sort[9*i+5]; //yz } } - int vflag_atom = vflag & 4; if(vflag_atom){ for (int i = 0; i < ntot; i++) for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; From 4d19b8bf3ad955837740ce7901510c3034a881f2 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 19:38:56 -0500 Subject: [PATCH 39/64] stype adjustment --- src/USER-MESONT/pair_mesont_tpm.cpp | 82 ++++++++++++++--------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index 08b43b122f..2fe5b2036f 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -40,7 +40,7 @@ using namespace LAMMPS_NS; template class array2003{ public: - T& operator[] (int idx){ return data[idx];}; + T& operator[] (int idx) { return data[idx];}; const T& operator[] (int idx) const{ return data[idx];}; private: T data[N]; @@ -165,7 +165,7 @@ void vector_union(std::vector& v1, std::vector& v2, } } -MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ +MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) { if (atom == nullptr || nblist == nullptr) return; //number of local atoms at the node int nlocal = atom->nlocal; @@ -194,7 +194,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ if (bonds[i][m] == cnt_end) chain_list[i][m] = cnt_end; for (int j = 0; j < nnb; j++) { int nb = nblist->firstneigh[i][j]; - if (bonds[i][0] == g_id[nb]){ + if (bonds[i][0] == g_id[nb]) { chain_list[i][0] = nb; chain_list[nb][1] = i; break; @@ -224,13 +224,13 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ for (int i = 0; i < nlocal; i++) { if (chain_list[i][0] == not_cnt) continue; if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - g_id[i] < g_id[chain_list[i][0]]){ + g_id[i] < g_id[chain_list[i][0]]) { array2003 tmp_c; tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; segments.push_back(tmp_c); } if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && - g_id[i] < g_id[chain_list[i][1]]){ + g_id[i] < g_id[chain_list[i][1]]) { array2003 tmp_c; tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; segments.push_back(tmp_c); @@ -239,7 +239,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ int nbonds = segments.size(); //triplets - for (int i = 0; i < nlocal; i++){ + for (int i = 0; i < nlocal; i++) { if (chain_list[i][0] == not_cnt) continue; if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end) @@ -286,7 +286,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist){ chain_list[idx0][0] != domain_end) chain[0] -= 1; if (chain_list[idx1][1] != cnt_end && chain_list[idx1][1] != domain_end) chain[1] += 1; - if(chain[0] != chain[1]) nb_chains[i].push_back(chain); + if (chain[0] != chain[1]) nb_chains[i].push_back(chain); idx_s = (j == nnb - 1) ? -1 : nb_list[j + 1]; } } @@ -314,7 +314,7 @@ PairMESONTTPM::PairMESONTTPM(LAMMPS *lmp) : Pair(lmp) { eatom_t = nullptr; nmax = 0; instance_count++; - if(instance_count > 1) error->all(FLERR, + if (instance_count > 1) error->all(FLERR, "only a single instance of mesont/tpm pair style can be created"); } @@ -337,7 +337,7 @@ PairMESONTTPM::~PairMESONTTPM() /* ---------------------------------------------------------------------- */ -void PairMESONTTPM::compute(int eflag, int vflag){ +void PairMESONTTPM::compute(int eflag, int vflag) { // set per atom values and accumulators // reallocate per-atom arrays if necessary ev_init(eflag,vflag); @@ -355,7 +355,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ //total number of atoms in the node and ghost shell int ntot = atom->nlocal + atom->nghost; int newton_pair = force->newton_pair; - if(!newton_pair) + if (!newton_pair) error->all(FLERR,"Pair style mesont/tpm requires newton pair on"); double **x = atom->x; @@ -374,7 +374,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } double Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + std::pow((2.0*RT + TPBRcutoff),2))); - if (cut_global < Rcut_min){ + if (cut_global < Rcut_min) { std::stringstream err; err << "The selected cutoff is too small for the current system : " << "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << @@ -390,7 +390,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ std::vector x_sort(3*nall), f_sort(3*nall), s_sort(9*nall); std::vector u_ts_sort(nall), u_tb_sort(nall), u_tt_sort(nall); std::vector b_sort(nall); - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); for (int j = 0; j < 3; j++) x_sort[3*i+j] = x[idx][j]; b_sort[i] = buckling[idx]; @@ -426,13 +426,13 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } //share new values of buckling - if (BendingMode == 1){ - for (int i = 0; i < nall; i++){ + if (BendingMode == 1) { + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); buckling[idx] = b_sort[i]; } comm->forward_comm_pair(this); - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); b_sort[i] = buckling[idx]; } @@ -470,7 +470,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ mesont_lib_TubeStretchingForceField(U1s, U2s, F1, F2, S1, S2, X1, X2, R12, L12); - for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++){ + for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++) { //id of the beginning and end of the chain in the sorted representation const array2003& chain = ntlist.get_nbs()[i][nc]; int N = chain[1] - chain[0] + 1; //number of elements in the chain @@ -489,7 +489,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ double* Xe = X; double* Fe = F; double* Se = S; if (!E1 && ntlist.get_triplet(end1)[0] != MESONTList::domain_end && ntlist.get_triplet(ntlist.get_triplet(end1)[0])[0] == - MESONTList::cnt_end){ + MESONTList::cnt_end) { Ee = 1; int idx = ntlist.get_idxb(ntlist.get_triplet(end1)[0]); Xe = &(x_sort[3*idx]); @@ -498,7 +498,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } else if (!E2 && ntlist.get_triplet(end2)[2] != MESONTList::domain_end && ntlist.get_triplet(ntlist.get_triplet(end2)[2])[2] == - MESONTList::cnt_end){ + MESONTList::cnt_end) { Ee = 2; int idx = ntlist.get_idxb(ntlist.get_triplet(end2)[2]); Xe = &(x_sort[3*idx]); @@ -514,7 +514,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ //check if cutoff is chosen correctly Rcut_min = std::max(2.0*Lmax, std::sqrt(0.5*Lmax*Lmax + std::pow((2.0*Rmax + TPBRcutoff),2))); - if (cut_global < Rcut_min){ + if (cut_global < Rcut_min) { std::stringstream err; err << "The selected cutoff is too small for the current system : " << "L_max = " << Lmax << ", R_max = " << RT << ", Rc = " << cut_global << @@ -523,15 +523,15 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } //convert from sorted representation - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); for (int j = 0; j < 3; j++) f[idx][j] += f_sort[3*i+j]; buckling[idx] = b_sort[i]; } - if(eflag_global){ + if (eflag_global) { eng_vdwl = 0.0; energy_s = 0.0; energy_b = 0.0; energy_t = 0.0; - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); energy_s += u_ts_sort[i]; energy_b += u_tb_sort[i]; @@ -539,12 +539,12 @@ void PairMESONTTPM::compute(int eflag, int vflag){ } eng_vdwl = energy_s + energy_b + energy_t; } - if(eflag_atom){ - for (int i = 0; i < ntot; i++){ + if (eflag_atom) { + for (int i = 0; i < ntot; i++) { eatom[i] = 0.0; eatom_s[i] = 0.0; eatom_b[i] = 0.0; eatom_t[i] = 0.0; } - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); eatom_s[idx] = u_ts_sort[i]; eatom_b[idx] = u_tb_sort[i]; @@ -552,9 +552,9 @@ void PairMESONTTPM::compute(int eflag, int vflag){ eatom[idx] = u_ts_sort[i] + u_tb_sort[i] + u_tt_sort[i]; } } - if(vflag_global){ + if (vflag_global) { for (int i = 0; i < 6; i++) virial[i] = 0.0; - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); virial[0] += s_sort[9*i+0]; //xx virial[1] += s_sort[9*i+4]; //yy @@ -564,10 +564,10 @@ void PairMESONTTPM::compute(int eflag, int vflag){ virial[5] += s_sort[9*i+5]; //yz } } - if(vflag_atom){ + if (vflag_atom) { for (int i = 0; i < ntot; i++) for (int j = 0; j < 6; j++) vatom[i][j] = 0.0; - for (int i = 0; i < nall; i++){ + for (int i = 0; i < nall; i++) { int idx = ntlist.get_idx(i); vatom[idx][0] = s_sort[9*i+0]; //xx vatom[idx][1] = s_sort[9*i+4]; //yy @@ -584,7 +584,7 @@ void PairMESONTTPM::compute(int eflag, int vflag){ allocate all arrays ------------------------------------------------------------------------- */ -void PairMESONTTPM::allocate(){ +void PairMESONTTPM::allocate() { allocated = 1; int n = atom->ntypes; @@ -601,7 +601,7 @@ void PairMESONTTPM::allocate(){ global settings ------------------------------------------------------------------------- */ -void PairMESONTTPM::settings(int narg, char **arg){ +void PairMESONTTPM::settings(int narg, char **arg) { if ((narg == 0) || (narg > 4)) error->all(FLERR,"Illegal pair_style command"); cut_global = utils::numeric(FLERR,arg[0],false,lmp); @@ -650,7 +650,7 @@ void PairMESONTTPM::settings(int narg, char **arg){ set coeffs for one or more type pairs ------------------------------------------------------------------------- */ -void PairMESONTTPM::coeff(int narg, char **arg){ +void PairMESONTTPM::coeff(int narg, char **arg) { if ((narg < 2) || (narg > 3)) error->all(FLERR,"Incorrect args for pair coefficients"); @@ -679,7 +679,7 @@ void PairMESONTTPM::coeff(int narg, char **arg){ init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ -double PairMESONTTPM::init_one(int i, int j){ +double PairMESONTTPM::init_one(int i, int j) { if (setflag[i][j] == 0) { cut[i][j] = mix_distance(cut[i][i],cut[j][j]); } @@ -691,7 +691,7 @@ double PairMESONTTPM::init_one(int i, int j){ proc 0 writes to restart file ------------------------------------------------------------------------- */ -void PairMESONTTPM::write_restart(FILE *fp){ +void PairMESONTTPM::write_restart(FILE *fp) { write_restart_settings(fp); int i,j; @@ -708,7 +708,7 @@ void PairMESONTTPM::write_restart(FILE *fp){ proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ -void PairMESONTTPM::read_restart(FILE *fp){ +void PairMESONTTPM::read_restart(FILE *fp) { read_restart_settings(fp); allocate(); @@ -731,7 +731,7 @@ void PairMESONTTPM::read_restart(FILE *fp){ proc 0 writes to restart file ------------------------------------------------------------------------- */ -void PairMESONTTPM::write_restart_settings(FILE *fp){ +void PairMESONTTPM::write_restart_settings(FILE *fp) { fwrite(&BendingMode,sizeof(int),1,fp); fwrite(&TPMType,sizeof(int),1,fp); fwrite(&cut_global,sizeof(double),1,fp); @@ -743,7 +743,7 @@ void PairMESONTTPM::write_restart_settings(FILE *fp){ proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ -void PairMESONTTPM::read_restart_settings(FILE *fp){ +void PairMESONTTPM::read_restart_settings(FILE *fp) { int me = comm->me; if (me == 0) { fread(&BendingMode,sizeof(int),1,fp); @@ -779,7 +779,7 @@ void PairMESONTTPM::read_restart_settings(FILE *fp){ proc 0 writes to data file ------------------------------------------------------------------------- */ -void PairMESONTTPM::write_data(FILE *fp){ +void PairMESONTTPM::write_data(FILE *fp) { for (int i = 1; i <= atom->ntypes; i++) fprintf(fp,"%d\n",i); } @@ -788,7 +788,7 @@ void PairMESONTTPM::write_data(FILE *fp){ proc 0 writes all pairs to data file ------------------------------------------------------------------------- */ -void PairMESONTTPM::write_data_all(FILE *fp){ +void PairMESONTTPM::write_data_all(FILE *fp) { for (int i = 1; i <= atom->ntypes; i++) for (int j = i; j <= atom->ntypes; j++) fprintf(fp,"%d %d %g\n",i,j,cut[i][j]); @@ -796,7 +796,7 @@ void PairMESONTTPM::write_data_all(FILE *fp){ /* ---------------------------------------------------------------------- */ -void PairMESONTTPM::init_style(){ +void PairMESONTTPM::init_style() { //make sure that a full list is created (including ghost nodes) int r = neighbor->request(this,instance_me); neighbor->requests[r]->half = false; @@ -804,7 +804,7 @@ void PairMESONTTPM::init_style(){ neighbor->requests[r]->ghost = true; } -void* PairMESONTTPM::extract(const char *str, int &){ +void* PairMESONTTPM::extract(const char *str, int &) { if (strcmp(str,"mesonttpm_Es_tot") == 0) return &energy_s; else if (strcmp(str,"mesonttpm_Eb_tot") == 0) return &energy_b; else if (strcmp(str,"mesonttpm_Et_tot") == 0) return &energy_t; From 3e7df13203a7cb39712930f114bb8f8de413b2d1 Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 21:22:14 -0500 Subject: [PATCH 40/64] c++11 --- src/USER-MESONT/pair_mesont_tpm.cpp | 70 +++++++++++------------------ 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index 2fe5b2036f..b92fc16750 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -9,7 +9,7 @@ the GNU General Public License. See the README file in the top-level LAMMPS directory. - + 2 Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu ------------------------------------------------------------------------- */ @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -36,28 +37,17 @@ using namespace LAMMPS_NS; -//since LAMMPS is compiled with C++ 2003, define a substitution for std::array -template -class array2003{ -public: - T& operator[] (int idx) { return data[idx];}; - const T& operator[] (int idx) const{ return data[idx];}; -private: - T data[N]; -}; - - class MESONTList { public: MESONTList(const Atom* atom, const NeighList* nblist); ~MESONTList() {}; //list of segments - const std::vector >& get_segments() const; + const std::vector>& get_segments() const; //list of triplets - const std::vector >& get_triplets() const; + const std::vector>& get_triplets() const; //list of neighbor chains [start,end] for segments //(use idx() to get real indexes) - const std::vector > >& get_nbs() const; + const std::vector>>& get_nbs() const; //convert idx from sorted representation to real idx int get_idx(int idx) const; //return list of indexes for conversion from sorted representation @@ -69,22 +59,22 @@ public: //check if the node is the end of the tube bool is_end(int idx) const; - array2003 get_segment(int idx) const; - array2003 get_triplet(int idx) const; + std::array get_segment(int idx) const; + std::array get_triplet(int idx) const; static const int cnt_end = -1; static const int domain_end = -2; static const int not_cnt = -3; private: - std::vector > chain_list, segments; - std::vector > triplets; - std::vector > > nb_chains; + std::vector> chain_list, segments; + std::vector> triplets; + std::vector>> nb_chains; std::vector index_list, index_list_b; }; //============================================================================= -inline const std::vector > > & +inline const std::vector>> & MESONTList::get_nbs() const { return nb_chains; } @@ -106,25 +96,25 @@ inline const std::vector& MESONTList::get_idxb_list() const { return index_list_b; }; -inline const std::vector > & MESONTList::get_segments() +inline const std::vector> & MESONTList::get_segments() const { return segments; } -inline const std::vector > & MESONTList::get_triplets() +inline const std::vector> & MESONTList::get_triplets() const { return triplets; } -inline array2003 MESONTList::get_segment(int idx) const { - array2003 result; +inline std::array MESONTList::get_segment(int idx) const { + std::array result; result[0] = chain_list[idx][0]; result[1] = idx; return result; } -inline array2003 MESONTList::get_triplet(int idx) const { - array2003 result; +inline std::array MESONTList::get_triplet(int idx) const { + std::array result; result[0] = chain_list[idx][0]; result[1] = idx; result[2] = chain_list[idx][1]; @@ -179,9 +169,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) { int* ilist = nblist->ilist; //convert bonds to local id representation - array2003 tmp_arr; - tmp_arr[0] = not_cnt; tmp_arr[1] = not_cnt; - chain_list.resize(ntot, tmp_arr); + chain_list.resize(ntot, {not_cnt,not_cnt}); for (int ii = 0; ii < nall; ii++) { int i = ilist[ii]; chain_list[i][0] = domain_end; @@ -224,17 +212,11 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) { for (int i = 0; i < nlocal; i++) { if (chain_list[i][0] == not_cnt) continue; if (chain_list[i][0] != cnt_end && chain_list[i][0] != domain_end && - g_id[i] < g_id[chain_list[i][0]]) { - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][0]; - segments.push_back(tmp_c); - } + g_id[i] < g_id[chain_list[i][0]]) + segments.push_back({i,chain_list[i][0]}); if (chain_list[i][1] != cnt_end && chain_list[i][1] != domain_end && - g_id[i] < g_id[chain_list[i][1]]) { - array2003 tmp_c; - tmp_c[0] = i; tmp_c[1] = chain_list[i][1]; - segments.push_back(tmp_c); - } + g_id[i] < g_id[chain_list[i][1]]) + segments.push_back({i,chain_list[i][1]}); } int nbonds = segments.size(); @@ -275,7 +257,7 @@ MESONTList::MESONTList(const Atom* atom, const NeighList* nblist) { int idx_next = chain_list[index_list[nb_list[j]]][1]; if ((j == nnb - 1) || (nb_list[j] + 1 != nb_list[j+1]) || (idx_next == cnt_end) || (idx_next == domain_end)) { - array2003 chain; + std::array chain; chain[0] = idx_s; chain[1] = nb_list[j]; //make sure that segments having at least one node @@ -399,7 +381,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) { //bending potential int n_triplets = ntlist.get_triplets().size(); for (int i = 0; i < n_triplets; i++) { - const array2003& t = ntlist.get_triplets()[i]; + const std::array& t = ntlist.get_triplets()[i]; //idx of nodes of a triplet in sorted representation int idx_s0 = ntlist.get_idxb(t[0]); int idx_s1 = ntlist.get_idxb(t[1]); @@ -443,7 +425,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) { double Rmax = 0.0; Lmax = 0.0; for (int i = 0; i < n_segments; i++) { - const array2003& s = ntlist.get_segments()[i]; + const std::array& s = ntlist.get_segments()[i]; //idx of a segment end 1 in sorted representation int idx_s0 = ntlist.get_idxb(s[0]); //idx of a segment end 2 in sorted representation @@ -472,7 +454,7 @@ void PairMESONTTPM::compute(int eflag, int vflag) { for (int nc = 0; nc < (int)ntlist.get_nbs()[i].size(); nc++) { //id of the beginning and end of the chain in the sorted representation - const array2003& chain = ntlist.get_nbs()[i][nc]; + const std::array& chain = ntlist.get_nbs()[i][nc]; int N = chain[1] - chain[0] + 1; //number of elements in the chain int end1 = ntlist.get_idx(chain[0]); //chain ends (real representation) int end2 = ntlist.get_idx(chain[1]); From af14739541c9b96d29d7ddfa03dc09de988f21ee Mon Sep 17 00:00:00 2001 From: iafoss Date: Mon, 2 Nov 2020 22:09:55 -0500 Subject: [PATCH 41/64] typo --- src/USER-MESONT/pair_mesont_tpm.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/USER-MESONT/pair_mesont_tpm.cpp b/src/USER-MESONT/pair_mesont_tpm.cpp index b92fc16750..1271ebddb6 100644 --- a/src/USER-MESONT/pair_mesont_tpm.cpp +++ b/src/USER-MESONT/pair_mesont_tpm.cpp @@ -9,7 +9,6 @@ the GNU General Public License. See the README file in the top-level LAMMPS directory. - 2 Contributing author: Maxim Shugaev (UVA), mvs9t@virginia.edu ------------------------------------------------------------------------- */ From 3ea395615a891b1f56156e4eb97e65956331343c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 4 Nov 2020 10:54:20 -0500 Subject: [PATCH 42/64] update fmtlib version 7.1.1 to 7.1.2 --- src/fmt/core.h | 2 +- src/fmt/format-inl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fmt/core.h b/src/fmt/core.h index 9bd2003b28..b4fc461011 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -18,7 +18,7 @@ #include // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 70101 +#define FMT_VERSION 70102 #ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) diff --git a/src/fmt/format-inl.h b/src/fmt/format-inl.h index 5d466eebbc..8f2fe7354a 100644 --- a/src/fmt/format-inl.h +++ b/src/fmt/format-inl.h @@ -2337,7 +2337,7 @@ void fallback_format(Double d, int num_digits, bool binary32, buffer& buf, upper = &upper_store; } denominator.assign_pow10(exp10); - denominator <<= 1; + denominator <<= shift; } else if (exp10 < 0) { numerator.assign_pow10(-exp10); lower.assign(numerator); From c2b9b6d57b7c49d57ce143d1182e7aab82c933a2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 6 Nov 2020 17:14:48 -0500 Subject: [PATCH 43/64] fix bug using the wrong flag variable and print warning only if a change was made --- src/domain.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/domain.cpp b/src/domain.cpp index b0a32ce114..1becba5e0b 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -1873,6 +1873,7 @@ void Domain::set_boundary(int narg, char **arg, int flag) else zperiodic = 0; // record if we changed a periodic boundary to a non-periodic one + int pflag=0; if ((periodicity[0] && !xperiodic) || (periodicity[1] && !yperiodic) @@ -1889,23 +1890,27 @@ void Domain::set_boundary(int narg, char **arg, int flag) boundary[1][0] >= 2 || boundary[1][1] >= 2 || boundary[2][0] >= 2 || boundary[2][1] >= 2) nonperiodic = 2; } + + // force non-zero image flags to zero for non-periodic dimensions + // keep track if a change was made, so we can print a warning message + if (pflag) { pflag = 0; for (int i=0; i < atom->nlocal; ++i) { int xbox = (atom->image[i] & IMGMASK) - IMGMAX; int ybox = (atom->image[i] >> IMGBITS & IMGMASK) - IMGMAX; int zbox = (atom->image[i] >> IMG2BITS) - IMGMAX; - if (!xperiodic) { xbox = 0; pflag = 1; } - if (!yperiodic) { ybox = 0; pflag = 1; } - if (!zperiodic) { zbox = 0; pflag = 1; } + if ((!xperiodic) && (xbox != 0)) { xbox = 0; pflag = 1; } + if ((!yperiodic) && (ybox != 0)) { ybox = 0; pflag = 1; } + if ((!zperiodic) && (zbox != 0)) { zbox = 0; pflag = 1; } atom->image[i] = ((imageint) (xbox + IMGMAX) & IMGMASK) | (((imageint) (ybox + IMGMAX) & IMGMASK) << IMGBITS) | (((imageint) (zbox + IMGMAX) & IMGMASK) << IMG2BITS); } int flag_all; - MPI_Allreduce(&flag,&flag_all, 1, MPI_INT, MPI_SUM, world); + MPI_Allreduce(&pflag,&flag_all, 1, MPI_INT, MPI_SUM, world); if ((flag_all > 0) && (comm->me == 0)) - error->warning(FLERR,"Reset image flags for non-periodic boundary"); + error->warning(FLERR,"Resetting image flags for non-periodic dimensions"); } } From c68829f17d59c9e247613d9d32f41a402ecb8f18 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 6 Nov 2020 17:17:18 -0500 Subject: [PATCH 44/64] update granular pair style example to comply to tighter history fix requirements --- examples/granular/in.pour.drum | 111 ++++++------ examples/granular/in.pour.flatwall | 76 ++++----- ...drum.g++.1 => log.29Oct20.pour.drum.g++.1} | 150 ++++++++-------- ...drum.g++.4 => log.29Oct20.pour.drum.g++.4} | 156 ++++++++--------- ....g++.1 => log.29Oct20.pour.flatwall.g++.1} | 120 +++++++++---- ....g++.4 => log.29Oct20.pour.flatwall.g++.4} | 161 +++++++++--------- 6 files changed, 411 insertions(+), 363 deletions(-) rename examples/granular/{log.29Mar19.pour.drum.g++.1 => log.29Oct20.pour.drum.g++.1} (69%) rename examples/granular/{log.29Mar19.pour.drum.g++.4 => log.29Oct20.pour.drum.g++.4} (69%) rename examples/granular/{log.29Mar19.pour.flatwall.g++.1 => log.29Oct20.pour.flatwall.g++.1} (56%) rename examples/granular/{log.29Mar19.pour.flatwall.g++.4 => log.29Oct20.pour.flatwall.g++.4} (52%) diff --git a/examples/granular/in.pour.drum b/examples/granular/in.pour.drum index 54372cd391..e0a0455f61 100644 --- a/examples/granular/in.pour.drum +++ b/examples/granular/in.pour.drum @@ -2,99 +2,98 @@ # 'turn' cylinder by changing direction of gravity, then rotate it. # This simulates a rotating drum powder characterization experiment. -variable name string rotating_drum_two_types +variable name string rotating_drum_two_types -atom_style sphere -units lj +atom_style sphere +units lj ############################################### # Geometry-related parameters ############################################### -variable boxx equal 30 -variable boxy equal 30 -variable boxz equal 50 +variable boxx equal 30 +variable boxy equal 30 +variable boxz equal 50 -variable drum_rad equal ${boxx}*0.5 -variable drum_height equal 20 +variable drum_rad equal ${boxx}*0.5 +variable drum_height equal 20 -variable xc equal 0.5*${boxx} -variable yc equal 0.5*${boxx} -variable zc equal 0.5*${boxz} +variable xc equal 0.5*${boxx} +variable yc equal 0.5*${boxx} +variable zc equal 0.5*${boxz} ############################################### # Particle-related parameters ############################################### -variable rlo equal 0.25 -variable rhi equal 0.5 -variable dlo equal 2.0*${rlo} -variable dhi equal 2.0*${rhi} +variable rlo equal 0.25 +variable rhi equal 0.5 +variable dlo equal 2.0*${rlo} +variable dhi equal 2.0*${rhi} -variable cyl_rad_inner equal ${drum_rad}-1.1*${rhi} +variable cyl_rad_inner equal ${drum_rad}-1.1*${rhi} -variable dens equal 1.0 +variable dens equal 1.0 variable skin equal 0.4*${rhi} ############# processors * * 1 -region boxreg block 0 ${boxx} 0 ${boxy} 0 ${boxz} -create_box 2 boxreg -change_box all boundary p p f -comm_modify vel yes +region boxreg block 0 ${boxx} 0 ${boxy} 0 ${boxz} +create_box 2 boxreg +change_box all boundary p p f -variable theta equal 0 +pair_style granular +pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji +pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji -region curved_wall cylinder z ${xc} ${yc} ${drum_rad} 0 ${drum_height} side in rotate v_theta ${xc} ${yc} 0 0 0 1 -region bottom_wall plane ${xc} ${yc} 0 0 0 1 side in rotate v_theta ${xc} ${yc} 0 0 0 1 +variable theta equal 0 -region insreg cylinder z ${xc} ${yc} ${cyl_rad_inner} ${drum_height} ${boxz} +region curved_wall cylinder z ${xc} ${yc} ${drum_rad} 0 ${drum_height} side in rotate v_theta ${xc} ${yc} 0 0 0 1 +region bottom_wall plane ${xc} ${yc} 0 0 0 1 side in rotate v_theta ${xc} ${yc} 0 0 0 1 -fix 0 all balance 100 1.0 shift xy 5 1.1 -fix 1 all nve/sphere -fix grav all gravity 10 vector 0 0 -1 -fix ins1 all pour 2000 1 1234 region insreg diam range ${dlo} ${dhi} dens ${dens} ${dens} -fix ins2 all pour 2000 2 1234 region insreg diam range ${dlo} ${dhi} dens ${dens} ${dens} +region insreg cylinder z ${xc} ${yc} ${cyl_rad_inner} ${drum_height} ${boxz} -comm_modify vel yes +fix 0 all balance 100 1.0 shift xy 5 1.1 +fix 1 all nve/sphere +fix grav all gravity 10 vector 0 0 -1 +fix ins1 all pour 2000 1 1234 region insreg diam range ${dlo} ${dhi} dens ${dens} ${dens} +fix ins2 all pour 2000 2 1234 region insreg diam range ${dlo} ${dhi} dens ${dens} ${dens} -neighbor ${skin} bin -neigh_modify delay 0 every 1 check yes +comm_modify vel yes -pair_style granular -pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji -pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji +neighbor ${skin} bin +neigh_modify delay 0 every 1 check yes -fix 3 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region curved_wall -fix 4 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region bottom_wall +fix 3 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region curved_wall +fix 4 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region bottom_wall -thermo_style custom step atoms ke v_theta -thermo_modify lost warn -thermo 100 +thermo_style custom step atoms ke v_theta +thermo_modify lost warn +thermo 100 -timestep 0.001 +timestep 0.001 -#dump 1 all custom 100 ${name}.dump id type radius mass x y z +#dump 1 all custom 100 ${name}.dump id type radius mass x y z #For removal later -compute 1 all property/atom radius -variable zmax atom z+c_1>0.5*${drum_height} -group delgroup dynamic all var zmax every 10000 +compute 1 all property/atom radius +variable zmax atom z+c_1>0.5*${drum_height} +group delgroup dynamic all var zmax every 10000 -run 2000 +run 2000 #Remove any particles that are above z > 0.5*drum_height -delete_atoms group delgroup +delete_atoms group delgroup #Add top lid -region top_wall plane ${xc} ${yc} ${drum_height} 0 0 -1 side in rotate v_theta ${xc} ${yc} 0 0 0 1 -fix 5 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region top_wall +region top_wall plane ${xc} ${yc} ${drum_height} 0 0 -1 side in rotate v_theta ${xc} ${yc} 0 0 0 1 +fix 5 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region top_wall # 'Turn' drum by switching the direction of gravity -unfix grav -unfix ins1 -unfix ins2 -fix grav all gravity 10 vector 0 -1 0 +unfix grav +unfix ins1 +unfix ins2 +fix grav all gravity 10 vector 0 -1 0 -variable theta equal 2*PI*elapsed/20000.0 -run 3000 +variable theta equal 2*PI*elapsed/20000.0 +run 3000 diff --git a/examples/granular/in.pour.flatwall b/examples/granular/in.pour.flatwall index cfa70e2d84..74d7c7370e 100644 --- a/examples/granular/in.pour.flatwall +++ b/examples/granular/in.pour.flatwall @@ -1,67 +1,65 @@ # pour two types of particles (cohesive and non-cohesive) on flat wall -variable name string pour_two_types +variable name string pour_two_types -atom_style sphere -units lj +atom_style sphere +units lj ############################################### # Geometry-related parameters ############################################### -variable boxx equal 20 -variable boxy equal 20 -variable boxz equal 30 +variable boxx equal 20 +variable boxy equal 20 +variable boxz equal 30 -variable xc1 equal 0.3*${boxx} -variable xc2 equal 0.7*${boxx} -variable yc equal 0.5*${boxy} +variable xc1 equal 0.3*${boxx} +variable xc2 equal 0.7*${boxx} +variable yc equal 0.5*${boxy} ############################################### # Particle-related parameters ############################################### -variable rlo equal 0.25 -variable rhi equal 0.5 -variable dlo equal 2.0*${rlo} -variable dhi equal 2.0*${rhi} +variable rlo equal 0.25 +variable rhi equal 0.5 +variable dlo equal 2.0*${rlo} +variable dhi equal 2.0*${rhi} -variable dens equal 1.0 +variable dens equal 1.0 variable skin equal 0.3*${rhi} ############# -processors * * 1 -region boxreg block 0 ${boxx} 0 ${boxy} 0 ${boxz} -create_box 2 boxreg -change_box all boundary p p f +processors * * 1 +region boxreg block 0 ${boxx} 0 ${boxy} 0 ${boxz} +create_box 2 boxreg +change_box all boundary p p f -comm_modify vel yes +pair_style granular +pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall +pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall -region insreg1 cylinder z ${xc1} ${yc} 5 15 ${boxz} -region insreg2 cylinder z ${xc2} ${yc} 5 15 ${boxz} +comm_modify vel yes -fix 1 all nve/sphere -fix grav all gravity 10.0 vector 0 0 -1 -fix ins1 all pour 1500 1 3123 region insreg1 diam range ${dlo} ${dhi} dens ${dens} ${dens} -fix ins2 all pour 1500 2 3123 region insreg2 diam range ${dlo} ${dhi} dens ${dens} ${dens} +region insreg1 cylinder z ${xc1} ${yc} 5 15 ${boxz} +region insreg2 cylinder z ${xc2} ${yc} 5 15 ${boxz} -comm_modify vel yes +fix 1 all nve/sphere +fix grav all gravity 10.0 vector 0 0 -1 +fix ins1 all pour 1500 1 3123 region insreg1 diam range ${dlo} ${dhi} dens ${dens} ${dens} +fix ins2 all pour 1500 2 3123 region insreg2 diam range ${dlo} ${dhi} dens ${dens} ${dens} -neighbor ${skin} bin -neigh_modify delay 0 every 1 check yes +neighbor ${skin} bin +neigh_modify delay 0 every 1 check yes -pair_style granular -pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall -pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall +fix 3 all wall/gran granular hertz/material 1e5 1e3 0.3 tangential mindlin NULL 1.0 0.5 zplane 0 NULL -fix 3 all wall/gran granular hertz/material 1e5 1e3 0.3 tangential mindlin NULL 1.0 0.5 zplane 0 NULL +thermo_style custom step atoms ke +thermo_modify lost warn +thermo 100 -thermo_style custom step cpu atoms ke -thermo_modify lost warn -thermo 100 +timestep 0.001 -timestep 0.001 +#dump 1 all custom 100 ${name}.dump id type radius mass x y z -#dump 1 all custom 100 ${name}.dump id type radius mass x y z - -run 5000 +run 5000 diff --git a/examples/granular/log.29Mar19.pour.drum.g++.1 b/examples/granular/log.29Oct20.pour.drum.g++.1 similarity index 69% rename from examples/granular/log.29Mar19.pour.drum.g++.1 rename to examples/granular/log.29Oct20.pour.drum.g++.1 index 71232ed8dd..8c4b828488 100644 --- a/examples/granular/log.29Mar19.pour.drum.g++.1 +++ b/examples/granular/log.29Oct20.pour.drum.g++.1 @@ -1,5 +1,4 @@ -LAMMPS (29 Mar 2019) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:88) +LAMMPS (29 Oct 2020) using 1 OpenMP thread(s) per MPI task # pour two types of particles (cohesive and non-cohesive) into cylinder # 'turn' cylinder by changing direction of gravity, then rotate it. @@ -55,10 +54,14 @@ region boxreg block 0 30 0 ${boxy} 0 ${boxz} region boxreg block 0 30 0 30 0 ${boxz} region boxreg block 0 30 0 30 0 50 create_box 2 boxreg -Created orthogonal box = (0 0 0) to (30 30 50) +Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (30.000000 30.000000 50.000000) 1 by 1 by 1 MPI processor grid change_box all boundary p p f -comm_modify vel yes +Changing box ... + +pair_style granular +pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji +pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji variable theta equal 0 @@ -104,10 +107,6 @@ neighbor ${skin} bin neighbor 0.2 bin neigh_modify delay 0 every 1 check yes -pair_style granular -pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji -pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji - fix 3 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region curved_wall fix 4 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region bottom_wall @@ -117,8 +116,7 @@ thermo 100 timestep 0.001 -dump 1 all custom 100 ${name}.dump id type radius mass x y z -dump 1 all custom 100 rotating_drum_two_types.dump id type radius mass x y z +#dump 1 all custom 100 ${name}.dump id type radius mass x y z #For removal later compute 1 all property/atom radius @@ -140,7 +138,7 @@ Neighbor list info ... pair build: half/size/bin/newton stencil: half/bin/3d/newton bin: standard -Per MPI rank memory allocation (min/avg/max) = 13.02 | 13.02 | 13.02 Mbytes +Per MPI rank memory allocation (min/avg/max) = 13.03 | 13.03 | 13.03 Mbytes Step Atoms KinEng v_theta 0 0 -0 0 100 4000 -0 0 @@ -163,32 +161,32 @@ Step Atoms KinEng v_theta 1800 4000 -0 0 1900 4000 -0 0 2000 4000 -0 0 -Loop time of 3.54461 on 1 procs for 2000 steps with 4000 atoms +Loop time of 10.5178 on 1 procs for 2000 steps with 4000 atoms -Performance: 48750.057 tau/day, 564.237 timesteps/s -99.5% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 16429.309 tau/day, 190.154 timesteps/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.61949 | 0.61949 | 0.61949 | 0.0 | 17.48 -Neigh | 1.2492 | 1.2492 | 1.2492 | 0.0 | 35.24 -Comm | 0.046404 | 0.046404 | 0.046404 | 0.0 | 1.31 -Output | 0.15901 | 0.15901 | 0.15901 | 0.0 | 4.49 -Modify | 1.4165 | 1.4165 | 1.4165 | 0.0 | 39.96 -Other | | 0.05391 | | | 1.52 +Pair | 1.0701 | 1.0701 | 1.0701 | 0.0 | 10.17 +Neigh | 4.2135 | 4.2135 | 4.2135 | 0.0 | 40.06 +Comm | 0.38276 | 0.38276 | 0.38276 | 0.0 | 3.64 +Output | 0.0013647 | 0.0013647 | 0.0013647 | 0.0 | 0.01 +Modify | 4.7076 | 4.7076 | 4.7076 | 0.0 | 44.76 +Other | | 0.1424 | | | 1.35 -Nlocal: 4000 ave 4000 max 4000 min +Nlocal: 4000.00 ave 4000 max 4000 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 166 ave 166 max 166 min +Nghost: 171.000 ave 171 max 171 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 8195 ave 8195 max 8195 min +Neighs: 8093.00 ave 8093 max 8093 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Total # of neighbors = 8195 -Ave neighs/atom = 2.04875 +Total # of neighbors = 8093 +Ave neighs/atom = 2.0232500 Neighbor list builds = 1004 -Dangerous builds = 3 +Dangerous builds = 4 #Remove any particles that are above z > 0.5*drum_height delete_atoms group delgroup @@ -205,67 +203,69 @@ fix 5 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindl # 'Turn' drum by switching the direction of gravity unfix grav +unfix ins1 +unfix ins2 fix grav all gravity 10 vector 0 -1 0 variable theta equal 2*PI*elapsed/20000.0 run 3000 -Per MPI rank memory allocation (min/avg/max) = 24.81 | 24.81 | 24.81 Mbytes +Per MPI rank memory allocation (min/avg/max) = 19.37 | 19.37 | 19.37 Mbytes Step Atoms KinEng v_theta - 2000 4000 64.333531 0 - 2100 4000 106.69182 0.031415927 - 2200 4000 121.8461 0.062831853 - 2300 4000 88.767952 0.09424778 - 2400 4000 82.850721 0.12566371 - 2500 4000 91.683284 0.15707963 - 2600 4000 31.56344 0.18849556 - 2700 4000 4.5697672 0.21991149 - 2800 4000 3.9879051 0.25132741 - 2900 4000 4.4394235 0.28274334 - 3000 4000 5.1212931 0.31415927 - 3100 4000 5.8608892 0.34557519 - 3200 4000 6.600714 0.37699112 - 3300 4000 7.3497851 0.40840704 - 3400 4000 8.0490988 0.43982297 - 3500 4000 8.6712396 0.4712389 - 3600 4000 9.1328667 0.50265482 - 3700 4000 9.4683561 0.53407075 - 3800 4000 9.5878145 0.56548668 - 3900 4000 9.387745 0.5969026 - 4000 4000 8.9117631 0.62831853 - 4100 4000 8.2344368 0.65973446 - 4200 4000 7.5335088 0.69115038 - 4300 4000 6.8426179 0.72256631 - 4400 4000 6.0567247 0.75398224 - 4500 4000 5.4166132 0.78539816 - 4600 4000 4.6012409 0.81681409 - 4700 4000 3.8314982 0.84823002 - 4800 4000 3.1916415 0.87964594 - 4900 4000 2.7833964 0.91106187 - 5000 4000 2.5051362 0.9424778 -Loop time of 11.9545 on 1 procs for 3000 steps with 4000 atoms + 2000 4000 65.647582 0 + 2100 4000 105.60001 0.031415927 + 2200 4000 112.27573 0.062831853 + 2300 4000 92.758671 0.09424778 + 2400 4000 88.925835 0.12566371 + 2500 4000 81.369163 0.15707963 + 2600 4000 32.046943 0.18849556 + 2700 4000 4.1926368 0.21991149 + 2800 4000 3.9933453 0.25132741 + 2900 4000 4.5062193 0.28274334 + 3000 4000 5.3409521 0.31415927 + 3100 4000 6.0165991 0.34557519 + 3200 4000 6.606767 0.37699112 + 3300 4000 7.3997751 0.40840704 + 3400 4000 8.1098807 0.43982297 + 3500 4000 8.6552424 0.4712389 + 3600 4000 9.8445204 0.50265482 + 3700 4000 10.098753 0.53407075 + 3800 4000 10.039489 0.56548668 + 3900 4000 9.6376278 0.5969026 + 4000 4000 9.2598836 0.62831853 + 4100 4000 8.7116037 0.65973446 + 4200 4000 8.1274117 0.69115038 + 4300 4000 7.1487627 0.72256631 + 4400 4000 6.2253778 0.75398224 + 4500 4000 5.3061398 0.78539816 + 4600 4000 4.4319316 0.81681409 + 4700 4000 4.205607 0.84823002 + 4800 4000 3.2112987 0.87964594 + 4900 4000 2.6449777 0.91106187 + 5000 4000 2.3475497 0.9424778 +Loop time of 32.4926 on 1 procs for 3000 steps with 4000 atoms -Performance: 21682.142 tau/day, 250.951 timesteps/s -99.7% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 7977.205 tau/day, 92.329 timesteps/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 4.8291 | 4.8291 | 4.8291 | 0.0 | 40.40 -Neigh | 2.7489 | 2.7489 | 2.7489 | 0.0 | 22.99 -Comm | 0.071249 | 0.071249 | 0.071249 | 0.0 | 0.60 -Output | 0.20547 | 0.20547 | 0.20547 | 0.0 | 1.72 -Modify | 4.0179 | 4.0179 | 4.0179 | 0.0 | 33.61 -Other | | 0.0819 | | | 0.69 +Pair | 8.0124 | 8.0124 | 8.0124 | 0.0 | 24.66 +Neigh | 10.993 | 10.993 | 10.993 | 0.0 | 33.83 +Comm | 0.86697 | 0.86697 | 0.86697 | 0.0 | 2.67 +Output | 0.0021827 | 0.0021827 | 0.0021827 | 0.0 | 0.01 +Modify | 12.367 | 12.367 | 12.367 | 0.0 | 38.06 +Other | | 0.2515 | | | 0.77 -Nlocal: 4000 ave 4000 max 4000 min +Nlocal: 4000.00 ave 4000 max 4000 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 322 ave 322 max 322 min +Nghost: 318.000 ave 318 max 318 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 14849 ave 14849 max 14849 min +Neighs: 14807.0 ave 14807 max 14807 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Total # of neighbors = 14849 -Ave neighs/atom = 3.71225 -Neighbor list builds = 1290 -Dangerous builds = 672 -Total wall time: 0:00:15 +Total # of neighbors = 14807 +Ave neighs/atom = 3.7017500 +Neighbor list builds = 2189 +Dangerous builds = 1536 +Total wall time: 0:00:43 diff --git a/examples/granular/log.29Mar19.pour.drum.g++.4 b/examples/granular/log.29Oct20.pour.drum.g++.4 similarity index 69% rename from examples/granular/log.29Mar19.pour.drum.g++.4 rename to examples/granular/log.29Oct20.pour.drum.g++.4 index ccd4365a38..e53da50b9b 100644 --- a/examples/granular/log.29Mar19.pour.drum.g++.4 +++ b/examples/granular/log.29Oct20.pour.drum.g++.4 @@ -1,5 +1,4 @@ -LAMMPS (29 Mar 2019) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:88) +LAMMPS (29 Oct 2020) using 1 OpenMP thread(s) per MPI task # pour two types of particles (cohesive and non-cohesive) into cylinder # 'turn' cylinder by changing direction of gravity, then rotate it. @@ -55,10 +54,14 @@ region boxreg block 0 30 0 ${boxy} 0 ${boxz} region boxreg block 0 30 0 30 0 ${boxz} region boxreg block 0 30 0 30 0 50 create_box 2 boxreg -Created orthogonal box = (0 0 0) to (30 30 50) +Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (30.000000 30.000000 50.000000) 2 by 2 by 1 MPI processor grid change_box all boundary p p f -comm_modify vel yes +Changing box ... + +pair_style granular +pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji +pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji variable theta equal 0 @@ -104,10 +107,6 @@ neighbor ${skin} bin neighbor 0.2 bin neigh_modify delay 0 every 1 check yes -pair_style granular -pair_coeff 1 * hertz/material 1e5 0.2 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji -pair_coeff 2 2 jkr 1e5 0.1 0.3 50 tangential mindlin NULL 1.0 0.5 rolling sds 1e3 1e3 0.1 twisting marshall damping tsuji - fix 3 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region curved_wall fix 4 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindlin NULL 1.0 0.5 damping tsuji region bottom_wall @@ -117,8 +116,7 @@ thermo 100 timestep 0.001 -dump 1 all custom 100 ${name}.dump id type radius mass x y z -dump 1 all custom 100 rotating_drum_two_types.dump id type radius mass x y z +#dump 1 all custom 100 ${name}.dump id type radius mass x y z #For removal later compute 1 all property/atom radius @@ -163,32 +161,32 @@ Step Atoms KinEng v_theta 1800 4000 -0 0 1900 4000 -0 0 2000 4000 -0 0 -Loop time of 2.0709 on 4 procs for 2000 steps with 4000 atoms +Loop time of 3.86825 on 4 procs for 2000 steps with 4000 atoms -Performance: 83442.024 tau/day, 965.764 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 44671.398 tau/day, 517.030 timesteps/s +96.7% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.24679 | 0.26336 | 0.28853 | 3.0 | 12.72 -Neigh | 0.52279 | 0.5332 | 0.53858 | 0.9 | 25.75 -Comm | 0.17418 | 0.20253 | 0.23266 | 4.7 | 9.78 -Output | 0.092897 | 0.093531 | 0.09515 | 0.3 | 4.52 -Modify | 0.88151 | 0.89571 | 0.90582 | 0.9 | 43.25 -Other | | 0.08257 | | | 3.99 +Pair | 0.26114 | 0.27918 | 0.28728 | 2.0 | 7.22 +Neigh | 1.2044 | 1.2414 | 1.3105 | 3.7 | 32.09 +Comm | 0.38592 | 0.47065 | 0.51052 | 7.4 | 12.17 +Output | 0.0007236 | 0.0013456 | 0.0024846 | 1.8 | 0.03 +Modify | 1.6217 | 1.6723 | 1.7801 | 5.0 | 43.23 +Other | | 0.2034 | | | 5.26 -Nlocal: 1000 ave 1001 max 999 min +Nlocal: 1000.00 ave 1012 max 988 min Histogram: 2 0 0 0 0 0 0 0 0 2 -Nghost: 267.75 ave 276 max 262 min -Histogram: 1 0 1 0 1 0 0 0 0 1 -Neighs: 2031.5 ave 2091 max 1958 min -Histogram: 1 0 0 0 1 0 0 1 0 1 +Nghost: 269.250 ave 278 max 256 min +Histogram: 1 0 0 0 0 0 1 1 0 1 +Neighs: 2060.50 ave 2156 max 1921 min +Histogram: 1 0 0 1 0 0 0 0 0 2 -Total # of neighbors = 8126 -Ave neighs/atom = 2.0315 +Total # of neighbors = 8242 +Ave neighs/atom = 2.0605000 Neighbor list builds = 1004 -Dangerous builds = 3 +Dangerous builds = 4 #Remove any particles that are above z > 0.5*drum_height delete_atoms group delgroup @@ -205,67 +203,69 @@ fix 5 all wall/gran/region granular hertz/material 1e5 0.1 0.3 tangential mindl # 'Turn' drum by switching the direction of gravity unfix grav +unfix ins1 +unfix ins2 fix grav all gravity 10 vector 0 -1 0 variable theta equal 2*PI*elapsed/20000.0 run 3000 -Per MPI rank memory allocation (min/avg/max) = 21.6 | 22.6 | 23.82 Mbytes +Per MPI rank memory allocation (min/avg/max) = 18.55 | 18.55 | 18.55 Mbytes Step Atoms KinEng v_theta - 2000 4000 64.255821 0 - 2100 4000 106.47082 0.031415927 - 2200 4000 121.52634 0.062831853 - 2300 4000 87.748818 0.09424778 - 2400 4000 82.712784 0.12566371 - 2500 4000 90.618713 0.15707963 - 2600 4000 30.096031 0.18849556 - 2700 4000 4.0838611 0.21991149 - 2800 4000 3.7485959 0.25132741 - 2900 4000 4.2159774 0.28274334 - 3000 4000 4.8730048 0.31415927 - 3100 4000 5.6109465 0.34557519 - 3200 4000 6.4290528 0.37699112 - 3300 4000 7.2699677 0.40840704 - 3400 4000 8.0895944 0.43982297 - 3500 4000 8.7222781 0.4712389 - 3600 4000 9.133205 0.50265482 - 3700 4000 9.3404584 0.53407075 - 3800 4000 9.3359844 0.56548668 - 3900 4000 9.0916854 0.5969026 - 4000 4000 8.5596424 0.62831853 - 4100 4000 7.9734883 0.65973446 - 4200 4000 7.2154383 0.69115038 - 4300 4000 6.7039232 0.72256631 - 4400 4000 6.1542738 0.75398224 - 4500 4000 5.4049454 0.78539816 - 4600 4000 4.4603192 0.81681409 - 4700 4000 3.6197985 0.84823002 - 4800 4000 2.9895571 0.87964594 - 4900 4000 2.5314553 0.91106187 - 5000 4000 2.2645533 0.9424778 -Loop time of 6.64209 on 4 procs for 3000 steps with 4000 atoms + 2000 4000 65.819213 0 + 2100 4000 105.02389 0.031415927 + 2200 4000 112.02469 0.062831853 + 2300 4000 92.271262 0.09424778 + 2400 4000 89.369506 0.12566371 + 2500 4000 80.910925 0.15707963 + 2600 4000 31.620722 0.18849556 + 2700 4000 4.3019937 0.21991149 + 2800 4000 3.9913967 0.25132741 + 2900 4000 4.5203726 0.28274334 + 3000 4000 5.484886 0.31415927 + 3100 4000 6.1085958 0.34557519 + 3200 4000 6.7085635 0.37699112 + 3300 4000 7.4787777 0.40840704 + 3400 4000 8.2116413 0.43982297 + 3500 4000 8.7979302 0.4712389 + 3600 4000 9.871649 0.50265482 + 3700 4000 10.012426 0.53407075 + 3800 4000 9.9067754 0.56548668 + 3900 4000 9.725458 0.5969026 + 4000 4000 9.3350056 0.62831853 + 4100 4000 8.8337295 0.65973446 + 4200 4000 8.2712493 0.69115038 + 4300 4000 6.9609934 0.72256631 + 4400 4000 6.0120294 0.75398224 + 4500 4000 5.0490036 0.78539816 + 4600 4000 4.2796544 0.81681409 + 4700 4000 4.1736483 0.84823002 + 4800 4000 3.0860106 0.87964594 + 4900 4000 2.6670909 0.91106187 + 5000 4000 2.2901814 0.9424778 +Loop time of 10.7627 on 4 procs for 3000 steps with 4000 atoms -Performance: 39023.861 tau/day, 451.665 timesteps/s -96.6% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 24083.252 tau/day, 278.741 timesteps/s +97.9% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 1.8376 | 2.126 | 2.3131 | 12.6 | 32.01 -Neigh | 0.97762 | 1.0518 | 1.1337 | 5.4 | 15.84 -Comm | 0.53699 | 0.84265 | 1.2325 | 27.6 | 12.69 -Output | 0.13922 | 0.14159 | 0.14388 | 0.4 | 2.13 -Modify | 1.8815 | 2.1026 | 2.3368 | 11.2 | 31.66 -Other | | 0.3774 | | | 5.68 +Pair | 1.6731 | 2.0701 | 2.3327 | 18.9 | 19.23 +Neigh | 2.7389 | 3.1706 | 3.5146 | 15.7 | 29.46 +Comm | 0.93507 | 1.5441 | 2.1182 | 39.1 | 14.35 +Output | 0.0021682 | 0.0044412 | 0.006026 | 2.2 | 0.04 +Modify | 3.0031 | 3.4223 | 3.9262 | 18.3 | 31.80 +Other | | 0.5511 | | | 5.12 -Nlocal: 1000 ave 1256 max 744 min +Nlocal: 1000.00 ave 1277 max 723 min Histogram: 2 0 0 0 0 0 0 0 0 2 -Nghost: 579.5 ave 789 max 498 min -Histogram: 2 1 0 0 0 0 0 0 0 1 -Neighs: 3696.25 ave 4853 max 2590 min -Histogram: 2 0 0 0 0 0 0 0 1 1 +Nghost: 569.750 ave 809 max 454 min +Histogram: 1 2 0 0 0 0 0 0 0 1 +Neighs: 3690.50 ave 4937 max 2426 min +Histogram: 1 1 0 0 0 0 0 0 0 2 -Total # of neighbors = 14785 -Ave neighs/atom = 3.69625 -Neighbor list builds = 1230 -Dangerous builds = 676 -Total wall time: 0:00:08 +Total # of neighbors = 14762 +Ave neighs/atom = 3.6905000 +Neighbor list builds = 2187 +Dangerous builds = 1610 +Total wall time: 0:00:14 diff --git a/examples/granular/log.29Mar19.pour.flatwall.g++.1 b/examples/granular/log.29Oct20.pour.flatwall.g++.1 similarity index 56% rename from examples/granular/log.29Mar19.pour.flatwall.g++.1 rename to examples/granular/log.29Oct20.pour.flatwall.g++.1 index daab5efce3..00fc8e31a1 100644 --- a/examples/granular/log.29Mar19.pour.flatwall.g++.1 +++ b/examples/granular/log.29Oct20.pour.flatwall.g++.1 @@ -1,5 +1,4 @@ -LAMMPS (29 Mar 2019) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:88) +LAMMPS (29 Oct 2020) using 1 OpenMP thread(s) per MPI task # pour two types of particles (cohesive and non-cohesive) on flat wall @@ -45,9 +44,14 @@ region boxreg block 0 20 0 ${boxy} 0 ${boxz} region boxreg block 0 20 0 20 0 ${boxz} region boxreg block 0 20 0 20 0 30 create_box 2 boxreg -Created orthogonal box = (0 0 0) to (20 20 30) +Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (20.000000 20.000000 30.000000) 1 by 1 by 1 MPI processor grid change_box all boundary p p f +Changing box ... + +pair_style granular +pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall +pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall comm_modify vel yes @@ -75,19 +79,13 @@ fix ins2 all pour 1500 2 3123 region insreg2 diam range 0.5 1 dens 1 ${dens} fix ins2 all pour 1500 2 3123 region insreg2 diam range 0.5 1 dens 1 1 Particle insertion: 562 every 346 steps, 1500 by step 693 -comm_modify vel yes - neighbor ${skin} bin neighbor 0.15 bin neigh_modify delay 0 every 1 check yes -pair_style granular -pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall -pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall - fix 3 all wall/gran granular hertz/material 1e5 1e3 0.3 tangential mindlin NULL 1.0 0.5 zplane 0 NULL -thermo_style custom step cpu atoms ke +thermo_style custom step atoms ke thermo_modify lost warn thermo 100 @@ -109,26 +107,82 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 12.22 | 12.22 | 12.22 Mbytes -Step CPU Atoms KinEng - 0 0 0 -0 - 100 3.8153191 855 -0 - 200 4.195287 855 -0 - 300 4.5890362 855 -0 - 400 10.636087 1500 -0 - 500 11.306909 1500 -0 - 600 11.968198 1500 -0 - 700 22.631892 2288 -0 - 800 23.711387 2288 -0 - 900 24.754344 2288 -0 - 1000 25.811778 2288 -0 - 1100 35.368869 2845 -0 - 1200 37.149843 2845 -0 - 1300 39.026458 2845 -0 - 1400 41.757583 3000 -0 - 1500 45.155503 3000 -0 - 1600 48.570241 3000 -0 - 1700 52.839322 3000 -0 - 1800 59.772697 3000 -0 - 1900 69.493305 3000 -0 - 2000 114.61886 3000 -0 - 2100 152.89232 3000 -0 +Step Atoms KinEng + 0 0 -0 + 100 926 -0 + 200 926 -0 + 300 926 -0 + 400 1498 -0 + 500 1498 -0 + 600 1498 -0 + 700 2275 -0 + 800 2275 -0 + 900 2275 -0 + 1000 2275 -0 + 1100 2954 -0 + 1200 2954 -0 + 1300 2954 -0 + 1400 3000 -0 + 1500 3000 -0 + 1600 3000 -0 + 1700 3000 -0 + 1800 3000 -0 + 1900 3000 -0 + 2000 3000 -0 + 2100 3000 -0 + 2200 3000 -0 + 2300 3000 -0 + 2400 3000 -0 + 2500 3000 -0 + 2600 3000 -0 + 2700 3000 -0 + 2800 3000 -0 + 2900 3000 -0 + 3000 3000 -0 + 3100 3000 -0 + 3200 3000 -0 + 3300 3000 -0 + 3400 3000 -0 + 3500 3000 -0 + 3600 3000 -0 + 3700 3000 -0 + 3800 3000 -0 + 3900 3000 -0 + 4000 3000 -0 + 4100 3000 -0 + 4200 3000 -0 + 4300 3000 -0 + 4400 3000 -0 + 4500 3000 -0 + 4600 3000 -0 + 4700 3000 -0 + 4800 3000 -0 + 4900 3000 -0 + 5000 3000 -0 +Loop time of 24.3889 on 1 procs for 5000 steps with 3000 atoms + +Performance: 17713.003 tau/day, 205.012 timesteps/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 14.362 | 14.362 | 14.362 | 0.0 | 58.89 +Neigh | 3.3483 | 3.3483 | 3.3483 | 0.0 | 13.73 +Comm | 0.42893 | 0.42893 | 0.42893 | 0.0 | 1.76 +Output | 0.0025065 | 0.0025065 | 0.0025065 | 0.0 | 0.01 +Modify | 6.059 | 6.059 | 6.059 | 0.0 | 24.84 +Other | | 0.1876 | | | 0.77 + +Nlocal: 3000.00 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 462.000 ave 462 max 462 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 17694.0 ave 17694 max 17694 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 17694 +Ave neighs/atom = 5.8980000 +Neighbor list builds = 1133 +Dangerous builds = 0 +Total wall time: 0:00:24 diff --git a/examples/granular/log.29Mar19.pour.flatwall.g++.4 b/examples/granular/log.29Oct20.pour.flatwall.g++.4 similarity index 52% rename from examples/granular/log.29Mar19.pour.flatwall.g++.4 rename to examples/granular/log.29Oct20.pour.flatwall.g++.4 index 62a8b96c05..1688e52b43 100644 --- a/examples/granular/log.29Mar19.pour.flatwall.g++.4 +++ b/examples/granular/log.29Oct20.pour.flatwall.g++.4 @@ -1,5 +1,4 @@ -LAMMPS (29 Mar 2019) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:88) +LAMMPS (29 Oct 2020) using 1 OpenMP thread(s) per MPI task # pour two types of particles (cohesive and non-cohesive) on flat wall @@ -45,9 +44,14 @@ region boxreg block 0 20 0 ${boxy} 0 ${boxz} region boxreg block 0 20 0 20 0 ${boxz} region boxreg block 0 20 0 20 0 30 create_box 2 boxreg -Created orthogonal box = (0 0 0) to (20 20 30) +Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (20.000000 20.000000 30.000000) 2 by 2 by 1 MPI processor grid change_box all boundary p p f +Changing box ... + +pair_style granular +pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall +pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall comm_modify vel yes @@ -75,26 +79,19 @@ fix ins2 all pour 1500 2 3123 region insreg2 diam range 0.5 1 dens 1 ${dens} fix ins2 all pour 1500 2 3123 region insreg2 diam range 0.5 1 dens 1 1 Particle insertion: 562 every 346 steps, 1500 by step 693 -comm_modify vel yes - neighbor ${skin} bin neighbor 0.15 bin neigh_modify delay 0 every 1 check yes -pair_style granular -pair_coeff 1 * jkr 1000.0 50.0 0.3 10 tangential mindlin 800.0 1.0 0.5 rolling sds 500.0 200.0 0.5 twisting marshall -pair_coeff 2 2 hertz 200.0 20.0 tangential linear_history 300.0 1.0 0.1 rolling sds 200.0 100.0 0.1 twisting marshall - fix 3 all wall/gran granular hertz/material 1e5 1e3 0.3 tangential mindlin NULL 1.0 0.5 zplane 0 NULL -thermo_style custom step cpu atoms ke +thermo_style custom step atoms ke thermo_modify lost warn thermo 100 timestep 0.001 -dump 1 all custom 100 ${name}.dump id type radius mass x y z -dump 1 all custom 100 pour_two_types.dump id type radius mass x y z +#dump 1 all custom 100 ${name}.dump id type radius mass x y z run 5000 Neighbor list info ... @@ -110,82 +107,82 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 11.98 | 11.98 | 11.98 Mbytes -Step CPU Atoms KinEng - 0 0 0 -0 - 100 0.11584234 855 -0 - 200 0.12743592 855 -0 - 300 0.13925815 855 -0 - 400 0.35203671 1500 -0 - 500 0.37055922 1500 -0 - 600 0.38671875 1500 -0 - 700 0.71736908 2288 -0 - 800 0.74506783 2288 -0 - 900 0.77112222 2288 -0 - 1000 0.79632139 2288 -0 - 1100 1.0384252 2845 -0 - 1200 1.08093 2845 -0 - 1300 1.1224561 2845 -0 - 1400 1.1811485 3000 -0 - 1500 1.2414908 3000 -0 - 1600 1.3105879 3000 -0 - 1700 1.390928 3000 -0 - 1800 1.4869275 3000 -0 - 1900 1.5958266 3000 -0 - 2000 1.7172487 3000 -0 - 2100 1.851155 3000 -0 - 2200 1.9957182 3000 -0 - 2300 2.1593764 3000 -0 - 2400 2.3433132 3000 -0 - 2500 2.532742 3000 -0 - 2600 2.7376895 3000 -0 - 2700 2.9463468 3000 -0 - 2800 3.1645725 3000 -0 - 2900 3.3879526 3000 -0 - 3000 3.6152103 3000 -0 - 3100 3.8467371 3000 -0 - 3200 4.0787683 3000 -0 - 3300 4.3097105 3000 -0 - 3400 4.5423617 3000 -0 - 3500 4.7773693 3000 -0 - 3600 5.0127218 3000 -0 - 3700 5.2519271 3000 -0 - 3800 5.4951298 3000 -0 - 3900 5.7210469 3000 -0 - 4000 5.9432652 3000 -0 - 4100 6.1687591 3000 -0 - 4200 6.3942792 3000 -0 - 4300 6.6331475 3000 -0 - 4400 6.8632154 3000 -0 - 4500 7.0979366 3000 -0 - 4600 7.3305347 3000 -0 - 4700 7.5670528 3000 -0 - 4800 7.8086057 3000 -0 - 4900 8.0407174 3000 -0 - 5000 8.2765219 3000 -0 -Loop time of 8.27669 on 4 procs for 5000 steps with 3000 atoms +Step Atoms KinEng + 0 0 -0 + 100 926 -0 + 200 926 -0 + 300 926 -0 + 400 1498 -0 + 500 1498 -0 + 600 1498 -0 + 700 2275 -0 + 800 2275 -0 + 900 2275 -0 + 1000 2275 -0 + 1100 2954 -0 + 1200 2954 -0 + 1300 2954 -0 + 1400 3000 -0 + 1500 3000 -0 + 1600 3000 -0 + 1700 3000 -0 + 1800 3000 -0 + 1900 3000 -0 + 2000 3000 -0 + 2100 3000 -0 + 2200 3000 -0 + 2300 3000 -0 + 2400 3000 -0 + 2500 3000 -0 + 2600 3000 -0 + 2700 3000 -0 + 2800 3000 -0 + 2900 3000 -0 + 3000 3000 -0 + 3100 3000 -0 + 3200 3000 -0 + 3300 3000 -0 + 3400 3000 -0 + 3500 3000 -0 + 3600 3000 -0 + 3700 3000 -0 + 3800 3000 -0 + 3900 3000 -0 + 4000 3000 -0 + 4100 3000 -0 + 4200 3000 -0 + 4300 3000 -0 + 4400 3000 -0 + 4500 3000 -0 + 4600 3000 -0 + 4700 3000 -0 + 4800 3000 -0 + 4900 3000 -0 + 5000 3000 -0 +Loop time of 12.1982 on 4 procs for 5000 steps with 3000 atoms -Performance: 52194.788 tau/day, 604.106 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 35414.923 tau/day, 409.895 timesteps/s +97.0% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 1.6106 | 3.4073 | 5.4191 | 95.7 | 41.17 -Neigh | 0.51456 | 0.64572 | 0.81542 | 16.6 | 7.80 -Comm | 0.2808 | 2.5222 | 4.4998 | 121.9 | 30.47 -Output | 0.15695 | 0.15919 | 0.16502 | 0.8 | 1.92 -Modify | 1.3517 | 1.4192 | 1.4904 | 4.9 | 17.15 -Other | | 0.123 | | | 1.49 +Pair | 1.7141 | 3.8131 | 6.2143 | 107.3 | 31.26 +Neigh | 0.77648 | 0.96585 | 1.1892 | 18.3 | 7.92 +Comm | 0.7427 | 3.5566 | 5.9731 | 128.4 | 29.16 +Output | 0.0067544 | 0.0086352 | 0.011408 | 1.8 | 0.07 +Modify | 3.3476 | 3.5826 | 3.8235 | 11.5 | 29.37 +Other | | 0.2715 | | | 2.23 -Nlocal: 750 ave 1036 max 482 min -Histogram: 2 0 0 0 0 0 0 0 1 1 -Nghost: 429.75 ave 475 max 386 min +Nlocal: 750.000 ave 1033 max 463 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 435.000 ave 492 max 378 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 4434.50 ave 7028 max 1967 min Histogram: 2 0 0 0 0 0 0 0 0 2 -Neighs: 4051.75 ave 6274 max 2057 min -Histogram: 2 0 0 0 0 0 0 0 1 1 -Total # of neighbors = 16207 -Ave neighs/atom = 5.40233 -Neighbor list builds = 1165 +Total # of neighbors = 17738 +Ave neighs/atom = 5.9126667 +Neighbor list builds = 1139 Dangerous builds = 0 -Total wall time: 0:00:08 +Total wall time: 0:00:12 From 773a31a628e41a21f5379ab428dfab763c08853b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 6 Nov 2020 17:30:08 -0500 Subject: [PATCH 45/64] improve read_dump and rerun documentation. mention that native binary dumps are not supported --- doc/src/read_dump.rst | 2 ++ doc/src/rerun.rst | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/doc/src/read_dump.rst b/doc/src/read_dump.rst index c46c12c951..f9a8c87c86 100644 --- a/doc/src/read_dump.rst +++ b/doc/src/read_dump.rst @@ -370,6 +370,8 @@ needed to generate absolute, unscaled coordinates. Restrictions """""""""""" +The *native* dump file reader does not support binary .bin dump files. + To read gzipped dump files, you must compile LAMMPS with the -DLAMMPS_GZIP option. See the :doc:`Build settings ` doc page for details. diff --git a/doc/src/rerun.rst b/doc/src/rerun.rst index 16b036b449..7d51fba868 100644 --- a/doc/src/rerun.rst +++ b/doc/src/rerun.rst @@ -99,14 +99,15 @@ files do not match the specified output frequency. ---------- If more than one dump file is specified, the dump files are read one -after the other. It is assumed that snapshot timesteps will be in -ascending order. If a snapshot is encountered that is not in -ascending order, it will skip the snapshot until it reads one that is. +after the other in the order specified. It is assumed that snapshot +timesteps will be in ascending order. If a snapshot is encountered that +is not in ascending order, it will skip the snapshot until it reads one +that is. This allows skipping of a duplicate snapshot (same timestep), e.g. that appeared at the end of one file and beginning of the next. However if you specify a series of dump files in an incorrect order (with respect to the timesteps they contain), you may skip large -numbers of snapshots +numbers of snapshots. Note that the dump files specified as part of the *dump* keyword can be parallel files, i.e. written as multiple files either per processor @@ -118,17 +119,24 @@ and write parallel dump files. The *first*\ , *last*\ , *every*\ , *skip* keywords determine which snapshots are read from the dump file(s). Snapshots are skipped until -they have a timestamp >= *Nfirst*\ . When a snapshot with a timestamp > -*Nlast* is encountered, the rerun command finishes. Note below that +they have a timestep >= *Nfirst*\ . When a snapshot with a timestep > +*Nlast* is encountered, the rerun command finishes. Note that the defaults for *first* and *last* are to read all snapshots. If the *every* keyword is set to a value > 0, then only snapshots with -timestamps that are a multiple of *Nevery* are read (the first +timesteps that are a multiple of *Nevery* are read (the first snapshot is always read). If *Nevery* = 0, then this criterion is ignored, i.e. every snapshot is read that meets the other criteria. If the *skip* keyword is used, then after the first snapshot is read, every Nth snapshot is read, where N = *Nskip*\ . E.g. if *Nskip* = 3, then only 1 out of every 3 snapshots is read, assuming the snapshot -timestamp is also consistent with the other criteria. +timestep is also consistent with the other criteria. + +.. note:: + + Not all dump formats contain the timestep and not all dump readers + support reading it. In that case individual snapshots are assigned + consecutive timestep numbers starting at 1. + The *start* and *stop* keywords do not affect which snapshots are read from the dump file(s). Rather, they have the same meaning that they @@ -205,9 +213,8 @@ thermodynamic output or new dump file output. Restrictions """""""""""" -To read gzipped dump files, you must compile LAMMPS with the --DLAMMPS_GZIP option. See the :doc:`Build settings ` -doc page for details. +The *rerun* command is subject to all restrictions of +the :doc:`read_dump ` command. Related commands """""""""""""""" From d55eeefc323792f0bd1561a1b835ee68742f18b6 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Mon, 9 Nov 2020 13:47:27 -0500 Subject: [PATCH 46/64] Undo change in library interface breaking compatibility --- src/library.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 2fd1486bc2..074cb3cffa 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -1976,7 +1976,7 @@ void lammps_gather_atoms(void *handle, char *name, int type, int count, void *da // use atom ID to insert each atom's values into copy // MPI_Allreduce with MPI_SUM to merge into data, ordered by atom ID - if (type == LAMMPS_INT) { + if (type == 0) { int *vector = nullptr; int **array = nullptr; const int imgunpack = (count == 3) && (strcmp(name,"image") == 0); @@ -2015,7 +2015,7 @@ void lammps_gather_atoms(void *handle, char *name, int type, int count, void *da MPI_Allreduce(copy,data,count*natoms,MPI_INT,MPI_SUM,lmp->world); lmp->memory->destroy(copy); - } else if (type == LAMMPS_DOUBLE) { + } else if (type == 1) { double *vector = nullptr; double **array = nullptr; if (count == 1) vector = (double *) vptr; From df672fe7d4f8e0d229ef2e72f12f38cb75ad005a Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 9 Nov 2020 12:42:12 -0700 Subject: [PATCH 47/64] Correcting indentation issue in pair_spin_dmi.cpp --- src/SPIN/pair_spin_dmi.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp index d7b7d1b3d9..69a9873303 100644 --- a/src/SPIN/pair_spin_dmi.cpp +++ b/src/SPIN/pair_spin_dmi.cpp @@ -257,16 +257,15 @@ void PairSpinDmi::compute(int eflag, int vflag) f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2]; - if (newton_pair || j < nlocal) { - f[j][0] -= fi[0]; - f[j][1] -= fi[1]; - f[j][2] -= fi[2]; - } + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } fm[i][0] += fmi[0]; fm[i][1] += fmi[1]; fm[i][2] += fmi[2]; - if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, evdwl,ecoul,fi[0],fi[1],fi[2],delx,dely,delz); } From 2acb0aaedd435390565c65093bc556f8aff7ad5f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Nov 2020 05:00:56 -0500 Subject: [PATCH 48/64] fix typo that has propagated to multiple pair style doc files --- doc/src/pair_atm.rst | 2 +- doc/src/pair_buck_long.rst | 2 +- doc/src/pair_gayberne.rst | 2 +- doc/src/pair_lcbop.rst | 9 +++++---- doc/src/pair_polymorphic.rst | 2 +- doc/src/pair_resquared.rst | 2 +- doc/src/pair_srp.rst | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/src/pair_atm.rst b/doc/src/pair_atm.rst index c0dfb64c50..0bdfecd517 100644 --- a/doc/src/pair_atm.rst +++ b/doc/src/pair_atm.rst @@ -143,7 +143,7 @@ combinations, else an error will result. Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -This pair styles do not support the :doc:`pair_modify ` +This pair style do not support the :doc:`pair_modify ` mix, shift, table, and tail options. This pair style writes its information to :doc:`binary restart files diff --git a/doc/src/pair_buck_long.rst b/doc/src/pair_buck_long.rst index 1883cee637..0e19873500 100644 --- a/doc/src/pair_buck_long.rst +++ b/doc/src/pair_buck_long.rst @@ -117,7 +117,7 @@ global Coulombic cutoff is allowed. Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -This pair styles does not support mixing. Thus, coefficients for all +This pair style does not support mixing. Thus, coefficients for all I,J pairs must be specified explicitly. This pair style supports the :doc:`pair_modify ` shift diff --git a/doc/src/pair_gayberne.rst b/doc/src/pair_gayberne.rst index 309e949f97..19597b9018 100644 --- a/doc/src/pair_gayberne.rst +++ b/doc/src/pair_gayberne.rst @@ -160,7 +160,7 @@ For atom type pairs I,J and I != J, the epsilon and sigma coefficients and cutoff distance for this pair style can be mixed. The default mix value is *geometric*\ . See the "pair_modify" command for details. -This pair styles supports the :doc:`pair_modify ` shift +This pair style supports the :doc:`pair_modify ` shift option for the energy of the Lennard-Jones portion of the pair interaction, but only for sphere-sphere interactions. There is no shifting performed for ellipsoidal interactions due to the anisotropic diff --git a/doc/src/pair_lcbop.rst b/doc/src/pair_lcbop.rst index fa2d3c0609..c44ad1f8a4 100644 --- a/doc/src/pair_lcbop.rst +++ b/doc/src/pair_lcbop.rst @@ -75,14 +75,15 @@ This pair style can only be used via the *pair* keyword of the Restrictions """""""""""" -This pair styles is part of the MANYBODY package. It is only enabled -if LAMMPS was built with that package. See the :doc:`Build package ` doc page for more info. +This pair style is part of the MANYBODY package. It is only enabled +if LAMMPS was built with that package. +See the :doc:`Build package ` doc page for more info. This pair potential requires the :doc:`newton ` setting to be "on" for pair interactions. -The C.lcbop potential file provided with LAMMPS (see the potentials -directory) is parameterized for metal :doc:`units `. You can use +The ``C.lcbop`` potential file provided with LAMMPS (see the potentials +directory) is parameterized for :doc:`metal units `. You can use the LCBOP potential with any LAMMPS units, but you would need to create your own LCBOP potential file with coefficients listed in the appropriate units if your simulation does not use "metal" units. diff --git a/doc/src/pair_polymorphic.rst b/doc/src/pair_polymorphic.rst index 04be107e02..6abe037581 100644 --- a/doc/src/pair_polymorphic.rst +++ b/doc/src/pair_polymorphic.rst @@ -298,7 +298,7 @@ described above. For each of the F functions, nx values are listed. Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -This pair styles does not support the :doc:`pair_modify ` +This pair style does not support the :doc:`pair_modify ` shift, table, and tail options. This pair style does not write their information to :doc:`binary restart diff --git a/doc/src/pair_resquared.rst b/doc/src/pair_resquared.rst index d34588682c..0932730469 100644 --- a/doc/src/pair_resquared.rst +++ b/doc/src/pair_resquared.rst @@ -173,7 +173,7 @@ equation for the Hamaker constant presented here. Mixing of sigma and epsilon followed by calculation of the energy prefactors using the equations above is recommended. -This pair styles supports the :doc:`pair_modify ` shift +This pair style supports the :doc:`pair_modify ` shift option for the energy of the Lennard-Jones portion of the pair interaction, but only for sphere-sphere interactions. There is no shifting performed for ellipsoidal interactions due to the anisotropic diff --git a/doc/src/pair_srp.rst b/doc/src/pair_srp.rst index 620c74d515..59df8be1e2 100644 --- a/doc/src/pair_srp.rst +++ b/doc/src/pair_srp.rst @@ -124,7 +124,7 @@ at the cutoff distance :math:`r_c`. Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -This pair styles does not support mixing. +This pair style does not support mixing. This pair style does not support the :doc:`pair_modify ` shift option for the energy of the pair interaction. Note that as From 5aae2cb44ded9af4596ca10505e9da130747cc48 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Tue, 10 Nov 2020 14:03:16 +0100 Subject: [PATCH 49/64] Fix typo in Howto Walls --- doc/src/Howto_walls.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Howto_walls.rst b/doc/src/Howto_walls.rst index 4d35cd66b3..6e3e22a3f0 100644 --- a/doc/src/Howto_walls.rst +++ b/doc/src/Howto_walls.rst @@ -67,5 +67,5 @@ rotate. The only frictional idealized walls currently in LAMMPS are flat or curved surfaces specified by the :doc:`fix wall/gran ` -command. At some point we plan to allow regoin surfaces to be used as +command. At some point we plan to allow region surfaces to be used as frictional walls, as well as triangulated surfaces. From eae9fea02615b0aaba3e0b92350e78e70f302e94 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Tue, 10 Nov 2020 14:04:49 +0100 Subject: [PATCH 50/64] Consistently use instead of --- doc/src/atc_output.rst | 2 +- doc/src/fix_filter_corotate.rst | 2 +- doc/src/fix_rx.rst | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/atc_output.rst b/doc/src/atc_output.rst index 3fb1491be1..5003817daa 100644 --- a/doc/src/atc_output.rst +++ b/doc/src/atc_output.rst @@ -14,7 +14,7 @@ Syntax * AtC fixID = ID of :doc:`fix atc ` instance * *output* or *output index* = name of the AtC sub-command * filename_prefix = prefix for data files (for *output*) -* frequency = frequency of output in time-steps (for *output*) +* frequency = frequency of output in timesteps (for *output*) * optional keywords for *output*: - text = creates text output of index, step and nodal variable values for unique nodes diff --git a/doc/src/fix_filter_corotate.rst b/doc/src/fix_filter_corotate.rst index ee608e5361..e33fc0ac4a 100644 --- a/doc/src/fix_filter_corotate.rst +++ b/doc/src/fix_filter_corotate.rst @@ -56,7 +56,7 @@ is slightly modified only for the computation of long-range forces. A good cluster decomposition constitutes in building clusters which contain the fastest covalent bonds inside clusters. -If the clusters are chosen suitably, the :doc:`run_style respa ` is stable for outer time-steps of at least 8fs. +If the clusters are chosen suitably, the :doc:`run_style respa ` is stable for outer timesteps of at least 8fs. ---------- diff --git a/doc/src/fix_rx.rst b/doc/src/fix_rx.rst index c1a1d0950c..9eab06ffad 100644 --- a/doc/src/fix_rx.rst +++ b/doc/src/fix_rx.rst @@ -90,10 +90,10 @@ accepted, *h* is increased by a proportional amount, and the next ODE step is be Otherwise, *h* is shrunk and the ODE step is repeated. Run-time diagnostics are available for the rkf45 ODE solver. The frequency -(in time-steps) that diagnostics are reported is controlled by the last (optional) +(in timesteps) that diagnostics are reported is controlled by the last (optional) 12th argument. A negative frequency means that diagnostics are reported once at the end of each run. A positive value N means that the diagnostics are reported once -per N time-steps. +per N timesteps. The diagnostics report the average # of integrator steps and RHS function evaluations and run-time per ODE as well as the average/RMS/min/max per process. If the From ad56e0ca9ff75b7129c1386dc615e490aefcb6f6 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Tue, 10 Nov 2020 14:16:12 +0100 Subject: [PATCH 51/64] Fix casing of the word --- .github/CONTRIBUTING.md | 2 +- doc/github-development-workflow.md | 2 +- doc/src/Howto_github.rst | 4 ++-- lib/kokkos/README.md | 4 ++-- lib/quip/README | 2 +- lib/scafacos/README | 2 +- src/USER-PLUMED/README | 2 +- tools/replica/reorder_remd_traj.py | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 60fe82d86c..62e7186360 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -108,7 +108,7 @@ For bug reports, the next step is that one of the core LAMMPS developers will se For submitting pull requests, there is a [detailed tutorial](https://lammps.sandia.gov/doc/Howto_github.html) in the LAMMPS manual. Thus only a brief breakdown of the steps is presented here. Please note, that the LAMMPS developers are still reviewing and trying to improve the process. If you are unsure about something, do not hesitate to post a question on the lammps-users mailing list or contact one fo the core LAMMPS developers. Immediately after the submission, the LAMMPS continuing integration server at ci.lammps.org will download your submitted branch and perform a simple compilation test, i.e. will test whether your submitted code can be compiled under various conditions. It will also do a check on whether your included documentation translates cleanly. Whether these tests are successful or fail will be recorded. If a test fails, please inspect the corresponding output on the CI server and take the necessary steps, if needed, so that the code can compile cleanly again. The test will be re-run each the pull request is updated with a push to the remote branch on GitHub. -Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assesment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). +Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assessment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). You may also receive comments and suggestions on the overall submission or specific details and on occasion specific requests for changes as part of the review. If permitted, also additional changes may be pushed into your pull request branch or a pull request may be filed in your LAMMPS fork on GitHub to include those changes. The LAMMPS developer may then decide to assign the pull request to another developer (e.g. when that developer is more knowledgeable about the submitted feature or enhancement or has written the modified code). It may also happen, that additional developers are requested to provide a review and approve the changes. For submissions, that may change the general behavior of LAMMPS, or where a possibility of unwanted side effects exists, additional tests may be requested by the assigned developer. If the assigned developer is satisfied and considers the submission ready for inclusion into LAMMPS, the pull request will receive approvals and be merged into the master branch by one of the core LAMMPS developers. After the pull request is merged, you may delete the feature branch used for the pull request in your personal LAMMPS fork. diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md index a7d41dd32a..503a33be4e 100644 --- a/doc/github-development-workflow.md +++ b/doc/github-development-workflow.md @@ -95,7 +95,7 @@ on the pull request discussion page on GitHub, so that other developers can later review the entire discussion after the fact and understand the rationale behind choices made. Exceptions to this policy are technical discussions, that are centered on tools or policies themselves -(git, github, c++) rather than on the content of the pull request. +(git, c++) rather than on the content of the pull request. ### Checklist for Pull Requests diff --git a/doc/src/Howto_github.rst b/doc/src/Howto_github.rst index 63cb8945e8..6303feb407 100644 --- a/doc/src/Howto_github.rst +++ b/doc/src/Howto_github.rst @@ -72,7 +72,7 @@ explained in more detail here: `feature branch workflow Date: Tue, 10 Nov 2020 14:20:52 +0100 Subject: [PATCH 52/64] Fix casing of the word GitHub --- .github/CONTRIBUTING.md | 2 +- doc/github-development-workflow.md | 2 +- doc/src/Howto_github.rst | 4 +- lib/kokkos/README.md | 4 +- lib/quip/README | 2 +- lib/scafacos/README | 2 +- src/USER-PLUMED/README | 2 +- tools/replica/reorder_remd_traj.py | 231 +++++++++++++++-------------- 8 files changed, 131 insertions(+), 118 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 60fe82d86c..62e7186360 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -108,7 +108,7 @@ For bug reports, the next step is that one of the core LAMMPS developers will se For submitting pull requests, there is a [detailed tutorial](https://lammps.sandia.gov/doc/Howto_github.html) in the LAMMPS manual. Thus only a brief breakdown of the steps is presented here. Please note, that the LAMMPS developers are still reviewing and trying to improve the process. If you are unsure about something, do not hesitate to post a question on the lammps-users mailing list or contact one fo the core LAMMPS developers. Immediately after the submission, the LAMMPS continuing integration server at ci.lammps.org will download your submitted branch and perform a simple compilation test, i.e. will test whether your submitted code can be compiled under various conditions. It will also do a check on whether your included documentation translates cleanly. Whether these tests are successful or fail will be recorded. If a test fails, please inspect the corresponding output on the CI server and take the necessary steps, if needed, so that the code can compile cleanly again. The test will be re-run each the pull request is updated with a push to the remote branch on GitHub. -Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assesment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). +Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assessment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). You may also receive comments and suggestions on the overall submission or specific details and on occasion specific requests for changes as part of the review. If permitted, also additional changes may be pushed into your pull request branch or a pull request may be filed in your LAMMPS fork on GitHub to include those changes. The LAMMPS developer may then decide to assign the pull request to another developer (e.g. when that developer is more knowledgeable about the submitted feature or enhancement or has written the modified code). It may also happen, that additional developers are requested to provide a review and approve the changes. For submissions, that may change the general behavior of LAMMPS, or where a possibility of unwanted side effects exists, additional tests may be requested by the assigned developer. If the assigned developer is satisfied and considers the submission ready for inclusion into LAMMPS, the pull request will receive approvals and be merged into the master branch by one of the core LAMMPS developers. After the pull request is merged, you may delete the feature branch used for the pull request in your personal LAMMPS fork. diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md index a7d41dd32a..c34a67dfcf 100644 --- a/doc/github-development-workflow.md +++ b/doc/github-development-workflow.md @@ -95,7 +95,7 @@ on the pull request discussion page on GitHub, so that other developers can later review the entire discussion after the fact and understand the rationale behind choices made. Exceptions to this policy are technical discussions, that are centered on tools or policies themselves -(git, github, c++) rather than on the content of the pull request. +(git, GitHub, c++) rather than on the content of the pull request. ### Checklist for Pull Requests diff --git a/doc/src/Howto_github.rst b/doc/src/Howto_github.rst index 63cb8945e8..311d716f18 100644 --- a/doc/src/Howto_github.rst +++ b/doc/src/Howto_github.rst @@ -72,7 +72,7 @@ explained in more detail here: `feature branch workflow .%%d.lammpstrj. \ Can be in compressed (.gz or .bz2) format. \ This is a required argument") - parser.add_argument("-logfn", "--logfn", default = "log.lammps", - help = "LAMMPS log file that contains swap history \ + parser.add_argument("-logfn", "--logfn", default="log.lammps", + help="LAMMPS log file that contains swap history \ of temperatures among replicas. \ Default = 'lammps.log'") - parser.add_argument("-tfn", "--tempfn", default = "temps.txt", - help = "ascii file (readable by numpy.loadtxt) with \ + parser.add_argument("-tfn", "--tempfn", default="temps.txt", + help="ascii file (readable by numpy.loadtxt) with \ the temperatures used in the REMD simulation.") - parser.add_argument("-ns", "--nswap", type = int, - help = "Swap frequency used in LAMMPS temper command") + parser.add_argument("-ns", "--nswap", type=int, + help="Swap frequency used in LAMMPS temper command") - parser.add_argument("-nw", "--nwrite", type = int, default = 1, - help = "Trajectory writing frequency used \ + parser.add_argument("-nw", "--nwrite", type=int, default=1, + help="Trajectory writing frequency used \ in LAMMPS dump command") - parser.add_argument("-np", "--nprod", type = int, default = 0, - help = "Number of timesteps to save in the reordered\ + parser.add_argument("-np", "--nprod", type=int, default=0, + help="Number of timesteps to save in the reordered\ trajectories.\ This should be in units of the LAMMPS timestep") - parser.add_argument("-logw", "--logw", action = 'store_true', - help = "Supplying this flag \ + parser.add_argument("-logw", "--logw", action='store_true', + help="Supplying this flag \ calculates *canonical* (NVT ensemble) log weights") parser.add_argument("-e", "--enefn", - help = "File that has n_replica x n_frames array\ + help="File that has n_replica x n_frames array\ of total potential energies") parser.add_argument("-kB", "--boltzmann_const", - type = float, default = 0.001987, - help = "Boltzmann constant in appropriate units. \ + type=float, default=0.001987, + help="Boltzmann constant in appropriate units. \ Default is kcal/mol") - parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64, - help = "Reorder trajectories at these temperatures.\n \ + parser.add_argument("-ot", "--out_temps", nargs='+', type=np.float64, + help="Reorder trajectories at these temperatures.\n \ Default is all temperatures used in the simulation") - parser.add_argument("-od", "--outdir", default = ".", - help = "All output will be saved to this directory") + parser.add_argument("-od", "--outdir", default=".", + help="All output will be saved to this directory") # parse inputs args = parser.parse_args() @@ -438,14 +449,16 @@ if __name__ == "__main__": nprod = args.nprod enefn = args.enefn - if not enefn is None: enefn = os.path.abspath(enefn) + if not enefn is None: + enefn = os.path.abspath(enefn) get_logw = args.logw kB = args.boltzmann_const out_temps = args.out_temps outdir = os.path.abspath(args.outdir) if not os.path.isdir(outdir): - if me == ROOT: os.mkdir(outdir) + if me == ROOT: + os.mkdir(outdir) # check that all input files are present (only on the ROOT proc) if me == ROOT: @@ -465,7 +478,8 @@ if __name__ == "__main__": for i in range(ntemps): this_intrajfn = intrajfns[i] x = this_intrajfn + ".gz" - if os.path.isfile(this_intrajfn): continue + if os.path.isfile(this_intrajfn): + continue elif os.path.isfile(this_intrajfn + ".gz"): intrajfns[i] = this_intrajfn + ".gz" elif os.path.isfile(this_intrajfn + ".bz2"): @@ -476,42 +490,41 @@ if __name__ == "__main__": # set output filenames outprefix = os.path.join(outdir, traj_prefix.split('/')[-1]) - outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \ - (outprefix, _get_nearest_temp(temps, t)) \ + outtrajfns = ["%s.%3.2f.lammpstrj.gz" % + (outprefix, _get_nearest_temp(temps, t)) for t in out_temps] - byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) \ + byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) for k in range(ntemps)] frametuplefn = outprefix + '.frametuple.pickle' if get_logw: logwfn = outprefix + ".logw.pickle" - # get a list of all frames at a particular temp visited by each replica # this is fast so run only on ROOT proc. master_frametuple_dict = {} if me == ROOT: - master_frametuple_dict = get_replica_frames(logfn = logfn, - temps = temps, - nswap = nswap, - writefreq = writefreq) + master_frametuple_dict = get_replica_frames(logfn=logfn, + temps=temps, + nswap=nswap, + writefreq=writefreq) # save to a pickle from the ROOT proc with open(frametuplefn, 'wb') as of: pickle.dump(master_frametuple_dict, of) # broadcast to all procs - master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT) + master_frametuple_dict = comm.bcast(master_frametuple_dict, root=ROOT) # define a chunk of replicas to process on each proc CHUNKSIZE_1 = int(ntemps/nproc) if me < nproc - 1: - my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 ) + my_rep_inds = range((me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1) else: - my_rep_inds = range( (me*CHUNKSIZE_1), ntemps ) + my_rep_inds = range((me*CHUNKSIZE_1), ntemps) # get byte indices from replica (un-ordered) trajs. in parallel - get_byte_index(rep_inds = my_rep_inds, - byteindfns = byteindfns, - intrajfns = intrajfns) + get_byte_index(rep_inds=my_rep_inds, + byteindfns=byteindfns, + intrajfns=intrajfns) # block until all procs have finished comm.barrier() @@ -520,7 +533,7 @@ if __name__ == "__main__": infobjs = [readwrite(i, "rb") for i in intrajfns] # open all byteindex files - byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) ) + byte_inds = dict((i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns)) # define a chunk of output trajs. to process for each proc. # # of reordered trajs. to write may be less than the total # of replicas @@ -536,38 +549,38 @@ if __name__ == "__main__": else: nproc_active = nproc if me < nproc_active-1: - my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 ) + my_temp_inds = range((me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1) else: - my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps) + my_temp_inds = range((me*CHUNKSIZE_2), n_out_temps) # retire the excess procs # dont' forget to close any open file objects if me >= nproc_active: - for fobj in infobjs: fobj.close() + for fobj in infobjs: + fobj.close() exit() # write reordered trajectories to disk from active procs in parallel - write_reordered_traj(temp_inds = my_temp_inds, - byte_inds = byte_inds, - outtemps = out_temps, temps = temps, - frametuple_dict = master_frametuple_dict, - nprod = nprod, writefreq = writefreq, - outtrajfns = outtrajfns, - infobjs = infobjs) + write_reordered_traj(temp_inds=my_temp_inds, + byte_inds=byte_inds, + outtemps=out_temps, temps=temps, + frametuple_dict=master_frametuple_dict, + nprod=nprod, writefreq=writefreq, + outtrajfns=outtrajfns, + infobjs=infobjs) # calculate canonical log-weights if requested # usually this is very fast so retire all but the ROOT proc - if not get_logw: exit() - if not me == ROOT: exit() - - logw = get_canonical_logw(enefn = enefn, temps = temps, - frametuple_dict = master_frametuple_dict, - nprod = nprod, writefreq = writefreq, - kB = kB) + if not get_logw: + exit() + if not me == ROOT: + exit() + logw = get_canonical_logw(enefn=enefn, temps=temps, + frametuple_dict=master_frametuple_dict, + nprod=nprod, writefreq=writefreq, + kB=kB) # save the logweights to a pickle with open(logwfn, 'wb') as of: pickle.dump(logw, of) - - From 2c65df1bc2efd9c39aae3a3ceeca06fecf25b698 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Tue, 10 Nov 2020 16:29:02 +0100 Subject: [PATCH 53/64] Revert typo fix in python due to auto-formatter changing too much --- tools/replica/reorder_remd_traj.py | 231 ++++++++++++++--------------- 1 file changed, 109 insertions(+), 122 deletions(-) diff --git a/tools/replica/reorder_remd_traj.py b/tools/replica/reorder_remd_traj.py index 6eee4770ab..5033ae1e53 100644 --- a/tools/replica/reorder_remd_traj.py +++ b/tools/replica/reorder_remd_traj.py @@ -37,17 +37,13 @@ StringIO (or io if in Python 3.x) """ -import os -import numpy as np -import argparse -import time -import pickle + +import os, numpy as np, argparse, time, pickle from scipy.special import logsumexp from mpi4py import MPI from tqdm import tqdm -import gzip -import bz2 +import gzip, bz2 try: # python-2 from StringIO import StringIO as IOBuffer @@ -56,11 +52,12 @@ except ImportError: from io import BytesIO as IOBuffer + #### INITIALIZE MPI #### # (note that all output on screen will be printed only on the ROOT proc) ROOT = 0 comm = MPI.COMM_WORLD -me = comm.rank # my proc id +me = comm.rank # my proc id nproc = comm.size @@ -80,8 +77,7 @@ def _get_nearest_temp(temps, query_temp): out_temp: nearest temp from the list """ - if isinstance(temps, list): - temps = np.array(temps) + if isinstance(temps, list): temps = np.array(temps) return temps[np.argmin(np.abs(temps-query_temp))] @@ -99,10 +95,10 @@ def readwrite(trajfn, mode): if trajfn.endswith(".gz"): of = gzip.open(trajfn, mode) - # return gzip.GzipFile(trajfn, mode) + #return gzip.GzipFile(trajfn, mode) elif trajfn.endswith(".bz2"): of = bz2.open(trajfn, mode) - # return bz2.BZ2File(trajfn, mode) + #return bz2.BZ2File(trajfn, mode) else: of = open(trajfn, mode) return of @@ -127,8 +123,8 @@ def get_replica_frames(logfn, temps, nswap, writefreq): """ n_rep = len(temps) - swap_history = np.loadtxt(logfn, skiprows=3) - master_frametuple_dict = dict((n, []) for n in range(n_rep)) + swap_history = np.loadtxt(logfn, skiprows = 3) + master_frametuple_dict = dict( (n, []) for n in range(n_rep) ) # walk through the replicas print("Getting frames from all replicas at temperature:") @@ -140,15 +136,15 @@ def get_replica_frames(logfn, temps, nswap, writefreq): if writefreq <= nswap: for ii, i in enumerate(rep_inds[:-1]): start = int(ii * nswap / writefreq) - stop = int((ii+1) * nswap / writefreq) - [master_frametuple_dict[n].append((i, x)) - for x in range(start, stop)] + stop = int( (ii+1) * nswap / writefreq) + [master_frametuple_dict[n].append( (i,x) ) \ + for x in range(start, stop)] # case-2: when temps. are swapped faster than dumping frames else: nskip = int(writefreq / nswap) - [master_frametuple_dict[n].append((i, ii)) - for ii, i in enumerate(rep_inds[0::nskip])] + [master_frametuple_dict[n].append( (i,ii) ) \ + for ii, i in enumerate(rep_inds[0::nskip])] return master_frametuple_dict @@ -165,12 +161,11 @@ def get_byte_index(rep_inds, byteindfns, intrajfns): """ for n in rep_inds: # check if the byte indices for this traj has already been computed - if os.path.isfile(byteindfns[n]): - continue + if os.path.isfile(byteindfns[n]): continue # extract bytes fobj = readwrite(intrajfns[n], "rb") - byteinds = [[0, 0]] + byteinds = [ [0,0] ] # place file pointer at first line nframe = 0 @@ -180,37 +175,33 @@ def get_byte_index(rep_inds, byteindfns, intrajfns): # status printed only for replica read on root proc # this assumes that each proc takes roughly the same time if me == ROOT: - pb = tqdm(desc="Reading replicas", leave=True, - position=ROOT + 2*me, - unit="B/replica", unit_scale=True, - unit_divisor=1024) + pb = tqdm(desc = "Reading replicas", leave = True, + position = ROOT + 2*me, + unit = "B/replica", unit_scale = True, + unit_divisor = 1024) # start crawling through the bytes while True: next_line = fobj.readline() - if len(next_line) == 0: - break + if len(next_line) == 0: break # this will only work with lammpstrj traj format. # this condition essentially checks periodic recurrences # of the token TIMESTEP. Each time it is found, # we have crawled through a frame (snapshot) if next_line == first_line: nframe += 1 - byteinds.append([nframe, cur_pos]) - if me == ROOT: - pb.update() + byteinds.append( [nframe, cur_pos] ) + if me == ROOT: pb.update() cur_pos = fobj.tell() - if me == ROOT: - pb.update(0) - if me == ROOT: - pb.close() + if me == ROOT: pb.update(0) + if me == ROOT: pb.close() # take care of the EOF cur_pos = fobj.tell() - byteinds.append([nframe+1, cur_pos]) # dummy index for the EOF + byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF # write to file - np.savetxt(byteindfns[n], np.array(byteinds), fmt="%d") + np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d") # close the trajfile object fobj.close() @@ -256,15 +247,15 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps, of = readwrite(outtrajfns[n], "wb") # get frames - abs_temp_ind = np.argmin(abs(temps - outtemps[n])) + abs_temp_ind = np.argmin( abs(temps - outtemps[n]) ) frametuple = frametuple_dict[abs_temp_ind][-nframes:] # write frames to buffer if me == ROOT: pb = tqdm(frametuple, - desc=("Buffering trajectories for writing"), - leave=True, position=ROOT + 2*me, - unit='frame/replica', unit_scale=True) + desc = ("Buffering trajectories for writing"), + leave = True, position = ROOT + 2*me, + unit = 'frame/replica', unit_scale = True) iterable = pb else: @@ -272,23 +263,20 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps, for i, (rep, frame) in enumerate(iterable): infobj = infobjs[rep] - start_ptr = int(byte_inds[rep][frame, 1]) - stop_ptr = int(byte_inds[rep][frame+1, 1]) + start_ptr = int(byte_inds[rep][frame,1]) + stop_ptr = int(byte_inds[rep][frame+1,1]) byte_len = stop_ptr - start_ptr infobj.seek(start_ptr) buf.write(infobj.read(byte_len)) - if me == ROOT: - pb.close() + if me == ROOT: pb.close() # write buffer to disk - if me == ROOT: - print("Writing buffer to file") + if me == ROOT: print("Writing buffer to file") of.write(buf.getvalue()) of.close() buf.close() - for i in infobjs: - i.close() + for i in infobjs: i.close() return @@ -337,13 +325,13 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq, pip install --user pymbar sudo pip install pymbar - To install the dev. version directly from GitHub, use: + To install the dev. version directly from github, use: pip install pip install git+https://github.com/choderalab/pymbar.git """) u_rn = np.loadtxt(enefn) - ntemps = u_rn.shape[0] # number of temps. - nframes = int(nprod / writefreq) # number of frames at each temp. + ntemps = u_rn.shape[0] # number of temps. + nframes = int(nprod / writefreq) # number of frames at each temp. # reorder the temps u_kn = np.zeros([ntemps, nframes], float) @@ -353,90 +341,91 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq, u_kn[k, i] = u_rn[rep, frame] # prep input for pymbar - # 1) array of frames at each temp. + #1) array of frames at each temp. nframes_k = nframes * np.ones(ntemps, np.uint8) - # 2) inverse temps. for chosen energy scale + #2) inverse temps. for chosen energy scale beta_k = 1.0 / (kB * temps) - # 3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*) + #3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*) u_kln = np.zeros([ntemps, ntemps, nframes], float) for k in range(ntemps): u_kln[k] = np.outer(beta_k, u_kn[k]) # run pymbar and extract the free energies print("\nRunning pymbar...") - mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose=True) - f_k = mbar.f_k # (1 x k array) + mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True) + f_k = mbar.f_k # (1 x k array) # calculate the log-weights print("\nExtracting log-weights...") log_nframes = np.log(nframes) - logw = dict((k, np.zeros([ntemps, nframes], float)) for k in range(ntemps)) + logw = dict( (k, np.zeros([ntemps, nframes], float)) for k in range(ntemps) ) # get log-weights to reweight to this temp. for k in range(ntemps): for n in range(nframes): - num = -beta_k[k] * u_kn[k, n] - denom = f_k - beta_k[k] * u_kn[k, n] + num = -beta_k[k] * u_kn[k,n] + denom = f_k - beta_k[k] * u_kn[k,n] for l in range(ntemps): - logw[l][k, n] = num - logsumexp(denom) - log_nframes + logw[l][k,n] = num - logsumexp(denom) - log_nframes return logw + #### MAIN WORKFLOW #### if __name__ == "__main__": # accept user inputs - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) + parser = argparse.ArgumentParser(description = __doc__, + formatter_class = argparse.RawDescriptionHelpFormatter) parser.add_argument("prefix", - help="Prefix of REMD LAMMPS trajectories.\ + help = "Prefix of REMD LAMMPS trajectories.\ Supply full path. Trajectories assumed to be named as \ .%%d.lammpstrj. \ Can be in compressed (.gz or .bz2) format. \ This is a required argument") - parser.add_argument("-logfn", "--logfn", default="log.lammps", - help="LAMMPS log file that contains swap history \ + parser.add_argument("-logfn", "--logfn", default = "log.lammps", + help = "LAMMPS log file that contains swap history \ of temperatures among replicas. \ Default = 'lammps.log'") - parser.add_argument("-tfn", "--tempfn", default="temps.txt", - help="ascii file (readable by numpy.loadtxt) with \ + parser.add_argument("-tfn", "--tempfn", default = "temps.txt", + help = "ascii file (readable by numpy.loadtxt) with \ the temperatures used in the REMD simulation.") - parser.add_argument("-ns", "--nswap", type=int, - help="Swap frequency used in LAMMPS temper command") + parser.add_argument("-ns", "--nswap", type = int, + help = "Swap frequency used in LAMMPS temper command") - parser.add_argument("-nw", "--nwrite", type=int, default=1, - help="Trajectory writing frequency used \ + parser.add_argument("-nw", "--nwrite", type = int, default = 1, + help = "Trajectory writing frequency used \ in LAMMPS dump command") - parser.add_argument("-np", "--nprod", type=int, default=0, - help="Number of timesteps to save in the reordered\ + parser.add_argument("-np", "--nprod", type = int, default = 0, + help = "Number of timesteps to save in the reordered\ trajectories.\ This should be in units of the LAMMPS timestep") - parser.add_argument("-logw", "--logw", action='store_true', - help="Supplying this flag \ + parser.add_argument("-logw", "--logw", action = 'store_true', + help = "Supplying this flag \ calculates *canonical* (NVT ensemble) log weights") parser.add_argument("-e", "--enefn", - help="File that has n_replica x n_frames array\ + help = "File that has n_replica x n_frames array\ of total potential energies") parser.add_argument("-kB", "--boltzmann_const", - type=float, default=0.001987, - help="Boltzmann constant in appropriate units. \ + type = float, default = 0.001987, + help = "Boltzmann constant in appropriate units. \ Default is kcal/mol") - parser.add_argument("-ot", "--out_temps", nargs='+', type=np.float64, - help="Reorder trajectories at these temperatures.\n \ + parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64, + help = "Reorder trajectories at these temperatures.\n \ Default is all temperatures used in the simulation") - parser.add_argument("-od", "--outdir", default=".", - help="All output will be saved to this directory") + parser.add_argument("-od", "--outdir", default = ".", + help = "All output will be saved to this directory") # parse inputs args = parser.parse_args() @@ -449,16 +438,14 @@ if __name__ == "__main__": nprod = args.nprod enefn = args.enefn - if not enefn is None: - enefn = os.path.abspath(enefn) + if not enefn is None: enefn = os.path.abspath(enefn) get_logw = args.logw kB = args.boltzmann_const out_temps = args.out_temps outdir = os.path.abspath(args.outdir) if not os.path.isdir(outdir): - if me == ROOT: - os.mkdir(outdir) + if me == ROOT: os.mkdir(outdir) # check that all input files are present (only on the ROOT proc) if me == ROOT: @@ -478,8 +465,7 @@ if __name__ == "__main__": for i in range(ntemps): this_intrajfn = intrajfns[i] x = this_intrajfn + ".gz" - if os.path.isfile(this_intrajfn): - continue + if os.path.isfile(this_intrajfn): continue elif os.path.isfile(this_intrajfn + ".gz"): intrajfns[i] = this_intrajfn + ".gz" elif os.path.isfile(this_intrajfn + ".bz2"): @@ -490,41 +476,42 @@ if __name__ == "__main__": # set output filenames outprefix = os.path.join(outdir, traj_prefix.split('/')[-1]) - outtrajfns = ["%s.%3.2f.lammpstrj.gz" % - (outprefix, _get_nearest_temp(temps, t)) + outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \ + (outprefix, _get_nearest_temp(temps, t)) \ for t in out_temps] - byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) + byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) \ for k in range(ntemps)] frametuplefn = outprefix + '.frametuple.pickle' if get_logw: logwfn = outprefix + ".logw.pickle" + # get a list of all frames at a particular temp visited by each replica # this is fast so run only on ROOT proc. master_frametuple_dict = {} if me == ROOT: - master_frametuple_dict = get_replica_frames(logfn=logfn, - temps=temps, - nswap=nswap, - writefreq=writefreq) + master_frametuple_dict = get_replica_frames(logfn = logfn, + temps = temps, + nswap = nswap, + writefreq = writefreq) # save to a pickle from the ROOT proc with open(frametuplefn, 'wb') as of: pickle.dump(master_frametuple_dict, of) # broadcast to all procs - master_frametuple_dict = comm.bcast(master_frametuple_dict, root=ROOT) + master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT) # define a chunk of replicas to process on each proc CHUNKSIZE_1 = int(ntemps/nproc) if me < nproc - 1: - my_rep_inds = range((me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1) + my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 ) else: - my_rep_inds = range((me*CHUNKSIZE_1), ntemps) + my_rep_inds = range( (me*CHUNKSIZE_1), ntemps ) # get byte indices from replica (un-ordered) trajs. in parallel - get_byte_index(rep_inds=my_rep_inds, - byteindfns=byteindfns, - intrajfns=intrajfns) + get_byte_index(rep_inds = my_rep_inds, + byteindfns = byteindfns, + intrajfns = intrajfns) # block until all procs have finished comm.barrier() @@ -533,7 +520,7 @@ if __name__ == "__main__": infobjs = [readwrite(i, "rb") for i in intrajfns] # open all byteindex files - byte_inds = dict((i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns)) + byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) ) # define a chunk of output trajs. to process for each proc. # # of reordered trajs. to write may be less than the total # of replicas @@ -549,38 +536,38 @@ if __name__ == "__main__": else: nproc_active = nproc if me < nproc_active-1: - my_temp_inds = range((me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1) + my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 ) else: - my_temp_inds = range((me*CHUNKSIZE_2), n_out_temps) + my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps) # retire the excess procs # dont' forget to close any open file objects if me >= nproc_active: - for fobj in infobjs: - fobj.close() + for fobj in infobjs: fobj.close() exit() # write reordered trajectories to disk from active procs in parallel - write_reordered_traj(temp_inds=my_temp_inds, - byte_inds=byte_inds, - outtemps=out_temps, temps=temps, - frametuple_dict=master_frametuple_dict, - nprod=nprod, writefreq=writefreq, - outtrajfns=outtrajfns, - infobjs=infobjs) + write_reordered_traj(temp_inds = my_temp_inds, + byte_inds = byte_inds, + outtemps = out_temps, temps = temps, + frametuple_dict = master_frametuple_dict, + nprod = nprod, writefreq = writefreq, + outtrajfns = outtrajfns, + infobjs = infobjs) # calculate canonical log-weights if requested # usually this is very fast so retire all but the ROOT proc - if not get_logw: - exit() - if not me == ROOT: - exit() + if not get_logw: exit() + if not me == ROOT: exit() + + logw = get_canonical_logw(enefn = enefn, temps = temps, + frametuple_dict = master_frametuple_dict, + nprod = nprod, writefreq = writefreq, + kB = kB) - logw = get_canonical_logw(enefn=enefn, temps=temps, - frametuple_dict=master_frametuple_dict, - nprod=nprod, writefreq=writefreq, - kB=kB) # save the logweights to a pickle with open(logwfn, 'wb') as of: pickle.dump(logw, of) + + From d1ce362fca80f5240ad9e36c6bd5d65e0c76fea4 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Tue, 10 Nov 2020 17:15:42 +0100 Subject: [PATCH 54/64] Remove wrong word 'regoin' from false positive list --- doc/utils/sphinx-config/false_positives.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 3ef0b904eb..6843118686 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -2614,7 +2614,6 @@ Ree refactored refactoring reflectionstyle -regoin Reinders reinit relaxbox From 2f3cbfed1304d9c263ed52698fa2ea263f776a40 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Nov 2020 17:58:26 -0500 Subject: [PATCH 55/64] add CMake code to download and compile libyaml if not found locally --- cmake/Modules/YAML.cmake | 32 ++++++++++++++++++++++++++++ unittest/force-styles/CMakeLists.txt | 4 ++-- 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 cmake/Modules/YAML.cmake diff --git a/cmake/Modules/YAML.cmake b/cmake/Modules/YAML.cmake new file mode 100644 index 0000000000..05163675df --- /dev/null +++ b/cmake/Modules/YAML.cmake @@ -0,0 +1,32 @@ +message(STATUS "Downloading and building YAML library") + +include(ExternalProject) +set(YAML_URL "https://pyyaml.org/download/libyaml/yaml-0.2.5.tar.gz" CACHE STRING "URL for libyaml tarball") +mark_as_advanced(YAML_URL) +ExternalProject_Add(libyaml + URL ${YAML_URL} + URL_MD5 bb15429d8fb787e7d3f1c83ae129a999 + SOURCE_DIR "${CMAKE_BINARY_DIR}/yaml-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/yaml-build" + CONFIGURE_COMMAND /configure ${CONFIGURE_REQUEST_PIC} + CXX=${CMAKE_CXX_COMPILER} + CC=${CMAKE_C_COMPILER} + --prefix= --disable-shared + BUILD_BYPRODUCTS /lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a + TEST_COMMAND "") + +ExternalProject_Get_Property(libyaml INSTALL_DIR) +set(YAML_INCLUDE_DIR ${INSTALL_DIR}/include) +set(YAML_LIBRARY_DIR ${INSTALL_DIR}/lib) + +# workaround for CMake 3.10 on ubuntu 18.04 +file(MAKE_DIRECTORY ${YAML_INCLUDE_DIR}) +file(MAKE_DIRECTORY ${YAML_LIBRARY_DIR}) + +set(YAML_LIBRARY_PATH ${INSTALL_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a) + +add_library(Yaml::Yaml UNKNOWN IMPORTED) +set_target_properties(Yaml::Yaml PROPERTIES + IMPORTED_LOCATION ${YAML_LIBRARY_PATH} + INTERFACE_INCLUDE_DIRECTORIES ${YAML_INCLUDE_DIR}) +add_dependencies(Yaml::Yaml libyaml) diff --git a/unittest/force-styles/CMakeLists.txt b/unittest/force-styles/CMakeLists.txt index 128dc62cff..1d7dc937eb 100644 --- a/unittest/force-styles/CMakeLists.txt +++ b/unittest/force-styles/CMakeLists.txt @@ -1,8 +1,8 @@ find_package(YAML) if(NOT YAML_FOUND) - message(STATUS "Skipping tests because libyaml is not found") - return() + # download and build a local copy of libyaml + include(YAML) endif() if(CMAKE_VERSION VERSION_LESS 3.12) From 2c6ccf0d0f0da1b63221dc34f34457c3480c8223 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Nov 2020 18:04:00 -0500 Subject: [PATCH 56/64] update docs for download and compilation of yaml sources --- doc/src/Build_development.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index cf3e2fb750..1b076caac0 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -111,8 +111,10 @@ error margin). The status of this automated testing can be viewed on The unit testing facility is integrated into the CMake build process of the LAMMPS source code distribution itself. It can be enabled by setting ``-D ENABLE_TESTING=on`` during the CMake configuration step. -It requires the `PyYAML `_ library and development -headers to compile and will download and compile a recent version of the +It requires the `YAML `_ library and development +headers (if not found locally a recent version will be downloaded +and compiled transparently) to compile and will download and compile +a specific recent version of the `Googletest `_ C++ test framework for implementing the tests. From 552dc7fba90af230c811abaedd96f180e95a2f02 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Nov 2020 18:05:06 -0500 Subject: [PATCH 57/64] whitespace --- cmake/Modules/YAML.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/YAML.cmake b/cmake/Modules/YAML.cmake index 05163675df..a080b566be 100644 --- a/cmake/Modules/YAML.cmake +++ b/cmake/Modules/YAML.cmake @@ -5,7 +5,7 @@ set(YAML_URL "https://pyyaml.org/download/libyaml/yaml-0.2.5.tar.gz" CACHE STRIN mark_as_advanced(YAML_URL) ExternalProject_Add(libyaml URL ${YAML_URL} - URL_MD5 bb15429d8fb787e7d3f1c83ae129a999 + URL_MD5 bb15429d8fb787e7d3f1c83ae129a999 SOURCE_DIR "${CMAKE_BINARY_DIR}/yaml-src" BINARY_DIR "${CMAKE_BINARY_DIR}/yaml-build" CONFIGURE_COMMAND /configure ${CONFIGURE_REQUEST_PIC} From 39bc47a4da261b5d7a5db1057cf806d1aae62664 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Thu, 12 Nov 2020 13:35:04 +0100 Subject: [PATCH 58/64] Fix inconsistent formatting in Error & Warning doc --- doc/src/Errors_warnings.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/src/Errors_warnings.rst b/doc/src/Errors_warnings.rst index 306c9b7b31..4f29fad9dd 100644 --- a/doc/src/Errors_warnings.rst +++ b/doc/src/Errors_warnings.rst @@ -119,7 +119,6 @@ Doc page with :doc:`ERROR messages ` :doc:`pair style zero ` with a suitable cutoff or use :doc:`comm_modify cutoff `. *Communication cutoff is shorter than a bond length based estimate. This may lead to errors.* - Since LAMMPS stores topology data with individual atoms, all atoms comprising a bond, angle, dihedral or improper must be present on any sub-domain that "owns" the atom with the information, either as a From 3991f704e1990b827d0bfa69ef5fc425430799e0 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 12 Nov 2020 10:42:09 -0500 Subject: [PATCH 59/64] Fix whitespace errors --- doc/src/pair_spin_exchange.rst | 78 ++++++++++----------- src/SPIN/compute_spin.cpp | 6 +- src/SPIN/pair_spin_dipole_cut.cpp | 8 +-- src/SPIN/pair_spin_dipole_long.cpp | 2 +- src/SPIN/pair_spin_dmi.cpp | 4 +- src/SPIN/pair_spin_exchange.cpp | 34 ++++----- src/SPIN/pair_spin_exchange_biquadratic.cpp | 40 +++++------ src/SPIN/pair_spin_exchange_biquadratic.h | 2 +- src/SPIN/pair_spin_magelec.cpp | 4 +- 9 files changed, 89 insertions(+), 89 deletions(-) diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst index 72c416ac72..9e6e534280 100644 --- a/doc/src/pair_spin_exchange.rst +++ b/doc/src/pair_spin_exchange.rst @@ -40,53 +40,53 @@ pairs of magnetic spins: H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,\vec{s}_i \cdot \vec{s}_j where :math:`\vec{s}_i` and :math:`\vec{s}_j` are two unit vectors representing -the magnetic spins of two particles (usually atoms), and -:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance -between those two particles. The summation is over pairs of nearest neighbors. -:math:`J(r_{ij})` is a function defining the intensity and the sign of the -exchange interaction for different neighboring shells. +the magnetic spins of two particles (usually atoms), and +:math:`r_{ij} = \vert \vec{r}_i - \vec{r}_j \vert` is the inter-atomic distance +between those two particles. The summation is over pairs of nearest neighbors. +:math:`J(r_{ij})` is a function defining the intensity and the sign of the +exchange interaction for different neighboring shells. -Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction +Style *spin/exchange/biquadratic* computes a biquadratic exchange interaction between pairs of magnetic spins: .. math:: - + H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\, - \vec{s}_{i}\cdot \vec{s}_{j} + \vec{s}_{i}\cdot \vec{s}_{j} -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\, - \left(\vec{s}_{i}\cdot + \left(\vec{s}_{i}\cdot \vec{s}_{j}\right)^2 -where :math:`\vec{s}_i`, :math:`\vec{s}_j`, :math:`r_{ij}` and -:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is +where :math:`\vec{s}_i`, :math:`\vec{s}_j`, :math:`r_{ij}` and +:math:`J(r_{ij})` have the same definitions as above, and :math:`K(r_{ij})` is a second function, defining the intensity and the sign of the biquadratic term. -The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both +The interatomic dependence of :math:`J(r_{ij})` and :math:`K(r_{ij})` in both interactions above is defined by the following function: .. math:: - {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d} \right)^2 - \left( 1 - b \left( \frac{r_{ij}}{d} \right)^2 \right) + {f}\left( r_{ij} \right) = 4 a \left( \frac{r_{ij}}{d} \right)^2 + \left( 1 - b \left( \frac{r_{ij}}{d} \right)^2 \right) e^{-\left( \frac{r_{ij}}{d} \right)^2 }\Theta (R_c - r_{ij}) -where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients -defined in the associated "pair_coeff" command, and :math:`R_c` is the radius +where :math:`a`, :math:`b` and :math:`d` are the three constant coefficients +defined in the associated "pair_coeff" command, and :math:`R_c` is the radius cutoff associated to the pair interaction (see below for more explanations). -The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that -the function above matches with the value of the exchange interaction for the +The coefficients :math:`a`, :math:`b`, and :math:`d` need to be fitted so that +the function above matches with the value of the exchange interaction for the :math:`N` neighbor shells taken into account. -Examples and more explanations about this function and its parameterization +Examples and more explanations about this function and its parameterization are reported in :ref:`(Tranchida) `. -When a *spin/exchange/biquadratic* pair style is defined, six coefficients -(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be +When a *spin/exchange/biquadratic* pair style is defined, six coefficients +(three for :math:`J(r_{ij})`, and three for :math:`K(r_{ij})`) have to be fitted. From this exchange interaction, each spin :math:`i` will be submitted -to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be -submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see +to a magnetic torque :math:`\vec{\omega}_{i}`, and its associated atom can be +submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see :doc:`fix nve/spin `), such as: .. math:: @@ -94,22 +94,22 @@ submitted to a force :math:`\vec{F}_{i}` for spin-lattice calculations (see \vec{\omega}_{i} = \frac{1}{\hbar} \sum_{j}^{Neighb} {J} \left(r_{ij} \right)\,\vec{s}_{j} ~~{\rm and}~~ - \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ + \vec{F}_{i} = \sum_{j}^{Neighb} \frac{\partial {J} \left(r_{ij} \right)}{ \partial r_{ij}} \left( \vec{s}_{i}\cdot \vec{s}_{j} \right) \vec{e}_{ij} with :math:`\hbar` the Planck constant (in metal units), and :math:`\vec{e}_{ij} = \frac{\vec{r}_i - \vec{r}_j}{\vert \vec{r}_i-\vec{r}_j \vert}` the unit vector between sites :math:`i` and :math:`j`. -Equivalent forces and magnetic torques are generated for the biquadratic term +Equivalent forces and magnetic torques are generated for the biquadratic term when a *spin/exchange/biquadratic* pair style is defined. More details about the derivation of these torques/forces are reported in :ref:`(Tranchida) `. -For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the -following coefficients must be defined for each pair of atoms types via the -:doc:`pair_coeff ` command as in the examples above, or in the data -file or restart files read by the :doc:`read_data ` or +For the *spin/exchange* and *spin/exchange/biquadratic* pair styles, the +following coefficients must be defined for each pair of atoms types via the +:doc:`pair_coeff ` command as in the examples above, or in the data +file or restart files read by the :doc:`read_data ` or :doc:`read_restart ` commands, and set in the following order: * :math:`R_c` (distance units) @@ -129,10 +129,10 @@ for the *spin/exchange* pair style, and: for the *spin/exchange/biquadratic* pair style. -Note that :math:`R_c` is the radius cutoff of the considered exchange -interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients -performing the parameterization of the function :math:`J(r_{ij})` defined -above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and +Note that :math:`R_c` is the radius cutoff of the considered exchange +interaction, and :math:`a`, :math:`b` and :math:`d` are the three coefficients +performing the parameterization of the function :math:`J(r_{ij})` defined +above (in the *biquadratic* style, :math:`a_j`, :math:`b_j`, :math:`d_j` and :math:`a_k`, :math:`b_k`, :math:`d_k` are the coefficients of :math:`J(r_{ij})` and :math:`K(r_{ij})` respectively). @@ -147,7 +147,7 @@ None of those coefficients is optional. If not specified, the For spin-lattice simulation, it can be useful to offset the mechanical forces and energies generated by the exchange interaction. -The *offset* keyword allows to apply this offset. +The *offset* keyword allows to apply this offset. By setting *offset* to *yes*, the energy definitions above are replaced by: @@ -155,14 +155,14 @@ replaced by: H_{ex} = -\sum_{i,j}^N J_{ij} (r_{ij}) \,[ \vec{s}_i \cdot \vec{s}_j-1 ] -for the *spin/exchange* pair style, and: +for the *spin/exchange* pair style, and: .. math:: - + H_{bi} = -\sum_{i, j}^{N} {J}_{ij} \left(r_{ij} \right)\, [ \vec{s}_{i}\cdot \vec{s}_{j} -1 ] -\sum_{i, j}^{N} {K}_{ij} \left(r_{ij} \right)\, - [ \left(\vec{s}_{i}\cdot + [ \left(\vec{s}_{i}\cdot \vec{s}_{j}\right)^2 -1] for the *spin/exchange/biquadratic* pair style. @@ -173,7 +173,7 @@ precession vectors (and thus does no impact the purely magnetic properties). This ensures that when all spins are aligned, the magnetic energy and the associated mechanical forces (and thus the pressure -generated by the magnetic potential) are null. +generated by the magnetic potential) are null. .. note:: This offset term can be very important when calculations such as @@ -194,7 +194,7 @@ Restrictions All the *pair/spin* styles are part of the SPIN package. These styles are only enabled if LAMMPS was built with this package, and if the -atom_style "spin" was declared. +atom_style "spin" was declared. See the :doc:`Build package ` doc page for more info. Related commands diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 3e4970a62b..8e44ea7b84 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -178,7 +178,7 @@ void ComputeSpin::compute_vector() for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { if (atom->sp_flag) { - + // compute first moment mag[0] += sp[i][0]; @@ -223,9 +223,9 @@ void ComputeSpin::compute_vector() magtot[1] *= scale; magtot[2] *= scale; magtot[3] = sqrt((magtot[0]*magtot[0])+(magtot[1]*magtot[1])+(magtot[2]*magtot[2])); - + // compute spin temperature - + spintemperature = hbar*tempnumtot; spintemperature /= (2.0*kb*tempdenomtot); diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp index b4355fd640..7ba81d93f8 100644 --- a/src/SPIN/pair_spin_dipole_cut.cpp +++ b/src/SPIN/pair_spin_dipole_cut.cpp @@ -234,14 +234,14 @@ void PairSpinDipoleCut::compute(int eflag, int vflag) local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; // compute dipolar interaction - + if (rsq < local_cut2) { r2inv = 1.0/rsq; r3inv = r2inv*rinv; compute_dipolar(i,j,eij,fmi,spi,spj,r3inv); - - if (lattice_flag) + + if (lattice_flag) compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv); if (eflag) { @@ -269,7 +269,7 @@ void PairSpinDipoleCut::compute(int eflag, int vflag) } } } - + if (vflag_fdotr) virial_fdotr_compute(); } diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp index 836b889513..3b4c861e0c 100644 --- a/src/SPIN/pair_spin_dipole_long.cpp +++ b/src/SPIN/pair_spin_dipole_long.cpp @@ -310,7 +310,7 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) } } } - + if (vflag_fdotr) virial_fdotr_compute(); } diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp index 69a9873303..e6ed5e4609 100644 --- a/src/SPIN/pair_spin_dmi.cpp +++ b/src/SPIN/pair_spin_dmi.cpp @@ -244,7 +244,7 @@ void PairSpinDmi::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_dmi(i,j,eij,fmi,spj); - + if (lattice_flag) compute_dmi_mech(i,j,rsq,eij,fi,spi,spj); @@ -253,7 +253,7 @@ void PairSpinDmi::compute(int eflag, int vflag) evdwl *= 0.5*hbar; emag[i] += evdwl; } else evdwl = 0.0; - + f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2]; diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index bccde3f66b..b7dd6ffc17 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -37,8 +37,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairSpinExchange::PairSpinExchange(LAMMPS *lmp) : - PairSpin(lmp) +PairSpinExchange::PairSpinExchange(LAMMPS *lmp) : + PairSpin(lmp) { e_offset = 0; } @@ -66,7 +66,7 @@ PairSpinExchange::~PairSpinExchange() void PairSpinExchange::settings(int narg, char **arg) { PairSpin::settings(narg,arg); - + if (narg != 1) error->all(FLERR,"Illegal pair_style command"); cut_spin_exchange_global = utils::numeric(FLERR,arg[0],false,lmp); @@ -112,17 +112,17 @@ void PairSpinExchange::coeff(int narg, char **arg) // read energy offset flag if specified - while (iarg < narg) { - if (strcmp(arg[7],"offset") == 0) { + while (iarg < narg) { + if (strcmp(arg[7],"offset") == 0) { if (strcmp(arg[8],"yes") == 0) { e_offset = 1; } else if (strcmp(arg[8],"no") == 0) { e_offset = 0; } else error->all(FLERR,"Incorrect args for pair coefficients"); - iarg += 2; + iarg += 2; } else error->all(FLERR,"Incorrect args for pair coefficients"); } - + int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { @@ -252,10 +252,10 @@ void PairSpinExchange::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_exchange(i,j,rsq,fmi,spj); - + if (lattice_flag) compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); - + if (eflag) { evdwl -= compute_energy(i,j,rsq,spi,spj); emag[i] += evdwl; @@ -388,7 +388,7 @@ void PairSpinExchange::compute_exchange(int i, int j, double rsq, double fmi[3], compute the mechanical force due to the exchange interaction between atom i and atom j ------------------------------------------------------------------------- */ -void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, +void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, double eij[3], double fi[3], double spi[3], double spj[3]) { int *type = atom->type; @@ -407,11 +407,11 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, Jex_mech = 1.0-ra-J2[itype][jtype]*ra*(2.0-ra); Jex_mech *= 8.0*Jex*rr*exp(-ra); - + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); - + // apply or not energy and force offset - + fx = fy = fz = 0.0; if (e_offset == 1) { // set offset fx = Jex_mech*(sdots-1.0)*eij[0]; @@ -446,17 +446,17 @@ double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], Jex = 4.0*Jex*ra; Jex *= (1.0-J2[itype][jtype]*ra); Jex *= exp(-ra); - - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); // apply or not energy and force offset - + if (e_offset == 1) { // set offset energy = 0.5*Jex*(sdots-1.0); } else if (e_offset == 0) { // no offset ("normal" calculation) energy = 0.5*Jex*sdots; } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); - + return energy; } diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 36f3dbcf5e..59b959f4cc 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -37,8 +37,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) : - PairSpin(lmp) +PairSpinExchangeBiquadratic::PairSpinExchangeBiquadratic(LAMMPS *lmp) : + PairSpin(lmp) { e_offset = 0; } @@ -119,14 +119,14 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) // read energy offset flag if specified - while (iarg < narg) { - if (strcmp(arg[10],"offset") == 0) { + while (iarg < narg) { + if (strcmp(arg[10],"offset") == 0) { if (strcmp(arg[11],"yes") == 0) { e_offset = 1; } else if (strcmp(arg[11],"no") == 0) { e_offset = 0; } else error->all(FLERR,"Incorrect args for pair coefficients"); - iarg += 2; + iarg += 2; } else error->all(FLERR,"Incorrect args for pair coefficients"); } @@ -267,10 +267,10 @@ void PairSpinExchangeBiquadratic::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_exchange(i,j,rsq,fmi,spi,spj); - + if (lattice_flag) compute_exchange_mech(i,j,rsq,eij,fi,spi,spj); - + if (eflag) { evdwl -= compute_energy(i,j,rsq,spi,spj); emag[i] += evdwl; @@ -384,7 +384,7 @@ void PairSpinExchangeBiquadratic::compute_single_pair(int ii, double fmi[3]) compute exchange interaction between spins i and j ------------------------------------------------------------------------- */ -void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, +void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, double fmi[3], double spi[3], double spj[3]) { int *type = atom->type; @@ -395,7 +395,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, r2j = rsq/J3[itype][jtype]/J3[itype][jtype]; r2k = rsq/J3[itype][jtype]/J3[itype][jtype]; - + Jex = 4.0*J1_mag[itype][jtype]*r2j; Jex *= (1.0-J2[itype][jtype]*r2j); Jex *= exp(-r2j); @@ -403,7 +403,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, Kex = 4.0*K1_mag[itype][jtype]*r2k; Kex *= (1.0-K2[itype][jtype]*r2k); Kex *= exp(-r2k); - + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); fmi[0] += (Jex*spj[0] + 2.0*Kex*spj[0]*sdots); @@ -415,7 +415,7 @@ void PairSpinExchangeBiquadratic::compute_exchange(int i, int j, double rsq, compute the mechanical force due to the exchange interaction between atom i and atom j ------------------------------------------------------------------------- */ -void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, +void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, double rsq, double eij[3], double fi[3], double spi[3], double spj[3]) { int *type = atom->type; @@ -430,22 +430,22 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, iJ3 = 1.0/(J3[itype][jtype]*J3[itype][jtype]); Kex = K1_mech[itype][jtype]; iK3 = 1.0/(K3[itype][jtype]*K3[itype][jtype]); - + rja = rsq*iJ3; rjr = sqrt(rsq)*iJ3; rka = rsq*iK3; rkr = sqrt(rsq)*iK3; - + Jex_mech = 1.0-rja-J2[itype][jtype]*rja*(2.0-rja); Jex_mech *= 8.0*Jex*rjr*exp(-rja); - + Kex_mech = 1.0-rka-K2[itype][jtype]*rka*(2.0-rka); Kex_mech *= 8.0*Kex*rkr*exp(-rka); sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); // apply or not energy and force offset - + fx = fy = fz = 0.0; if (e_offset == 1) { // set offset fx = (Jex_mech*(sdots-1.0) + Kex_mech*(sdots*sdots-1.0))*eij[0]; @@ -469,7 +469,7 @@ void PairSpinExchangeBiquadratic::compute_exchange_mech(int i, int j, compute energy of spin pair i and j ------------------------------------------------------------------------- */ -double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, +double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, double spi[3], double spj[3]) { int *type = atom->type; @@ -487,7 +487,7 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, rk = ra/K3[itype][jtype]; r2k = rsq/K3[itype][jtype]/K3[itype][jtype]; ir3k = 1.0/(rk*rk*rk); - + Jex = 4.0*J1_mech[itype][jtype]*r2j; Jex *= (1.0-J2[itype][jtype]*r2j); Jex *= exp(-r2j); @@ -496,16 +496,16 @@ double PairSpinExchangeBiquadratic::compute_energy(int i, int j, double rsq, Kex *= (1.0-K2[itype][jtype]*r2k); Kex *= exp(-r2k); - sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); + sdots = (spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); // apply or not energy and force offset - + if (e_offset == 1) { // set offset energy = 0.5*(Jex*(sdots-1.0) + Kex*(sdots*sdots-1.0)); } else if (e_offset == 0) { // no offset ("normal" calculation) energy = 0.5*(Jex*sdots + Kex*sdots*sdots); } else error->all(FLERR,"Illegal option in pair exchange/biquadratic command"); - + return energy; } diff --git a/src/SPIN/pair_spin_exchange_biquadratic.h b/src/SPIN/pair_spin_exchange_biquadratic.h index 1074b50f7b..9619416f2e 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.h +++ b/src/SPIN/pair_spin_exchange_biquadratic.h @@ -48,7 +48,7 @@ class PairSpinExchangeBiquadratic : public PairSpin { double cut_spin_exchange_global; // global exchange cutoff distance protected: - + int e_offset; // apply energy offset double **J1_mag; // H exchange coeffs in eV double **J1_mech; // mech exchange coeffs in diff --git a/src/SPIN/pair_spin_magelec.cpp b/src/SPIN/pair_spin_magelec.cpp index 72a52c1340..33ad364aaa 100644 --- a/src/SPIN/pair_spin_magelec.cpp +++ b/src/SPIN/pair_spin_magelec.cpp @@ -237,7 +237,7 @@ void PairSpinMagelec::compute(int eflag, int vflag) if (rsq <= local_cut2) { compute_magelec(i,j,eij,fmi,spj); - + if (lattice_flag) compute_magelec_mech(i,j,fi,spi,spj); @@ -246,7 +246,7 @@ void PairSpinMagelec::compute(int eflag, int vflag) evdwl *= 0.5*hbar; emag[i] += evdwl; } else evdwl = 0.0; - + f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2]; From e7ccbd0ce61fa6bfea47333fc17361b1c753bab3 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 12 Nov 2020 10:44:04 -0500 Subject: [PATCH 60/64] Replace NULL with nullptr --- src/SPIN/pair_spin_exchange_biquadratic.cpp | 30 ++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 59b959f4cc..7cdd8d0c19 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -180,7 +180,7 @@ void *PairSpinExchangeBiquadratic::extract(const char *str, int &dim) { dim = 0; if (strcmp(str,"cut") == 0) return (void *) &cut_spin_exchange_global; - return NULL; + return nullptr; } /* ---------------------------------------------------------------------- */ @@ -576,19 +576,19 @@ void PairSpinExchangeBiquadratic::read_restart(FILE *fp) int me = comm->me; for (i = 1; i <= atom->ntypes; i++) { for (j = i; j <= atom->ntypes; j++) { - if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,NULL,error); + if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,nullptr,error); MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); if (setflag[i][j]) { if (me == 0) { - utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,NULL,error); + utils::sfread(FLERR,&J1_mag[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&J1_mech[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&J2[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&J3[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&K1_mag[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&K1_mech[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&K2[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&K3[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&cut_spin_exchange[i][j],sizeof(double),1,fp,nullptr,error); } MPI_Bcast(&J1_mag[i][j],1,MPI_DOUBLE,0,world); MPI_Bcast(&J1_mech[i][j],1,MPI_DOUBLE,0,world); @@ -624,10 +624,10 @@ void PairSpinExchangeBiquadratic::write_restart_settings(FILE *fp) void PairSpinExchangeBiquadratic::read_restart_settings(FILE *fp) { if (comm->me == 0) { - utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,NULL,error); - utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,NULL,error); - utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,NULL,error); - utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,NULL,error); + utils::sfread(FLERR,&cut_spin_exchange_global,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&e_offset,sizeof(int),1,fp,nullptr,error); + utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,nullptr,error); + utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,nullptr,error); } MPI_Bcast(&cut_spin_exchange_global,1,MPI_DOUBLE,0,world); MPI_Bcast(&e_offset,1,MPI_INT,0,world); From c407d547cd702f63a1b8d0d59a0231bebb13b82e Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 12 Nov 2020 10:54:20 -0500 Subject: [PATCH 61/64] Whitespace --- src/SPIN/pair_spin_exchange_biquadratic.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/SPIN/pair_spin_exchange_biquadratic.cpp b/src/SPIN/pair_spin_exchange_biquadratic.cpp index 7cdd8d0c19..f2baf1333b 100644 --- a/src/SPIN/pair_spin_exchange_biquadratic.cpp +++ b/src/SPIN/pair_spin_exchange_biquadratic.cpp @@ -156,8 +156,7 @@ void PairSpinExchangeBiquadratic::coeff(int narg, char **arg) double PairSpinExchangeBiquadratic::init_one(int i, int j) { - - if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); J1_mag[j][i] = J1_mag[i][j]; J1_mech[j][i] = J1_mech[i][j]; From aadc66877120af9fac50d6e647bbae3dd72e9525 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 12 Nov 2020 10:58:59 -0500 Subject: [PATCH 62/64] Fix pair_spin_exchange doc page title --- doc/src/pair_spin_exchange.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/pair_spin_exchange.rst b/doc/src/pair_spin_exchange.rst index 9e6e534280..630ec6608e 100644 --- a/doc/src/pair_spin_exchange.rst +++ b/doc/src/pair_spin_exchange.rst @@ -5,7 +5,7 @@ pair_style spin/exchange command ================================ pair_style spin/exchange/biquadratic command -================================ +============================================ Syntax """""" @@ -25,6 +25,7 @@ Examples pair_style spin/exchange 4.0 pair_coeff * * exchange 4.0 0.0446928 0.003496 1.4885 pair_coeff 1 2 exchange 6.0 -0.01575 0.0 1.965 offset yes + pair_style spin/exchange/biquadratic 4.0 pair_coeff * * biquadratic 4.0 0.05 0.03 1.48 0.05 0.03 1.48 offset no pair_coeff 1 2 biquadratic 6.0 -0.01 0.0 1.9 0.0 0.1 19 From 497f0dd59358093e11e157e960a6d238ae02df37 Mon Sep 17 00:00:00 2001 From: julient31 Date: Thu, 12 Nov 2020 09:43:38 -0700 Subject: [PATCH 63/64] Removing binder and m2,m4 declarations from compute/spin --- src/SPIN/compute_spin.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 3e4970a62b..c92f24f1ae 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -148,12 +148,10 @@ void ComputeSpin::compute_vector() int i; int countsp, countsptot; double mag[4], magtot[4]; - double m2, m2tot; - double m4, m4tot; double magenergy, magenergytot; double tempnum, tempnumtot; double tempdenom, tempdenomtot; - double spintemperature,binder; + double spintemperature; invoked_vector = update->ntimestep; From a48f463faf26cdf9af2f4af589f05138ea30b46a Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 13 Nov 2020 13:12:50 -0700 Subject: [PATCH 64/64] Fix memory bug in Kokkos KISS FFT --- src/KOKKOS/fft3d_kokkos.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 02f55e11fa..04a5512cc7 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -228,7 +228,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -236,7 +236,6 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); #endif @@ -273,13 +272,13 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, #elif defined(FFT_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); #else + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); #endif // 2nd mid-remap to prepare for 3rd FFTs @@ -315,6 +314,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, #elif defined(FFT_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -866,7 +866,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f);